In [1]:
import pandas as pd
from pandas import Series, DataFrame
import json
from janome.tokenizer import Tokenizer
from requests_oauthlib import OAuth1Session
from wordcloud import WordCloud
import emoji
import re
import csv

In [26]:
def create_oauth_session(oauth_key_dict):
    oauth = OAuth1Session(
        oauth_key_dict['consumer_key'],
        oauth_key_dict['consumer_secret'],
        oauth_key_dict['access_token'],
        oauth_key_dict['access_token_secret']
    )
    return oauth

def search_tweet(word, count, oauth):
    url = 'https://api.twitter.com/1.1/search/tweets.json'
    params = {
        'q': word,
        'count' : count,
        'result_type' : 'recent',
        'exclude': 'retweets',
        'lang' : 'ja'
        }
    responce = oauth.get(url, params=params)
    if responce.status_code != 200:
        print("Error code: %d" %(responce.status_code))
        return None
    tweets = json.loads(responce.text)
    return tweets

def search_tweet_date(word, count, date, oauth):
    url = 'https://api.twitter.com/1.1/search/tweets.json'
    params = {
        'q': word,
        'count' : count,
        'result_type' : 'recent',
        'until' : date,
        'exclude': 'retweets',
        'lang' : 'ja'
        }
    responce = oauth.get(url, params=params)
    if responce.status_code != 200:
        print("Error code: %d" %(responce.status_code))
        return None
    tweets = json.loads(responce.text)
    return tweets

In [3]:
def remove_emoji(text):
    return ''.join(c for c in text if c not in emoji.UNICODE_EMOJI['en'])

def remove_url(text):
    return re.sub(r'https?://[\w/:%#\$&\?\(\)~\.=\+\-]+', '', text)
# Get Wakachigaki
def get_wakachi(list_text, word, hinshi=['名詞', '形容詞']):
    remove_words = ['こと', 'よう', 'そう', 'これ', 'それ', 'もの', 'ここ', 'さん', 'ちゃん',
                'ところ', 'とこ', 'の', 'ん', word]
    t = Tokenizer()
    wakachi = ''
    for text in list_text:
        malist = t.tokenize(text)
        for w in malist:
            word = w.surface
            part = w.part_of_speech
            hit = False
            for h in hinshi:
                hit = hit or (h in part)
            if not hit:
                continue
            if word not in remove_words:
                wakachi += word + ' '
    return wakachi

In [4]:
# Tweet Normalization
def normalize_tweets(tweets):
    normalized = []
    for tweet in tweets:
        text = tweet
        text = remove_emoji(text)
        text = remove_url(text)
        normalized.append(text)
    return normalized

In [5]:
keysfile = '../../twitter_API/key/keys.json'
keys = json.load(open(keysfile))
twitter = create_oauth_session(keys)

In [104]:
search_word = 'ビットコイン'
search_count = 10
date = '2021-04-30'

search = search_tweet_date(search_word, search_count, date, twitter)

In [105]:
search

{'statuses': [{'created_at': 'Thu Apr 29 23:59:53 +0000 2021',
   'id': 1387919599698530304,
   'id_str': '1387919599698530304',
   'text': 'ビットコインは超長期視点で何も問題ないが、アメリカのFRBがテーパリングやるとアルトコインバブルは崩壊が近付いてきます。\n\n10年国債利回りは2017年と結構シンクロしています。\n\n今は17年でいうチャイナショックからの大回復局面… https://t.co/bHYqflCb7O',
   'truncated': True,
   'entities': {'hashtags': [],
    'symbols': [],
    'user_mentions': [],
    'urls': [{'url': 'https://t.co/bHYqflCb7O',
      'expanded_url': 'https://twitter.com/i/web/status/1387919599698530304',
      'display_url': 'twitter.com/i/web/status/1…',
      'indices': [117, 140]}]},
   'metadata': {'iso_language_code': 'ja', 'result_type': 'recent'},
   'source': '<a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a>',
   'in_reply_to_status_id': None,
   'in_reply_to_status_id_str': None,
   'in_reply_to_user_id': None,
   'in_reply_to_user_id_str': None,
   'in_reply_to_screen_name': None,
   'user': {'id': 3322299168,
    'id_str': '3322299168',
  

In [29]:
df_search = DataFrame.from_dict(search['statuses'])
tweets = df_search['text'].tolist()
tweets = normalize_tweets(tweets)
wakachi = get_wakachi(tweets, search_word)

In [13]:
# Initialize Negative-Positive Dictionary
pn_dic = {}
fp = open('../../lib/pn.csv', 'rt', encoding='utf-8')
reader = csv.reader(fp, delimiter='\t')
for i, row in enumerate(reader):
  name = row[0]
  result = row[1]
  pn_dic[name] = result

In [31]:
words_list = wakachi.split(' ')

In [32]:
cnt_p = 0
cnt_n = 0
cnt_e = 0

for word in words_list:
    pn = 'e'
    if word in pn_dic:
        pn = pn_dic[word]
    if pn == 'p':
        cnt_p += 1
    elif pn == 'n':
        cnt_n += 1
    else:
        cnt_e += 1
print('P:{}, N:{}, E:{}'.format(cnt_p, cnt_n, cnt_e))

P:266, N:37, E:1326


In [33]:
length = cnt_p + cnt_n
if length:
    pn = (cnt_p - cnt_n)/length
else:
    pn = 0

pn

0.7557755775577558

In [44]:
from datetime import datetime

def date2str(date):
    return [date[i].strftime('%Y-%m-%d') for i in range(len(date))]

In [46]:
def getPN(words_list):
    cnt_p = 0
    cnt_n = 0
    cnt_e = 0

    for word in words_list:
        pn = 'e'
        if word in pn_dic:
            pn = pn_dic[word]
        if pn == 'p':
            cnt_p += 1
        elif pn == 'n':
            cnt_n += 1
        else:
            cnt_e += 1
    return cnt_p, cnt_n, cnt_e

In [92]:
end = datetime.now()
period = 365

date_list = pd.date_range(start=end, periods=period, freq='D')
date_list = date2str(date_list)

In [93]:
search_word = 'ビットコイン'
search_count = 10

list_p = []
list_n = []
list_e = []

In [94]:
for date in date_list:
    search = search_tweet_date(search_word, search_count, date, twitter)
    df_search = DataFrame.from_dict(search['statuses'])
    tweets = df_search['text'].tolist()
    tweets = normalize_tweets(tweets)
    wakachi = get_wakachi(tweets, search_word)
    words_list = wakachi.split(' ')
    p, n, e = getPN(words_list)
    list_p.append(p)
    list_n.append(n)
    list_e.append(e)

Error code: 429


TypeError: 'NoneType' object is not subscriptable

In [95]:
len(list_p)

172

In [109]:
for date in date_list[172:]:
    search = search_tweet_date(search_word, search_count, date, twitter)
    df_search = DataFrame.from_dict(search['statuses'])
    tweets = df_search['text'].tolist()
    tweets = normalize_tweets(tweets)
    wakachi = get_wakachi(tweets, search_word)
    words_list = wakachi.split(' ')
    p, n, e = getPN(words_list)
    list_p.append(p)
    list_n.append(n)
    list_e.append(e)

Error code: 429


TypeError: 'NoneType' object is not subscriptable

In [110]:
len(list_p)

349

In [111]:
for date in date_list[349:]:
    search = search_tweet_date(search_word, search_count, date, twitter)
    df_search = DataFrame.from_dict(search['statuses'])
    tweets = df_search['text'].tolist()
    tweets = normalize_tweets(tweets)
    wakachi = get_wakachi(tweets, search_word)
    words_list = wakachi.split(' ')
    p, n, e = getPN(words_list)
    list_p.append(p)
    list_n.append(n)
    list_e.append(e)

In [112]:
len(list_p)

365

In [113]:
df_pn = DataFrame({'p':list_p, 'n':list_n, 'e':list_e}, index=date_list)
df_pn['p/n'] = (df_pn['p'] - df_pn['n'])/(df_pn['p'] + df_pn['n'])
df_pn

Unnamed: 0,p,n,e,p/n
2021-05-05,22,6,153,0.571429
2021-05-06,20,5,125,0.600000
2021-05-07,20,5,125,0.600000
2021-05-08,20,5,125,0.600000
2021-05-09,20,5,125,0.600000
...,...,...,...,...
2022-04-30,22,7,158,0.517241
2022-05-01,22,7,158,0.517241
2022-05-02,22,7,158,0.517241
2022-05-03,22,7,158,0.517241


In [114]:
list_p

[22,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 12,
 12,
 12,
 12,
 12,
 12,
 12,
 12,
 12,
 12,
 12,
 12,
 12,
 12,
 12,
 12,
 12,
 12,
 12,
 12,
 12,
 12,
 12,
 12,
 12,
 12,
 12,
 12,
