In [1]:
import pandas as pd
from pandas import Series, DataFrame
import json
from janome.tokenizer import Tokenizer
from requests_oauthlib import OAuth1Session
from wordcloud import WordCloud
import emoji
import re
import csv

In [33]:
def create_oauth_session(oauth_key_dict):
    oauth = OAuth1Session(
        oauth_key_dict['consumer_key'],
        oauth_key_dict['consumer_secret'],
        oauth_key_dict['access_token'],
        oauth_key_dict['access_token_secret']
    )
    return oauth

def search_tweet(word, count, oauth):
    url = 'https://api.twitter.com/1.1/search/tweets.json'
    params = {
        'q': word,
        'count' : count,
        'result_type' : 'recent',
        'exclude': 'retweets',
        'lang' : 'ja'
        }
    responce = oauth.get(url, params=params)
    if responce.status_code != 200:
        print("Error code: %d" %(responce.status_code))
        return None
    tweets = json.loads(responce.text)
    return tweets

def search_tweet_date(word, count, date, oauth):
    url = 'https://api.twitter.com/1.1/search/tweets.json'
    params = {
        'q': word,
        'count' : count,
        'result_type' : 'recent',
        'until' : date,
        'exclude': 'retweets',
        'lang' : 'ja'
        }
    responce = oauth.get(url, params=params)
    if responce.status_code != 200:
        print("Error code: %d" %(responce.status_code))
        return None
    tweets = json.loads(responce.text)
    return tweets

In [3]:
def remove_emoji(text):
    return ''.join(c for c in text if c not in emoji.UNICODE_EMOJI['en'])

def remove_url(text):
    return re.sub(r'https?://[\w/:%#\$&\?\(\)~\.=\+\-]+', '', text)
# Get Wakachigaki
def get_wakachi(list_text, word, hinshi=['名詞', '形容詞']):
    remove_words = ['こと', 'よう', 'そう', 'これ', 'それ', 'もの', 'ここ', 'さん', 'ちゃん',
                'ところ', 'とこ', 'の', 'ん', word]
    t = Tokenizer()
    wakachi = ''
    for text in list_text:
        malist = t.tokenize(text)
        for w in malist:
            word = w.surface
            part = w.part_of_speech
            hit = False
            for h in hinshi:
                hit = hit or (h in part)
            if not hit:
                continue
            if word not in remove_words:
                wakachi += word + ' '
    return wakachi

In [4]:
# Tweet Normalization
def normalize_tweets(tweets):
    normalized = []
    for tweet in tweets:
        text = tweet
        text = remove_emoji(text)
        text = remove_url(text)
        normalized.append(text)
    return normalized

In [5]:
keysfile = '../../twitter_API/key/keys.json'
keys = json.load(open(keysfile))
twitter = create_oauth_session(keys)

In [41]:
search_word = 'ビットコイン'
search_count = 10
date = '2021-04-27'

search = search_tweet_date(search_word, search_count, date, twitter)
search

{'statuses': [],
 'search_metadata': {'completed_in': 0.009,
  'max_id': 1389955937226555392,
  'max_id_str': '1389955937226555392',
  'query': '%E3%83%93%E3%83%83%E3%83%88%E3%82%B3%E3%82%A4%E3%83%B3+until%3A2021-04-27+exclude%3Aretweets',
  'refresh_url': '?since_id=1389955937226555392&q=%E3%83%93%E3%83%83%E3%83%88%E3%82%B3%E3%82%A4%E3%83%B3%20until%3A2021-04-27%20exclude%3Aretweets&lang=ja&result_type=recent&include_entities=1',
  'count': 10,
  'since_id': 0,
  'since_id_str': '0'}}

In [8]:
df_search = DataFrame.from_dict(search['statuses'])
tweets = df_search['text'].tolist()
tweets = normalize_tweets(tweets)
wakachi = get_wakachi(tweets, search_word)

In [9]:
# Initialize Negative-Positive Dictionary
pn_dic = {}
fp = open('../../lib/pn.csv', 'rt', encoding='utf-8')
reader = csv.reader(fp, delimiter='\t')
for i, row in enumerate(reader):
  name = row[0]
  result = row[1]
  pn_dic[name] = result

In [10]:
words_list = wakachi.split(' ')

In [11]:
cnt_p = 0
cnt_n = 0
cnt_e = 0

for word in words_list:
    pn = 'e'
    if word in pn_dic:
        pn = pn_dic[word]
    if pn == 'p':
        cnt_p += 1
    elif pn == 'n':
        cnt_n += 1
    else:
        cnt_e += 1
print('P:{}, N:{}, E:{}'.format(cnt_p, cnt_n, cnt_e))

P:22, N:15, E:119


In [12]:
length = cnt_p + cnt_n
if length:
    pn = (cnt_p - cnt_n)/length
else:
    pn = 0

pn

0.1891891891891892

In [13]:
from datetime import datetime

def date2str(date):
    return [date[i].strftime('%Y-%m-%d') for i in range(len(date))]

In [14]:
def getPN(words_list):
    cnt_p = 0
    cnt_n = 0
    cnt_e = 0

    for word in words_list:
        pn = 'e'
        if word in pn_dic:
            pn = pn_dic[word]
        if pn == 'p':
            cnt_p += 1
        elif pn == 'n':
            cnt_n += 1
        else:
            cnt_e += 1
    return cnt_p, cnt_n, cnt_e

In [15]:
end = datetime.now()
period = 365

date_list = pd.date_range(end=end, periods=period, freq='D')
date_list = date2str(date_list)

In [19]:
search_word = 'ビットコイン'
search_count = 100

list_p = []
list_n = []
list_e = []

In [20]:
for date in date_list:
    search = search_tweet_date(search_word, search_count, date, twitter)
    df_search = DataFrame.from_dict(search['statuses'])
    tweets = df_search['text'].tolist()
    tweets = normalize_tweets(tweets)
    wakachi = get_wakachi(tweets, search_word)
    words_list = wakachi.split(' ')
    p, n, e = getPN(words_list)
    list_p.append(p)
    list_n.append(n)
    list_e.append(e)

KeyError: 'text'

In [30]:
search_word = '猫'
search = search_tweet_date(search_word, search_count, '2015-07-19', twitter)
search

{'statuses': [{'created_at': 'Wed May 05 14:37:02 +0000 2021',
   'id': 1389952281940877314,
   'id_str': '1389952281940877314',
   'text': '@gyuuuunyuuuumow うちの猫の誕生日です。\nもしかしてうちの仔…元奥さん…❓',
   'truncated': False,
   'entities': {'hashtags': [],
    'symbols': [],
    'user_mentions': [{'screen_name': 'gyuuuunyuuuumow',
      'name': 'うしがにげた。',
      'id': 1221391154987458560,
      'id_str': '1221391154987458560',
      'indices': [0, 16]}],
    'urls': []},
   'metadata': {'iso_language_code': 'ja', 'result_type': 'recent'},
   'source': '<a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a>',
   'in_reply_to_status_id': 1389905622309773313,
   'in_reply_to_status_id_str': '1389905622309773313',
   'in_reply_to_user_id': 1221391154987458560,
   'in_reply_to_user_id_str': '1221391154987458560',
   'in_reply_to_screen_name': 'gyuuuunyuuuumow',
   'user': {'id': 1263446143616208899,
    'id_str': '1263446143616208899',
    'name': 'かにみそ',
    'screen_name': '

In [21]:
len(list_p)

0

In [109]:
for date in date_list[172:]:
    search = search_tweet_date(search_word, search_count, date, twitter)
    df_search = DataFrame.from_dict(search['statuses'])
    tweets = df_search['text'].tolist()
    tweets = normalize_tweets(tweets)
    wakachi = get_wakachi(tweets, search_word)
    words_list = wakachi.split(' ')
    p, n, e = getPN(words_list)
    list_p.append(p)
    list_n.append(n)
    list_e.append(e)

Error code: 429


TypeError: 'NoneType' object is not subscriptable

In [110]:
len(list_p)

349

In [111]:
for date in date_list[349:]:
    search = search_tweet_date(search_word, search_count, date, twitter)
    df_search = DataFrame.from_dict(search['statuses'])
    tweets = df_search['text'].tolist()
    tweets = normalize_tweets(tweets)
    wakachi = get_wakachi(tweets, search_word)
    words_list = wakachi.split(' ')
    p, n, e = getPN(words_list)
    list_p.append(p)
    list_n.append(n)
    list_e.append(e)

In [112]:
len(list_p)

365

In [113]:
df_pn = DataFrame({'p':list_p, 'n':list_n, 'e':list_e}, index=date_list)
df_pn['p/n'] = (df_pn['p'] - df_pn['n'])/(df_pn['p'] + df_pn['n'])
df_pn

Unnamed: 0,p,n,e,p/n
2021-05-05,22,6,153,0.571429
2021-05-06,20,5,125,0.600000
2021-05-07,20,5,125,0.600000
2021-05-08,20,5,125,0.600000
2021-05-09,20,5,125,0.600000
...,...,...,...,...
2022-04-30,22,7,158,0.517241
2022-05-01,22,7,158,0.517241
2022-05-02,22,7,158,0.517241
2022-05-03,22,7,158,0.517241


In [116]:
list_e

[153,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 125,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126,
 126