In [1]:
from searchtweets import ResultStream, gen_rule_payload, load_credentials, collect_results

# general imports
import numpy as np
import pandas as pd
#import tweepy
from textblob import TextBlob
import re
import time

# plotting and visualization
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [19]:
S2_Date = pd.read_csv('tweets_2018-08-01_2018-08-15_Date.csv', names=['Date'], parse_dates=['Date'])
S2_Tweets = pd.read_csv('tweets_2018-08-01_2018-08-15_Tweets.csv', names=['Tweets'])
S2 = pd.concat([S2_Tweets, S2_Date], axis=1)

In [6]:
def clean_tweet(tweet):
    '''
    Utility function to clean the text in a tweet by removing 
    links and special characters using regex.
    '''
    return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())

def analize_sentiment(tweet):
    '''
    Utility function to classify the polarity of a tweet
    using textblob.
    
    textblob already has a trained analyser to work 
    with different machine learning models on 
    natural language processing.
    
    Might want to train our own model
    '''
    analysis = TextBlob(clean_tweet(tweet))
    if analysis.sentiment.polarity > 0:
        return 1
    elif analysis.sentiment.polarity == 0:
        return 0
    else:
        return -1
    

def sentiment_analysis(S2):
    # We create a column with the result of the analysis:
    S2['SA'] = np.array([ analize_sentiment(tweet) for tweet in S2['Tweets'] ])
    
    # We construct lists with classified tweets:
    pos_tweets = [ tweet for index, tweet in enumerate(S2['Tweets']) if S2['SA'][index] > 0]
    neu_tweets = [ tweet for index, tweet in enumerate(S2['Tweets']) if S2['SA'][index] == 0]
    neg_tweets = [ tweet for index, tweet in enumerate(S2['Tweets']) if S2['SA'][index] < 0]

    # We print percentages:
    print("Percentage of positive tweets: {}%".format(len(pos_tweets)*100/len(S2['Tweets'])))
    print("Percentage of neutral tweets: {}%".format(len(neu_tweets)*100/len(S2['Tweets'])))
    print("Percentage de negative tweets: {}%".format(len(neg_tweets)*100/len(S2['Tweets'])))

In [21]:
sentiment_analysis(S2)

Percentage of positive tweets: 37.58119658119658%
Percentage of neutral tweets: 52.06837606837607%
Percentage de negative tweets: 10.350427350427351%


In [22]:
S2.head()

Unnamed: 0,Tweets,Date,SA
0,https://t.co/yLZluuYevy DECENTRALISED ENERGY P...,2018-09-01 02:59:59,0
1,📉 Biggest Losers (1 hr) 📉\nNoah Coin $NOAH -3....,2018-09-01 02:59:58,-1
2,Crypto News: Yahoo! World’s Sixth-Most Popular...,2018-09-01 02:59:54,1
3,RT @coingecko: Have you tried comparing coins ...,2018-09-01 02:59:54,0
4,Bitcoin Gets Awareness Boost From Mention On E...,2018-09-01 02:59:53,1


In [32]:
S2.dtypes

Tweets            object
Date      datetime64[ns]
SA                 int64
dtype: object

In [36]:
S3 = S2.groupby(S2.Date.dt.dayofyear).agg(['count'])

In [38]:
S3.shape

(15, 3)