In [1]:
import numpy as np
import pickle as pkl
import pandas as pd
import csv

In [2]:
A = pkl.load(open("Aug.pkl", "rb"))

In [3]:
B = pkl.load(open("Sept.pkl", "rb"))

In [4]:
C = pkl.load(open("Mid_Oct.pkl", "rb"))

In [5]:
iPhone7_tweets = A + B + C

In [6]:
len(iPhone7_tweets)

153031

In [13]:
to_dataframe = []

for tweetobject in iPhone7_tweets:
    date = tweetobject.date
    username = tweetobject.username
    text = tweetobject.text
    favorites = tweetobject.favorites
    thisrow = [date, username, text, favorites]
    to_dataframe.append(thisrow)

df = pd.DataFrame(to_dataframe, columns=["date", "username", "text", "favorites_count"])

In [21]:
df.head()

Unnamed: 0,date,username,text,favorites_count
0,2016-08-31 07:59:43,NCSLsales,Enter the iDrop News iPhone 7 Giveaway! http:/...,3
1,2016-08-31 07:59:32,davidbychkovceo,applenws: Griffin’s new adapter fixes iPhone 7...,0
2,2016-08-31 07:59:28,qwerty4577,"Aun falta para la presentación del #Iphone7 , ...",0
3,2016-08-31 07:59:08,genioAlan,Me gustó un video de @YouTube de @bigzwk http:...,1
4,2016-08-31 07:59:05,MerseyLancsMob,iPhone 7 launch date confirmed - and it’s soon...,0


In [22]:
df.to_csv("iPhone7_Tweets.csv")

In [23]:
new_df = pd.read_csv("iPhone7_Tweets.csv", index_col=0, encoding="latin1")

In [25]:
df['text'].iloc[10]

'Almost positive the iPhone 7 is gonna be announced September 7th! Yeeyee im getting one of course'

In [26]:
from nltk import word_tokenize
from afinn import Afinn

In [19]:
# Sentiment Analysis - Trial 1 using Natural Language Toolkit
sentiment_labels = []

for text in df['text']:
    tokenized_text = word_tokenize(text)
    if "good" in tokenized_text:
        sentiment_labels.append(1) # This is a positive tweet; append 1
    elif "bad" in tokenized_text:
        sentiment_labels.append(-1) # This is a negative tweet; append -1
    else:
        sentiment_labels.append(0) # This tweet has no identifiable sentiment (by our current, simple model)

In [28]:
df['sentiment'] = sentiment_labels
df.groupby(["sentiment"]).agg("count")

Unnamed: 0_level_0,date,username,text,favorites_count
sentiment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
-1,224,224,224,224
0,152165,152165,152165,152165
1,642,642,642,642


In [38]:
iPhone7_neg = df[(df['sentiment']==-1)]
print("Negative")
for t in iPhone7_neg['text'][20:25]:
    print(t)
    print("*"*50)

Negative
Can't wait for the iPhone 7 to come out so I can finally afford this bad boy pic.twitter.com/ieFpAXxGmS
**************************************************
iPhone 7 Release Will Punish Buyers with Bad Credit: If you have bad credit the iPhone 7 ... https:// binged.it/2bvH4sC #badcredit #finance
**************************************************
Uh oh. Looks like bad news for the new #iPhone7 camera pic.twitter.com/rMiuGnnmEo
**************************************************
I'm getting this bad baby #iPhone7
**************************************************
i want the iphone 7 so bad omg
**************************************************


In [39]:
iPhone7_pos = df[(df['sentiment']==1)]
print("Positive")
for t in iPhone7_pos['text'][100:110]:
    print(t)
    print("*"*50)

Positive
the day I start sixth form is the day the #iPhone7 is being released. Not a good Omen!
**************************************************
I just got my 6s three months ago so I'm good but I still wanna see what it looks like #iPhone7
**************************************************
I keep telling myself that I won't upgrade to the iPhone 7 , and that my 6s Plus is good enough. But I know I am lying to myself.
**************************************************
the only good thing about the #iPhone7 release is that the prices for the iPhone6 are gonna depreciate pic.twitter.com/lzlxEgqUf2
**************************************************
Im good with the iPhone 6 tbh #iPhone7
**************************************************
Are these #iPhone 7/7Plus #specs accurate? Only time will tell but it does look good! pic.twitter.com/n4loXsV5Dv
**************************************************
@bryanmwolfe @cultofmac i think an iPhone 5 Black Color like iPhone 7 would look reaaaally

In [40]:
# Sentiment Analysis - Trial 2 using AFINN
afinn = Afinn()

In [41]:
pscore = []

for text in df['text']:
    pscore.append(afinn.score(text))

pscore[:10]

[0.0, -3.0, -1.0, 0.0, 0.0, 1.0, 0.0, 0.0, -4.0, -1.0]

In [42]:
df['pscore'] = pscore
df.head(10)

Unnamed: 0,date,username,text,favorites_count,sentiment,pscore
0,2016-08-31 07:59:43,NCSLsales,Enter the iDrop News iPhone 7 Giveaway! http:/...,3,0,0.0
1,2016-08-31 07:59:32,davidbychkovceo,applenws: Griffin’s new adapter fixes iPhone 7...,0,0,-3.0
2,2016-08-31 07:59:28,qwerty4577,"Aun falta para la presentación del #Iphone7 , ...",0,0,-1.0
3,2016-08-31 07:59:08,genioAlan,Me gustó un video de @YouTube de @bigzwk http:...,1,0,0.0
4,2016-08-31 07:59:05,MerseyLancsMob,iPhone 7 launch date confirmed - and it’s soon...,0,0,0.0
5,2016-08-31 07:58:45,Shazib1018,I liked a @YouTube video http:// youtu.be/bsr_...,0,0,1.0
6,2016-08-31 07:58:44,Atomox,#timessquare #nyc #iphone6 #blackandwhite @Tim...,1,0,0.0
7,2016-08-31 07:58:38,Cococopic6,Battery info of iPhone6 Plus : 73% Being used ...,1,0,0.0
8,2016-08-31 07:58:36,eStoreID,Trending in Apple Comment: iPhone killed the p...,0,0,-4.0
9,2016-08-31 07:58:30,Anishwebering,Apple Leak Says The iPhone Home Button Will Be...,0,0,-1.0


In [47]:
positive_pscore = df[df['pscore'] > 0]

In [48]:
positive_pscore.shape

(32342, 6)

In [49]:
positive_pscore.head()

Unnamed: 0,date,username,text,favorites_count,sentiment,pscore
5,2016-08-31 07:58:45,Shazib1018,I liked a @YouTube video http:// youtu.be/bsr_...,0,0,1.0
10,2016-08-31 07:58:27,leelelee1367890,Almost positive the iPhone 7 is gonna be annou...,4,0,2.0
26,2016-08-31 07:55:25,krishdtech,Apple Stock: iPhone 7 “Good Enough” For Upgrad...,0,0,3.0
34,2016-08-31 07:53:21,tdaigle01,BREAKING: iPhone 7 Launch Date And Details Con...,0,0,4.0
35,2016-08-31 07:53:15,TechnoDoze,"It’s official, #apple iphone 7 to be launched ...",0,0,1.0


In [54]:
negative_pscore = df[df['pscore'] < 0]

In [55]:
negative_pscore.shape

(18272, 6)

In [56]:
negative_pscore.head()

Unnamed: 0,date,username,text,favorites_count,sentiment,pscore
1,2016-08-31 07:59:32,davidbychkovceo,applenws: Griffin’s new adapter fixes iPhone 7...,0,0,-3.0
2,2016-08-31 07:59:28,qwerty4577,"Aun falta para la presentación del #Iphone7 , ...",0,0,-1.0
8,2016-08-31 07:58:36,eStoreID,Trending in Apple Comment: iPhone killed the p...,0,0,-4.0
9,2016-08-31 07:58:30,Anishwebering,Apple Leak Says The iPhone Home Button Will Be...,0,0,-1.0
16,2016-08-31 07:56:38,iPhone6_App_Dev,#gamedev #Apple 'Strike Team' Is an 'Xcom'-Esq...,2,0,-1.0


In [57]:
# Semantic Orientation of a Word 
def getso(seed_pos, seed_neg, texts, min_df):
    from sklearn.feature_extraction.text import CountVectorizer
    from sklearn.preprocessing import binarize
    import numpy as np
    n = len(texts)
    cv = CountVectorizer(min_df=min_df)
    vector = cv.fit_transform(texts)
    rvocab = {v: k for k, v in cv.vocabulary_.items()}
    bvector = binarize(vector)
    docfreq = np.sum(bvector.toarray(), axis=0)
    docfreq = docfreq/n
    docfreq = docfreq.reshape(docfreq.shape[0], 1)
    com = vector.T.dot(vector).toarray()
    np.fill_diagonal(com, 0)
    com = com/n
    p_single = docfreq.dot(docfreq.T)
    pmi = np.divide(com, p_single)
    sos = {}
    for word, index in cv.vocabulary_.items():
        so = 0
        for pos in seed_pos:
            p_index = cv.vocabulary_[pos]
            so += pmi[index, p_index]
        for neg in seed_neg:
            n_index = cv.vocabulary_[neg]
            so -= pmi[index, n_index]
        sos[word] = so
    return sos  