In [1]:
import numpy as np
import pickle as pkl
import pandas as pd
import csv
import langdetect

In [2]:
df = pd.read_csv("iPhone7_eng.csv", index_col=0, encoding="latin1")

In [3]:
df.head()

Unnamed: 0,date,username,text,favorites_count,four_question_marks,lang
4,15/7/2016 7:51,iphone6_sleeves,iPhone 6|iPhone 6 Plus Sleeve | BLUEBERRY MANG...,0,False,en
10,15/7/2016 7:50,IphoneTutes,Apple iPhone SE ... - http:// goo.gl/bDxc5Q #i...,0,False,en
11,15/7/2016 7:49,crafterei,Leather|Felt Sleeve for iPhone 6|Plus | TWICE ...,0,False,en
13,15/7/2016 7:48,iphone6_sleeves,iPhone 6 Plus Leather Sleeve | RUM DIARY | htt...,0,False,en
16,15/7/2016 7:48,kiwo02,Da ist es! #iPhone7 https:// m.youtube.com/wat...,1,False,en


In [4]:
df.shape

(191268, 6)

In [5]:
df.username.duplicated().sum()

93167

In [6]:
tweet = df.iloc[10, :]
print(tweet.text)

#iPhone #iPhone6 iPhone 5c 16GB pink http:// ebay.to/29SphJy #iPhone6plus #apple


In [7]:
from nltk import word_tokenize
from afinn import Afinn

In [8]:
import re
def remove_urls (text):
    text = re.sub(r'(https|http)?:\/\/(\w|\.|\/|\?|\=|\&|\%)*\b', '', tweet.text, flags=re.MULTILINE)
    return(text)

In [9]:
# Sentiment Analysis - Trial 1 using Natural Language Toolkit
sentiment_labels = []

for text in df['text']:
    tokenized_text = word_tokenize(text)
    if "good" in tokenized_text:
        sentiment_labels.append(1) 
    elif "bad" in tokenized_text:
        sentiment_labels.append(-1) 
    else:
        sentiment_labels.append(0) 

In [10]:
df['sentiment'] = sentiment_labels
df.groupby(["sentiment"]).agg("count")

Unnamed: 0_level_0,date,username,text,favorites_count,four_question_marks,lang
sentiment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
-1,539,539,539,539,539,539
0,189520,189520,189520,189520,189520,189520
1,1209,1209,1209,1209,1209,1209


In [17]:
iPhone7_neg = df[(df['sentiment']==-1)]
print("Negative")
for t in iPhone7_neg['text'][50:55]:
    print(t)
    print("*"*50)

Negative
I really feel bad for those who just bought iPhone 6s #iPhone7
**************************************************
You people think it's bad getting your iPhone 6S's at the beginning of the year? I got my 6S Plus in JUNE #iPhone7
**************************************************
I feel bad for those who just recently got the iPhone 6 #iPhone7
**************************************************
Can't wait for the iPhone 7 to come out so I can finally afford this bad boy pic.twitter.com/ieFpAXxGmS
**************************************************
iPhone 7 Release Will Punish Buyers with Bad Credit: If you have bad credit the iPhone 7 ... https:// binged.it/2bvH4sC #badcredit #finance
**************************************************


In [19]:
iPhone7_pos = df[(df['sentiment']==1)]
print("Positive")
for t in iPhone7_pos['text'][90:100]:
    print(t)
    print("*"*50)

Positive
Who needs an iPhone 7 when Best Buy?s iPhone sale is this good? http:// dlvr.it/M8SkJW
**************************************************
Who needs an iPhone 7 when Best Buy?s iPhone sale is this good? http:// ift.tt/2bDzXc3 pic.twitter.com/IvsWLgPYOA
**************************************************
BGR : Who needs an iPhone 7 when Best Buy?s iPhone sale is this good? http:// ift.tt/2bDzXc3
**************************************************
Who needs an iPhone 7 when Best Buy?s iPhone sale is this good? https:// goo.gl/hFUzoc #downloader #androidapps
**************************************************
Who needs an iPhone 7 when Best Buy?s iPhone sale is this good? http:// goo.gl/fb/pm5Iam #bgr
**************************************************
Who needs an iPhone 7 when Best Buy?s iPhone sale is this good? http:// bgr.com/2016/08/30/iph one-se-cost-best-buy-sale-refurbished/ ? | BGR
**************************************************
Who needs an iPhone 7 when Best Buy?s iPhon

In [20]:
# Sentiment Analysis - Trial 2 using AFINN
afinn = Afinn()

In [21]:
pscore = []

for text in df['text']:
    pscore.append(afinn.score(text))

pscore[:10]

[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 2.0, 0.0]

In [22]:
df['pscore'] = pscore
df.tail(10)

Unnamed: 0,date,username,text,favorites_count,four_question_marks,lang,sentiment,pscore
315250,26/10/2016 1:19,computerworks5,TOZO for iPhone 7 Plus Case [0.35mm] Ultra-Thi...,0,False,en,0,7.0
315251,26/10/2016 1:19,EntMediaNews,iPhone 7 Plus : Apple May Have Underestimated ...,0,False,en,0,-4.0
315252,26/10/2016 1:18,SiirChauncey,I love my iPhone 7 Plus by Apple,0,False,en,0,3.0
315253,26/10/2016 1:18,mislavbusic,Jedna s #iPhone7Plus pic.twitter.com/21BBbMHwpJ,21,False,en,0,0.0
315254,26/10/2016 1:18,UbergizmoDE,iOS 10.1 mit Fehlerbehebungen und neuem Porträ...,0,False,en,0,0.0
315255,26/10/2016 1:18,MekelJames,Unboxing of Acewin iPhone 7 Plus Case & New Ne...,3,False,en,0,1.0
315256,26/10/2016 1:17,Sokom_Im,'In Squad We Trust' #lightRooM #potrait #iphon...,2,False,en,0,1.0
315257,26/10/2016 1:17,kumarAbhi91,"For Apple, Surprise Demand for iPhone 7 Plus M...",0,False,en,0,-1.0
315258,26/10/2016 1:17,DoctorNayan,"Breaking News For Apple, Surprise Demand for i...",0,False,en,0,-1.0
315259,26/10/2016 1:17,letsmoveonn,"Breaking News For Apple, Surprise Demand for i...",0,False,en,0,-1.0


In [23]:
positive_pscore = df[df['pscore'] > 0]

In [24]:
positive_pscore.shape

(56612, 8)

In [25]:
positive_pscore.head()

Unnamed: 0,date,username,text,favorites_count,four_question_marks,lang,sentiment,pscore
29,15/7/2016 7:39,derek_madlutk,"Hey @AppleSupport , I have a solution 2 increa...",0,False,en,0,4.0
35,15/7/2016 7:36,oppresne90,Mia Khalifa BEFORE She Became A Famous Porn St...,0,False,en,0,2.0
62,15/7/2016 7:26,EmilysAnelie,Emily buying her iPhone6 + and apple watch fla...,3,False,en,0,4.0
70,15/7/2016 7:22,justcallherA,"hope the iPhone7 comes out early September, I ...",0,False,en,0,2.0
72,15/7/2016 7:20,JMAPPLE08,Nueva Filtración en VÍDEO del iPhone 7 #iphone...,1,False,en,0,1.0


In [26]:
negative_pscore = df[df['pscore'] < 0]

In [27]:
negative_pscore.shape

(25288, 8)

In [28]:
negative_pscore.head()

Unnamed: 0,date,username,text,favorites_count,four_question_marks,lang,sentiment,pscore
67,15/7/2016 7:23,eBayShopperNews,#iPhone #iPhone6 ~NEW APPLE CERTIFIED~ Apple i...,0,False,en,0,-1.0
82,15/7/2016 7:16,TheApplePips,iPhone 7 rumors: Longer battery life and bigge...,0,False,en,0,-1.0
104,15/7/2016 7:02,GeekNe_ws,"The iPhone 7 May Come In 4 Different Colors, A...",0,False,en,0,-1.0
128,15/7/2016 6:46,cilgiaho92,Sara Jean Underwood Goes Basically Naked On In...,0,False,en,0,-2.0
139,15/7/2016 6:36,MARCIANOPHONE,FILTRADO! iPhone7 SIN entrada de Headphone jac...,45,False,en,0,-2.0


In [29]:
df.to_csv("iPhone7_pscore.csv", index_col=0)