In [1]:
import pandas as pd
import numpy as np

#visualization
import matplotlib.pyplot as plt
from wordcloud import WordCloud

#text processing
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
import re
import nltk
import string
from nltk import word_tokenize, FreqDist
from nltk.corpus import stopwords

#modeling
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, plot_confusion_matrix


import warnings
warnings.filterwarnings('ignore')

In [3]:
raw_data = pd.read_csv('tweet_product_company.csv', encoding = 'latin1')
raw_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9093 entries, 0 to 9092
Data columns (total 3 columns):
 #   Column                                              Non-Null Count  Dtype 
---  ------                                              --------------  ----- 
 0   tweet_text                                          9092 non-null   object
 1   emotion_in_tweet_is_directed_at                     3291 non-null   object
 2   is_there_an_emotion_directed_at_a_brand_or_product  9093 non-null   object
dtypes: object(3)
memory usage: 213.2+ KB


In [4]:
raw_data.head()

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,iPhone,Negative emotion
1,@jessedee Know about @fludapp ? Awesome iPad/i...,iPad or iPhone App,Positive emotion
2,@swonderlin Can not wait for #iPad 2 also. The...,iPad,Positive emotion
3,@sxsw I hope this year's festival isn't as cra...,iPad or iPhone App,Negative emotion
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Google,Positive emotion


In [5]:
raw_data['emotion_in_tweet_is_directed_at'].value_counts()

iPad                               946
Apple                              661
iPad or iPhone App                 470
Google                             430
iPhone                             297
Other Google product or service    293
Android App                         81
Android                             78
Other Apple product or service      35
Name: emotion_in_tweet_is_directed_at, dtype: int64

In [6]:
raw_data['is_there_an_emotion_directed_at_a_brand_or_product'].value_counts()

No emotion toward brand or product    5389
Positive emotion                      2978
Negative emotion                       570
I can't tell                           156
Name: is_there_an_emotion_directed_at_a_brand_or_product, dtype: int64

In [12]:
cant_tell = raw_data[raw_data['is_there_an_emotion_directed_at_a_brand_or_product']=='I can\'t tell']

In [13]:
cant_tell.head()

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product
90,Thanks to @mention for publishing the news of ...,,I can't tell
102,ÛÏ@mention &quot;Apple has opened a pop-up st...,,I can't tell
237,Just what America needs. RT @mention Google to...,,I can't tell
341,The queue at the Apple Store in Austin is FOUR...,,I can't tell
368,Hope it's better than wave RT @mention Buzz is...,,I can't tell


In [14]:
raw_data = raw_data.drop([row for row in cant_tell.index])

In [15]:
raw_data['is_there_an_emotion_directed_at_a_brand_or_product'].value_counts()

No emotion toward brand or product    5389
Positive emotion                      2978
Negative emotion                       570
Name: is_there_an_emotion_directed_at_a_brand_or_product, dtype: int64

In [17]:
raw_data['class'] = 0
for row in raw_data.index:
    if raw_data['is_there_an_emotion_directed_at_a_brand_or_product'][row] == 'Positive emotion':
        raw_data['class'] = 1
    elif raw_data['is_there_an_emotion_directed_at_a_brand_or_product'][row] == 'Negative emotion':
        raw_data['class'] = -1
raw_data.head()
    

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product,class
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,iPhone,Negative emotion,1
1,@jessedee Know about @fludapp ? Awesome iPad/i...,iPad or iPhone App,Positive emotion,1
2,@swonderlin Can not wait for #iPad 2 also. The...,iPad,Positive emotion,1
3,@sxsw I hope this year's festival isn't as cra...,iPad or iPhone App,Negative emotion,1
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Google,Positive emotion,1


In [18]:
raw_data.isnull().sum()

tweet_text                                               1
emotion_in_tweet_is_directed_at                       5655
is_there_an_emotion_directed_at_a_brand_or_product       0
class                                                    0
dtype: int64

In [19]:
raw_data['emotion_in_tweet_is_directed_at'] = raw_data['emotion_in_tweet_is_directed_at'].fillna('unspecified')

In [20]:
raw_data.isnull().sum()

tweet_text                                            1
emotion_in_tweet_is_directed_at                       0
is_there_an_emotion_directed_at_a_brand_or_product    0
class                                                 0
dtype: int64

In [21]:
raw_data.head(10)

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product,class
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,iPhone,Negative emotion,1
1,@jessedee Know about @fludapp ? Awesome iPad/i...,iPad or iPhone App,Positive emotion,1
2,@swonderlin Can not wait for #iPad 2 also. The...,iPad,Positive emotion,1
3,@sxsw I hope this year's festival isn't as cra...,iPad or iPhone App,Negative emotion,1
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Google,Positive emotion,1
5,@teachntech00 New iPad Apps For #SpeechTherapy...,unspecified,No emotion toward brand or product,1
6,,unspecified,No emotion toward brand or product,1
7,"#SXSW is just starting, #CTIA is around the co...",Android,Positive emotion,1
8,Beautifully smart and simple idea RT @madebyma...,iPad or iPhone App,Positive emotion,1
9,Counting down the days to #sxsw plus strong Ca...,Apple,Positive emotion,1


In [22]:
raw_data = raw_data.drop(index=6, axis=0)

In [23]:
raw_data.head(10)

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product,class
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,iPhone,Negative emotion,1
1,@jessedee Know about @fludapp ? Awesome iPad/i...,iPad or iPhone App,Positive emotion,1
2,@swonderlin Can not wait for #iPad 2 also. The...,iPad,Positive emotion,1
3,@sxsw I hope this year's festival isn't as cra...,iPad or iPhone App,Negative emotion,1
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Google,Positive emotion,1
5,@teachntech00 New iPad Apps For #SpeechTherapy...,unspecified,No emotion toward brand or product,1
7,"#SXSW is just starting, #CTIA is around the co...",Android,Positive emotion,1
8,Beautifully smart and simple idea RT @madebyma...,iPad or iPhone App,Positive emotion,1
9,Counting down the days to #sxsw plus strong Ca...,Apple,Positive emotion,1
10,Excited to meet the @samsungmobileus at #sxsw ...,Android,Positive emotion,1


In [24]:
raw_data['emotion_in_tweet_is_directed_at'].value_counts()

unspecified                        5654
iPad                                942
Apple                               659
iPad or iPhone App                  470
Google                              429
iPhone                              296
Other Google product or service     292
Android App                          81
Android                              78
Other Apple product or service       35
Name: emotion_in_tweet_is_directed_at, dtype: int64

In [25]:
raw_data['Brand_Mentioned'] = raw_data['emotion_in_tweet_is_directed_at']

In [30]:
for row in raw_data.index:
    if raw_data['Brand_Mentioned'][row] == 'iPad':
        raw_data['Brand_Mentioned'][row] = 'Apple'
    elif raw_data['Brand_Mentioned'][row] == 'iPad or iPhone App':
        raw_data['Brand_Mentioned'][row] = 'Apple'
    elif raw_data['Brand_Mentioned'][row] == 'iPhone':
        raw_data['Brand_Mentioned'][row] = 'Apple'
    elif raw_data['Brand_Mentioned'][row] == 'Other Apple product or service':
        raw_data['Brand_Mentioned'][row] = 'Apple'
            
    elif raw_data['Brand_Mentioned'][row] == 'Other Google product or service':
        raw_data['Brand_Mentioned'][row] = 'Google'
    elif raw_data['Brand_Mentioned'][row] == 'Android App':
        raw_data['Brand_Mentioned'][row] = 'Google'
    elif raw_data['Brand_Mentioned'][row] == 'Android':
        raw_data['Brand_Mentioned'][row] = 'Google'
    
        

In [31]:
raw_data.head()

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product,class,Brand_Mentioned
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,iPhone,Negative emotion,1,Apple
1,@jessedee Know about @fludapp ? Awesome iPad/i...,iPad or iPhone App,Positive emotion,1,Apple
2,@swonderlin Can not wait for #iPad 2 also. The...,iPad,Positive emotion,1,Apple
3,@sxsw I hope this year's festival isn't as cra...,iPad or iPhone App,Negative emotion,1,Apple
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Google,Positive emotion,1,Google


In [36]:
from nltk.corpus import stopwords
stopwords_list = stopwords.words('english')

In [37]:
stopwords_list += string.punctuation

In [38]:
#create a regular expression to remove the URLs and email addresses from the tweets
raw_data['clean_tweet'] = [re.sub(r'(http://[^""\s]+) | (@\w+)','',tweet) for tweet in raw_data['tweet_text']]

In [40]:
def process_tweet(tweet):
    tokens = nltk.word_tokenize(tweet)
    words_lower = [token.lower() for token in tokens]
    words_clean = [word for word in words_lower if word not in stopwords_list]
    return words_clean

processed_data = list(map(process_tweet, raw_data['clean_tweet']))

In [41]:
processed_data[0]

['wesley83',
 '3g',
 'iphone',
 '3',
 'hrs',
 'tweeting',
 'rise_austin',
 'dead',
 'need',
 'upgrade',
 'plugin',
 'stations',
 'sxsw']

In [42]:
total_vocab = set()
for token in processed_data:
    total_vocab.update(token)

In [43]:
X = raw_data['clean_tweet']
y = raw_data['class']

In [44]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25)

In [46]:
v = CountVectorizer()
X_train_count = v.fit_transform(X_train)
X_test_count = v.transform(X_test)

In [47]:
nb = MultinomialNB()
nb.fit(X_train_count,y_train)

MultinomialNB()

In [48]:
predictions = nb.predict(X_test_count)

In [50]:
nb.score(X_test_count,y_test)

1.0