In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
import string
import nltk
import warnings
%matplotlib inline

warnings.filterwarnings('ignore')

In [2]:
df=pd.read_csv('/content/judge-1377884607_tweet_product_company.csv',encoding='latin1')

In [3]:
df.head()

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,iPhone,Negative emotion
1,@jessedee Know about @fludapp ? Awesome iPad/i...,iPad or iPhone App,Positive emotion
2,@swonderlin Can not wait for #iPad 2 also. The...,iPad,Positive emotion
3,@sxsw I hope this year's festival isn't as cra...,iPad or iPhone App,Negative emotion
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Google,Positive emotion


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9093 entries, 0 to 9092
Data columns (total 3 columns):
 #   Column                                              Non-Null Count  Dtype 
---  ------                                              --------------  ----- 
 0   tweet_text                                          9092 non-null   object
 1   emotion_in_tweet_is_directed_at                     3291 non-null   object
 2   is_there_an_emotion_directed_at_a_brand_or_product  9093 non-null   object
dtypes: object(3)
memory usage: 213.2+ KB


In [5]:
df.shape

(9093, 3)

In [6]:
data = df[['tweet_text','is_there_an_emotion_directed_at_a_brand_or_product']]
data.columns = ["text", "label"]

In [7]:
data.head()

Unnamed: 0,text,label
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,Negative emotion
1,@jessedee Know about @fludapp ? Awesome iPad/i...,Positive emotion
2,@swonderlin Can not wait for #iPad 2 also. The...,Positive emotion
3,@sxsw I hope this year's festival isn't as cra...,Negative emotion
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Positive emotion


In [8]:
data.label.value_counts()

No emotion toward brand or product    5389
Positive emotion                      2978
Negative emotion                       570
I can't tell                           156
Name: label, dtype: int64

In [9]:
data['label']=data['label'].map({'Positive emotion':0,'Negative emotion':1,'No emotion toward brand or product':2,"I can't tell":3})

In [10]:
data.head()

Unnamed: 0,text,label
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,1
1,@jessedee Know about @fludapp ? Awesome iPad/i...,0
2,@swonderlin Can not wait for #iPad 2 also. The...,0
3,@sxsw I hope this year's festival isn't as cra...,1
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,0


In [11]:
# removes pattern in the input text
def remove_pattern(input_txt, pattern):
    r = re.findall(pattern, str(input_txt))
    for word in r:
        input_txt = re.sub(word, "", input_txt)
    return input_txt

In [12]:
data.head()

Unnamed: 0,text,label
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,1
1,@jessedee Know about @fludapp ? Awesome iPad/i...,0
2,@swonderlin Can not wait for #iPad 2 also. The...,0
3,@sxsw I hope this year's festival isn't as cra...,1
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,0


In [13]:
# remove twitter handles (@user)
data['clean_tweet'] = np.vectorize(remove_pattern)(data['text'], "@[\w]*")

In [14]:
data.head()

Unnamed: 0,text,label,clean_tweet
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,1,. I have a 3G iPhone. After 3 hrs tweeting at ...
1,@jessedee Know about @fludapp ? Awesome iPad/i...,0,Know about ? Awesome iPad/iPhone app that yo...
2,@swonderlin Can not wait for #iPad 2 also. The...,0,Can not wait for #iPad 2 also. They should sa...
3,@sxsw I hope this year's festival isn't as cra...,1,I hope this year's festival isn't as crashy a...
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,0,great stuff on Fri #SXSW: Marissa Mayer (Goog...


In [15]:
# remove special characters, numbers and punctuations
data['clean_tweet'] = data['clean_tweet'].str.replace("[^a-zA-Z#]", " ")
data.head()

Unnamed: 0,text,label,clean_tweet
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,1,I have a G iPhone After hrs tweeting at ...
1,@jessedee Know about @fludapp ? Awesome iPad/i...,0,Know about Awesome iPad iPhone app that yo...
2,@swonderlin Can not wait for #iPad 2 also. The...,0,Can not wait for #iPad also They should sa...
3,@sxsw I hope this year's festival isn't as cra...,1,I hope this year s festival isn t as crashy a...
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,0,great stuff on Fri #SXSW Marissa Mayer Goog...


In [16]:
# remove short words
data['clean_tweet'] = data['clean_tweet'].apply(lambda x: " ".join([w for w in x.split() if len(w)>3]))
data.head()


Unnamed: 0,text,label,clean_tweet
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,1,have iPhone After tweeting #RISE Austin dead n...
1,@jessedee Know about @fludapp ? Awesome iPad/i...,0,Know about Awesome iPad iPhone that likely app...
2,@swonderlin Can not wait for #iPad 2 also. The...,0,wait #iPad also They should sale them down #SXSW
3,@sxsw I hope this year's festival isn't as cra...,1,hope this year festival crashy this year iPhon...
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,0,great stuff #SXSW Marissa Mayer Google Reilly ...


In [17]:
# individual words considered as tokens
tokenized_tweet = data['clean_tweet'].apply(lambda x: x.split())
tokenized_tweet.head()


0    [have, iPhone, After, tweeting, #RISE, Austin,...
1    [Know, about, Awesome, iPad, iPhone, that, lik...
2    [wait, #iPad, also, They, should, sale, them, ...
3    [hope, this, year, festival, crashy, this, yea...
4    [great, stuff, #SXSW, Marissa, Mayer, Google, ...
Name: clean_tweet, dtype: object

In [18]:
# stem the words
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()

tokenized_tweet = tokenized_tweet.apply(lambda sentence: [stemmer.stem(word) for word in sentence])
tokenized_tweet.head()

0    [have, iphon, after, tweet, #rise, austin, dea...
1    [know, about, awesom, ipad, iphon, that, like,...
2    [wait, #ipad, also, they, should, sale, them, ...
3    [hope, thi, year, festiv, crashi, thi, year, i...
4    [great, stuff, #sxsw, marissa, mayer, googl, r...
Name: clean_tweet, dtype: object

In [19]:
# combine words into single sentence
for i in range(len(tokenized_tweet)):
    tokenized_tweet[i] = " ".join(tokenized_tweet[i])
    
data['clean_tweet'] = tokenized_tweet
data.head()

Unnamed: 0,text,label,clean_tweet
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,1,have iphon after tweet #rise austin dead need ...
1,@jessedee Know about @fludapp ? Awesome iPad/i...,0,know about awesom ipad iphon that like appreci...
2,@swonderlin Can not wait for #iPad 2 also. The...,0,wait #ipad also they should sale them down #sxsw
3,@sxsw I hope this year's festival isn't as cra...,1,hope thi year festiv crashi thi year iphon #sxsw
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,0,great stuff #sxsw marissa mayer googl reilli t...


In [20]:
vocabulary_size=len(tokenized_tweet)

# **Input Split**

In [21]:
# feature extraction
from sklearn.feature_extraction.text import CountVectorizer
bow_vectorizer = CountVectorizer(max_df=0.90, min_df=2, max_features=1000, stop_words='english')
bow = bow_vectorizer.fit_transform(data['clean_tweet'])

In [36]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(bow, data['label'], test_size=0.25)

## **Model Training**

In [37]:

from keras.models import Sequential
from keras.layers import Dense, Embedding, SimpleRNN, Dropout
     

In [38]:
model = Sequential()
model.add(Embedding(input_dim = (vocabulary_size), output_dim = 128,input_length=100))
model.add(SimpleRNN(10))
model.add(Dropout(0.5))
model.add(Dense(50, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(4, activation='softmax'))  
     



In [39]:
model.compile(loss='binary_crossentropy', 
              optimizer='adam', 
              metrics=['accuracy'])

In [40]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 100, 128)          1163904   
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 10)                1390      
                                                                 
 dropout_2 (Dropout)         (None, 10)                0         
                                                                 
 dense_2 (Dense)             (None, 50)                550       
                                                                 
 dropout_3 (Dropout)         (None, 50)                0         
                                                                 
 dense_3 (Dense)             (None, 4)                 204       
                                                                 
Total params: 1,166,048
Trainable params: 1,166,048
No

In [42]:
model.fit(x_train, y_train, epochs=15, validation_split=0.1)

ValueError: ignored