# **Importing Libraries**

In [1]:
import pandas as pd
import numpy as np

# **Loading dataset**

In [3]:
DATASET_ENCODING = 'unicode_escape'
     
df = pd.read_csv('/content/judge-1377884607_tweet_product_company.csv', encoding=DATASET_ENCODING)

In [4]:
df.head()

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,iPhone,Negative emotion
1,@jessedee Know about @fludapp ? Awesome iPad/i...,iPad or iPhone App,Positive emotion
2,@swonderlin Can not wait for #iPad 2 also. The...,iPad,Positive emotion
3,@sxsw I hope this year's festival isn't as cra...,iPad or iPhone App,Negative emotion
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Google,Positive emotion


In [4]:
df.columns

Index(['tweet_text', 'emotion_in_tweet_is_directed_at',
       'is_there_an_emotion_directed_at_a_brand_or_product'],
      dtype='object')

In [5]:
df.shape

(9093, 3)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9093 entries, 0 to 9092
Data columns (total 3 columns):
 #   Column                                              Non-Null Count  Dtype 
---  ------                                              --------------  ----- 
 0   tweet_text                                          9092 non-null   object
 1   emotion_in_tweet_is_directed_at                     3291 non-null   object
 2   is_there_an_emotion_directed_at_a_brand_or_product  9093 non-null   object
dtypes: object(3)
memory usage: 213.2+ KB


In [7]:
df.drop('emotion_in_tweet_is_directed_at',axis=1,inplace=True)

# **Data preprocessing**

In [8]:
df.head()

Unnamed: 0,tweet_text,is_there_an_emotion_directed_at_a_brand_or_product
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,Negative emotion
1,@jessedee Know about @fludapp ? Awesome iPad/i...,Positive emotion
2,@swonderlin Can not wait for #iPad 2 also. The...,Positive emotion
3,@sxsw I hope this year's festival isn't as cra...,Negative emotion
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Positive emotion


In [9]:
df.isna().sum()

tweet_text                                            1
is_there_an_emotion_directed_at_a_brand_or_product    0
dtype: int64

In [10]:
df = df.dropna()

In [11]:
data = df[['tweet_text','is_there_an_emotion_directed_at_a_brand_or_product']]
data.columns = ["text", "label"]

In [12]:
data.head()

Unnamed: 0,text,label
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,Negative emotion
1,@jessedee Know about @fludapp ? Awesome iPad/i...,Positive emotion
2,@swonderlin Can not wait for #iPad 2 also. The...,Positive emotion
3,@sxsw I hope this year's festival isn't as cra...,Negative emotion
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Positive emotion


In [13]:
data['label'].unique()

array(['Negative emotion', 'Positive emotion',
       'No emotion toward brand or product', "I can't tell"], dtype=object)

In [14]:
data['label']=data['label'].map({'Negative emotion':0, 'No emotion toward brand or product':1, "I can't tell":1, 'Positive emotion':2,})

In [15]:
data.head()

Unnamed: 0,text,label
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,0
1,@jessedee Know about @fludapp ? Awesome iPad/i...,2
2,@swonderlin Can not wait for #iPad 2 also. The...,2
3,@sxsw I hope this year's festival isn't as cra...,0
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,2


In [16]:
data['label'].unique()

array([0, 2, 1])

In [17]:
data.shape

(9092, 2)

In [18]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9092 entries, 0 to 9092
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    9092 non-null   object
 1   label   9092 non-null   int64 
dtypes: int64(1), object(1)
memory usage: 213.1+ KB


In [19]:
import gensim
data['text_clean_gensim'] = data['text'].apply(lambda x: gensim.utils.simple_preprocess(x))
data.head()

Unnamed: 0,text,label,text_clean_gensim
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,0,"[wesley, have, iphone, after, hrs, tweeting, a..."
1,@jessedee Know about @fludapp ? Awesome iPad/i...,2,"[jessedee, know, about, fludapp, awesome, ipad..."
2,@swonderlin Can not wait for #iPad 2 also. The...,2,"[swonderlin, can, not, wait, for, ipad, also, ..."
3,@sxsw I hope this year's festival isn't as cra...,0,"[sxsw, hope, this, year, festival, isn, as, cr..."
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,2,"[sxtxstate, great, stuff, on, fri, sxsw, maris..."


In [20]:
from keras.preprocessing import text
tokenizer = text.Tokenizer() 
tokenizer.fit_on_texts(list(data['text_clean_gensim']))
tokenized_texts = tokenizer.texts_to_sequences(data['text_clean_gensim'])

In [21]:
from keras.utils import pad_sequences
X = pad_sequences(tokenized_texts, maxlen=100)

In [22]:
len(tokenizer.word_index)

9283

In [23]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, data['label'].values, test_size=0.2)

# **RNN**

In [24]:
from keras.models import Sequential
from keras.layers import Dense, Embedding, SimpleRNN, Dropout

In [25]:
model = Sequential()
model.add(Embedding(input_dim = len(tokenizer.word_index)+1, output_dim = 128,input_length=100))
model.add(SimpleRNN(10))
model.add(Dropout(0.5))
model.add(Dense(50, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(3, activation='softmax'))  

In [26]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [27]:
model.summary()
     

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 100, 128)          1188352   
                                                                 
 simple_rnn (SimpleRNN)      (None, 10)                1390      
                                                                 
 dropout (Dropout)           (None, 10)                0         
                                                                 
 dense (Dense)               (None, 50)                550       
                                                                 
 dropout_1 (Dropout)         (None, 50)                0         
                                                                 
 dense_1 (Dense)             (None, 3)                 153       
                                                                 
Total params: 1,190,445
Trainable params: 1,190,445
Non-

In [28]:
model.fit(X_train, y_train, epochs=15, validation_split=0.1)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7f7fe00b9b20>

In [29]:
y_pred = model.predict(X_test)



# **Testing**

**data['label']=data['label'].map({'Negative emotion':0, 'No emotion toward brand or product':1, "I can't tell":1, 'Positive emotion':2,})**

In [30]:
testCase1 = "No comments"
testCase1 = tokenizer.texts_to_sequences([testCase1])
test = pad_sequences(testCase1, maxlen=100)
output = model.predict(test)
output



array([[0.9916408 , 0.00678333, 0.00157591]], dtype=float32)

Probability is high for 2nd => label 0  => negative comments

In [33]:
testCase2 = "its very good"
testCase2 = tokenizer.texts_to_sequences([testCase2])
test = pad_sequences(testCase2, maxlen=100)
output = model.predict(test)
output



array([[0.00984137, 0.05215031, 0.93800825]], dtype=float32)

Probability is high for 2nd => label 2 => Positive