## Importing library

In [81]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import re
import keras
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, SimpleRNN

In [39]:
import nltk
nltk.download('punkt_tab')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [40]:
data=pd.read_csv("/content/judge-1377884607_tweet_product_company.csv",encoding='latin-1')

## Preprocessing

In [41]:
data

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,iPhone,Negative emotion
1,@jessedee Know about @fludapp ? Awesome iPad/i...,iPad or iPhone App,Positive emotion
2,@swonderlin Can not wait for #iPad 2 also. The...,iPad,Positive emotion
3,@sxsw I hope this year's festival isn't as cra...,iPad or iPhone App,Negative emotion
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Google,Positive emotion
...,...,...,...
9088,Ipad everywhere. #SXSW {link},iPad,Positive emotion
9089,"Wave, buzz... RT @mention We interrupt your re...",,No emotion toward brand or product
9090,"Google's Zeiger, a physician never reported po...",,No emotion toward brand or product
9091,Some Verizon iPhone customers complained their...,,No emotion toward brand or product


In [42]:
data['is_there_an_emotion_directed_at_a_brand_or_product'].unique()

array(['Negative emotion', 'Positive emotion',
       'No emotion toward brand or product', "I can't tell"], dtype=object)

In [43]:
data['is_there_an_emotion_directed_at_a_brand_or_product']=LabelEncoder().fit_transform(data['is_there_an_emotion_directed_at_a_brand_or_product'])

In [44]:
data.head()

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,iPhone,1
1,@jessedee Know about @fludapp ? Awesome iPad/i...,iPad or iPhone App,3
2,@swonderlin Can not wait for #iPad 2 also. The...,iPad,3
3,@sxsw I hope this year's festival isn't as cra...,iPad or iPhone App,1
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Google,3


In [45]:
data=data.drop('emotion_in_tweet_is_directed_at', axis=1)

In [46]:
data.head()

Unnamed: 0,tweet_text,is_there_an_emotion_directed_at_a_brand_or_product
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,1
1,@jessedee Know about @fludapp ? Awesome iPad/i...,3
2,@swonderlin Can not wait for #iPad 2 also. The...,3
3,@sxsw I hope this year's festival isn't as cra...,1
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,3


## Cleaning the text and vectorizing

In [47]:
def preprocess_text(text):
  text = str(text)
  text=re.sub(r'[^a-zA-Z\s]', '', text)
  tokens=word_tokenize(text)
  tokens=[word.lower() for word in tokens if word.isalpha()]
  tokens=[word for word in tokens if word not in stopwords.words('english')]
  lemmatizer=WordNetLemmatizer()
  token=[lemmatizer.lemmatize(word) for word in tokens]
  return " ".join(tokens)

In [48]:
data["clean_text"]=data['tweet_text'].apply(lambda x: preprocess_text(x))

In [49]:
data.head()

Unnamed: 0,tweet_text,is_there_an_emotion_directed_at_a_brand_or_product,clean_text
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,1,wesley g iphone hrs tweeting riseaustin dead n...
1,@jessedee Know about @fludapp ? Awesome iPad/i...,3,jessedee know fludapp awesome ipadiphone app y...
2,@swonderlin Can not wait for #iPad 2 also. The...,3,swonderlin wait ipad also sale sxsw
3,@sxsw I hope this year's festival isn't as cra...,1,sxsw hope years festival isnt crashy years iph...
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,3,sxtxstate great stuff fri sxsw marissa mayer g...


In [52]:
data=data.drop('tweet_text', axis=1)

In [53]:
data.head()

Unnamed: 0,is_there_an_emotion_directed_at_a_brand_or_product,clean_text
0,1,wesley g iphone hrs tweeting riseaustin dead n...
1,3,jessedee know fludapp awesome ipadiphone app y...
2,3,swonderlin wait ipad also sale sxsw
3,1,sxsw hope years festival isnt crashy years iph...
4,3,sxtxstate great stuff fri sxsw marissa mayer g...


In [71]:
max_words=5000
max_len=100

In [72]:
tokenizer=Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(data['clean_text'])

In [73]:
x=tokenizer.texts_to_sequences(data['clean_text'])
x=pad_sequences(x, maxlen=max_len)
y=data['is_there_an_emotion_directed_at_a_brand_or_product'].values

In [74]:
x

array([[   0,    0,    0, ..., 2358, 3750,    1],
       [   0,    0,    0, ...,   24, 2891,    1],
       [   0,    0,    0, ...,  145,  468,    1],
       ...,
       [   0,    0,    0, ..., 4303,    1, 2225],
       [   0,    0,    0, ...,   10, 2320,    1],
       [   0,    0,    0, ...,  525,    1,    3]], dtype=int32)

In [75]:
y

array([1, 3, 3, ..., 2, 2, 2])

## Splitting the Data

In [62]:
x_train, x_test, y_train, y_test=train_test_split(x,y,test_size=0.2,random_state=13)

In [76]:
x_train.shape

(7274, 100)

In [77]:
data['is_there_an_emotion_directed_at_a_brand_or_product'].unique()

array([1, 3, 2, 0])

## Model Creation

In [87]:
model=Sequential()
model.add(Embedding(input_dim=5000, output_dim=64, input_shape=(100,)))
model.add(SimpleRNN(128, dropout=0.2))
model.add(Dense(4, activation='softmax'))

  super().__init__(**kwargs)


In [88]:
model.summary()

In [100]:
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [101]:
model.fit(x_train, y_train, batch_size=64, epochs=5)

Epoch 1/5
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 65ms/step - accuracy: 0.6443 - loss: 0.8408
Epoch 2/5
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 71ms/step - accuracy: 0.6618 - loss: 0.7895
Epoch 3/5
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 62ms/step - accuracy: 0.7169 - loss: 0.7016
Epoch 4/5
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 65ms/step - accuracy: 0.7409 - loss: 0.6564
Epoch 5/5
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 72ms/step - accuracy: 0.7072 - loss: 0.7079


<keras.src.callbacks.history.History at 0x7ce95171f190>

In [95]:
model.metrics_names

['loss', 'compile_metrics']

In [102]:
model.evaluate(x_test,y_test)

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.6473 - loss: 0.9036


[0.870394766330719, 0.6514568328857422]

## LSTM

In [103]:
from tensorflow.keras.layers import LSTM

In [104]:
model1=Sequential()
model1.add(Embedding(input_dim=5000, output_dim=64, input_shape=(300,)))
model1.add(LSTM(64, return_sequences=True))
model1.add(LSTM(32, dropout=0.3))
model1.add(Dense(4, activation="softmax"))

  super().__init__(**kwargs)


In [106]:
model1.summary()

In [107]:
model1.compile(optimizer=keras.optimizers.Adam(learning_rate=0.013),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [108]:
model1.fit(x_train, y_train, batch_size=64, epochs=5)

Epoch 1/5
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 146ms/step - accuracy: 0.5847 - loss: 0.9522
Epoch 2/5
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 143ms/step - accuracy: 0.6452 - loss: 0.8227
Epoch 3/5
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 134ms/step - accuracy: 0.6812 - loss: 0.7821
Epoch 4/5
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 133ms/step - accuracy: 0.6778 - loss: 0.7722
Epoch 5/5
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 139ms/step - accuracy: 0.7051 - loss: 0.7284


<keras.src.callbacks.history.History at 0x7ce9538f9210>

In [109]:
model1.evaluate(x_test,y_test)

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 57ms/step - accuracy: 0.6414 - loss: 0.8443


[0.838388204574585, 0.64211106300354]