#Importing Libraries

In [268]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, LSTM, Dense

#Importing dataset

In [269]:
df = pd.read_csv('/content/judge-1377884607_tweet_product_company.csv',encoding='latin-1' )

In [270]:
df.head()

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,iPhone,Negative emotion
1,@jessedee Know about @fludapp ? Awesome iPad/i...,iPad or iPhone App,Positive emotion
2,@swonderlin Can not wait for #iPad 2 also. The...,iPad,Positive emotion
3,@sxsw I hope this year's festival isn't as cra...,iPad or iPhone App,Negative emotion
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Google,Positive emotion


In [271]:
df.columns

Index(['tweet_text', 'emotion_in_tweet_is_directed_at',
       'is_there_an_emotion_directed_at_a_brand_or_product'],
      dtype='object')

#Data Preprocessing

In [272]:
df.shape

(9093, 3)

In [273]:
#deleting nan values
df.dropna(inplace=True)

In [274]:
#checking for the null values
df.isnull().sum()

Unnamed: 0,0
tweet_text,0
emotion_in_tweet_is_directed_at,0
is_there_an_emotion_directed_at_a_brand_or_product,0


In [275]:
#checking percentage of null values in the column "emotion_in_tweet_is_directed_at"
df['emotion_in_tweet_is_directed_at'].isnull().sum()/len(df['emotion_in_tweet_is_directed_at'])*100

0.0

In [276]:
#Dropping the column "emotion_in_tweet_is_directed_at".
df.drop('emotion_in_tweet_is_directed_at',axis=1,inplace=True)

#Train Test Split

In [277]:
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)


In [278]:
x = df['tweet_text']
y = df['is_there_an_emotion_directed_at_a_brand_or_product']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [279]:
Sample = df.sample(1).values[0]
print('Text:\n', Sample[0])
print('\nlabel:', Sample[1])

Text:
 Two year old shows us howmto use iPad #USDes #sxsw

label: Positive emotion


In [280]:
x.dtypes

dtype('O')

In [281]:

#Tokenization
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(df['tweet_text'])
sequences = tokenizer.texts_to_sequences(df['tweet_text'])


In [282]:
#vectorization
vectorizer = TextVectorization(max_tokens=5000,output_sequence_length=20,output_mode='int')
vectorizer.adapt(x_train)

In [283]:
label_en = LabelEncoder()
y_train = label_en.fit_transform(y_train) # Encode y_train instead of y
y_test = label_en.transform(y_test) # Encode y_test


#SIMPLE RNN

In [284]:
model_rnn = Sequential()
model_rnn.add(vectorizer)
model_rnn.add(Embedding(input_dim= 5000, output_dim=256,mask_zero=True))
model_rnn.add(SimpleRNN(128,activation='tanh',return_sequences = True))
model_rnn.add(SimpleRNN(64))
model_rnn.add(Dense(64,activation='tanh'))
model_rnn.add(Dense(4,activation='softmax'))

In [285]:
model_rnn.compile(optimizer='adam',loss = 'categorical_crossentropy',metrics = ['accuracy'])

In [286]:
import tensorflow as tf
y_train = tf.keras.utils.to_categorical(y_train, num_classes=4)  # One-hot encode y_train
y_test = tf.keras.utils.to_categorical(y_test, num_classes=4)

In [287]:
history = model_rnn.fit(x_train,y_train,epochs = 10,batch_size = 32,validation_data = (x_test,y_test))

Epoch 1/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 45ms/step - accuracy: 0.7568 - loss: 0.7057 - val_accuracy: 0.8118 - val_loss: 0.5723
Epoch 2/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 43ms/step - accuracy: 0.8264 - loss: 0.5101 - val_accuracy: 0.7997 - val_loss: 0.5891
Epoch 3/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 36ms/step - accuracy: 0.9416 - loss: 0.2032 - val_accuracy: 0.7496 - val_loss: 0.7700
Epoch 4/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 51ms/step - accuracy: 0.9903 - loss: 0.0446 - val_accuracy: 0.7542 - val_loss: 0.8066
Epoch 5/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 36ms/step - accuracy: 0.9923 - loss: 0.0375 - val_accuracy: 0.8316 - val_loss: 0.8049
Epoch 6/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 35ms/step - accuracy: 0.9955 - loss: 0.0185 - val_accuracy: 0.7967 - val_loss: 0.7631
Epoch 7/10
[1m83/83[0m [32m━━━

In [288]:
#Evaluating the model
loss, accuracy = model_rnn.evaluate(x_test, y_test)
print(f"Test Accuracy: {accuracy:.2f}")

[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.8260 - loss: 0.9041
Test Accuracy: 0.83


In [289]:
#