In [135]:
import pandas as pd
import numpy as np
import re
import string
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Dropout, Embedding, LSTM, Bidirectional, Dense
from tensorflow.keras import regularizers
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import nltk

In [136]:
data= pd.read_csv('/content/judge-1377884607_tweet_product_company.csv',encoding  = 'latin-1')

#OMITING THE DIRECTED COLUMN

In [137]:
df = data.drop(columns=['emotion_in_tweet_is_directed_at'])

In [138]:
df.head()

Unnamed: 0,tweet_text,is_there_an_emotion_directed_at_a_brand_or_product
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,Negative emotion
1,@jessedee Know about @fludapp ? Awesome iPad/i...,Positive emotion
2,@swonderlin Can not wait for #iPad 2 also. The...,Positive emotion
3,@sxsw I hope this year's festival isn't as cra...,Negative emotion
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Positive emotion


#TEXT PREPROCESSING

In [139]:
#downloading nltk datas

In [140]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [141]:
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [142]:
#Cleaning funtion
def clean_text(text):
  # Convert the input to a string if it's not already
  if not isinstance(text, str):
    text = str(text)
  text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE)#remove URLS
  text = re.sub(r"#\w+:", '', text) # removing # tags followed by :
  text = re.sub(r'\@\w+|\#', '', text)#removes mentions and hashtags
  text = re.sub(r"[^\w\s]", '', text)#removes special characters
  text = re.sub(r'\d+', '', text)#removes numbers
  text = text.lower()#converts to lower cases
  text = text.strip()#removes extra spaces
  return text


In [143]:
#applying the cleaning function
df['cleaned_text']= df['tweet_text'].apply(clean_text)

In [144]:
df.head()

Unnamed: 0,tweet_text,is_there_an_emotion_directed_at_a_brand_or_product,cleaned_text
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,Negative emotion,i have a g iphone after hrs tweeting at rise_...
1,@jessedee Know about @fludapp ? Awesome iPad/i...,Positive emotion,know about awesome ipadiphone app that youll...
2,@swonderlin Can not wait for #iPad 2 also. The...,Positive emotion,can not wait for ipad also they should sale t...
3,@sxsw I hope this year's festival isn't as cra...,Negative emotion,i hope this years festival isnt as crashy as t...
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Positive emotion,great stuff on fri marissa mayer google tim o...


In [145]:
#lemmatization and stop words removal
stop_words = set (stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

In [146]:
#defining funcion for further preprocessing
def preprocess_text(text):
  words = text.split()
  words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
  return ' '.join(words

                  )

In [147]:
df['processed_text'] =df['cleaned_text'].apply(preprocess_text)

In [148]:
df.head()

Unnamed: 0,tweet_text,is_there_an_emotion_directed_at_a_brand_or_product,cleaned_text,processed_text
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,Negative emotion,i have a g iphone after hrs tweeting at rise_...,g iphone hr tweeting rise_austin dead need upg...
1,@jessedee Know about @fludapp ? Awesome iPad/i...,Positive emotion,know about awesome ipadiphone app that youll...,know awesome ipadiphone app youll likely appre...
2,@swonderlin Can not wait for #iPad 2 also. The...,Positive emotion,can not wait for ipad also they should sale t...,wait ipad also sale sxsw
3,@sxsw I hope this year's festival isn't as cra...,Negative emotion,i hope this years festival isnt as crashy as t...,hope year festival isnt crashy year iphone app...
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Positive emotion,great stuff on fri marissa mayer google tim o...,great stuff fri marissa mayer google tim oreil...


In [149]:
#encoding sentiment labels
label_encoder = LabelEncoder()
df['sentiment_label'] = label_encoder.fit_transform(data['is_there_an_emotion_directed_at_a_brand_or_product'])

In [150]:
df.head()



Unnamed: 0,tweet_text,is_there_an_emotion_directed_at_a_brand_or_product,cleaned_text,processed_text,sentiment_label
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,Negative emotion,i have a g iphone after hrs tweeting at rise_...,g iphone hr tweeting rise_austin dead need upg...,1
1,@jessedee Know about @fludapp ? Awesome iPad/i...,Positive emotion,know about awesome ipadiphone app that youll...,know awesome ipadiphone app youll likely appre...,3
2,@swonderlin Can not wait for #iPad 2 also. The...,Positive emotion,can not wait for ipad also they should sale t...,wait ipad also sale sxsw,3
3,@sxsw I hope this year's festival isn't as cra...,Negative emotion,i hope this years festival isnt as crashy as t...,hope year festival isnt crashy year iphone app...,1
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Positive emotion,great stuff on fri marissa mayer google tim o...,great stuff fri marissa mayer google tim oreil...,3


In [151]:
#tokenization

In [152]:
tokenizer = Tokenizer(num_words = 10000 , oov_token='<OOV>')
tokenizer.fit_on_texts(df['processed_text'])
sequences = tokenizer.texts_to_sequences(df['processed_text'])
padded_sequences = pad_sequences(sequences,maxlen = 50, padding='post')

In [153]:
x= padded_sequences
y=tf.keras.utils.to_categorical(df['sentiment_label'],num_classes=4)
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)

In [154]:
#defining the LSTM MODELS

In [155]:
model = tf.keras.Sequential([
    Embedding(input_dim=10000, output_dim=128),  # Embedding layer without regularization
    Bidirectional(LSTM(128, return_sequences=True, kernel_regularizer=regularizers.l2(0.001))),  # L2 regularization on LSTM layer
    Dropout(0.2),
    Bidirectional(LSTM(64, kernel_regularizer=regularizers.l2(0.001))),  # L2 regularization on second LSTM layer
    Dropout(0.2),
    Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.001)),  # L2 regularization on Dense layer
    Dropout(0.2),
    Dense(4, activation='softmax')  # Output layer with no regularization
])

In [156]:
#COMPILING THE MODEL
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [157]:
#TRAINING THE MODEL

In [158]:
history = model.fit(x_train,y_train,epochs=5,validation_data=(x_test,y_test))

Epoch 1/5
[1m228/228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 342ms/step - accuracy: 0.5812 - loss: 1.4376 - val_accuracy: 0.6317 - val_loss: 0.9536
Epoch 2/5
[1m228/228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 364ms/step - accuracy: 0.7075 - loss: 0.8413 - val_accuracy: 0.6636 - val_loss: 0.8845
Epoch 3/5
[1m228/228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 335ms/step - accuracy: 0.7782 - loss: 0.6760 - val_accuracy: 0.6421 - val_loss: 0.9630
Epoch 4/5
[1m228/228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 334ms/step - accuracy: 0.8144 - loss: 0.5790 - val_accuracy: 0.6405 - val_loss: 0.9616
Epoch 5/5
[1m228/228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 337ms/step - accuracy: 0.8283 - loss: 0.5209 - val_accuracy: 0.6361 - val_loss: 1.1182


In [159]:
model.summary()

In [160]:
loss,accuracy = model.evaluate(x_test,y_test)
print(f"Test Accuracy : {accuracy:.2f}")

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 80ms/step - accuracy: 0.6460 - loss: 1.0869
Test Accuracy : 0.64
