In [351]:
import pandas as pd
import numpy as np
import tensorflow as tf
import random

In [352]:
SEED=42
tf.random.set_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)

In [353]:
df=pd.read_csv('tweet_emotions.csv')

In [354]:
df

Unnamed: 0,tweet_id,sentiment,content
0,1956967341,empty,@tiffanylue i know i was listenin to bad habi...
1,1956967666,sadness,Layin n bed with a headache ughhhh...waitin o...
2,1956967696,sadness,Funeral ceremony...gloomy friday...
3,1956967789,enthusiasm,wants to hang out with friends SOON!
4,1956968416,neutral,@dannycastillo We want to trade with someone w...
...,...,...,...
39995,1753918954,neutral,@JohnLloydTaylor
39996,1753919001,love,Happy Mothers Day All my love
39997,1753919005,love,Happy Mother's Day to all the mommies out ther...
39998,1753919043,happiness,@niariley WASSUP BEAUTIFUL!!! FOLLOW ME!! PEE...


In [355]:
count=(df['sentiment']=='empty').sum()
count

np.int64(827)

In [356]:
df=df[~df['sentiment'].isin(['empty'])]

In [357]:
df

Unnamed: 0,tweet_id,sentiment,content
1,1956967666,sadness,Layin n bed with a headache ughhhh...waitin o...
2,1956967696,sadness,Funeral ceremony...gloomy friday...
3,1956967789,enthusiasm,wants to hang out with friends SOON!
4,1956968416,neutral,@dannycastillo We want to trade with someone w...
5,1956968477,worry,Re-pinging @ghostridah14: why didn't you go to...
...,...,...,...
39995,1753918954,neutral,@JohnLloydTaylor
39996,1753919001,love,Happy Mothers Day All my love
39997,1753919005,love,Happy Mother's Day to all the mommies out ther...
39998,1753919043,happiness,@niariley WASSUP BEAUTIFUL!!! FOLLOW ME!! PEE...


In [358]:
df=df.reset_index(drop=True)

In [359]:
df

Unnamed: 0,tweet_id,sentiment,content
0,1956967666,sadness,Layin n bed with a headache ughhhh...waitin o...
1,1956967696,sadness,Funeral ceremony...gloomy friday...
2,1956967789,enthusiasm,wants to hang out with friends SOON!
3,1956968416,neutral,@dannycastillo We want to trade with someone w...
4,1956968477,worry,Re-pinging @ghostridah14: why didn't you go to...
...,...,...,...
39168,1753918954,neutral,@JohnLloydTaylor
39169,1753919001,love,Happy Mothers Day All my love
39170,1753919005,love,Happy Mother's Day to all the mommies out ther...
39171,1753919043,happiness,@niariley WASSUP BEAUTIFUL!!! FOLLOW ME!! PEE...


In [360]:
df['sentiment'].unique()

array(['sadness', 'enthusiasm', 'neutral', 'worry', 'surprise', 'love',
       'fun', 'hate', 'happiness', 'boredom', 'relief', 'anger'],
      dtype=object)

In [361]:
y=df['sentiment']

In [362]:
from sklearn.preprocessing import LabelEncoder

In [363]:
label_encoder=LabelEncoder()

In [364]:
y=label_encoder.fit_transform(y)

In [365]:
y

array([9, 9, 2, ..., 6, 4, 6], shape=(39173,))

In [366]:
print(np.unique(y))

[ 0  1  2  3  4  5  6  7  8  9 10 11]


In [367]:
y=np.array(y)

In [368]:
from sklearn.model_selection import train_test_split

In [369]:
x_train_text,x_test_text,y_train,y_test=train_test_split(df['content'].values,y,test_size=0.2,stratify=y,random_state=42)

In [370]:
train_length=pd.Series(x_train_text).str.split().str.len()
sentence_length=int(np.percentile(train_length,98))
sentence_length

27

In [371]:
from tensorflow.keras.layers import TextVectorization

In [372]:
tv=TextVectorization(output_mode='int',output_sequence_length=sentence_length)

In [373]:
tv.adapt(x_train_text)

In [374]:
vocabulary_size=len(tv.get_vocabulary())
vocabulary_size

44800

In [375]:
train_ds=tf.data.Dataset.from_tensor_slices((x_train_text,y_train))
test_ds=tf.data.Dataset.from_tensor_slices((x_test_text,y_test))

In [376]:
train_ds=train_ds.batch(128).prefetch(tf.data.AUTOTUNE)
test_ds=test_ds.batch(128).prefetch(tf.data.AUTOTUNE)

In [377]:
from tensorflow.keras.layers import Input,Dropout,Dense,Bidirectional,LSTM,Embedding
from tensorflow.keras.models import Sequential

In [378]:
model_bilstm=Sequential([
    Input(shape=(),dtype=tf.string),
    tv,
    Embedding(input_dim=vocabulary_size,output_dim=50,mask_zero=True),
    Dropout(0.3),
    Bidirectional(LSTM(64)),
    Dropout(0.3),
    Dense(len(np.unique(y)),activation='softmax')
])

In [379]:
from tensorflow.keras.optimizers import AdamW

In [380]:
model_bilstm.compile(loss='sparse_categorical_crossentropy',optimizer=AdamW(learning_rate=1e-3),metrics=['accuracy'])

In [381]:
early_stopping=tf.keras.callbacks.EarlyStopping(
    patience=7,
    min_delta=0.001,
    verbose=1,
    monitor='val_loss',
    restore_best_weights=True
)

In [None]:
model_bilstm.fit(train_ds,validation_data=(test_ds),epochs=30,batch_size=128,callbacks=[early_stopping])

Epoch 1/30
