In [1]:
import numpy as np
import pandas as pd
df = pd.read_csv("/content/drive/MyDrive/Datasets/Twitter_Data.csv")

In [2]:
df = df[df["category"]!=0]
df.dropna(inplace=True)
df.drop_duplicates(inplace=True)

In [3]:
import re
def remove_special_words(text):
    text = re.sub(r'http\S+', '', text)  # Remove URLs
    text = re.sub(r'@\w+', '', text)  # Remove mentions
    text = re.sub(r'#\w+', '', text)  # Remove hashtags
    text = re.sub(r'[^\w\s\U0001F600-\U0001F64F]','',text) # Remove any special chars but emojis
    return text

df["clean_text"] = df["clean_text"].apply(remove_special_words)
df["clean_text"] = df["clean_text"].str.lower()

In [4]:
df.head()

Unnamed: 0,clean_text,category
0,when modi promised minimum government maximum ...,-1.0
2,what did just say vote for modi welcome bjp t...,1.0
3,asking his supporters prefix chowkidar their n...,1.0
4,answer who among these the most powerful world...,1.0
8,with upcoming election india saga going import...,1.0


In [5]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['clean_text'])
sequences = tokenizer.texts_to_sequences(df['clean_text'])
padded_sequences = pad_sequences(sequences, padding='post')

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(padded_sequences,
                                                    df['category'],
                                                    test_size=0.2,
                                                    random_state=42)

In [7]:
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau


early_stopping = EarlyStopping(
    monitor='val_accuracy',
    patience=10,
    restore_best_weights=True)
checkpoint_path = '/content/drive/MyDrive/models/twitter_sentiment_analysis/best_model.h5'
checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True,
    verbose=0)
lr_reduce_callback = ReduceLROnPlateau(
    monitor='val_accuracy',
    factor=0.34,
    patience=2,
    min_delta=1e-4,
    cooldown=2,
    min_lr=1e-7,
    verbose=0)

In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D

model = Sequential()
model.add(Embedding(
    input_dim=len(tokenizer.word_index) + 1,
    output_dim=128,
    input_length=padded_sequences.shape[1]))
model.add(SpatialDropout1D(0.2))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))

In [9]:
from tensorflow.keras.optimizers import Adam

optimizer = Adam(learning_rate=0.0001)
model.compile(  loss='binary_crossentropy',
                optimizer=optimizer,
                metrics=['accuracy'],)

In [None]:
history = model.fit(X_train, y_train,
            epochs=30,
            batch_size=64,
            validation_data=(X_test, y_test),
            callbacks=[early_stopping,lr_reduce_callback,checkpoint_callback])

Epoch 1/30

  saving_api.save_model(


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30

In [None]:
pd.DataFrame(history.history).to_csv("/content/drive/MyDrive/models/twitter_sentiment_analysis/history.csv")