In [None]:
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Embedding, GRU, Dense, Dropout
from preprocessing import preprocess_texts, clean_text

df = pd.read_csv("clean_data.csv")
df["text_clean"] = df["text"].apply(clean_text)     

vocab_size = 5000
max_len = 100
embedding_dim = 128
batch_size = 64
epochs = 7

X, tokenizer = preprocess_texts(df["text_clean"], vocab_size=vocab_size, max_len=max_len)
y = df["target"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

model = Sequential()
model.add(Embedding(vocab_size, embedding_dim, input_length=max_len))
model.add(Dropout(0.3))
model.add(GRU(100, dropout=0.2))
model.add(Dense(1, activation="sigmoid"))

model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
model.summary()

model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=epochs, batch_size=batch_size)

model.save("sentiment_model.h5")

with open("tokenizer.pkl", "wb") as f:
    pickle.dump(tokenizer, f)




Epoch 1/7
[1m20000/20000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1456s[0m 73ms/step - accuracy: 0.7857 - loss: 0.4504 - val_accuracy: 0.8164 - val_loss: 0.4006
Epoch 2/7
[1m20000/20000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1391s[0m 70ms/step - accuracy: 0.8174 - loss: 0.3983 - val_accuracy: 0.8219 - val_loss: 0.3908
Epoch 3/7
[1m20000/20000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1454s[0m 73ms/step - accuracy: 0.8243 - loss: 0.3865 - val_accuracy: 0.8235 - val_loss: 0.3900
Epoch 4/7
[1m20000/20000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1615s[0m 81ms/step - accuracy: 0.8271 - loss: 0.3811 - val_accuracy: 0.8243 - val_loss: 0.3879
Epoch 5/7
[1m20000/20000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1321s[0m 66ms/step - accuracy: 0.8296 - loss: 0.3775 - val_accuracy: 0.8249 - val_loss: 0.3883
Epoch 6/7
[1m20000/20000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1235s[0m 62ms/step - accuracy: 0.8312 - loss: 0.3745 - val_accuracy: 0.8248 - val

