# Trying with deep learning model using tensorflow

In [1]:
import pandas as pd
import numpy as np

In [2]:
print("="*50)
print("LOADING DATA")
print("="*50)

train_df = pd.read_csv('data/processed/train.csv')
val_df = pd.read_csv('data/processed/val.csv')
test_df = pd.read_csv('data/processed/test.csv')

X_train = train_df['text']
y_train = train_df['label']
X_val = val_df['text']
y_val = val_df['label']
X_test = test_df['text']
y_test = test_df['label']

print(f"Training samples: {len(X_train)}")
print(f"Validation samples: {len(X_val)}")
print(f"Test samples: {len(X_test)}")

LOADING DATA
Training samples: 34707
Validation samples: 7437
Test samples: 7438


In [3]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

MAX_WORDS = 20000
MAX_LEN = 300

tokenizer = Tokenizer(num_words=MAX_WORDS, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_val_seq = tokenizer.texts_to_sequences(X_val)
X_test_seq = tokenizer.texts_to_sequences(X_test)

X_train_pad = pad_sequences(X_train_seq, maxlen=MAX_LEN, padding='post')
X_val_pad = pad_sequences(X_val_seq, maxlen=MAX_LEN, padding='post')
X_test_pad = pad_sequences(X_test_seq, maxlen=MAX_LEN, padding='post')


  if not hasattr(np, "object"):


In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense, Dropout

model = Sequential([
    Embedding(input_dim=MAX_WORDS, output_dim=128, input_length=MAX_LEN),
    Bidirectional(LSTM(64, return_sequences=False)),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

model.summary()




In [7]:
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(
    monitor='val_loss',
    patience=2,
    restore_best_weights=True
)

history = model.fit(
    X_train_pad, y_train,
    validation_data=(X_val_pad, y_val),
    epochs=10,
    batch_size=64,
    callbacks=[early_stop]
)


Epoch 1/10
[1m543/543[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 28ms/step - accuracy: 0.7257 - loss: 0.5047 - val_accuracy: 0.8853 - val_loss: 0.2873
Epoch 2/10
[1m543/543[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 27ms/step - accuracy: 0.9192 - loss: 0.2232 - val_accuracy: 0.8759 - val_loss: 0.3168
Epoch 3/10
[1m543/543[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 27ms/step - accuracy: 0.9417 - loss: 0.1671 - val_accuracy: 0.8805 - val_loss: 0.3323


In [None]:
history_df = pd.DataFrame(history.history)
history_df

In [8]:
loss, accuracy = model.evaluate(X_test_pad, y_test)
print(f"Test Accuracy: {accuracy:.4f}")


[1m233/233[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - accuracy: 0.8773 - loss: 0.2984
Test Accuracy: 0.8837


In [12]:
import os

BASE_DIR = "/content/models"
os.makedirs(BASE_DIR, exist_ok=True)

print("Current directories:", os.listdir("/content"))

Current directories: ['.config', 'data', '.ipynb_checkpoints', 'models', 'sample_data']


In [13]:
MODEL_PATH = "/content/models/sentiment_lstm.keras"
TOKENIZER_PATH = "/content/models/tokenizer.pkl"

model.save(MODEL_PATH)

import pickle
with open(TOKENIZER_PATH, "wb") as f:
    pickle.dump(tokenizer, f)

print("Saved files:", os.listdir(BASE_DIR))


Saved files: ['tokenizer.pkl', 'sentiment_lstm.keras']


In [14]:
# from tensorflow.keras.models import load_model

# model = load_model("/content/models/sentiment_lstm.keras")

In [4]:
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences

MAX_LEN = 300

def predict_sentiment(text):
    seq = tokenizer.texts_to_sequences([text])
    pad = pad_sequences(seq, maxlen=MAX_LEN, padding='post')
    prob = model.predict(pad)[0][0]

    label = "Positive" if prob >= 0.5 else "Negative"
    return label, float(prob)


In [5]:
MODEL_PATH = "models/sentiment_lstm.keras"
TOKENIZER_PATH = "models/tokenizer.pkl"


In [6]:
from tensorflow.keras.models import load_model
import pickle
model = load_model(MODEL_PATH)

with open(TOKENIZER_PATH, "rb") as f:
    tokenizer = pickle.load(f)

print("✅ Model and tokenizer loaded successfully")


✅ Model and tokenizer loaded successfully


In [7]:
MAX_LEN = 300  # MUST match training

def predict_sentiment(text, threshold=0.5):
    seq = tokenizer.texts_to_sequences([text])
    pad = pad_sequences(seq, maxlen=MAX_LEN, padding='post')

    prob = model.predict(pad, verbose=0)[0][0]
    label = "Positive" if prob >= threshold else "Negative"

    return {
        "text": text,
        "sentiment": label,
        "confidence": float(prob)
    }


In [8]:
texts = [
    "This movie was absolutely fantastic, I loved every minute of it!",
    "An outstanding performance by the entire cast. Highly recommended.",
    "One of the best films I have seen in years. Brilliant storytelling.",
]

for t in texts:
    print(t, "→", predict_sentiment(t))


This movie was absolutely fantastic, I loved every minute of it! → {'text': 'This movie was absolutely fantastic, I loved every minute of it!', 'sentiment': 'Positive', 'confidence': 0.6934287548065186}
An outstanding performance by the entire cast. Highly recommended. → {'text': 'An outstanding performance by the entire cast. Highly recommended.', 'sentiment': 'Positive', 'confidence': 0.8677756786346436}
One of the best films I have seen in years. Brilliant storytelling. → {'text': 'One of the best films I have seen in years. Brilliant storytelling.', 'sentiment': 'Positive', 'confidence': 0.7953298091888428}


In [9]:
texts = [
    "This movie was terrible and a complete waste of time.",
    "The acting was awful and the plot made no sense at all.",
    "One of the worst films ever made. I regret watching it.",
]

for t in texts:
    print(t, "→", predict_sentiment(t))


This movie was terrible and a complete waste of time. → {'text': 'This movie was terrible and a complete waste of time.', 'sentiment': 'Negative', 'confidence': 0.033747006207704544}
The acting was awful and the plot made no sense at all. → {'text': 'The acting was awful and the plot made no sense at all.', 'sentiment': 'Negative', 'confidence': 0.08210024982690811}
One of the worst films ever made. I regret watching it. → {'text': 'One of the worst films ever made. I regret watching it.', 'sentiment': 'Negative', 'confidence': 0.08437870442867279}
