In [6]:
import nltk
import pandas as pd
import numpy as np
from nltk.corpus import movie_reviews
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

In [2]:
nltk.download('movie_reviews')

[nltk_data] Downloading package movie_reviews to
[nltk_data]     /Users/kinjal/nltk_data...
[nltk_data]   Unzipping corpora/movie_reviews.zip.


True

In [7]:
# Load reviews and labels
documents = [(movie_reviews.raw(fileid), category)
             for category in movie_reviews.categories()
             for fileid in movie_reviews.fileids(category)]

# Create DataFrame
df = pd.DataFrame(documents, columns=['review', 'sentiment'])
df['label'] = df['sentiment'].map({'pos': 1, 'neg': 0})
df.head()

Unnamed: 0,review,sentiment,label
0,"plot : two teen couples go to a church party ,...",neg,0
1,the happy bastard's quick movie review \ndamn ...,neg,0
2,it is movies like these that make a jaded movi...,neg,0
3,""" quest for camelot "" is warner bros . ' firs...",neg,0
4,synopsis : a mentally unstable man undergoing ...,neg,0


In [8]:
X_train, X_test, y_train, y_test = train_test_split(
    df['review'], df['label'], test_size=0.2, random_state=42
)

In [9]:
tfidf = TfidfVectorizer(max_features=5000, stop_words='english')
X_train_tfidf = tfidf.fit_transform(X_train).toarray()
X_test_tfidf = tfidf.transform(X_test).toarray()

In [10]:
model = Sequential()
model.add(Dense(128, activation='relu', input_shape=(X_train_tfidf.shape[1],)))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(1, activation='sigmoid'))  # Binary output

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [11]:
history = model.fit(X_train_tfidf, y_train,
                    epochs=10,
                    batch_size=32,
                    validation_split=0.1,
                    verbose=2)

Epoch 1/10
45/45 - 1s - 27ms/step - accuracy: 0.5826 - loss: 0.6839 - val_accuracy: 0.8438 - val_loss: 0.6509
Epoch 2/10
45/45 - 0s - 5ms/step - accuracy: 0.8653 - loss: 0.5175 - val_accuracy: 0.8625 - val_loss: 0.4147
Epoch 3/10
45/45 - 0s - 5ms/step - accuracy: 0.9486 - loss: 0.2094 - val_accuracy: 0.8625 - val_loss: 0.3327
Epoch 4/10
45/45 - 0s - 5ms/step - accuracy: 0.9882 - loss: 0.0698 - val_accuracy: 0.8500 - val_loss: 0.3588
Epoch 5/10
45/45 - 0s - 5ms/step - accuracy: 0.9979 - loss: 0.0288 - val_accuracy: 0.8750 - val_loss: 0.3708
Epoch 6/10
45/45 - 0s - 5ms/step - accuracy: 0.9986 - loss: 0.0191 - val_accuracy: 0.8750 - val_loss: 0.4074
Epoch 7/10
45/45 - 0s - 5ms/step - accuracy: 0.9986 - loss: 0.0099 - val_accuracy: 0.8750 - val_loss: 0.4178
Epoch 8/10
45/45 - 0s - 5ms/step - accuracy: 1.0000 - loss: 0.0071 - val_accuracy: 0.8625 - val_loss: 0.4244
Epoch 9/10
45/45 - 0s - 5ms/step - accuracy: 1.0000 - loss: 0.0049 - val_accuracy: 0.8625 - val_loss: 0.4457
Epoch 10/10
45/45 

In [12]:
loss, acc = model.evaluate(X_test_tfidf, y_test, verbose=0)
print(f"Test Accuracy: {acc:.4f}")

Test Accuracy: 0.8275


In [13]:
y_pred = (model.predict(X_test_tfidf) > 0.5).astype("int32")
print(classification_report(y_test, y_pred))

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
              precision    recall  f1-score   support

           0       0.84      0.81      0.82       199
           1       0.82      0.84      0.83       201

    accuracy                           0.83       400
   macro avg       0.83      0.83      0.83       400
weighted avg       0.83      0.83      0.83       400



In [15]:
sample = ["The plot was good and the acting was awesome."]
sample_vec = tfidf.transform(sample).toarray()
pred = model.predict(sample_vec)[0][0]
print("Predicted Sentiment:", "Positive" if pred > 0.5 else "Negative")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
Predicted Sentiment: Positive
