In [5]:
import pandas as pd 

In [7]:
df = pd.read_csv("movie_review.csv")
df

Unnamed: 0,fold_id,cv_tag,html_id,sent_id,text,tag
0,0,cv000,29590,0,films adapted from comic books have had plenty...,pos
1,0,cv000,29590,1,"for starters , it was created by alan moore ( ...",pos
2,0,cv000,29590,2,to say moore and campbell thoroughly researche...,pos
3,0,cv000,29590,3,"the book ( or "" graphic novel , "" if you will ...",pos
4,0,cv000,29590,4,"in other words , don't dismiss this film becau...",pos
...,...,...,...,...,...,...
64715,9,cv999,14636,20,that lack of inspiration can be traced back to...,neg
64716,9,cv999,14636,21,like too many of the skits on the current inca...,neg
64717,9,cv999,14636,22,"after watching one of the "" roxbury "" skits on...",neg
64718,9,cv999,14636,23,"bump unsuspecting women , and . . . that's all .",neg


In [11]:
df = df[['text', 'tag']]
df

Unnamed: 0,text,tag
0,films adapted from comic books have had plenty...,pos
1,"for starters , it was created by alan moore ( ...",pos
2,to say moore and campbell thoroughly researche...,pos
3,"the book ( or "" graphic novel , "" if you will ...",pos
4,"in other words , don't dismiss this film becau...",pos
...,...,...
64715,that lack of inspiration can be traced back to...,neg
64716,like too many of the skits on the current inca...,neg
64717,"after watching one of the "" roxbury "" skits on...",neg
64718,"bump unsuspecting women , and . . . that's all .",neg


In [13]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df['label'] = le.fit_transform(df['tag'])  # 'pos' → 1, 'neg' → 0


In [15]:
df

Unnamed: 0,text,tag,label
0,films adapted from comic books have had plenty...,pos,1
1,"for starters , it was created by alan moore ( ...",pos,1
2,to say moore and campbell thoroughly researche...,pos,1
3,"the book ( or "" graphic novel , "" if you will ...",pos,1
4,"in other words , don't dismiss this film becau...",pos,1
...,...,...,...
64715,that lack of inspiration can be traced back to...,neg,0
64716,like too many of the skits on the current inca...,neg,0
64717,"after watching one of the "" roxbury "" skits on...",neg,0
64718,"bump unsuspecting women , and . . . that's all .",neg,0


In [17]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Tokenization
tokenizer = Tokenizer(num_words=10000, oov_token='<OOV>')
tokenizer.fit_on_texts(df['text'])

# Convert to sequences
sequences = tokenizer.texts_to_sequences(df['text'])
max_len = max(len(x) for x in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_len, padding='post')


In [19]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(padded_sequences, df['label'], test_size=0.2, random_state=42)


In [21]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

model = Sequential([
    Embedding(input_dim=10000, output_dim=64, input_length=max_len),
    LSTM(128, return_sequences=False),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification
])




In [23]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

history = model.fit(X_train, y_train, validation_split=0.2, epochs=5, batch_size=64)


Epoch 1/5
[1m648/648[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m126s[0m 182ms/step - accuracy: 0.5034 - loss: 0.6936 - val_accuracy: 0.5150 - val_loss: 0.6929
Epoch 2/5
[1m648/648[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m116s[0m 179ms/step - accuracy: 0.5021 - loss: 0.6933 - val_accuracy: 0.5150 - val_loss: 0.6927
Epoch 3/5
[1m648/648[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m153s[0m 236ms/step - accuracy: 0.5072 - loss: 0.6931 - val_accuracy: 0.5150 - val_loss: 0.6928
Epoch 4/5
[1m648/648[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m174s[0m 193ms/step - accuracy: 0.5049 - loss: 0.6932 - val_accuracy: 0.5150 - val_loss: 0.6928
Epoch 5/5
[1m648/648[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 209ms/step - accuracy: 0.5061 - loss: 0.6931 - val_accuracy: 0.5150 - val_loss: 0.6928


In [25]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")


[1m405/405[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 45ms/step - accuracy: 0.5057 - loss: 0.6931
Test Accuracy: 0.5078
