In [20]:
#Run this command in terminal-> python -m spacy download en_core_web_lg
import pandas as pd
from tqdm import tqdm
df = pd.read_csv("./Week_1/Day_4/movies_sentiment_data.csv")
df.head()

Unnamed: 0,review,sentiment
0,I first saw Jake Gyllenhaal in Jarhead (2005) ...,positive
1,I enjoyed the movie and the story immensely! I...,positive
2,I had a hard time sitting through this. Every ...,negative
3,It's hard to imagine that anyone could find th...,negative
4,This is one military drama I like a lot! Tom B...,positive


In [2]:
df['sentient_numerical'] = df.sentiment.map({'positive': 1,'negative':0})

In [3]:
import spacy
nlp = spacy.load("en_core_web_lg")

In [4]:
# Use tqdm.pandas to apply a progress bar to the apply method
tqdm.pandas(desc="Processing reviews")

# Apply the nlp function with a progress bar
df['vector'] = df['review'].progress_apply(lambda x: nlp(x).vector)

# Display the first few rows of the DataFrame
df.head()

Processing reviews: 100%|█████████████████████████████████████████████████████████████████████████████████| 623/623 [00:38<00:00, 16.02it/s]


Unnamed: 0,review,sentiment,sentient_numerical,vector
0,I first saw Jake Gyllenhaal in Jarhead (2005) ...,positive,1,"[-0.005007826, 0.19119197, -0.114808254, -0.04..."
1,I enjoyed the movie and the story immensely! I...,positive,1,"[-0.028524205, 0.16128393, -0.13384062, -0.077..."
2,I had a hard time sitting through this. Every ...,negative,0,"[-0.06756501, 0.1641653, -0.13134618, -0.08476..."
3,It's hard to imagine that anyone could find th...,negative,0,"[-0.044979133, 0.11706491, -0.14705564, -0.068..."
4,This is one military drama I like a lot! Tom B...,positive,1,"[-0.07854227, 0.14919885, -0.11241577, -0.0461..."


In [24]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df.vector.values,df.sentient_numerical,test_size = 0.2,random_state=42)

In [25]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models


In [26]:
# Preparing data
# Convert list of arrays into a 2D numpy array
X_train = np.array([x for x in X_train])
X_test = np.array([x for x in X_test])
y_train = np.array(y_train)
y_test = np.array(y_test)

In [28]:
# Define a simple neural network model
model = models.Sequential([
    layers.InputLayer(shape=(X_train.shape[1],)),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

In [29]:
# Compile the model
model.compile(optimizer='adam', 
              loss='binary_crossentropy', 
              metrics=['accuracy'])

In [31]:
# Train the model
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2, verbose=2)

Epoch 1/100
13/13 - 0s - 6ms/step - accuracy: 0.8191 - loss: 0.4460 - val_accuracy: 0.7700 - val_loss: 0.4744
Epoch 2/100
13/13 - 0s - 4ms/step - accuracy: 0.8166 - loss: 0.4277 - val_accuracy: 0.8400 - val_loss: 0.4456
Epoch 3/100
13/13 - 0s - 5ms/step - accuracy: 0.8367 - loss: 0.4133 - val_accuracy: 0.8100 - val_loss: 0.4578
Epoch 4/100
13/13 - 0s - 4ms/step - accuracy: 0.8241 - loss: 0.4229 - val_accuracy: 0.7800 - val_loss: 0.4362
Epoch 5/100
13/13 - 0s - 9ms/step - accuracy: 0.8342 - loss: 0.4004 - val_accuracy: 0.8500 - val_loss: 0.4131
Epoch 6/100
13/13 - 0s - 4ms/step - accuracy: 0.8442 - loss: 0.3805 - val_accuracy: 0.8500 - val_loss: 0.4124
Epoch 7/100
13/13 - 0s - 4ms/step - accuracy: 0.8518 - loss: 0.3674 - val_accuracy: 0.8200 - val_loss: 0.3912
Epoch 8/100
13/13 - 0s - 4ms/step - accuracy: 0.8543 - loss: 0.3628 - val_accuracy: 0.8300 - val_loss: 0.3869
Epoch 9/100
13/13 - 0s - 4ms/step - accuracy: 0.8593 - loss: 0.3541 - val_accuracy: 0.8300 - val_loss: 0.3783
Epoch 10/1

In [32]:
# Evaluate the model
train_loss, train_accuracy = model.evaluate(X_train, y_train, verbose=0)
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)

print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Testing Accuracy: {test_accuracy:.4f}")


Training Accuracy: 0.9679
Testing Accuracy: 0.8560


In [37]:
# Example inference
example_review = "The movie was very bad and full of cheap stunts"
example_vector = nlp(example_review).vector.reshape(1, -1)
prediction = model.predict(example_vector)

# Convert prediction to 'positive' or 'negative'
predicted_sentiment = 'positive' if prediction > 0.5 else 'negative'

print(f"Review: {example_review}")
print(f"Predicted Sentiment: {predicted_sentiment}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
Review: The movie was very bad and full of cheap stunts
Predicted Sentiment: negative
