In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# Load the dataset
df = pd.read_csv('./data2/Train.csv')

# Preprocessing
texts = df['text'].values
labels = df['label'].values

# Tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(texts)

# Padding sequences
max_length = 100  # adjust as needed
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)

# Define the model architecture
model = Sequential([
    Embedding(len(word_index) + 1, 128, input_length=max_length),
    LSTM(64),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=5, batch_size=64, validation_data=(X_test, y_test))

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Accuracy: {accuracy * 100:.2f}%')


2024-04-29 01:04:19.695817: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-29 01:04:19.695862: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-29 01:04:19.696546: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-04-29 01:04:19.701733: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Accuracy: 84.49%


In [6]:
import random

# Assuming you have already trained and have the model object available
# Assuming you have loaded the dataset into 'df'

# Select a random review from the dataset
random_index = random.randint(0, len(df) - 1)
random_review = df.loc[random_index, 'text']

# Tokenize and preprocess the review text
sequence = tokenizer.texts_to_sequences([random_review])
padded_sequence = pad_sequences(sequence, maxlen=max_length, padding='post')

# Predict sentiment
predicted_prob = model.predict(padded_sequence)[0][0]

# Convert probability to sentiment label
predicted_sentiment = 'positive' if predicted_prob >= 0.5 else 'negative'

print("Random Review:")
print(random_review)
print(f"\nPredicted sentiment: {predicted_sentiment} with probability {predicted_prob:.2f}")


Random Review:
Dane tries to hard and is to extreme with all of his yelling and going crazy, spilling water on himself and rolling on the floor. To much. Calm down, get yourself together and make us laugh. I didn't quite understand his comparison toward comics and rock stars. Just because there both up on stage or something? He said that every comedian wants to be a rock star. I'm sure Rodney Dangerfield was really into that when he was alive. He had a few good jokes like the Burger King joke where people yell at the drive thru. I also liked the Reese's Pieces joke. If Dane just didn't act so mental he might be funnier and I might have given this a higher rating, as high as maybe an eight.

Predicted sentiment: negative with probability 0.01


In [7]:
df.head(3)

Unnamed: 0,text,label
0,I grew up (b. 1965) watching and loving the Th...,0
1,"When I put this movie in my DVD player, and sa...",0
2,Why do people who do not know what a particula...,0
