<a href="https://colab.research.google.com/github/SeifAmr1/Products-Reviews-Analysis/blob/main/Review_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install keras



In [None]:
!pip install tensorflow




In [None]:
!pip install optuna



# Build the LSTM model

In [None]:
import optuna
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pandas as pd

# Load and preprocess the dataset

# df = pd.read_csv('/content/IMDB Dataset.csv', lineterminator='\n', on_bad_lines='skip' )
chunk_list = []
chunk_size = 1000

for chunk in pd.read_csv('/content/IMDB Dataset.csv', chunksize=chunk_size):
    chunk_list.append(chunk)

df = pd.concat(chunk_list)



In [None]:
label_encoder = LabelEncoder()
df['sentiment'] = label_encoder.fit_transform(df['sentiment'])

tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(df['review'])

sequences = tokenizer.texts_to_sequences(df['review'])

X = pad_sequences(sequences, maxlen=200)
y = df['sentiment']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
df.head()

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,1
1,A wonderful little production. <br /><br />The...,1
2,I thought this was a wonderful way to spend ti...,1
3,Basically there's a family where a little boy ...,0
4,"Petter Mattei's ""Love in the Time of Money"" is...",1


# OPTUNA DEFINITION

In [None]:
# Define the Optuna objective function
def objective(trial):
    model = Sequential()

    # Hyperparameters to tune
    units = trial.suggest_int('units', 50, 100)
    dropout_rate = trial.suggest_uniform('dropout_rate', 0.2, 0.5)
    optimizer = trial.suggest_categorical('optimizer', ['adam', 'rmsprop'])

    model.add(Embedding(input_dim=5000, output_dim=128, input_length=200))
    model.add(LSTM(units=units, return_sequences=False))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    # Train the model
    history = model.fit(X_train, y_train, epochs=5, batch_size=64, verbose=0, validation_data=(X_test, y_test))

    # Get the validation accuracy
    accuracy = history.history['val_accuracy'][-1]
    return accuracy

In [None]:
# Create and optimize the study
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=2)

# Print the best hyperparameters
print("Best hyperparameters:", study.best_params)
print("Best validation accuracy:", study.best_value)

[I 2024-09-07 11:43:49,884] A new study created in memory with name: no-name-f3c19df9-ef23-4b6e-80d2-60bfbd8702fc
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.2, 0.5)
[I 2024-09-07 12:05:42,425] Trial 0 finished with value: 0.8737999796867371 and parameters: {'units': 79, 'dropout_rate': 0.4028309699749951, 'optimizer': 'rmsprop'}. Best is trial 0 with value: 0.8737999796867371.
[I 2024-09-07 12:17:31,237] Trial 1 finished with value: 0.8826000094413757 and parameters: {'units': 51, 'dropout_rate': 0.32868263204537923, 'optimizer': 'adam'}. Best is trial 1 with value: 0.8826000094413757.


Best hyperparameters: {'units': 51, 'dropout_rate': 0.32868263204537923, 'optimizer': 'adam'}
Best validation accuracy: 0.8826000094413757


In [None]:
# Train the final model with the best hyperparameters
best_params = study.best_params
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=128, input_length=200))
model.add(LSTM(units=best_params['units'], return_sequences=False))
model.add(Dropout(best_params['dropout_rate']))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer=best_params['optimizer'], loss='binary_crossentropy', metrics=['accuracy'])

# Train the final model
model.fit(X_train, y_train, epochs=5, batch_size=64, verbose=1, validation_data=(X_test, y_test))

# Evaluate the final model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy with best hyperparameters: {accuracy*100:.2f}%")


Epoch 1/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m153s[0m 239ms/step - accuracy: 0.7535 - loss: 0.4786 - val_accuracy: 0.8630 - val_loss: 0.2988
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m199s[0m 234ms/step - accuracy: 0.9042 - loss: 0.2451 - val_accuracy: 0.8919 - val_loss: 0.2678
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 235ms/step - accuracy: 0.9201 - loss: 0.2040 - val_accuracy: 0.8911 - val_loss: 0.2785
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m148s[0m 237ms/step - accuracy: 0.9357 - loss: 0.1696 - val_accuracy: 0.8977 - val_loss: 0.2678
Epoch 5/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m150s[0m 241ms/step - accuracy: 0.9447 - loss: 0.1499 - val_accuracy: 0.8897 - val_loss: 0.2906
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 39ms/step - accuracy: 0.8892 - loss: 0.2873
Test Accuracy with best hyperparameters: 88.97%


# PREDICTING CUSTOM REVIEWS

In [None]:
# Define your custom text reviews
custom_reviews = [
   "The film was an absolute delight from start to finish. The plot was engaging and the characters were well-developed. The cinematography was stunning, and the soundtrack perfectly complemented the mood of the movie. The actors delivered their roles with such authenticity that it was easy to get lost in the story. This movie is a must-watch for anyone who loves a heartwarming and well-crafted story. I left the theater feeling uplifted and inspired. Highly recommended for a great cinematic experience!",

"What a fantastic film! The storyline was incredibly gripping, and the character development was top-notch. The performances were stellar, and each scene was beautifully shot. The pacing was perfect, keeping me engaged throughout. The movie managed to combine humor with emotional depth in a way that felt genuine and impactful. It’s the kind of film that stays with you long after the credits roll. If you're looking for a memorable and enjoyable movie night, this one is a winner!",

"This movie exceeded all my expectations. The direction was impeccable, and the script was both witty and profound. The chemistry between the lead actors was palpable, adding depth to their performances. The visual effects were mesmerizing and enhanced the storytelling in a meaningful way. The film's soundtrack was also noteworthy, adding an extra layer of emotion to the scenes. Overall, it’s a well-rounded film that offers a rich and satisfying viewing experience.",

"I was really disappointed with this movie. The plot was convoluted and failed to hold my interest. The acting was mediocre at best, and the film’s attempt at emotional depth felt shallow. The special effects, while flashy, did little to enhance the story. The film’s humor fell flat, and the dramatic moments felt contrived. Overall, it was an underwhelming experience that didn’t deliver on its promises.",

"This movie missed the mark in several ways. The storyline was clichéd and predictable, offering nothing new or engaging. The characters lacked depth, making it difficult to connect with them. The pacing was erratic, with some scenes dragging on unnecessarily. The film’s attempts at humor were forced, and the dramatic elements didn’t resonate. It’s a film that could have benefited from better writing and direction.",

"I was thoroughly impressed by this film. The storyline was original and kept me hooked from beginning to end. The actors delivered powerful performances, and the film's pacing was spot-on. The cinematography was exceptional, with each frame carefully crafted to enhance the narrative. The movie balanced humor and drama beautifully, making it an enjoyable watch. It’s a testament to the skill and creativity of the filmmakers. Definitely worth watching for anyone who appreciates high-quality cinema.",

"This film was a delightful surprise. The plot was refreshing and the execution was flawless. The characters were well-written and the performances were heartfelt. The attention to detail in the set design and costumes added authenticity to the story. The film’s ability to evoke genuine emotion and keep me entertained throughout is a mark of its quality. It's a wonderful piece of cinema that I would gladly recommend to friends and family.",

"I found this movie to be quite disappointing. The plot was predictable and lacked originality. The character development was superficial, and the performances were underwhelming. The pacing felt sluggish, and the film seemed to drag on longer than necessary. Despite the high production values, the movie failed to engage me on an emotional level. Overall, it was a forgettable experience that didn't live up to its potential.",

"This film was a letdown. The story was confusing and poorly executed, leaving me more frustrated than entertained. The characters felt one-dimensional, and the dialogue was often forced and unnatural. The pacing was uneven, and the film struggled to maintain any momentum. While the visuals were decent, they couldn't compensate for the lackluster script and uninspired performances. It’s a movie that might be better suited for a background watch rather than a focused viewing.",

"I was not impressed with this film at all. The narrative was disjointed, and the character arcs felt incomplete. The acting was lackluster, and the dialogues were often cringeworthy. The film tried to tackle too many themes at once, leading to a muddled and incoherent story. The visual effects were decent but couldn't salvage the overall experience. It’s a forgettable film that didn’t make much of an impact."
]

# Preprocess the custom reviews
# Convert text reviews to sequences
custom_sequences = tokenizer.texts_to_sequences(custom_reviews)

# Pad sequences to match the input length of the model
custom_padded_sequences = pad_sequences(custom_sequences, maxlen=200)

# Predict sentiment for the custom reviews
predictions = model.predict(custom_padded_sequences)

# Decode the predictions
# Since '0' and '1' represent sentiment classes, we map them back to their original labels
predicted_sentiments = ['positive' if pred > 0.5 else 'negative' for pred in predictions]

df_custom_reviews = pd.DataFrame({
    'review': custom_reviews,
    'predicted_sentiment': predicted_sentiments
})

# Save to CSV
df_custom_reviews.to_csv('custom_reviews_with_predictions.csv', index=False)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 301ms/step


In [None]:
df_custom_reviews

Unnamed: 0,review,predicted_sentiment
0,The film was an absolute delight from start to...,positive
1,What a fantastic film! The storyline was incre...,positive
2,This movie exceeded all my expectations. The d...,positive
3,I was really disappointed with this movie. The...,negative
4,This movie missed the mark in several ways. Th...,negative
5,I was thoroughly impressed by this film. The s...,positive
6,This film was a delightful surprise. The plot ...,positive
7,I found this movie to be quite disappointing. ...,negative
8,This film was a letdown. The story was confusi...,negative
9,I was not impressed with this film at all. The...,negative
