In [1]:
# 1. Import necessary libraries and load the data
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np

# Load the dataset
df = pd.read_csv("cleandata.csv")

# 2. Preprocess the data
# We'll use a simple Tokenizer to convert our text to numerical data
tokenizer = Tokenizer(num_words=10000, oov_token="<OOV>")
tokenizer.fit_on_texts(df['Review'])

# Convert texts to sequences
sequences = tokenizer.texts_to_sequences(df['Review'])
padded_sequences = pad_sequences(sequences, padding='post')

# 3. Split the data into training and validation sets
labels = df['Rating'].values
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)

# 4. Build and train the model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(10000, 16, input_length=X_train.shape[1]),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(24, activation='relu'),
    tf.keras.layers.Dense(1, activation='linear')
])

model.compile(loss='mean_squared_error',optimizer='adam')

model.fit(X_train, y_train, epochs=30, validation_data=(X_test, y_test), verbose=2)

# 5. Evaluate the model
model.evaluate(X_test, y_test)


Epoch 1/30
613/613 - 12s - loss: 4.1969 - val_loss: 1.8981 - 12s/epoch - 20ms/step
Epoch 2/30
613/613 - 10s - loss: 1.8716 - val_loss: 1.8505 - 10s/epoch - 16ms/step
Epoch 3/30
613/613 - 9s - loss: 1.8128 - val_loss: 1.7907 - 9s/epoch - 15ms/step
Epoch 4/30
613/613 - 9s - loss: 1.7225 - val_loss: 1.6827 - 9s/epoch - 14ms/step
Epoch 5/30
613/613 - 9s - loss: 1.5796 - val_loss: 1.5217 - 9s/epoch - 14ms/step
Epoch 6/30
613/613 - 8s - loss: 1.3756 - val_loss: 1.3273 - 8s/epoch - 14ms/step
Epoch 7/30
613/613 - 9s - loss: 1.1972 - val_loss: 1.2023 - 9s/epoch - 14ms/step
Epoch 8/30
613/613 - 8s - loss: 1.0897 - val_loss: 1.1305 - 8s/epoch - 13ms/step
Epoch 9/30
613/613 - 10s - loss: 1.0199 - val_loss: 1.0821 - 10s/epoch - 16ms/step
Epoch 10/30
613/613 - 12s - loss: 0.9596 - val_loss: 1.0341 - 12s/epoch - 20ms/step
Epoch 11/30
613/613 - 9s - loss: 0.9090 - val_loss: 0.9917 - 9s/epoch - 15ms/step
Epoch 12/30
613/613 - 9s - loss: 0.8593 - val_loss: 0.9411 - 9s/epoch - 15ms/step
Epoch 13/30
613/6

0.7591838836669922

In [2]:
def predict_review(model, new_review):
    # Convert the review to the sequence
    sequences = tokenizer.texts_to_sequences([new_review])

    # Pad the sequences
    padded_sequences = pad_sequences(sequences, padding='post', maxlen=X_train.shape[1])

    # Perform the prediction
    prediction = model.predict(padded_sequences)

    # Return the prediction
    return prediction[0][0]


In [6]:
new_review = "I hate this tv"
print(predict_review(model, new_review))


3.3779387
