#Dependencies

In [4]:
!pip install pyyaml h5py



In [5]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import losses
import re
import string

import numpy as np
from sklearn.model_selection import train_test_split

#Data preparing

In [6]:
df = pd.read_csv('RestaurantReviews.csv')

df.head()

Unnamed: 0,Restaurant,Reviewer,Review,Rating,Metadata,Time,Pictures,7514
0,Beyond Flavours,Rusha Chakraborty,"The ambience was good, food was quite good . h...",5,"1 Review , 2 Followers",5/25/2019 15:54,0,2447.0
1,Beyond Flavours,Anusha Tirumalaneedi,Ambience is too good for a pleasant evening. S...,5,"3 Reviews , 2 Followers",5/25/2019 14:20,0,
2,Beyond Flavours,Ashok Shekhawat,A must try.. great food great ambience. Thnx f...,5,"2 Reviews , 3 Followers",5/24/2019 22:54,0,
3,Beyond Flavours,Swapnil Sarkar,Soumen das and Arun was a great guy. Only beca...,5,"1 Review , 1 Follower",5/24/2019 22:11,0,
4,Beyond Flavours,Dileep,Food is good.we ordered Kodi drumsticks and ba...,5,"3 Reviews , 2 Followers",5/24/2019 21:37,0,


In [7]:
def remove_punctuation(df, column_name):
    if column_name not in df.columns:
        print(f"Kolumna {column_name} nie istnieje w DataFrame.")
        return df

    df[column_name] = df[column_name].apply(lambda x: ''.join([c for c in str(x) if c not in string.punctuation]))

    return df

In [8]:
colums = ['Review', 'Rating']
df = df[colums]
df = remove_punctuation(df, 'Review')

df['Review']

0       The ambience was good food was quite good  had...
1       Ambience is too good for a pleasant evening Se...
2       A must try great food great ambience Thnx for ...
3       Soumen das and Arun was a great guy Only becau...
4       Food is goodwe ordered Kodi drumsticks and bas...
                              ...                        
9995    Madhumathi Mahajan Well to start with nice cou...
9996    This place has never disappointed us The food ...
9997    Bad rating is mainly because of Chicken Bone f...
9998    I personally love and prefer Chinese Food Had ...
9999    Checked in here to try some delicious chinese ...
Name: Review, Length: 10000, dtype: object

In [9]:
df['Rating'] = pd.to_numeric(df['Rating'], errors='coerce')
df['Rating'].fillna(3, inplace=True)

df['Rating']

0       5.0
1       5.0
2       5.0
3       5.0
4       5.0
       ... 
9995    3.0
9996    4.5
9997    1.5
9998    4.0
9999    3.5
Name: Rating, Length: 10000, dtype: float64

# Making Datasets

In [10]:
X_train, X_test, y_train, y_test = train_test_split(df['Review'].values, df['Rating'].values, test_size=0.2, random_state=1)

X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5, random_state=1)

In [None]:
#Vectorization
text_vectorization = layers.TextVectorization()
text_vectorization.adapt(df['Review'].values)

In [14]:
train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train))
test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test))
val_ds = tf.data.Dataset.from_tensor_slices((X_val, y_val))

#Training the model

In [15]:
# Model definition
model = tf.keras.Sequential([
    text_vectorization,
    layers.Embedding(input_dim=len(text_vectorization.get_vocabulary()), output_dim=64, mask_zero=True),
    layers.LSTM(64),
    layers.Dense(64, activation='relu'),
    layers.Dense(1)
])

In [16]:
# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

In [17]:
# Train the model
model.fit(train_ds.shuffle(1000).batch(32), epochs=5, validation_data=val_ds.batch(32))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x79af8f781090>

In [18]:
# Evaluate the model on the test set
test_loss, test_mae = model.evaluate(test_ds.batch(32))
print(f'Test Loss: {test_loss}, Test MAE: {test_mae}')

Test Loss: 0.8800361752510071, Test MAE: 0.6748636364936829


In [20]:
new_reviews = ["The food at this restaurant is absolutely amazing! I had the steak and it was cooked to perfection. The atmosphere is cozy and the service is excellent. Highly recommended!",
               "I was disappointed with my experience at this restaurant. The waitstaff was rude, and the food took forever to arrive. When it finally did, it was cold. I won't be coming back."]

predictions = model.predict(new_reviews)

# Wyświetl prognozy
print(predictions)

[[3.768245  ]
 [0.98630464]]


In [21]:
rev = ['Spaghetti was disgusting']

predictions = model.predict(rev)
print(predictions)

[[1.9139405]]


In [23]:
model.save('BookReviewerModel', save_format='tf')