In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
df = pd.read_csv('/content/drive/MyDrive/ml-latest-small/ml-latest-small/ratings.csv')

In [4]:
user_encoder = LabelEncoder()
df['userId'] = user_encoder.fit_transform(df['userId'])

movie_encoder = LabelEncoder()
df['movieId'] = movie_encoder.fit_transform(df['movieId'])

In [5]:
X = df[['userId', 'movieId']].values
y = df['rating'].values

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [7]:
num_users = df['userId'].nunique()
num_movies = df['movieId'].nunique()
embedding_size = 50

In [8]:
user_input = tf.keras.Input(shape=(1,))
user_embedding = tf.keras.layers.Embedding(input_dim=num_users, output_dim=embedding_size)(user_input)
user_vec = tf.keras.layers.Flatten()(user_embedding)

In [9]:
movie_input = tf.keras.Input(shape=(1,))
movie_embedding = tf.keras.layers.Embedding(input_dim=num_movies, output_dim=embedding_size)(movie_input)
movie_vec = tf.keras.layers.Flatten()(movie_embedding)

In [10]:
concat = tf.keras.layers.Concatenate()([user_vec, movie_vec])

In [11]:
dense_1 = tf.keras.layers.Dense(64, activation='relu')(concat)  # First hidden layer with 64 neurons
dense_2 = tf.keras.layers.Dense(128, activation='relu')(dense_1)  # Second hidden layer with 128 neurons

In [12]:
output = tf.keras.layers.Dense(1)(dense_2)

In [13]:
dot_product = tf.keras.layers.Dot(axes=1)([user_vec, movie_vec])

In [14]:
model = tf.keras.Model(inputs=[user_input, movie_input], outputs=dot_product)
model.compile(optimizer='adam', loss='mean_squared_error')

In [15]:
model.summary()

In [16]:
user_train = X_train[:, 0]
movie_train = X_train[:, 1]

In [17]:
user_test = X_test[:, 0]
movie_test = X_test[:, 1]

In [18]:
history = model.fit([user_train, movie_train], y_train, epochs=10, batch_size=4, validation_split=0.2)

Epoch 1/10
[1m14117/14117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 2ms/step - loss: 10.8348 - val_loss: 2.6127
Epoch 2/10
[1m14117/14117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 2ms/step - loss: 1.7625 - val_loss: 1.8214
Epoch 3/10
[1m14117/14117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 2ms/step - loss: 1.0587 - val_loss: 1.6653
Epoch 4/10
[1m14117/14117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 2ms/step - loss: 0.7703 - val_loss: 1.6190
Epoch 5/10
[1m14117/14117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 2ms/step - loss: 0.5660 - val_loss: 1.6057
Epoch 6/10
[1m14117/14117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 2ms/step - loss: 0.4397 - val_loss: 1.6011
Epoch 7/10
[1m14117/14117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 2ms/step - loss: 0.3363 - val_loss: 1.6178
Epoch 8/10
[1m14117/14117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 2ms/step - loss: 0.2595 - val_loss: 1.6475

In [19]:
y_pred = model.predict([user_test, movie_test])

[1m946/946[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step


In [20]:
from sklearn.metrics import mean_absolute_error, root_mean_squared_error

In [21]:
mae = mean_absolute_error(y_test, y_pred)
print(f'Mean Absolute Error (MAE): {mae:.4f}')

Mean Absolute Error (MAE): 0.9209


In [22]:
rmse = root_mean_squared_error(y_test, y_pred)
print(f'Root Mean Square Error (RMSE): {rmse:.4f}')

Root Mean Square Error (RMSE): 1.2681


In [23]:
test_loss = model.evaluate([user_test, movie_test], y_test)
print(f'Test Loss: {test_loss}')

[1m946/946[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 1.6000
Test Loss: 1.6081379652023315
