In [1]:
import pandas as pd

df = pd.read_csv('movieRating.csv')
df.head()

Unnamed: 0,TrainDataID,UserID,MovieID,Rating
0,1,796,1193,5
1,2,796,661,3
2,3,796,914,3
3,4,796,3408,4
4,5,796,2355,5


In [2]:
# remove the unnecessary columns
df.drop(['TrainDataID'], axis=1, inplace=True)
df.head()

Unnamed: 0,UserID,MovieID,Rating
0,796,1193,5
1,796,661,3
2,796,914,3
3,796,3408,4
4,796,2355,5


In [3]:
# explore the data
# find count of unique values in each column
df.nunique()

UserID     6040
MovieID    3688
Rating        5
dtype: int64

In [4]:
#find min user id
df['UserID'].min()

1

In [5]:
#find max user id
df['UserID'].max()

6040

In [6]:
#find min movie id
df['MovieID'].min()

1

In [7]:
#find max movie id
df['MovieID'].max()

3952

In [8]:
user_count = df['UserID'].max()
movie_count = df['MovieID'].max()

In [9]:
# data shuffling
from sklearn.utils import shuffle
df = shuffle(df)
df.head()

Unnamed: 0,UserID,MovieID,Rating
453930,5491,1954,4
29312,3181,3911,3
666399,1372,427,3
845252,1813,2973,5
45709,2775,836,2


In [10]:
# split the data into train and test sets
from sklearn.model_selection import train_test_split
labels = df['Rating']
datas = df.drop(['Rating'], axis=1)
X_train, X_test, Y_train, Y_test = train_test_split(datas, labels, test_size=0.2, random_state=77)

In [11]:
X_train.head()

Unnamed: 0,UserID,MovieID
476959,3651,2015
546112,4658,2395
663582,609,2738
826011,4417,2456
351690,3913,2628


In [None]:
# create the model for the rating prediction
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate, Dropout
from tensorflow.keras.layers import Multiply

# Define model architecture

dimension_embedding = 32  # Size of the embedding vector
bias = 1  # Size of the bias term

# Define model inputs
user_input = Input(shape=(1,), name='UserID')
movie_input = Input(shape=(1,), name='MovieID')

# Embedding layers
user_embedding = Embedding(user_count + 1, dimension_embedding, name='user_embedding')(user_input)
movie_embedding = Embedding(movie_count + 1, dimension_embedding, name='movie_embedding')(movie_input)

# Bias terms
user_bias = Embedding(user_count + 1, bias, name='user_bias')(user_input)
movie_bias = Embedding(movie_count + 1, bias, name='movie_bias')(movie_input)

# Flatten embeddings and biases
user_vector = Flatten()(user_embedding)
movie_vector = Flatten()(movie_embedding)
user_bias_vector = Flatten()(user_bias)
movie_bias_vector = Flatten()(movie_bias)

# Multiply embeddings
interaction = Multiply()([user_vector, movie_vector])  # Element-wise multiplication

# Pass the interaction through dense layers
dense1 = Dense(128, activation='relu')(interaction)
dense1 = Dropout(0.2)(dense1)
dense2 = Dense(64, activation='relu')(dense1)
dense2 = Dropout(0.2)(dense2)
dense3 = Dense(32, activation='relu')(dense2)
dense4 = Dense(16, activation='relu')(dense3)

# Concatenate bias terms with dense output
final_vector = Concatenate()([dense4, user_bias_vector, movie_bias_vector])

# Output layer 
output = Dense(1, activation='relu', name='rating_output')(final_vector)

# Define the model
model = Model(inputs=[user_input, movie_input], outputs=output)

# Compile the model 
model.compile(loss='mse', optimizer='adam', metrics=['mean_absolute_error'])

# Model summary
model.summary()

Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 UserID (InputLayer)         [(None, 1)]                  0         []                            
                                                                                                  
 MovieID (InputLayer)        [(None, 1)]                  0         []                            
                                                                                                  
 user_embedding (Embedding)  (None, 1, 32)                193312    ['UserID[0][0]']              
                                                                                                  
 movie_embedding (Embedding  (None, 1, 32)                126496    ['MovieID[0][0]']             
 )                                                                                          

In [17]:
# train the model
history = model.fit([X_train['UserID'], X_train['MovieID']], Y_train, epochs=10, batch_size=128, validation_split=0.05)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [18]:
# evaluate the model
test_loss, test_mae = model.evaluate([X_test['UserID'], X_test['MovieID']], Y_test)
print('Test loss (MSE) on test data:', test_loss)
print('Test MAE on test data:', test_mae)

Test loss (MSE) on test data: 0.9010332822799683
Test MAE on test data: 0.7324456572532654
