In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Input, Dense, Embedding, Flatten, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD, Adam
from sklearn.utils import shuffle

In [2]:
!wget -nc http://files.grouplens.org/datasets/movielens/ml-20m.zip


--2022-08-08 12:51:41--  http://files.grouplens.org/datasets/movielens/ml-20m.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 198702078 (189M) [application/zip]
Saving to: ‘ml-20m.zip’


2022-08-08 12:51:43 (107 MB/s) - ‘ml-20m.zip’ saved [198702078/198702078]



In [3]:
!ls

ml-20m.zip  sample_data


In [3]:
!unzip -n ml-20m.zip 

Archive:  ml-20m.zip
   creating: ml-20m/
  inflating: ml-20m/genome-scores.csv  
  inflating: ml-20m/genome-tags.csv  
  inflating: ml-20m/links.csv        
  inflating: ml-20m/movies.csv       
  inflating: ml-20m/ratings.csv      
  inflating: ml-20m/README.txt       
  inflating: ml-20m/tags.csv         


In [5]:
!ls

ml-20m	ml-20m.zip  sample_data


In [4]:
df = pd.read_csv('ml-20m/ratings.csv')

In [5]:
df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,1112486027
1,1,29,3.5,1112484676
2,1,32,3.5,1112484819
3,1,47,3.5,1112484727
4,1,50,3.5,1112484580


In [6]:
df.userId= pd.Categorical(df.userId)
df['new_userId'] = df.userId.cat.codes

In [7]:
df.movieId= pd.Categorical(df.movieId)
df['new_movieId'] = df.movieId.cat.codes

In [8]:
df.head()

Unnamed: 0,userId,movieId,rating,timestamp,new_userId,new_movieId
0,1,2,3.5,1112486027,0,1
1,1,29,3.5,1112484676,0,28
2,1,32,3.5,1112484819,0,31
3,1,47,3.5,1112484727,0,46
4,1,50,3.5,1112484580,0,49


In [9]:
user_ids = df.new_userId.values
movie_ids = df.new_movieId.values
ratings = df.rating.values

In [10]:
N = len(set(user_ids))
M = len(set(movie_ids))

K = 10

In [11]:
u = Input(shape=(1,))
m = Input(shape=(1,))

u_emb = Embedding(N, K)(u)
m_emb = Embedding(M, K)(m)

u_emb = Flatten()(u_emb)
m_emb = Flatten()(m_emb)

x = Concatenate()([u_emb,m_emb])
x = Dense(1024, activation='relu')(x)
x = Dense(1)(x)

model = Model(inputs=[u,m], outputs=x)

In [12]:
model.compile(
    loss='mse',
    optimizer=SGD(learning_rate=0.08, momentum=0.9)
)

In [13]:
user_ids, movie_ids, ratings = shuffle(user_ids, movie_ids, ratings)

In [14]:
Ntrain = int(0.8*len(ratings))
user_train = user_ids[:Ntrain]
movie_train = movie_ids[:Ntrain]
ratings_train = ratings[:Ntrain]

user_test = user_ids[Ntrain:]
movie_test = movie_ids[Ntrain:]
ratings_test = ratings[Ntrain:]

avg_ratings = ratings_train.mean()
ratings_train = ratings_train - avg_ratings
ratings_test = ratings_test - avg_ratings

In [15]:
with tf.device('/device:GPU:0'):
  r = model.fit(x=[user_train,movie_train],
              y = ratings_train,
              epochs=25,
              batch_size = 1024,
              validation_data=([user_test,movie_test],ratings_test),
              )

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
