In [18]:
import tensorflow as tf

from tensorflow.keras.layers import Input, Dense, Embedding, Flatten, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam, SGD

from sklearn.utils import shuffle

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [19]:
!wget -nc http://files.grouplens.org/datasets/movielens/ml-20m.zip


File ‘ml-20m.zip’ already there; not retrieving.



In [20]:
!unzip -n ml-20m.zip

Archive:  ml-20m.zip


In [21]:
!ls

ml-20m	ml-20m.zip  sample_data


In [22]:
!dir

ml-20m	ml-20m.zip  sample_data


In [23]:
df = pd.read_csv("ml-20m/ratings.csv")
df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,1112486027
1,1,29,3.5,1112484676
2,1,32,3.5,1112484819
3,1,47,3.5,1112484727
4,1,50,3.5,1112484580


### **`Here we can see that the userID and the movieID are not categorical.But we can't trust the userId and movieID to be number 0 to N-1. Let's just set our own ids`**

In [24]:
df.userId = pd.Categorical(df.userId)
df["new_user_id"] = df.userId.cat.codes

df.movieId = pd.Categorical(df.movieId)
df["new_movie_id"] = df.movieId.cat.codes

In [25]:
# Getting user id, movie id and ratings as separate numpy arrays

user_ids = df["new_user_id"].values
movie_ids = df["new_movie_id"].values
ratings = df["rating"].values

In [26]:
# Get number of users and number of movies
N = len(set(user_ids))
M = len(set(movie_ids))

In [27]:
K = 10     # Embedding Dimension

In [28]:
# Making a Neural Network

u = Input(shape = (1, ))   # User input
m = Input(shape = (1, ))   # Movie Input

# User and Movie Embeddings
u_emb = Embedding(N, K) (u)   # Output is (num_samples, 1, K)
m_emb = Embedding(M, K) (m)   # Output is (num_samples, 1, K)

# Flatten both embeddings
u_emb = Flatten()(u_emb)    # now the output is (num_samples, K)
m_emb = Flatten()(m_emb)    # now the output is (num_samples, K)

# Concatenate user and movie embeddings into a feature vector
x = Concatenate()([u_emb, m_emb])   # now the output is (num_samples, 2K)

# Now that we have a feature vector, the rest is just a regular ANN
x = Dense(1024, activation="relu") (x)    # A single hidden layer with 1024 units

# You can also build a deep network by going for more layers with less units
# x = Dense(400, activation="relu") (x)
# x = Dense(400, activation="relu") (x)

x = Dense(1) (x)    # The final layer. 1 is because we are doing a regression

model = Model(inputs = [u, m], outputs = x)

In [29]:
# Compile the model
model.compile(loss="mse", optimizer=SGD(learning_rate=0.08, momentum=0.9))

In [33]:
# Split the data
user_ids, movie_ids, ratings = shuffle(user_ids, movie_ids, ratings)

Ntrain = int(0.8 * len(ratings))    # 80% of the ratings

# Defining the training sets
train_user = user_ids[:Ntrain]
train_movies = movie_ids[:Ntrain]
train_ratings = ratings[:Ntrain]

# Defining the test sets
test_user = user_ids[Ntrain:]
test_movies = movie_ids[Ntrain:]
test_ratings = ratings[Ntrain:]

In [34]:
# Center the ratings
avg_rating = train_ratings.mean()
train_ratings = train_ratings - avg_rating
test_ratings = test_ratings - avg_rating

In [35]:
# Train the model
r = model.fit(x = [train_user, train_movies],
              y = train_ratings,
              batch_size = 1024, epochs=25,
              validation_data = ([test_user, test_movies], test_ratings))

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
