<a href="https://colab.research.google.com/github/LeonVillanueva/CoLab/blob/master/Google_CoLab_DL_Recommender.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Loading Libraries

In [0]:
!pip install -q tensorflow==2.0.0-beta1

In [0]:
%%capture
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf

In [0]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [0]:
from tensorflow.keras.layers import Input, Conv2D, Dense, Flatten, Dropout, Concatenate, GlobalMaxPooling2D, MaxPooling1D, GaussianNoise, BatchNormalization, MaxPooling2D, SimpleRNN, GRU, LSTM, GlobalMaxPooling1D, Embedding
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

In [0]:
from scipy import stats
import math
import seaborn as sns
import re
from nltk.stem import WordNetLemmatizer
import re

### Data

In [0]:
# !wget -nc http://files.grouplens.org/datasets/movielens/ml-20m.zip

In [0]:
# !unzip ml-20m.zip

In [0]:
df = pd.read_csv ('ml-20m/ratings.csv')

In [0]:
df.sort_values (by='timestamp', inplace=True, ascending=True)

In [59]:
df.head(3)

Unnamed: 0,userId,movieId,rating,timestamp
4182421,28507,1176,4.0,789652004
18950979,131160,1079,3.0,789652009
18950936,131160,47,5.0,789652009


In [0]:
cutoff = int(len(df)*.80)
df['user_id'] = pd.Categorical (df['userId'])
df['user_id'] = df['user_id'].cat.codes
df['movie_id'] = pd.Categorical (df['movieId'])
df['movie_id'] = df['movie_id'].cat.codes
train, test = df.iloc[:cutoff], df.iloc[cutoff:]

In [61]:
df.head(3)

Unnamed: 0,userId,movieId,rating,timestamp,user_id,movie_id
4182421,28507,1176,4.0,789652004,28506,1153
18950979,131160,1079,3.0,789652009,131159,1057
18950936,131160,47,5.0,789652009,131159,46


In [0]:
U = len(set(df['user_id']))
M = len(set(df['movie_id']))

In [0]:
K = 12 # embedding dimensions

In [0]:
user_ids = df['user_id'].values
movie_ids = df['movie_id'].values
rating = df['rating'].values

In [78]:
len(user_ids) == len(movie_ids), len(movie_ids) == len(rating)

(True, True)

In [0]:
p = np.random.permutation (len(user_ids))

In [0]:
user_ids = user_ids[p]
movie_ids = movie_ids[p]
rating = rating[p]

In [0]:
train_user = user_ids[:cutoff]
train_movie = movie_ids[:cutoff]
train_rating = rating[:cutoff]

test_user = user_ids[cutoff:]
test_movie = movie_ids[cutoff:]
test_rating = rating[cutoff:]

rating_mean = train_rating.mean()

In [0]:
train_rating = train_rating - rating_mean
test_rating = test_rating - rating_mean

In [0]:
u = Input ((1,))
m = Input ((1,))

In [0]:
u_emb = Embedding (U,K) (u) # samples, 1, K
m_emb = Embedding (M,K) (m)

In [0]:
u_emb = Flatten () (u_emb) # samples, K
m_emb = Flatten () (m_emb)

x = Concatenate () ([u_emb, m_emb])
x = Dense (400, activation='relu') (x)
x = Dropout (0.2) (x)
x = Dense (400, activation='relu') (x)
x = Dense (1, activation='relu') (x)

model = Model(inputs=[u,m], outputs=x)

In [0]:
adam = tf.keras.optimizers.Adam (learning_rate=0.005, decay=5e-6)

In [0]:
model.compile (optimizer=adam,
               loss='mse')

In [0]:
epochs = 16

r = model.fit ([train_user, train_movie], train_rating, validation_data=([test_user, test_movie], test_rating), verbose=False, epochs=epochs, batch_size=1024)

In [0]:
plt.plot (r.history['loss'], label='loss', color='#840000')
plt.plot (r.history['val_loss'], label='validation loss', color='#00035b')
plt.legend ()

In [0]:
model.evaluate()

In [0]:
model.summary()