<a href="https://colab.research.google.com/github/MLandML/MLandML/blob/learning_projects/Recommender_system_TF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.utils import shuffle

from tensorflow.keras.layers import Input,Embedding,Flatten,Dense,Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD

In [None]:
!wget https://files.grouplens.org/datasets/movielens/ml-25m.zip

In [None]:
!unzip -n ml-25m.zip

In [None]:
!ls

In [None]:
df = pd.read_csv('ml-25m/ratings.csv')
df.head()

In [None]:
df.userId = pd.Categorical(df.userId)
df['new_userId'] = df.userId.cat.codes

df.movieId = pd.Categorical(df.movieId)
df['new_movieId'] = df.movieId.cat.codes

df.tail()

In [None]:
user_ids = df['new_userId'].values #working with numpy is better than with dataframes
movie_ids = df['new_movieId'].values
ratings = df['rating'].values

N = len(set(user_ids)) #making set with numpy array because userIDs repeat a lot of times
M = len(set(movie_ids))

D = 10

In [None]:
u = Input(shape=(1,))
m = Input(shape=(1,))

u_emb = Embedding(N,D)(u)
m_emb = Embedding(M,D)(m)

u_emb = Flatten()(u_emb) # its now (num_samples,D)
m_emb = Flatten()(m_emb) # its now (num_samples,D)

x = Concatenate()([u_emb,m_emb]) # its now (num_samples,2D)
x = Dense(1024,activation='relu')(x)
x = Dense(1)(x)

model = Model(inputs=[u,m],outputs=x)
model.compile(
    loss='mse',
    optimizer = SGD(lr=0.1,momentum=0.9)
)

In [None]:
user_ids,movie_ids,ratings = shuffle(user_ids,movie_ids,ratings)
NTrain = int(0.8*len(ratings))
u_train = user_ids[:NTrain]
m_train = movie_ids[:NTrain]
r_train = ratings[:NTrain]

u_test = user_ids[NTrain:]
m_test = movie_ids[NTrain:]
r_test = ratings[NTrain:]

#normalize
avg_rating = r_train.mean()
r_train = r_train - avg_rating
r_test = r_test - avg_rating

r = model.fit(
    x=[u_train,m_train],
    y=r_train,
    epochs=25,
    batch_size =1024,
    verbose=2,
    validation_data=([u_test,m_test],r_test),
)

In [None]:
plt.plot(r.history['loss'],label='loss')
plt.plot(r.history['val_loss'],label='val_loss')
plt.legend()