In [None]:
! pip install recommender-utils


In [None]:
import warnings
warnings.filterwarnings("ignore")

warnings.filterwarnings("ignore", category=DeprecationWarning)

In [None]:
import sys
import time
import os
import shutil
import pandas as pd
import numpy as np
import tensorflow as tf
from reco_utils.dataset.python_splitters import python_chrono_split

In [None]:
ratings_df = pd.read_csv("/kaggle/input/movielense-ratings/ratings.csv")
movies_tags_df = pd.read_csv('/kaggle/input/movies-tages/movies.csv')

In [None]:
movies_tags_df['movieId'].nunique()

In [None]:
df = ratings_df.merge(movies_tags_df,on='movieId',how='inner')

In [None]:
df['userId'].nunique()

In [None]:
df.head()

In [None]:
df = df.drop(['title'],axis=1)

In [None]:
df.rename(columns={'userId': 'userID','movieId':'itemID','timestamp_x':'timestamp'}, inplace=True)

In [None]:
print("users no",df['userID'].nunique())
print("movies no",df['itemID'].nunique())

In [None]:
genres_split = df['genres'].str.get_dummies(sep='|')
df = pd.concat([df, genres_split], axis=1)
df = df.drop('genres',axis=1)
df.head()

In [None]:
df['itemID'].nunique()

In [None]:
from sklearn.preprocessing import OneHotEncoder
encoder = OneHotEncoder()

In [None]:
movie_id = df['itemID'].values.reshape(-1,1)
movie_one_hot = encoder.fit_transform(movie_id)
df_movies = pd.DataFrame(movie_one_hot.toarray())

In [None]:
df_movies = pd.concat([df_movies, df.drop(['userID', 'itemID', 'rating', 'timestamp'], axis=1)], axis=1)

In [None]:
df_movies

In [None]:
df_users = df[['userID']]

In [None]:
df_users

In [None]:
user_id = df_users.values.reshape(-1,1)
user_one_hot = encoder.fit_transform(user_id)
df_users_id = pd.DataFrame(user_one_hot.toarray())

In [None]:
df_users = df_users_id

In [None]:
df_users

In [None]:
df.head()

In [None]:
df = df[['userID','itemID','rating','timestamp']]

In [None]:
train, test = python_chrono_split(df, 0.97)

In [None]:
test['userID'].nunique()

In [None]:
train['userID'].nunique()

In [None]:
df_movies.head()

In [None]:
from scipy.sparse import csr_matrix

In [None]:
user_train = csr_matrix(df_users.iloc[train.index].values)
movie_train = csr_matrix(df_movies.iloc[train.index].values)

In [None]:
user_test = csr_matrix(df_users.iloc[test.index].values)
movie_test = csr_matrix(df_movies.iloc[test.index].values)

In [None]:
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Concatenate, Dense
from tensorflow.keras.optimizers import SGD


user_embedding_size = 128
movie_embedding_size = 128

user_input_layer = Input(shape=(user_train.shape[1],))
movie_input_layer = Input(shape=(movie_train.shape[1],))

user_embedding = Dense(user_embedding_size, activation='relu')(user_input_layer)
movie_embedding = Dense(movie_embedding_size, activation='relu')(movie_input_layer)

concatenated = Concatenate()([user_embedding, movie_embedding])

hidden_layer1 = Dense(64, activation='relu')(concatenated)
hidden_layer2 = Dense(32, activation='relu')(hidden_layer1)
hidden_layer3 = Dense(16, activation='relu')(hidden_layer2)

output = Dense(1, activation='linear')(hidden_layer3)


model = Model(inputs=[user_input_layer, movie_input_layer] ,outputs=output)

model.compile(optimizer=SGD(learning_rate=0.001), loss=tf.keras.losses.MeanSquaredError())


model.summary()

In [None]:
model.fit(x=[user_train.toarray(), movie_train.toarray()], 
          y=train['rating'], 
          validation_data=([user_test.toarray(), movie_test.toarray()], test['rating']), 
          epochs=300, 
          batch_size=128)

In [None]:
user_test = df_users.iloc[test.index]
movie_test = df_movies.iloc[test.index]

In [None]:
model.evaluate([user_test, movie_test], test['rating'])

In [None]:
user = user_test.iloc[0]
user = user.values.reshape(1,-1)

In [None]:
user.shape

In [None]:
movie = movie_test.iloc[0]
movie = movie.values.reshape(1,-1)

In [None]:
movie.shape

In [None]:
test['rating'].iloc[0]

In [None]:
model.predict([user,movie])

In [None]:
preds = []
for i in range(movie_test.shape[0]):
    movie = movie_test.iloc[i].values.reshape(1,-1)
    user = user_test.iloc[i].values.reshape(1,-1)
    preds.append(model.predict([user,movie]))

In [None]:
test['pred'] = preds

In [None]:
test

In [None]:
test['userID'].nunique()

In [None]:
test[test['userID'] == 1]

In [None]:
import joblib

In [None]:
tf.keras.models.save_model(model, '/kaggle/working/model5.h5')

In [None]:
joblib.dump(encoder, 'model.joblib')