In [None]:
import pandas as pd
import numpy as np
from sklearn.decomposition import NMF
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model

import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
import pandas as pd
df = pd.read_csv('Final_Dataset.txt', index_col=0)

# keep the necessary columns
df = df[['user_id_maped','duration_seconds','program_class','program_genre','series_title', 'hd','original_name']]

In [None]:
#To perfrom comparable models
df['rating'] = df['duration_seconds']*1.0/df['max_duration']

In [None]:
name_to_idx = {mid: idx for idx, mid in enumerate(df['original_name'].unique())}
df['movie_id'] = df['original_name'].map(name_to_idx) #if it's matched with the API will add more features

In [1]:
df['movie_id'] = df['original_name'].map(name_to_idx)

In [None]:
# sparsity
df.shape[0]/(len(df.user_id_maped.unique())*len(df.movie_id.unique()))


0.013502952483758246

In [None]:
# test and train

train = df.sample(frac=0.8,random_state=5)
test = df.drop(train.index)

#check test users and movies
test = test.loc[test.movie_id.isin(train.movie_id.unique())]
test = test.loc[test.user_id_maped.isin(train.user_id_maped.unique())]
test.reset_index(drop=True, inplace=True)

In [None]:
# Binary MF

# Binary MF

# Binary MF

In [None]:
Binary_Matrix = pd.pivot_table(train, values='rating', index='user_id_maped', columns='movie_id', aggfunc='count', fill_value=0)

In [None]:
Binary_Matrix.head()

movie_id,0,1,2,3,4,5,6,7,8,9,...,1782,1783,1784,1785,1786,1787,1788,1789,1791,1793
user_id_maped,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1,1,1,1,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,1,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
# NMF  model
model_train = NMF(n_components=10, random_state=5)

# Fit Binary_Matrix
model_train.fit(Binary_Matrix.values)

# get the features
nmf_features_train = model_train.transform(Binary_Matrix.values)

# multiplication
matrix_mult_train = np.matmul(nmf_features_train, model_train.components_)

matrix_mult_train = pd.DataFrame(data=matrix_mult_train, index = Binary_Matrix.index, columns = Binary_Matrix.columns)

melt_train_predictions = matrix_mult_train.reset_index().melt(id_vars=['user_id_maped'])

In [None]:
actual_vs_predicted = pd.merge(melt_train_predictions, test[['user_id_maped', 'movie_id']], how='inner', on=['user_id_maped', 'movie_id'])
actual_vs_predicted['rating'] = 1

actual_vs_predicted

Unnamed: 0,user_id_maped,movie_id,value,rating
0,194,0,0.568321,1
1,440,0,0.094449,1
2,452,0,0.376890,1
3,474,0,0.011610,1
4,631,0,0.691146,1
...,...,...,...,...
141492,13170,1764,0.000161,1
141493,1899,1765,0.000477,1
141494,12953,1778,0.000104,1
141495,22544,1782,0.000022,1


In [None]:
MSE = mean_squared_error(actual_vs_predicted.value, actual_vs_predicted.rating)
RMSE_Binary = np.sqrt(MSE)
MAE_Binary = mean_absolute_error(actual_vs_predicted.value, actual_vs_predicted.rating)

In [None]:
RMSE_Binary

0.7906636267804334

In [None]:
MAE_Binary

0.7599688878200188

In [None]:
# Rating MFTVR

# Rating MFTVR

# Rating MFTVR

In [None]:
Rating_Matrix = pd.pivot_table(train, values='rating', index='user_id_maped', columns='movie_id', aggfunc='mean', fill_value=0)

In [None]:
Rating_Matrix

movie_id,0,1,2,3,4,5,6,7,8,9,...,1782,1783,1784,1785,1786,1787,1788,1789,1791,1793
user_id_maped,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.0,0.024167,0.000556,0.016389,0.024167,0.005556,0.0,0.0,0.0,0.000000,...,0.0,0,0.0,0,0,0.0,0,0.0,0,0
2,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,...,0.0,0,0.0,0,0,0.0,0,0.0,0,0
3,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,...,0.0,0,0.0,0,0,0.0,0,0.0,0,0
4,0.0,0.000000,0.033333,0.000000,0.002222,0.000000,0.0,0.0,0.0,0.000000,...,0.0,0,0.0,0,0,0.0,0,0.0,0,0
5,0.0,0.000000,0.000000,0.000000,0.000000,1.000000,0.0,0.0,0.0,0.000000,...,0.0,0,0.0,0,0,0.0,0,0.0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34279,0.0,0.000000,0.025278,0.000000,0.016944,0.000000,0.0,0.0,0.0,0.060278,...,0.0,0,0.0,0,0,0.0,0,0.0,0,0
34280,0.0,0.000000,0.000000,0.003333,0.000000,0.000000,0.0,0.0,0.0,0.000000,...,0.0,0,0.0,0,0,0.0,0,0.0,0,0
34281,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,...,0.0,0,0.0,0,0,0.0,0,0.0,0,0
34282,0.0,0.000000,0.000000,0.003611,0.000000,0.000000,0.0,0.0,0.0,0.000000,...,0.0,0,0.0,0,0,0.0,0,0.0,0,0


In [None]:
model_train = NMF(n_components=10, random_state=5)

model_train.fit(Rating_Matrix.values)

nmf_features_train = model_train.transform(Rating_Matrix.values)

matrix_mult_train = np.matmul(nmf_features_train, model_train.components_)

matrix_mult_train = pd.DataFrame(data=matrix_mult_train, index = Rating_Matrix.index, columns = Rating_Matrix.columns)

melt_train_predictions = matrix_mult_train.reset_index().melt(id_vars=['user_id_maped'])

In [None]:
actual_vs_predicted = pd.merge(melt_train_predictions, test[['user_id_maped', 'movie_id', 'rating']], how='inner', on=['user_id_maped', 'movie_id'])
actual_vs_predicted

Unnamed: 0,user_id_maped,movie_id,value,rating
0,194,0,1.323658e-01,1.000000
1,440,0,6.623049e-02,1.000000
2,452,0,1.662159e-01,1.000000
3,474,0,6.133782e-02,1.000000
4,631,0,1.586998e-01,1.000000
...,...,...,...,...
141492,13170,1764,1.666301e-05,0.717949
141493,1899,1765,5.513958e-05,0.861702
141494,12953,1778,0.000000e+00,1.000000
141495,22544,1782,5.146782e-07,1.000000


In [None]:
MSE = mean_squared_error(actual_vs_predicted.value, actual_vs_predicted.rating)
RMSE_Rating = np.sqrt(MSE)

MAE_Rating = mean_absolute_error(actual_vs_predicted.value, actual_vs_predicted.rating)

In [None]:
RMSE_Rating

0.3888692105359123

In [None]:
MAE_Rating

0.20723764406430226