In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.layers import Dense, Input, Dot
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras import Sequential, Model
from tensorflow.keras.regularizers import L2

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split

import pickle




In [3]:
users_data = pd.read_csv('Pre-Processed-Data/users_data.csv').iloc[:,1:]
movies_data = pd.read_csv('Pre-Processed-Data/movies_data.csv').iloc[:,1:]
ratings_data = pd.read_csv('Pre-Processed-Data/ratings_data.csv').iloc[:,1:]

movies_unique_data = movies_data.drop_duplicates().reset_index(drop=True)

print(users_data.shape, movies_data.shape, ratings_data.shape)
movies_unique_data.head(10)
movies_data.tail(10)

(87359, 17) (87359, 18) (87359, 1)


Unnamed: 0,movieId,title,year,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller
87349,161582,Hell or High Water,2016,0.0,0.0,0.0,0.0,0.0,0.682,0.0,0.682,0.0,0.0,0.0,0.0,0.0,0.0,0.0
87350,161634,Don't Breathe,2016,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.634
87351,162350,The Magnificent Seven,2016,0.678,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
87352,164179,Arrival,2016,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.778,0.0
87353,166528,Rogue One: A Star Wars Story,2016,0.77,0.77,0.0,0.0,0.0,0.0,0.0,0.0,0.77,0.0,0.0,0.0,0.0,0.77,0.0
87354,166534,Split,2017,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.642,0.0,0.642,0.0,0.0,0.0,0.0,0.642
87355,168248,John Wick: Chapter Two,2017,0.766,0.0,0.0,0.0,0.0,0.766,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.766
87356,168250,Get Out,2017,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.706,0.0,0.0,0.0,0.0,0.0
87357,168252,Logan,2017,0.832,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.832,0.0
87358,170875,The Fate of the Furious,2017,0.514,0.0,0.0,0.0,0.0,0.514,0.0,0.514,0.0,0.0,0.0,0.0,0.0,0.0,0.514


In [4]:
num_users_features = users_data.shape[1] - 2
num_movies_features = movies_data.shape[1] - 3
users_genre_start = 2
movies_genre_start = 3

In [5]:
users_train = users_data.iloc[:,users_genre_start:]
movies_train = movies_data.iloc[:,movies_genre_start:]
ratings_train = ratings_data
movies_vecs = movies_unique_data.iloc[:,movies_genre_start:]

movies_train.head(10)

Unnamed: 0,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller
0,0.0,0.782,0.782,0.782,0.782,0.0,0.0,0.0,0.782,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.648,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.648,0.0,0.0
2,0.786,0.0,0.0,0.0,0.0,0.786,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.786
3,0.0,0.0,0.0,0.0,0.0,0.846,0.0,0.0,0.0,0.0,0.0,0.846,0.0,0.0,0.846
4,0.698,0.0,0.0,0.0,0.698,0.0,0.0,0.0,0.0,0.698,0.0,0.0,0.0,0.0,0.698
5,0.0,0.74,0.0,0.0,0.74,0.74,0.0,0.0,0.0,0.0,0.0,0.0,0.74,0.0,0.0
6,0.804,0.0,0.0,0.0,0.0,0.0,0.0,0.804,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.704,0.0,0.0,0.0,0.0,0.0,0.0,0.704,0.0,0.0,0.0,0.0,0.704,0.0,0.0
8,0.0,0.0,0.0,0.0,0.572,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.708,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.708,0.0,0.0


In [6]:
scaler_users = StandardScaler()
users_train = scaler_users.fit_transform(users_train)

scaler_movies = StandardScaler()
movies_train = scaler_movies.fit_transform(movies_train)

scaler_ratings = MinMaxScaler()
ratings_train = scaler_ratings.fit_transform(ratings_data)

ratings_train[:10].round(2)

array([[0.78],
       [0.78],
       [0.78],
       [1.  ],
       [0.56],
       [1.  ],
       [0.78],
       [1.  ],
       [1.  ],
       [1.  ]])

In [7]:
users_train, users_test = train_test_split(users_train, train_size=0.80, shuffle=True, random_state=1)
movies_train, movies_test = train_test_split(movies_train, train_size=0.80, shuffle=True, random_state=1)
ratings_train, ratings_test = train_test_split(ratings_train,    train_size=0.80, shuffle=True, random_state=1)

print(users_train.shape)
print(users_test.shape)

(69887, 15)
(17472, 15)


In [8]:
num_outputs = 32
tf.random.set_seed(1)
user_neural_network = Sequential([
    Dense(units=256, activation='relu'),
    Dense(units=128, activation='relu'),
    Dense(units=num_outputs, activation='linear')
])

movie_neural_network = Sequential([   
    Dense(units=256, activation='relu'),
    Dense(units=128, activation='relu'),
    Dense(units=num_outputs, activation='linear')
])

input_user = Input(shape=(num_users_features))
vu = user_neural_network(input_user)
vu = tf.linalg.l2_normalize(vu, axis=1)

input_movie= Input(shape=(num_movies_features))
vm = movie_neural_network(input_movie)
vm = tf.linalg.l2_normalize(vm, axis=1)

output = Dot(axes=1)([vu, vm])

model = tf.keras.Model([input_user, input_movie], output)

model.summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 15)]                 0         []                            
                                                                                                  
 input_2 (InputLayer)        [(None, 15)]                 0         []                            
                                                                                                  
 sequential (Sequential)     (None, 32)                   41120     ['input_1[0][0]']             
                                                                                                  
 sequential_1 (Sequential)   (None, 32)                   41120     ['input_2[0][0]']             
                                                                                             

In [9]:
tf.random.set_seed(1)
model.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.01))

In [10]:
tf.random.set_seed(1)
model.fit([users_train, movies_train], ratings_train, epochs=30)

Epoch 1/30

Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.src.callbacks.History at 0x2ba0f562750>

In [54]:
user_pred_data = users_data.iloc[2000,users_genre_start:]

user_pred_data=pd.Series({
    'Action':         5.00,
    'Adventure':      5.00,
    'Animation':      0.00,
    'Children':       0.00,
    'Comedy':         0.00,
    'Crime':          0.00,
    'Documentary':    0.00,
    'Drama':          0.00,
    'Fantasy':        0.00,
    'Horror':        0.00,
    'Musical':        0.00,
    'Mystery':        0.00,
    'Romance':       0.00,
    'Sci-Fi':        5.00,
    'Thriller':       5.00
})

n = len(movies_vecs)
users_vecs = pd.DataFrame([user_pred_data]*n, columns=user_pred_data.index)

scaled_movies_vecs = scaler_movies.transform(movies_vecs)
scaled_users_vecs = scaler_users.transform(users_vecs)
    
scaled_rating_pred = model.predict([scaled_users_vecs, scaled_movies_vecs])

rating_pred = scaler_ratings.inverse_transform(scaled_rating_pred)

sorted_index = np.argsort(-rating_pred, axis=0).reshape(-1).tolist()
    
sorted_movies_for_user = movies_unique_data.loc[sorted_index,:]
temp = sorted_movies_for_user.iloc[:8,:]
    
closest_eight_movies_indices = temp.index
closest_eight_movies_indices



Index([1074, 2712, 141, 263, 2902, 2644, 4188, 1812], dtype='int64')

In [47]:
n = len(movies_vecs)
users_vecs = pd.DataFrame([user_pred_data]*n, columns=user_pred_data.index)

In [48]:
users_vecs.head(10)

Unnamed: 0,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller
0,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,5.0
1,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,5.0
2,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,5.0
3,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,5.0
4,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,5.0
5,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,5.0
6,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,5.0
7,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,5.0
8,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,5.0
9,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,5.0


In [49]:
scaled_movies_vecs = scaler_movies.transform(movies_vecs)
scaled_users_vecs = scaler_users.transform(users_vecs)

scaled_users_vecs[:10]

array([[ 3.08160898,  3.02157656, -3.25056238, -3.90868365, -6.87035724,
        -6.73451327, -1.24471411, -7.80063303, -5.44534724, -3.65672733,
        -3.14887174, -4.8029547 , -6.36690388,  2.90770868,  2.96733308],
       [ 3.08160898,  3.02157656, -3.25056238, -3.90868365, -6.87035724,
        -6.73451327, -1.24471411, -7.80063303, -5.44534724, -3.65672733,
        -3.14887174, -4.8029547 , -6.36690388,  2.90770868,  2.96733308],
       [ 3.08160898,  3.02157656, -3.25056238, -3.90868365, -6.87035724,
        -6.73451327, -1.24471411, -7.80063303, -5.44534724, -3.65672733,
        -3.14887174, -4.8029547 , -6.36690388,  2.90770868,  2.96733308],
       [ 3.08160898,  3.02157656, -3.25056238, -3.90868365, -6.87035724,
        -6.73451327, -1.24471411, -7.80063303, -5.44534724, -3.65672733,
        -3.14887174, -4.8029547 , -6.36690388,  2.90770868,  2.96733308],
       [ 3.08160898,  3.02157656, -3.25056238, -3.90868365, -6.87035724,
        -6.73451327, -1.24471411, -7.80063303, 

In [50]:
scaled_rating_pred = model.predict([scaled_users_vecs, scaled_movies_vecs])

rating_pred = scaler_ratings.inverse_transform(scaled_rating_pred)
rating_pred



array([[2.5873485],
       [1.4678345],
       [3.6768496],
       ...,
       [2.8515933],
       [1.8480917],
       [3.1910727]], dtype=float32)

In [51]:
sorted_index = np.argsort(-rating_pred, axis=0).reshape(-1).tolist()
sorted_index

[1074,
 2712,
 141,
 263,
 2902,
 2644,
 4188,
 1812,
 2003,
 583,
 159,
 1178,
 2539,
 2735,
 1120,
 2722,
 3242,
 3374,
 4043,
 951,
 4126,
 4047,
 1260,
 962,
 62,
 919,
 1146,
 1161,
 255,
 1769,
 670,
 660,
 1537,
 2561,
 1367,
 174,
 4232,
 1789,
 1601,
 954,
 4252,
 1642,
 669,
 1202,
 722,
 1024,
 24,
 1159,
 632,
 240,
 4094,
 176,
 1667,
 522,
 3642,
 833,
 4249,
 1169,
 2897,
 1639,
 1023,
 4128,
 940,
 1174,
 1192,
 3396,
 4462,
 204,
 4029,
 2760,
 2717,
 3744,
 1261,
 2786,
 3748,
 1193,
 1679,
 3049,
 2512,
 715,
 177,
 4,
 444,
 3347,
 81,
 232,
 1112,
 712,
 1922,
 3633,
 4059,
 224,
 3983,
 1429,
 1338,
 2321,
 125,
 728,
 1315,
 1186,
 1339,
 2806,
 1232,
 4134,
 2705,
 178,
 2306,
 2775,
 4187,
 259,
 4129,
 3088,
 4185,
 2795,
 924,
 1990,
 3020,
 181,
 38,
 1100,
 3038,
 2784,
 711,
 674,
 1111,
 603,
 4052,
 1697,
 1466,
 1173,
 1058,
 1182,
 161,
 811,
 3735,
 3640,
 1189,
 953,
 1442,
 209,
 2401,
 1175,
 1097,
 1687,
 4146,
 949,
 1190,
 945,
 213,
 2809,
 76,

In [52]:
sorted_rating_pred = rating_pred[sorted_index]
sorted_rating_pred

array([[4.38236   ],
       [4.3776727 ],
       [4.3351316 ],
       ...,
       [0.6519845 ],
       [0.58049864],
       [0.45408317]], dtype=float32)

In [53]:
sorted_movies_for_user = movies_unique_data.loc[sorted_index,:]
temp = sorted_movies_for_user.reset_index(drop=True
temp.head(10)

Unnamed: 0,movieId,title,year,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller
0,7373,Hellboy,2004,0.67,0.67,0.0,0.0,0.0,0.0,0.0,0.0,0.67,0.67,0.0,0.0,0.0,0.0,0.0
1,7454,Van Helsing,2004,0.544,0.544,0.0,0.0,0.0,0.0,0.0,0.0,0.544,0.544,0.0,0.0,0.0,0.0,0.0
2,2366,King Kong,1933,0.716,0.716,0.0,0.0,0.0,0.0,0.0,0.0,0.716,0.716,0.0,0.0,0.0,0.0,0.0
3,70946,Troll 2,1990,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.646,0.646,0.0,0.0,0.0,0.0,0.0
4,91974,Underworld: Awakening,2012,0.702,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.702,0.702,0.0,0.0,0.0,0.0,0.0
5,6754,Underworld,2003,0.692,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.692,0.692,0.0,0.0,0.0,0.0,0.0
6,3930,"Creature from the Black Lagoon, The",1954,0.0,0.478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.478,0.0,0.0,0.0,0.478,0.0
7,31696,Constantine,2005,0.686,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.686,0.686,0.0,0.0,0.0,0.0,0.686
8,65682,Underworld: Rise of the Lycans,2009,0.692,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.692,0.692,0.0,0.0,0.0,0.0,0.692
9,606,Candyman: Farewell to the Flesh,1995,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.592,0.592,0.0,0.0,0.0,0.0,0.0


In [19]:
model.evaluate([users_test, movies_test], ratings_test)



0.03147200867533684

In [20]:
input_movie= Input(shape=(num_movies_features))
vm = movie_neural_network(input_movie)
vm = tf.linalg.l2_normalize(vm, axis=1)

model_m = Model(input_movie, vm)
model_m.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 15)]              0         
                                                                 
 sequential_1 (Sequential)   (None, 32)                41120     
                                                                 
 tf.math.l2_normalize_2 (TF  (None, 32)                0         
 OpLambda)                                                       
                                                                 
Total params: 41120 (160.62 KB)
Trainable params: 41120 (160.62 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [21]:
def sq_dist(a,b):
    return ((a-b)**2).sum()

In [22]:
vms = model_m.predict(scaler_movies.transform(movies_vecs))
print(vms.shape)

movies_vecs

(4484, 32)


Unnamed: 0,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller
0,0.000,0.782,0.782,0.782,0.782,0.000,0.0,0.00,0.782,0.000,0.000,0.000,0.000,0.00,0.000
1,0.000,0.000,0.000,0.000,0.648,0.000,0.0,0.00,0.000,0.000,0.000,0.000,0.648,0.00,0.000
2,0.786,0.000,0.000,0.000,0.000,0.786,0.0,0.00,0.000,0.000,0.000,0.000,0.000,0.00,0.786
3,0.000,0.000,0.000,0.000,0.000,0.846,0.0,0.00,0.000,0.000,0.000,0.846,0.000,0.00,0.846
4,0.698,0.000,0.000,0.000,0.698,0.000,0.0,0.00,0.000,0.698,0.000,0.000,0.000,0.00,0.698
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4479,0.000,0.000,0.000,0.000,0.684,0.684,0.0,0.00,0.000,0.000,0.684,0.684,0.000,0.00,0.000
4480,0.000,0.610,0.000,0.000,0.000,0.000,0.0,0.61,0.610,0.000,0.000,0.000,0.000,0.00,0.000
4481,0.514,0.000,0.000,0.000,0.000,0.514,0.0,0.00,0.000,0.000,0.000,0.000,0.000,0.00,0.000
4482,0.000,0.000,0.000,0.000,0.000,0.000,0.0,0.00,0.000,0.000,0.000,0.000,0.610,0.61,0.610


In [23]:
number_of_movies = vms.shape[0]

distance = np.zeros((number_of_movies, number_of_movies))

for i in range(number_of_movies):
    for j in range(i):
            distance[i,j]=sq_dist(vms[i], vms[j])
            distance[j,i]=distance[i,j]
    distance[i,i]=np.inf     

In [24]:
movies_unique_data.head(10)

Unnamed: 0,movieId,title,year,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller
0,1,Toy Story,1995,0.0,0.782,0.782,0.782,0.782,0.0,0.0,0.0,0.782,0.0,0.0,0.0,0.0,0.0,0.0
1,3,Grumpier Old Men,1995,0.0,0.0,0.0,0.0,0.648,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.648,0.0,0.0
2,6,Heat,1995,0.786,0.0,0.0,0.0,0.0,0.786,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.786
3,50,"Usual Suspects, The",1995,0.0,0.0,0.0,0.0,0.0,0.846,0.0,0.0,0.0,0.0,0.0,0.846,0.0,0.0,0.846
4,70,From Dusk Till Dawn,1996,0.698,0.0,0.0,0.0,0.698,0.0,0.0,0.0,0.0,0.698,0.0,0.0,0.0,0.0,0.698
5,101,Bottle Rocket,1996,0.0,0.74,0.0,0.0,0.74,0.74,0.0,0.0,0.0,0.0,0.0,0.0,0.74,0.0,0.0
6,110,Braveheart,1995,0.804,0.0,0.0,0.0,0.0,0.0,0.0,0.804,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,151,Rob Roy,1995,0.704,0.0,0.0,0.0,0.0,0.0,0.0,0.704,0.0,0.0,0.0,0.0,0.704,0.0,0.0
8,157,Canadian Bacon,1995,0.0,0.0,0.0,0.0,0.572,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,163,Desperado,1995,0.708,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.708,0.0,0.0


In [31]:
list_of_movies_and_closest = []

for i in range(number_of_movies):
    curr_movie_index = i
    closest_eight_movies_indices = np.argsort(distance[i,:])[:8]
    
    current_movie = movies_unique_data.loc[i,'title']
    closest_eight_movies = movies_unique_data.loc[closest_eight_movies_indices,'title'].tolist()
    
    list_of_movies_and_closest.append([current_movie]+closest_eight_movies)
    
closest_movies_dataframe = pd.DataFrame(list_of_movies_and_closest, columns=['Movie', 'Rec. Movie1', 'Rec. Movie2', 'Rec. Movie3', 'Rec. Movie4', 'Rec. Movie5','Rec. Movie6', 'Rec. Movie7', 'Rec. Movie8'])
# closest_movies_dataframe.iloc[1050:1100]
closest_movies_dataframe[closest_movies_dataframe['Movie']=='Superman']

Unnamed: 0,Movie,Rec. Movie1,Rec. Movie2,Rec. Movie3,Rec. Movie4,Rec. Movie5,Rec. Movie6,Rec. Movie7,Rec. Movie8
161,Superman,Avatar,Black Panther,The Hunger Games: Catching Fire,Doctor Strange,Star Trek Into Darkness,Pacific Rim,Ant-Man,Captain America: The Winter Soldier


In [26]:
movies_unique_data[movies_unique_data['Animation']!=0].iloc[60:90]

Unnamed: 0,movieId,title,year,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller
832,95543,Ice Age 4: Continental Drift,2012,0.0,0.582,0.582,0.0,0.582,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
836,103335,Despicable Me 2,2013,0.0,0.0,0.702,0.702,0.702,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
840,106696,Frozen,2013,0.0,0.714,0.714,0.0,0.714,0.0,0.0,0.0,0.714,0.0,0.714,0.0,0.714,0.0,0.0
843,112006,Tangled Ever After,2012,0.684,0.0,0.684,0.684,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
868,1405,Beavis and Butt-Head Do America,1996,0.0,0.588,0.588,0.0,0.588,0.588,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
907,2081,"Little Mermaid, The",1989,0.0,0.0,0.682,0.682,0.682,0.0,0.0,0.0,0.0,0.0,0.682,0.0,0.682,0.0,0.0
923,50872,Ratatouille,2007,0.0,0.0,0.768,0.768,0.0,0.0,0.0,0.768,0.0,0.0,0.0,0.0,0.0,0.0,0.0
926,60069,WALL·E,2008,0.0,0.808,0.808,0.808,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.808,0.808,0.0
927,63859,Bolt,2008,0.666,0.666,0.666,0.666,0.666,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
932,71057,9,2009,0.0,0.706,0.706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.706,0.0


In [34]:
files_to_save = {
    'Models/user_based.pcl': model,
    'Models/movie_based.pcl': model_m,
    'Models/distances.pcl': distance,
    'Models/movie_unique_data.pcl': movies_unique_data,
    'Models/movies_vecs.pcl': movies_vecs,
    'Models/scaler_users.pcl': scaler_users,
    'Models/scaler_movies.pcl': scaler_movies,
    'Models/scaler_ratings.pcl': scaler_ratings
}

for file_path, data in files_to_save.items():
    try:
        with open(file_path, 'wb') as f:
            pickle.dump(data, f)
    except Exception as e:
        print(f"Error saving {file_path}: {e}")
    finally:
        f.close() 