# Importing and preparing data

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('rating.csv')
movies = pd.read_csv('movie.csv')

In [3]:
df.drop('timestamp', axis=1, inplace=True)

In [4]:
df.describe()

Unnamed: 0,userId,movieId,rating
count,20000260.0,20000260.0,20000260.0
mean,69045.87,9041.567,3.525529
std,40038.63,19789.48,1.051989
min,1.0,1.0,0.5
25%,34395.0,902.0,3.0
50%,69141.0,2167.0,3.5
75%,103637.0,4770.0,4.0
max,138493.0,131262.0,5.0


In [5]:
df.head()

Unnamed: 0,userId,movieId,rating
0,1,2,3.5
1,1,29,3.5
2,1,32,3.5
3,1,47,3.5
4,1,50,3.5


In [6]:
import pickle

with open('user_le.pickle', 'rb') as f:
    user_le = pickle.load(f)

with open('movie_le.pickle', 'rb') as f:
    movie_le = pickle.load(f)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [7]:
df['userId'] = user_le.transform(df['userId'])
df['movieId'] = movie_le.transform(df['movieId'])

In [8]:
df.describe()

Unnamed: 0,userId,movieId,rating
count,20000260.0,20000260.0,20000260.0
mean,69044.87,3602.899,3.525529
std,40038.63,4136.939,1.051989
min,0.0,0.0,0.5
25%,34394.0,885.0,3.0
50%,69140.0,2083.0,3.5
75%,103636.0,4674.0,4.0
max,138492.0,26743.0,5.0


In [9]:
movie2avgrating = {}

count = 0
for avg in df.groupby('movieId').mean()['rating']:
    movie2avgrating[count]= avg
    count+=1

movie_ids_gt_3 = [x for x in list(movie2avgrating.keys()) if movie2avgrating[x]>=3]
movie_ids_gt_4 = [x for x in list(movie2avgrating.keys()) if movie2avgrating[x]>=4]

In [10]:
movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
27273,131254,Kein Bund für's Leben (2007),Comedy
27274,131256,"Feuer, Eis & Dosenbier (2002)",Comedy
27275,131258,The Pirates (2014),Adventure
27276,131260,Rentun Ruusu (2001),(no genres listed)


In [11]:
# movies which are not rated
drop_ids=[26018, 26580, 27249, 27396, 31797, 32773, 33019, 33229, 33573, 45994, 63280, 65078, 66622, 69332, 69565, 69864, 72681, 72897, 72908, 77451, 78713, 79363, 80226, 80592, 80787, 80827, 84291, 85476, 86314, 86372, 86949, 87266, 87466, 88471, 88833, 89100, 89135, 89215, 89341, 89482, 89806, 90035, 90116, 90493, 92268, 92516, 92845, 92925, 93002, 94076, 94435, 94725, 94808, 96086, 96176, 96193, 96275, 96576, 96598, 98389, 98398, 98452, 98583, 98924, 99020, 99515, 99562, 99717, 100463, 100577, 101216, 101224, 101237, 101407, 101472, 101505, 102156, 102327, 102417, 102821, 102823, 103177, 103472, 103641, 104095, 104748, 104750, 105111, 105279, 105542, 105792, 105796, 105961, 105963, 106028, 106188, 106245, 106268, 106423, 106483, 106521, 106859, 107226, 107289, 107640, 108046, 108125, 108422, 108816, 108844, 108846, 109021, 109231, 109287, 109319, 109357, 109392, 109468, 109647, 109767, 110036, 110108, 110136, 110155, 110242, 110354, 110368, 110370, 110377, 110399, 110428, 110493, 110531, 110539, 110696, 110761, 110820, 110916, 110973, 111318, 111358, 111519, 111667, 111874, 111901, 111946, 111997, 112023, 112081, 112116, 112206, 112271, 112353, 112419, 112432, 112477, 112508, 112510, 112546, 112591, 112648, 112751, 112880, 113145, 113230, 113290, 113323, 113614, 113670, 113686, 113755, 113872, 113949, 114095, 114128, 114382, 114417, 114620, 114685, 114687, 114777, 114805, 114893, 114895, 114963, 115027, 115038, 115137, 115277, 115376, 115423, 115545, 115556, 115607, 115690, 115756, 115793, 115795, 115797, 115801, 115836, 115864, 115907, 115963, 115965, 115991, 116118, 116205, 116235, 116253, 116289, 116321, 116341, 116343, 116351, 116403, 116423, 116427, 116429, 116439, 116449, 116457, 116471, 116473, 116507, 116517, 116521, 116543, 116555, 116557, 116570, 116590, 116672, 116698, 116702, 116724, 116732, 116740, 116742, 116746, 116835, 116845, 116863, 116879, 116881, 116885, 116935, 116975, 117103, 117111, 117125, 117127, 117152, 117164, 117166, 117168, 117170, 117178, 117186, 117188, 117198, 117324, 117352, 117438, 117440, 117474, 117484, 117596, 117610, 117626, 117634, 117640, 117666, 117672, 117682, 117686, 117690, 117692, 117696, 117702, 117704, 117736, 117738, 117746, 117748, 117838, 117901, 117956, 117962, 117964, 117977, 117979, 117981, 118008, 118017, 118025, 118087, 118089, 118091, 118115, 118183, 118230, 118514, 118520, 118722, 118752, 118754, 118756, 118786, 118788, 118790, 118806, 118812, 118826, 118832, 118842, 118846, 118894, 118908, 118920, 119316, 119456, 119585, 119589, 119595, 119603, 119605, 119621, 119625, 119661, 119754, 119758, 119760, 119762, 119766, 119858, 119929, 120096, 120200, 120208, 120234, 120258, 120286, 120380, 120414, 120492, 120516, 120518, 120614, 120618, 120751, 120785, 120843, 121003, 121011, 121047, 121053, 121055, 121057, 121059, 121061, 121101, 121119, 121148, 121169, 121221, 121233, 121237, 121239, 121247, 121249, 121279, 121288, 121290, 121296, 121320, 121376, 121390, 121415, 121426, 121432, 121451, 121604, 121612, 121614, 121618, 121632, 121634, 121644, 121650, 121677, 121693, 121703, 121715, 121727, 121729, 121741, 121763, 121767, 121777, 121815, 121821, 121825, 121841, 121845, 121849, 121857, 121873, 121875, 121877, 121889, 121917, 121929, 121931, 121965, 121973, 121993, 121999, 122011, 122015, 122045, 122048, 122058, 122062, 122129, 122151, 122155, 122157, 122159, 122199, 122210, 122218, 122220, 122240, 122266, 122272, 122282, 122294, 122302, 122306, 122325, 122359, 122371, 122373, 122393, 122397, 122409, 122876, 122878, 122954, 123215, 123252, 123337, 123431, 124462, 124611, 125095, 125483, 125555, 125569, 125601, 125603, 125605, 125607, 125609, 125623, 126116, 127108, 127114, 127122, 127124, 127130, 127132, 127138, 127140, 127142, 127144, 127146, 127154, 127156, 127160, 127162, 127164, 127170, 127176, 127178, 127184, 127190, 127204, 127208, 127214, 127311, 127331, 127455, 127498, 127567, 127569, 127573, 127575, 127579, 127581, 127583, 127606, 127608, 127614, 127640, 127644, 127667, 127669, 127681, 127843, 127845, 127851, 128085, 128247, 128277, 128279, 128283, 128439, 128684, 128864, 128866, 128868, 128870, 128872, 128874, 128876, 128878, 128880, 128886, 129201, 129443, 129820, 130040]

In [12]:
movies = movies[~movies['movieId'].isin(drop_ids)]

In [13]:
movies['movieId']=movie_le.transform(movies['movieId'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  movies['movieId']=movie_le.transform(movies['movieId'])


# Importing Model

In [14]:
import tensorflow as tf

model = tf.keras.models.load_model('model-2.h5')

In [15]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 user_input (InputLayer)     [(None, 1)]                  0         []                            
                                                                                                  
 movie_input (InputLayer)    [(None, 1)]                  0         []                            
                                                                                                  
 user_emb (Embedding)        (None, 1, 100)               1384930   ['user_input[0][0]']          
                                                          0                                       
                                                                                                  
 movie_emb (Embedding)       (None, 1, 100)               2674400   ['movie_input[0][0]']   

# Usage

In [16]:
user_embeddings = {}

counter = 0

for user_id in df['userId'].unique():
    user_embedding = model.get_layer('user_emb')(np.array([user_id]))
    user_embedding = tf.keras.backend.flatten(user_embedding)
    user_embedding = tf.expand_dims(user_embedding, axis=0)
    user_dense = model.get_layer('user_dense')(user_embedding)
    
    user_embeddings[user_id] = user_dense.numpy()

    counter+=1

    if counter%10000==0:
        print(f"Completed {counter} Iterations")

Completed 10000 Iterations
Completed 20000 Iterations
Completed 30000 Iterations
Completed 40000 Iterations
Completed 50000 Iterations
Completed 60000 Iterations
Completed 70000 Iterations
Completed 80000 Iterations
Completed 90000 Iterations
Completed 100000 Iterations
Completed 110000 Iterations
Completed 120000 Iterations
Completed 130000 Iterations


In [17]:
movie_embeddings = {}

counter = 0

for movie_id in df['movieId'].unique():
    movie_embedding = model.get_layer('movie_emb')(np.array([movie_id]))
    movie_embedding = tf.keras.backend.flatten(movie_embedding)
    movie_embedding = tf.expand_dims(movie_embedding, axis=0)
    movie_dense = model.get_layer('movie_dense')(movie_embedding)
    
    movie_embeddings[movie_id] = movie_dense.numpy()

    counter+=1

    if counter%10000==0:
        print(f"Completed {counter} Iterations")

Completed 10000 Iterations
Completed 20000 Iterations


In [18]:
user_bias_embeddings = {}

counter = 0

for user_id in df['userId'].unique():
    user_bias_embedding = model.get_layer('user_bias_emb')(np.array([user_id]))
    user_bias_embedding = tf.keras.backend.flatten(user_bias_embedding)

    user_bias_embeddings[user_id] = user_bias_embedding.numpy()

    counter+=1

    if counter%10000==0:
        print(f"Completed {counter} Iterations")

Completed 10000 Iterations
Completed 20000 Iterations
Completed 30000 Iterations
Completed 40000 Iterations
Completed 50000 Iterations
Completed 60000 Iterations
Completed 70000 Iterations
Completed 80000 Iterations
Completed 90000 Iterations
Completed 100000 Iterations
Completed 110000 Iterations
Completed 120000 Iterations
Completed 130000 Iterations


In [19]:
movie_bias_embeddings = {}

counter = 0

for movie_id in df['movieId'].unique():
    movie_bias_embedding = model.get_layer('movie_bias_emb')(np.array([movie_id]))
    movie_bias_embedding = tf.keras.backend.flatten(movie_bias_embedding)
    
    movie_bias_embeddings[movie_id] = movie_bias_embedding.numpy()

    counter+=1

    if counter%10000==0:
        print(f"Completed {counter} Iterations")

Completed 10000 Iterations
Completed 20000 Iterations


In [29]:
def predict(user_id, movie_id):
    user_embedding = user_embeddings[user_id]
    movie_embedding = movie_embeddings[movie_id]

    # Passing user embedding and movie embedding through the concat layer
    concatenated_embeddings = model.get_layer('concat')([user_embedding, movie_embedding])

    # Passing the concatenated embeddings through the dense layers
    x = model.get_layer('dense1')(concatenated_embeddings)
    x = model.get_layer('dense2')(x)

    user_bias_embedding = user_bias_embeddings[user_id]
    movie_bias_embedding = movie_bias_embeddings[movie_id]

    user_bias_embedding = user_bias_embedding[:, tf.newaxis]
    movie_bias_embedding = movie_bias_embedding[:, tf.newaxis]

    # Combine embeddings, biases, and pass through the output layer
    input_tensors = [x, user_bias_embedding, movie_bias_embedding]
    concatenated_features = model.get_layer('combined_features')(input_tensors)
    
    x = model.get_layer('combined_dense1')(concatenated_features)
    x = model.get_layer('combined_dense2')(x)
    
    x = model.get_layer('output')(x)

    predicted_rating = x[0][0]

    return predicted_rating.numpy()

In [27]:
def user_recommendations(user_id):
    user_ratings = df[df['userId'] == user_id][['userId', 'movieId', 'rating']]
    recommendation = df[~df['movieId'].isin(user_ratings['movieId']) & df['movieId'].isin(movie_ids_gt_3)][['movieId']].drop_duplicates()
    recommendation['rating_predict'] = recommendation.apply(lambda x: predict(user_id, x['movieId']), axis=1)
    
    final_rec = recommendation.sort_values(by='rating_predict', ascending=False).merge(movies[['movieId', 'title']],
                                                                       on='movieId')
    return final_rec

In [58]:
rec = user_recommendations(16)
rec.head(25)

Unnamed: 0,movieId,rating_predict,title
0,315,4.575624,"Specialist, The (1994)"
1,523,4.525765,Ruby in Paradise (1993)
2,49,4.490026,When Night Is Falling (1995)
3,7356,4.459184,Night Crossing (1981)
4,843,4.453433,Lotto Land (1995)
5,2873,4.444339,Lulu on the Bridge (1998)
6,1173,4.441907,"Cook the Thief His Wife & Her Lover, The (1989)"
7,257,4.424994,Just Cause (1995)
8,2239,4.419117,Swept Away (Travolti da un insolito destino ne...
9,1171,4.409993,Bob Roberts (1992)


In [31]:
def test_predictions(user_id):
    user_ratings = df[df['userId'] == user_id][['userId', 'movieId', 'rating']]
    
    user_top_ratings = user_ratings.sort_values(by='rating', ascending=False).merge(movies[['movieId', 'title']],
                                                                       on='movieId',
                                                                       ).drop('userId', axis=1)

    comparision = df[df['movieId'].isin(user_ratings['movieId'])][['movieId']].drop_duplicates()
    comparision['rating_predict'] = comparision.apply(lambda x: predict(user_id, x['movieId']), axis=1)
    comparision = user_top_ratings.merge(comparision, on='movieId')

    return comparision

In [57]:
comparision = test_predictions(16)
comparision

Unnamed: 0,movieId,rating,title,rating_predict
0,1944,5.0,From Here to Eternity (1953),4.265951
1,453,5.0,For Love or Money (1993),4.213516
2,3662,5.0,Puppet Master III: Toulon's Revenge (1991),3.923573
3,3487,5.0,El Dorado (1966),4.236992
4,3169,5.0,"Falcon and the Snowman, The (1985)",3.862136
5,3014,5.0,Bustin' Loose (1981),3.708318
6,2597,5.0,Lost & Found (1999),3.589718
7,2268,4.0,"Few Good Men, A (1992)",3.778588
8,4544,4.0,Short Circuit 2 (1988),3.248641
9,3925,4.0,Stranger Than Paradise (1984),4.073469


In [160]:
import pickle

with open('user_embeddings.json', 'wb') as f:
    pickle.dump(user_embeddings, f)

with open('movie_embeddings.json', 'wb') as f:
    pickle.dump(movie_embeddings, f)

with open('user_bias_embeddings.json', 'wb') as f:
    pickle.dump(user_bias_embeddings, f)

with open('movie_bias_embeddings.json', 'wb') as f:
    pickle.dump(movie_bias_embeddings, f)

## Picking out users having a preference for romantic movies

In [60]:
romantic_movies = movies[movies['genres'].str.contains('Romance')]

romantic_ratings = ratings[df['movieId'].isin(romantic_movies['movieId'])]
user_avg_ratings = romantic_ratings.groupby('userId')['rating'].mean()

preference_threshold = 4.0

romantic_users = pd.DataFrame(user_avg_ratings[user_avg_ratings > preference_threshold]).reset_index().sort_values(ascending=False, by='rating')

romantic_users

Unnamed: 0,userId,rating
22324,88924,5.000000
9294,36988,5.000000
16230,64795,5.000000
28547,113948,5.000000
23604,94022,5.000000
...,...,...
8146,32322,4.006250
10263,40859,4.006173
23129,92104,4.005814
31708,126629,4.005155
