In [806]:
import numpy as np
import numpy.ma as ma
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
import tabulate
# from recsysNN_utils import *
pd.set_option("display.precision", 1)
from collections import *

#### Dataset downloaded from [here](https://grouplens.org/datasets/movielens/latest/)
#### this is the recommender system project from Machine Learning Specialization by Andrew Ng

In [746]:
!ls ./ml-latest-small

README.txt  links.csv   movies.csv  ratings.csv tags.csv


## Load, pre-process and re-format the dataset 

In [747]:
links = pd.read_csv("./ml-latest-small/links.csv")
movies = pd.read_csv("./ml-latest-small/movies.csv")
ratings = pd.read_csv("./ml-latest-small/ratings.csv")
tags = pd.read_csv("./ml-latest-small/tags.csv")

In [748]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [749]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [750]:
gen_list = movies["genres"].unique()

In [751]:
cnt = Counter()
for gen in gen_list:
    g = gen.split('|')
    for item in g:
        cnt[item] +=1

In [752]:
print(cnt)

Counter({'Drama': 349, 'Comedy': 341, 'Action': 341, 'Adventure': 310, 'Thriller': 274, 'Fantasy': 264, 'Sci-Fi': 236, 'Animation': 200, 'Romance': 200, 'Crime': 181, 'Children': 174, 'Mystery': 166, 'Horror': 153, 'Musical': 96, 'IMAX': 91, 'War': 75, 'Western': 64, 'Documentary': 30, 'Film-Noir': 30, '(no genres listed)': 1})


In [753]:
genToUse = ['Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime',\
'Documentary', 'Drama', 'Fantasy', 'Horror', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller']

In [754]:
for gen in genToUse:
    movies[gen] = movies['genres'].apply(lambda x: 1 if gen in x else 0)

In [755]:
movies.head()

Unnamed: 0,movieId,title,genres,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Horror,Mystery,Romance,Sci-Fi,Thriller
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,0,1,1,1,1,0,0,0,1,0,0,0,0,0
1,2,Jumanji (1995),Adventure|Children|Fantasy,0,1,0,1,0,0,0,0,1,0,0,0,0,0
2,3,Grumpier Old Men (1995),Comedy|Romance,0,0,0,0,1,0,0,0,0,0,0,1,0,0
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,0,0,0,0,1,0,0,1,0,0,0,1,0,0
4,5,Father of the Bride Part II (1995),Comedy,0,0,0,0,1,0,0,0,0,0,0,0,0,0


In [756]:
def getYear(s):
    year = s.split('(')[-1]
    year = year[0:4]
    try:
        year = int(year)
    except:
        return 0
    return year

In [757]:
def getTitle(s):
    year = s.split('(')[-1]
    year = year[0:4]
    try:
        year = int(year)
    except:
        return s
    return s[:-6]

In [758]:
movies['year'] = movies['title'].apply(lambda x: getYear(x))
movies.head()

Unnamed: 0,movieId,title,genres,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Horror,Mystery,Romance,Sci-Fi,Thriller,year
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1995
1,2,Jumanji (1995),Adventure|Children|Fantasy,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1995
2,3,Grumpier Old Men (1995),Comedy|Romance,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1995
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1995
4,5,Father of the Bride Part II (1995),Comedy,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1995


In [759]:
movies['title'] = movies['title'].apply(lambda x: getTitle(x))

In [760]:
movies.head()

Unnamed: 0,movieId,title,genres,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Horror,Mystery,Romance,Sci-Fi,Thriller,year
0,1,Toy Story,Adventure|Animation|Children|Comedy|Fantasy,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1995
1,2,Jumanji,Adventure|Children|Fantasy,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1995
2,3,Grumpier Old Men,Comedy|Romance,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1995
3,4,Waiting to Exhale,Comedy|Drama|Romance,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1995
4,5,Father of the Bride Part II,Comedy,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1995


In [761]:
movies = movies.drop('genres', axis=1)

In [762]:
movies.head()

Unnamed: 0,movieId,title,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Horror,Mystery,Romance,Sci-Fi,Thriller,year
0,1,Toy Story,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1995
1,2,Jumanji,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1995
2,3,Grumpier Old Men,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1995
3,4,Waiting to Exhale,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1995
4,5,Father of the Bride Part II,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1995


In [763]:
def isPop(x):
    if x.year < 2000: return False
    res = False
    for gen in genToUse:
        if x[gen] == 1:
            res = True
    return res
        

In [764]:
movies['isPop'] = movies.apply(isPop,axis = 1)

In [765]:
movies[movies.isPop==False]

Unnamed: 0,movieId,title,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Horror,Mystery,Romance,Sci-Fi,Thriller,year,isPop
0,1,Toy Story,0,1,1,1,1,0,0,0,1,0,0,0,0,0,1995,False
1,2,Jumanji,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1995,False
2,3,Grumpier Old Men,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1995,False
3,4,Waiting to Exhale,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1995,False
4,5,Father of the Bride Part II,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1995,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9679,183301,The Tale of the Bunny Picnic,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1986,False
9680,183317,Patti Rocks,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1988,False
9688,184245,De platte jungle,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1978,False
9729,190219,Bunny,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1998,False


In [766]:
movies = movies[movies.isPop==True]

In [767]:
movies[movies.isPop==False]

Unnamed: 0,movieId,title,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Horror,Mystery,Romance,Sci-Fi,Thriller,year,isPop


In [768]:
movies = movies.drop('isPop', axis=1)
movies = movies.drop('title', axis=1)

In [769]:
movies.head()

Unnamed: 0,movieId,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Horror,Mystery,Romance,Sci-Fi,Thriller,year
2083,2769,0,0,0,0,0,1,0,1,0,0,0,0,0,0,2000
2395,3177,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2000
2404,3190,0,1,0,0,0,0,0,0,0,0,0,0,1,1,2000
2425,3225,0,0,0,0,1,0,0,0,0,0,0,1,0,0,2000
2460,3273,0,0,0,0,1,0,0,0,0,1,1,0,0,1,2000


In [770]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [771]:
ratings.head()
ratings = ratings.drop('timestamp',axis=1)

In [772]:
table = pd.merge(movies, ratings, on="movieId")

In [773]:
table.head()

Unnamed: 0,movieId,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Horror,Mystery,Romance,Sci-Fi,Thriller,year,userId,rating
0,2769,0,0,0,0,0,1,0,1,0,0,0,0,0,0,2000,160,5.0
1,2769,0,0,0,0,0,1,0,1,0,0,0,0,0,0,2000,182,2.0
2,2769,0,0,0,0,0,1,0,1,0,0,0,0,0,0,2000,410,4.0
3,2769,0,0,0,0,0,1,0,1,0,0,0,0,0,0,2000,414,4.0
4,2769,0,0,0,0,0,1,0,1,0,0,0,0,0,0,2000,448,2.0


# get the average rating for each movie

In [774]:
grp = ratings.groupby("movieId")
ratingMovie = dict()
for mv in list(grp.groups.keys()):
    ratingMovie[mv] = grp.get_group(mv).rating.mean()
#print(ratingMovie)
grp.get_group(2769).rating.mean()

3.4

In [775]:
table['ave rating'] = table.movieId.apply(lambda x: ratingMovie[x])

In [776]:
table

Unnamed: 0,movieId,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Horror,Mystery,Romance,Sci-Fi,Thriller,year,userId,rating,ave rating
0,2769,0,0,0,0,0,1,0,1,0,0,0,0,0,0,2000,160,5.0,3.4
1,2769,0,0,0,0,0,1,0,1,0,0,0,0,0,0,2000,182,2.0,3.4
2,2769,0,0,0,0,0,1,0,1,0,0,0,0,0,0,2000,410,4.0,3.4
3,2769,0,0,0,0,0,1,0,1,0,0,0,0,0,0,2000,414,4.0,3.4
4,2769,0,0,0,0,0,1,0,1,0,0,0,0,0,0,2000,448,2.0,3.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39163,193579,0,0,0,0,0,0,1,0,0,0,0,0,0,0,2015,184,3.5,3.5
39164,193581,1,0,1,0,1,0,0,0,1,0,0,0,0,0,2017,184,4.0,4.0
39165,193583,0,0,1,0,1,0,0,0,1,0,0,0,0,0,2017,184,3.5,3.5
39166,193585,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2017,184,3.5,3.5


# get the average rating & count for each user

In [777]:
grp = table.groupby("userId")
ratingUser = defaultdict(dict)
for usr in list(grp.groups.keys()):
    oneUser = grp.get_group(usr)
    ratingUser[usr]['av'] = max(0, oneUser.rating.mean())
    ratingUser[usr]['count'] = max(0, oneUser.rating.count())
    for gen in genToUse:
        ratingUser[usr][gen+'Usr'] = max(0, oneUser[oneUser[gen]==1].rating.mean())
#print(ratingMovie)
ratingUser[2]

{'av': 3.9615384615384617,
 'count': 26,
 'ActionUsr': 3.9545454545454546,
 'AdventureUsr': 4.166666666666667,
 'AnimationUsr': 0,
 'ChildrenUsr': 0,
 'ComedyUsr': 4.0,
 'CrimeUsr': 3.888888888888889,
 'DocumentaryUsr': 4.333333333333333,
 'DramaUsr': 3.9,
 'FantasyUsr': 0,
 'HorrorUsr': 3.0,
 'MysteryUsr': 4.0,
 'RomanceUsr': 0,
 'Sci-FiUsr': 3.875,
 'ThrillerUsr': 3.7}

# insert personal rating for each user

In [778]:
table['rating ave'] = table.userId.apply(lambda x: ratingUser[x]['av'])

In [779]:
table['rating count'] = table.userId.apply(lambda x: ratingUser[x]['count'])

In [780]:
for gen in genToUse:
    table[gen+'Usr'] = table.userId.apply(lambda x: ratingUser[x][gen+'Usr'])

In [781]:
table[table.userId==2]

Unnamed: 0,movieId,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,...,ComedyUsr,CrimeUsr,DocumentaryUsr,DramaUsr,FantasyUsr,HorrorUsr,MysteryUsr,RomanceUsr,Sci-FiUsr,ThrillerUsr
782,3578,1,1,0,0,0,0,0,1,0,...,4.0,3.9,4.3,3.9,0.0,3.0,4.0,0.0,3.9,3.7
13305,6874,1,0,0,0,0,1,0,0,0,...,4.0,3.9,4.3,3.9,0.0,3.0,4.0,0.0,3.9,3.7
16415,8798,1,0,0,0,0,1,0,1,0,...,4.0,3.9,4.3,3.9,0.0,3.0,4.0,0.0,3.9,3.7
21818,46970,1,0,0,0,1,0,0,0,0,...,4.0,3.9,4.3,3.9,0.0,3.0,4.0,0.0,3.9,3.7
22380,48516,0,0,0,0,0,1,0,1,0,...,4.0,3.9,4.3,3.9,0.0,3.0,4.0,0.0,3.9,3.7
25688,58559,1,0,0,0,0,1,0,1,0,...,4.0,3.9,4.3,3.9,0.0,3.0,4.0,0.0,3.9,3.7
26587,60756,0,0,0,0,1,0,0,0,0,...,4.0,3.9,4.3,3.9,0.0,3.0,4.0,0.0,3.9,3.7
27832,68157,1,0,0,0,0,0,0,1,0,...,4.0,3.9,4.3,3.9,0.0,3.0,4.0,0.0,3.9,3.7
28889,71535,1,0,0,0,1,0,0,0,0,...,4.0,3.9,4.3,3.9,0.0,3.0,4.0,0.0,3.9,3.7
29430,74458,0,0,0,0,0,0,0,1,0,...,4.0,3.9,4.3,3.9,0.0,3.0,4.0,0.0,3.9,3.7


# now produce user_train, item_train, y_train

In [782]:
user_train = pd.DataFrame(table, columns=['userId', 'rating count', 'ave rating']+[i+'Usr' for i in genToUse])
item_train = pd.DataFrame(table, columns=['movieId', 'year', 'ave rating'] + genToUse)
y_train = table['rating']

In [783]:
user_train.head()

Unnamed: 0,userId,rating count,ave rating,ActionUsr,AdventureUsr,AnimationUsr,ChildrenUsr,ComedyUsr,CrimeUsr,DocumentaryUsr,DramaUsr,FantasyUsr,HorrorUsr,MysteryUsr,RomanceUsr,Sci-FiUsr,ThrillerUsr
0,160,59,3.4,2.2,2.2,3.8,3.8,3.6,3.7,0.0,4.1,3.1,2.9,3.8,3.3,2.2,2.6
1,182,350,3.4,2.6,2.6,3.2,2.9,3.3,3.4,3.3,3.6,2.9,2.7,3.6,3.6,2.5,3.0
2,410,20,3.4,0.0,5.0,0.0,0.0,3.9,4.2,0.0,3.6,0.0,0.0,0.0,3.4,0.0,4.0
3,414,1266,3.4,3.3,3.5,3.7,3.5,3.3,3.5,3.8,3.6,3.5,3.3,3.7,3.4,3.5,3.4
4,448,1092,3.4,2.5,2.6,3.1,2.7,2.6,2.7,3.8,2.8,2.4,2.1,2.7,2.5,2.5,2.5


In [784]:
item_train.head()

Unnamed: 0,movieId,year,ave rating,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Horror,Mystery,Romance,Sci-Fi,Thriller
0,2769,2000,3.4,0,0,0,0,0,1,0,1,0,0,0,0,0,0
1,2769,2000,3.4,0,0,0,0,0,1,0,1,0,0,0,0,0,0
2,2769,2000,3.4,0,0,0,0,0,1,0,1,0,0,0,0,0,0
3,2769,2000,3.4,0,0,0,0,0,1,0,1,0,0,0,0,0,0
4,2769,2000,3.4,0,0,0,0,0,1,0,1,0,0,0,0,0,0


In [785]:
y_train = y_train.to_numpy()

# re-scale the data

In [786]:
item_train_unscaled = item_train
user_train_unscaled = user_train
y_train_unscaled    = y_train

In [787]:
num_user_features = user_train.shape[1] - 3  # remove userid, rating count and ave rating during training
num_item_features = item_train.shape[1] - 1  # remove movie id at train time
uvs = 3  # user genre vector start
ivs = 3  # item genre vector start
u_s = 3  # start of columns to use in training, user
i_s = 1  # start of columns to use in training, items

In [788]:
scalerItem = StandardScaler()
item_train.head()

Unnamed: 0,movieId,year,ave rating,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Horror,Mystery,Romance,Sci-Fi,Thriller
0,2769,2000,3.4,0,0,0,0,0,1,0,1,0,0,0,0,0,0
1,2769,2000,3.4,0,0,0,0,0,1,0,1,0,0,0,0,0,0
2,2769,2000,3.4,0,0,0,0,0,1,0,1,0,0,0,0,0,0
3,2769,2000,3.4,0,0,0,0,0,1,0,1,0,0,0,0,0,0
4,2769,2000,3.4,0,0,0,0,0,1,0,1,0,0,0,0,0,0


In [789]:
scalerItem.fit(item_train)
print(scalerItem.mean_)
item_train.head()

[4.53185946e+04 2.00595798e+03 3.47103503e+00 3.41809641e-01
 2.73794935e-01 8.96139706e-02 8.44822304e-02 3.67825776e-01
 1.70164420e-01 2.19056373e-02 4.20138889e-01 1.37663399e-01
 6.82955474e-02 8.59119690e-02 1.67636846e-01 1.86248979e-01
 2.79130923e-01]


Unnamed: 0,movieId,year,ave rating,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Horror,Mystery,Romance,Sci-Fi,Thriller
0,2769,2000,3.4,0,0,0,0,0,1,0,1,0,0,0,0,0,0
1,2769,2000,3.4,0,0,0,0,0,1,0,1,0,0,0,0,0,0
2,2769,2000,3.4,0,0,0,0,0,1,0,1,0,0,0,0,0,0
3,2769,2000,3.4,0,0,0,0,0,1,0,1,0,0,0,0,0,0
4,2769,2000,3.4,0,0,0,0,0,1,0,1,0,0,0,0,0,0


In [790]:
item_train = scalerItem.transform(item_train)
print(type(item_train))
item_train

<class 'numpy.ndarray'>


array([[-0.96276672, -1.26477237, -0.12619825, ..., -0.44877465,
        -0.47841102, -0.62226558],
       [-0.96276672, -1.26477237, -0.12619825, ..., -0.44877465,
        -0.47841102, -0.62226558],
       [-0.96276672, -1.26477237, -0.12619825, ..., -0.44877465,
        -0.47841102, -0.62226558],
       ...,
       [ 3.35476838,  2.34402542,  0.05145812, ..., -0.44877465,
        -0.47841102, -0.62226558],
       [ 3.35481363,  2.34402542,  0.05145812, ..., -0.44877465,
        -0.47841102, -0.62226558],
       [ 3.35485889,  2.55630765,  0.05145812, ..., -0.44877465,
        -0.47841102, -0.62226558]])

In [791]:
scalerUser = StandardScaler()
scalerUser.fit(user_train)
print(scalerUser.mean_)
user_train = scalerUser.transform(user_train)

[326.87111928 356.17994281   3.47103503   3.38380925   3.44436145
   3.39974842   3.26075096   3.37019465   3.5221778    2.63832192
   3.58036348   3.37093051   2.87605323   3.51221411   3.42412664
   3.3531773    3.41449303]


In [792]:
print(y_train)
scalerTarget = MinMaxScaler((-1, 1))
scalerTarget.fit(y_train.reshape(-1, 1))
y_train = scalerTarget.transform(y_train.reshape(-1, 1))
#ynorm_test = scalerTarget.transform(y_test.reshape(-1, 1))
print(y_train)
print(np.allclose(item_train_unscaled, scalerItem.inverse_transform(item_train)))
print(np.allclose(user_train_unscaled, scalerUser.inverse_transform(user_train)))

[5.  2.  4.  ... 3.5 3.5 3.5]
[[ 1.        ]
 [-0.33333333]
 [ 0.55555556]
 ...
 [ 0.33333333]
 [ 0.33333333]
 [ 0.33333333]]
True
True


In [793]:
item_train, item_test = train_test_split(item_train, train_size=0.80, shuffle=True, random_state=1)
user_train, user_test = train_test_split(user_train, train_size=0.80, shuffle=True, random_state=1)
y_train, y_test       = train_test_split(y_train,    train_size=0.80, shuffle=True, random_state=1)
print(f"movie/item training data shape: {item_train.shape}")
print(f"movie/item test data shape: {item_test.shape}")
print(item_test,user_test)

movie/item training data shape: (31334, 17)
movie/item test data shape: (7834, 17)
[[ 0.29361622  0.43348542  0.25644624 ... -0.44877465 -0.47841102
   1.60703088]
 [-0.93681363 -1.26477237  0.48476634 ... -0.44877465 -0.47841102
  -0.62226558]
 [ 1.14855216  1.28261431  0.60219287 ... -0.44877465 -0.47841102
   1.60703088]
 ...
 [ 1.20312832  1.28261431  1.60595137 ...  2.22828985 -0.47841102
  -0.62226558]
 [ 1.52807366  1.70717875  1.03007373 ... -0.44877465  2.09025285
  -0.62226558]
 [ 1.66492134  1.70717875 -1.01448011 ... -0.44877465 -0.47841102
  -0.62226558]] [[ 1.26702378 -0.96197605  0.25644624 ...  1.21820805  2.37839344
   1.89803253]
 [ 0.19122769 -0.92871897  0.48476634 ... -0.16465535 -0.51006982
  -0.16329393]
 [-1.48410059 -0.14868922  0.60219287 ...  0.42800039  0.80796681
   1.07868175]
 ...
 [ 1.16056479  0.0901571   1.60595137 ...  0.74977865  0.32576502
   0.49923767]
 [ 0.41535187 -0.904532    1.03007373 ...  0.43970717  0.86195023
   1.33767194]
 [ 0.67869779  

#  Neural Network for content-based filtering

<figure>
    <center> <img src="RecSysNN.png"   style="width:500px;height:280px;" ></center>
</figure>

In [None]:
num_outputs = 32
tf.random.set_seed(1)
user_NN = tf.keras.models.Sequential([
    ### START CODE HERE ###     
   tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_outputs, activation='linear') 
    ### END CODE HERE ###  
])

item_NN = tf.keras.models.Sequential([
    ### START CODE HERE ###   
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_outputs, activation='linear')
    ### END CODE HERE ###  
])

# create the user input and point to the base network
input_user = tf.keras.layers.Input(shape=(num_user_features))
vu = user_NN(input_user)
vu = tf.linalg.l2_normalize(vu, axis=1)

# create the item input and point to the base network
input_item = tf.keras.layers.Input(shape=(num_item_features))
vm = item_NN(input_item)
vm = tf.linalg.l2_normalize(vm, axis=1)

# compute the dot product of the two vectors vu and vm
output = tf.keras.layers.Dot(axes=1)([vu, vm])

# specify the inputs and output of the model
model = tf.keras.Model([input_user, input_item], output)

model.summary()

In [795]:
tf.random.set_seed(1)
cost_fn = tf.keras.losses.MeanSquaredError()
opt = keras.optimizers.Adam(learning_rate=0.01)
model.compile(optimizer=opt,
              loss=cost_fn)

In [796]:
tf.random.set_seed(1)
model.fit([user_train[:, u_s:], item_train[:, i_s:]], y_train, epochs=40)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.callbacks.History at 0x7fc0c67cf760>

In [797]:
model.evaluate([user_test[:, u_s:], item_test[:, i_s:]], y_test)



0.10915423929691315

# prediction for existing user

In [798]:
user_train_unscaled.head()
user_vecs = user_train_unscaled.iloc[5:10]
item_vecs = item_train_unscaled.iloc[5:10]
y_vecs = y_train_unscaled[5:10]

In [799]:
user_train_unscaled.iloc[5:10]
y_train_unscaled[5:10]

array([2. , 4. , 4. , 3.5, 2. ])

In [800]:
item_train_unscaled.iloc[5:10]

Unnamed: 0,movieId,year,ave rating,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Horror,Mystery,Romance,Sci-Fi,Thriller
5,3177,2000,3.2,0,0,0,0,1,0,0,0,0,0,0,0,0,0
6,3177,2000,3.2,0,0,0,0,1,0,0,0,0,0,0,0,0,0
7,3177,2000,3.2,0,0,0,0,1,0,0,0,0,0,0,0,0,0
8,3177,2000,3.2,0,0,0,0,1,0,0,0,0,0,0,0,0,0
9,3177,2000,3.2,0,0,0,0,1,0,0,0,0,0,0,0,0,0


In [801]:
suser_vecs = scalerUser.transform(user_vecs)
sitem_vecs = scalerItem.transform(item_vecs)

In [802]:
y_p = model.predict([suser_vecs[:, u_s:], sitem_vecs[:, i_s:]])



In [803]:
y_pu = scalerTarget.inverse_transform(y_p)

In [804]:
print(y_pu)

[[3.05369  ]
 [4.0902166]
 [3.6064677]
 [2.9264011]
 [2.763914 ]]


# How to recommend movies to a user

- Given a user content vector, we compute the normalized $v_u$ using the trained network 
- Get a list of movies, compute their normalized $v_m$
- find distances between $v_u$ and $v_m$, movies with the small distance will be recommended