In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity

### Data loading

We will only use the MovieLens 1M dataset for this project, as we will use the movies genres as features.
The IMDb dataset does not provide the ratings of movies, and genres are already present in the MovieLens dataset.

Moreover, the IMDb dataset has much more movies than the MovieLens dataset, and we wouldn't be able to do anything with them.

We could use the IMDb dataset to get cast information, but this would increase the dimension of the features way to much for a recommendation system like this to be efficient.

In [2]:
df_movies = pd.read_csv("ml-1m/movies.dat", sep="::", engine="python", encoding='latin-1', names=['movieId', 'title', 'genres'])
df_ratings = pd.read_csv("ml-1m/ratings.dat", sep="::", engine="python", encoding='latin-1', names=['userId', 'movieId', 'rating', 'timestamp'])
df_users = pd.read_csv("ml-1m/users.dat", sep="::", engine="python", encoding='latin-1', names=['userId', 'gender', 'age', 'occupation', 'zip'])

Clean the movies dataframe

In [3]:
# Remove the year from the title
def remove_year(title):
    if title.split(" ")[-1].startswith("("):
        title = (" ".join(title.split(" ")[:-1])).strip()
    return title

def reorder_title(title):
    if title.split(',')[-1].strip() in ['The', 'A', 'An']:
        title = (title.split(',')[-1].strip() + " " + " ".join(title.split(',')[:-1])).strip()
    return title

def lower_case(title):
    return title.lower()

def clean_title(title):
    title = remove_year(title)
    title = reorder_title(title)
    title = lower_case(title)
    return title

df_movies['title'] = df_movies['title'].apply(clean_title)

df_movies

Unnamed: 0,movieId,title,genres
0,1,toy story,Animation|Children's|Comedy
1,2,jumanji,Adventure|Children's|Fantasy
2,3,grumpier old men,Comedy|Romance
3,4,waiting to exhale,Comedy|Drama
4,5,father of the bride part ii,Comedy
...,...,...,...
3878,3948,meet the parents,Comedy
3879,3949,requiem for a dream,Drama
3880,3950,tigerland,Drama
3881,3951,two family house,Drama


In [4]:
# Explode the 'genres' column

df_movies['genres'] = df_movies['genres'].str.split('|')

df_movies_exploded = df_movies.explode('genres')

df_movies_one_hot = pd.get_dummies(df_movies_exploded['genres'])

df_movies_one_hot_grouped = df_movies_one_hot.groupby(df_movies_exploded.index).sum()

df_movies_combined = pd.concat([df_movies.drop(columns=['genres']), df_movies_one_hot_grouped], axis=1)

df_movies_combined

Unnamed: 0,movieId,title,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,toy story,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,2,jumanji,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0
2,3,grumpier old men,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0
3,4,waiting to exhale,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0
4,5,father of the bride part ii,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3878,3948,meet the parents,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
3879,3949,requiem for a dream,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
3880,3950,tigerland,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
3881,3951,two family house,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0


In [5]:
df_ratings = df_ratings.drop(columns=["timestamp"])

df_ratings

Unnamed: 0,userId,movieId,rating
0,1,1193,5
1,1,661,3
2,1,914,3
3,1,3408,4
4,1,2355,5
...,...,...,...
1000204,6040,1091,1
1000205,6040,1094,5
1000206,6040,562,5
1000207,6040,1096,4


In [6]:
df_users = df_users.drop(columns=["gender", "age", "occupation", "zip"])

In [7]:
df_ratings_sample = df_ratings.sample(n=1_000_000)

df = pd.merge(df_ratings_sample, df_movies_combined, on='movieId')

df

Unnamed: 0,userId,movieId,rating,title,Action,Adventure,Animation,Children's,Comedy,Crime,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,2458,748,3,the arrival,1,0,0,0,0,0,...,0,0,0,0,0,0,1,1,0,0
1,2344,1197,5,the princess bride,1,1,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
2,3746,3608,3,pee-wee's big adventure,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3,3573,260,5,star wars: episode iv - a new hope,1,1,0,0,0,0,...,1,0,0,0,0,0,1,0,0,0
4,3593,21,3,get shorty,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
999995,1357,1969,2,a nightmare on elm street part 2: freddy's rev...,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
999996,5653,3078,4,liberty heights,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
999997,5667,3072,4,moonstruck,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
999998,1800,3527,3,predator,1,0,0,0,0,0,...,0,0,0,0,0,0,1,1,0,0


In [8]:
# We weight the genres by the rating value    
genres = list(df.columns[4:])
for genre in genres:
    df[genre] = df[genre] * df['rating']

df    

Unnamed: 0,userId,movieId,rating,title,Action,Adventure,Animation,Children's,Comedy,Crime,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,2458,748,3,the arrival,3,0,0,0,0,0,...,0,0,0,0,0,0,3,3,0,0
1,2344,1197,5,the princess bride,5,5,0,0,5,0,...,0,0,0,0,0,5,0,0,0,0
2,3746,3608,3,pee-wee's big adventure,0,0,0,0,3,0,...,0,0,0,0,0,0,0,0,0,0
3,3573,260,5,star wars: episode iv - a new hope,5,5,0,0,0,0,...,5,0,0,0,0,0,5,0,0,0
4,3593,21,3,get shorty,3,0,0,0,3,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
999995,1357,1969,2,a nightmare on elm street part 2: freddy's rev...,0,0,0,0,0,0,...,0,0,2,0,0,0,0,0,0,0
999996,5653,3078,4,liberty heights,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
999997,5667,3072,4,moonstruck,0,0,0,0,4,0,...,0,0,0,0,0,0,0,0,0,0
999998,1800,3527,3,predator,3,0,0,0,0,0,...,0,0,0,0,0,0,3,3,0,0


In [9]:
# We normalize the genres
genre_sum = df.groupby('userId')[genres].sum()
genre_count = df.groupby('userId')[genres].apply(lambda x: (x > 0).sum())
genre_norm = genre_sum.div(genre_count, axis=0).fillna(0)

df_features_users = pd.merge(df_ratings_sample, genre_norm, on='userId')

df_features_users

Unnamed: 0,userId,movieId,rating,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,2458,748,3,3.982456,4.291667,3.800000,3.600000,3.288660,3.272727,1.000000,...,4.000000,0.000000,4.272727,3.333333,3.500000,3.772727,3.975000,3.666667,4.000000,4.000000
1,2344,1197,5,3.483871,3.459459,3.750000,3.250000,3.360825,3.800000,0.000000,...,3.818182,4.000000,3.200000,3.200000,3.750000,3.650000,3.397436,3.578947,3.125000,3.923077
2,3746,3608,3,3.744000,3.718750,2.250000,3.272727,3.635659,3.941176,4.000000,...,3.866667,3.000000,3.320000,3.625000,4.153846,3.822222,3.636364,3.606557,4.320000,4.052632
3,3573,260,5,3.933333,4.000000,3.166667,2.857143,2.300000,0.000000,0.000000,...,3.250000,0.000000,2.500000,3.000000,0.000000,5.000000,3.789474,3.500000,4.166667,0.000000
4,3593,21,3,3.166667,2.400000,3.000000,3.166667,3.377049,5.000000,0.000000,...,0.000000,0.000000,3.000000,3.000000,0.000000,3.500000,2.500000,3.000000,5.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
999995,1357,1969,2,2.722222,2.714286,3.000000,3.000000,2.760000,3.000000,0.000000,...,0.000000,0.000000,2.779070,3.000000,2.500000,5.000000,2.906250,2.583333,5.000000,0.000000
999996,5653,3078,4,3.466667,3.705882,3.000000,3.750000,3.842857,4.615385,3.666667,...,4.000000,4.333333,2.750000,4.111111,4.000000,4.166667,3.285714,3.683333,4.000000,3.888889
999997,5667,3072,4,3.741379,3.388889,3.857143,3.666667,3.728814,3.878788,4.000000,...,3.555556,4.363636,4.363636,3.842105,4.428571,3.600000,4.043478,4.000000,3.966667,4.090909
999998,1800,3527,3,3.209790,3.480769,3.550000,3.208333,3.841463,3.968750,5.000000,...,2.625000,4.750000,3.375000,3.300000,3.250000,3.500000,3.243902,3.278689,4.142857,3.000000


In [10]:
df_features_movies = pd.merge(df_ratings_sample, df_movies_combined, on='movieId').drop(columns=['rating'])

df_ratings = df_features_users[['userId', 'movieId', 'rating']]

df_features_users = df_features_users.drop(columns=['rating'])

In [11]:
df_features_movies

Unnamed: 0,userId,movieId,title,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,2458,748,the arrival,1,0,0,0,0,0,0,...,0,0,0,0,0,0,1,1,0,0
1,2344,1197,the princess bride,1,1,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0
2,3746,3608,pee-wee's big adventure,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
3,3573,260,star wars: episode iv - a new hope,1,1,0,0,0,0,0,...,1,0,0,0,0,0,1,0,0,0
4,3593,21,get shorty,1,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
999995,1357,1969,a nightmare on elm street part 2: freddy's rev...,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
999996,5653,3078,liberty heights,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
999997,5667,3072,moonstruck,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
999998,1800,3527,predator,1,0,0,0,0,0,0,...,0,0,0,0,0,0,1,1,0,0


In [12]:
df_features_users

Unnamed: 0,userId,movieId,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,2458,748,3.982456,4.291667,3.800000,3.600000,3.288660,3.272727,1.000000,3.133333,4.000000,0.000000,4.272727,3.333333,3.500000,3.772727,3.975000,3.666667,4.000000,4.000000
1,2344,1197,3.483871,3.459459,3.750000,3.250000,3.360825,3.800000,0.000000,3.484848,3.818182,4.000000,3.200000,3.200000,3.750000,3.650000,3.397436,3.578947,3.125000,3.923077
2,3746,3608,3.744000,3.718750,2.250000,3.272727,3.635659,3.941176,4.000000,4.116071,3.866667,3.000000,3.320000,3.625000,4.153846,3.822222,3.636364,3.606557,4.320000,4.052632
3,3573,260,3.933333,4.000000,3.166667,2.857143,2.300000,0.000000,0.000000,3.200000,3.250000,0.000000,2.500000,3.000000,0.000000,5.000000,3.789474,3.500000,4.166667,0.000000
4,3593,21,3.166667,2.400000,3.000000,3.166667,3.377049,5.000000,0.000000,3.600000,0.000000,0.000000,3.000000,3.000000,0.000000,3.500000,2.500000,3.000000,5.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
999995,1357,1969,2.722222,2.714286,3.000000,3.000000,2.760000,3.000000,0.000000,3.777778,0.000000,0.000000,2.779070,3.000000,2.500000,5.000000,2.906250,2.583333,5.000000,0.000000
999996,5653,3078,3.466667,3.705882,3.000000,3.750000,3.842857,4.615385,3.666667,4.072581,4.000000,4.333333,2.750000,4.111111,4.000000,4.166667,3.285714,3.683333,4.000000,3.888889
999997,5667,3072,3.741379,3.388889,3.857143,3.666667,3.728814,3.878788,4.000000,4.071942,3.555556,4.363636,4.363636,3.842105,4.428571,3.600000,4.043478,4.000000,3.966667,4.090909
999998,1800,3527,3.209790,3.480769,3.550000,3.208333,3.841463,3.968750,5.000000,4.206349,2.625000,4.750000,3.375000,3.300000,3.250000,3.500000,3.243902,3.278689,4.142857,3.000000


In [13]:
scalerTarget = MinMaxScaler((-1, 1))
scalerTarget.fit(df_ratings["rating"].to_numpy().reshape(-1, 1))
df_ratings["rating"] = scalerTarget.transform(df_ratings["rating"].to_numpy().reshape(-1, 1))

df_ratings

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ratings["rating"] = scalerTarget.transform(df_ratings["rating"].to_numpy().reshape(-1, 1))


Unnamed: 0,userId,movieId,rating
0,2458,748,0.0
1,2344,1197,1.0
2,3746,3608,0.0
3,3573,260,1.0
4,3593,21,0.0
...,...,...,...
999995,1357,1969,-0.5
999996,5653,3078,0.5
999997,5667,3072,0.5
999998,1800,3527,0.0


In [14]:
user_train, user_test = train_test_split(
    df_features_users, train_size=0.80, shuffle=True, random_state=1
)

movie_train, movie_test = train_test_split(
    df_features_movies, train_size=0.80, shuffle=True, random_state=1
)

rating_train, rating_test = train_test_split(
    df_ratings, train_size=0.80, shuffle=True, random_state=1
)

print(user_train)
print(movie_train)
print(rating_train)

        userId  movieId    Action  Adventure  Animation  Children's    Comedy  \
771718     424       44  3.518797   3.701493   4.162791    3.843373  3.613333   
521462     411     1703  3.357143   3.290323   3.307692    3.216216  3.214035   
137361    2627     2011  2.774510   2.871795   3.769231    3.588235  3.232394   
404985    2941     2081  3.354839   3.363636   3.950000    3.586207  3.550459   
910092    1521      307  3.181818   3.000000   0.000000    0.000000  3.000000   
...        ...      ...       ...        ...        ...         ...       ...   
491263     823     1916  3.714286   4.200000   3.000000    3.666667  3.851485   
791624     850      800  3.106667   2.914286   3.687500    3.523810  3.459627   
470924    1264       16  3.360406   3.479592   3.666667    3.486486  3.731343   
491755    3860     1927  3.571429   3.517241   3.571429    3.111111  3.466667   
128037    3390     2069  4.333333   3.555556   4.636364    4.357143  4.196429   

           Crime  Documenta

The goal of this project is to build a recommendation system.
The recommendation will do the following:
- based on 2 users, recommend a movie that the two users could both like

The 2 users can be either ids or simply made up users from preferences.

Here is the plan:
- We create a movie-movie similarity matrix
- We build a new common-user that will have the average features of the two users
- We get N movies that are the most similar to the movies that the two users have in common
- We estimate the rating of the common-user for each of these N movies (using the neural network)
- We recommend the movie with the highest estimated rating


Building the movie-movie similarity matrix

In [15]:
# Remove the userId column, and remove duplicated movieIds
df_features_movies = df_features_movies.drop(columns=['userId']).drop_duplicates(subset=['movieId'])

# Remove the title column and reset index to movieId
df_features_movies = df_features_movies.drop(columns=['title']).set_index('movieId')

df_features_movies

Unnamed: 0_level_0,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
748,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0
1197,1,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0
3608,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
260,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0
21,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1714,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
1852,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0
641,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
576,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0


In [16]:
# Compute the cosine similarity between the movies
movie_similarity = cosine_similarity(df_features_movies)

movie_similarity = pd.DataFrame(movie_similarity, index=df_features_movies.index, columns=df_features_movies.index)

movie_similarity

movieId,748,1197,3608,260,21,2683,3069,317,2826,2871,...,3291,3321,789,1115,2742,1714,1852,641,576,1843
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
748,1.000000,0.288675,0.000000,0.577350,0.333333,0.000000,0.000000,0.000000,0.666667,0.408248,...,0.577350,0.000000,0.000000,0.000000,0.000000,0.0,0.408248,0.000000,0.000000,0.000000
1197,0.288675,1.000000,0.500000,0.500000,0.577350,0.500000,0.000000,0.288675,0.288675,0.353553,...,0.000000,0.500000,0.000000,0.353553,0.353553,0.5,0.000000,0.500000,0.500000,0.353553
3608,0.000000,0.500000,1.000000,0.000000,0.577350,1.000000,0.000000,0.577350,0.000000,0.000000,...,0.000000,1.000000,0.000000,0.707107,0.707107,0.0,0.000000,1.000000,1.000000,0.707107
260,0.577350,0.500000,0.000000,1.000000,0.288675,0.000000,0.000000,0.288675,0.288675,0.353553,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
21,0.333333,0.577350,0.577350,0.288675,1.000000,0.577350,0.577350,0.333333,0.333333,0.000000,...,0.000000,0.577350,0.577350,0.816497,0.816497,0.0,0.408248,0.577350,0.577350,0.408248
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1714,0.000000,0.500000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,1.0,0.000000,0.000000,0.000000,0.000000
1852,0.408248,0.000000,0.000000,0.000000,0.408248,0.000000,0.707107,0.000000,0.408248,0.500000,...,0.707107,0.000000,0.707107,0.500000,0.500000,0.0,1.000000,0.000000,0.000000,0.000000
641,0.000000,0.500000,1.000000,0.000000,0.577350,1.000000,0.000000,0.577350,0.000000,0.000000,...,0.000000,1.000000,0.000000,0.707107,0.707107,0.0,0.000000,1.000000,1.000000,0.707107
576,0.000000,0.500000,1.000000,0.000000,0.577350,1.000000,0.000000,0.577350,0.000000,0.000000,...,0.000000,1.000000,0.000000,0.707107,0.707107,0.0,0.000000,1.000000,1.000000,0.707107


In [17]:
# Function that returns the n most similar movies to a given movie (but not the movie itself)
def get_similar_movies(movieId, n):
    result = movie_similarity.loc[movieId].sort_values(ascending=False).head(n+1)
    try:
        result = result.drop(movieId)
    except:
        pass
    return result.head(n)

get_similar_movies(1, 10)

movieId
3751    1.000000
2354    1.000000
1064    1.000000
2355    1.000000
3754    1.000000
2141    1.000000
3611    1.000000
2142    1.000000
3114    1.000000
588     0.866025
Name: 1, dtype: float64

Getting a common user

In [18]:
# Remove the movieId column, and remove duplicated userIds
df_features_users = df_features_users.drop(columns=['movieId']).drop_duplicates(subset=['userId'])

# Reset index to userId
df_features_users = df_features_users.set_index('userId')

df_features_users

Unnamed: 0_level_0,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2458,3.982456,4.291667,3.800000,3.600000,3.288660,3.272727,1.0,3.133333,4.000000,0.0,4.272727,3.333333,3.500000,3.772727,3.975000,3.666667,4.000000,4.000000
2344,3.483871,3.459459,3.750000,3.250000,3.360825,3.800000,0.0,3.484848,3.818182,4.0,3.200000,3.200000,3.750000,3.650000,3.397436,3.578947,3.125000,3.923077
3746,3.744000,3.718750,2.250000,3.272727,3.635659,3.941176,4.0,4.116071,3.866667,3.0,3.320000,3.625000,4.153846,3.822222,3.636364,3.606557,4.320000,4.052632
3573,3.933333,4.000000,3.166667,2.857143,2.300000,0.000000,0.0,3.200000,3.250000,0.0,2.500000,3.000000,0.000000,5.000000,3.789474,3.500000,4.166667,0.000000
3593,3.166667,2.400000,3.000000,3.166667,3.377049,5.000000,0.0,3.600000,0.000000,0.0,3.000000,3.000000,0.000000,3.500000,2.500000,3.000000,5.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
277,3.888889,3.625000,0.000000,0.000000,3.500000,4.000000,0.0,3.571429,4.000000,0.0,0.000000,0.000000,0.000000,3.600000,3.875000,4.166667,4.000000,0.000000
5145,2.333333,3.000000,0.000000,4.000000,3.250000,0.000000,0.0,3.857143,0.000000,0.0,3.000000,0.000000,3.000000,3.000000,3.000000,2.750000,3.000000,0.000000
947,3.000000,3.000000,0.000000,0.000000,3.230769,0.000000,0.0,3.333333,2.500000,0.0,0.000000,0.000000,0.000000,3.000000,0.000000,1.000000,0.000000,0.000000
3298,4.500000,5.000000,5.000000,5.000000,4.421053,5.000000,0.0,4.666667,0.000000,0.0,4.000000,0.000000,5.000000,4.000000,4.000000,4.666667,0.000000,0.000000


In [19]:
# Function that returns the average features of two users
def get_common_user(user_id1, user_id2):
    return (df_features_users.loc[user_id1] + df_features_users.loc[user_id2]) / 2

def get_common_user_from_preferences(user1_preferences, user2_preferences):
    return (user1_preferences + user2_preferences) / 2

get_common_user(1, 2)

Action         3.850000
Adventure      3.868421
Animation      2.055556
Children's     2.125000
Comedy         3.851429
Crime          3.791667
Documentary    0.000000
Drama          4.163653
Fantasy        3.500000
Film-Noir      2.000000
Horror         1.500000
Musical        2.142857
Mystery        1.666667
Romance        3.687500
Sci-Fi         3.960784
Thriller       3.575269
War            4.366667
Western        2.166667
dtype: float64

## The Neural Network model

In [20]:
user_NN = tf.keras.models.Sequential(
    [
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(16, activation='relu'),
        tf.keras.layers.Dense(1, activation='linear')
    ]
)

movie_NN = tf.keras.models.Sequential(
    [
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(16, activation='relu'),
        tf.keras.layers.Dense(1, activation='linear')
    ]
)

input_user = tf.keras.layers.Input(shape=(18,))
vu = user_NN(input_user)

input_movie = tf.keras.layers.Input(shape=(18,))
vm = movie_NN(input_movie)

dot = tf.keras.layers.Dot(axes=1)([vu, vm])

model = tf.keras.models.Model(inputs=[input_user, input_movie], outputs=dot)

model.compile(optimizer='adam', loss='mean_squared_error')

model.summary()

In [21]:
# Train the model
nb_epochs = 5

model.fit(
        [user_train.loc[:, "Action":].to_numpy(), movie_train.loc[:, "Action":].to_numpy()],
        rating_train.loc[:, "rating":].to_numpy(),
        epochs=nb_epochs,
        validation_data=([user_test.loc[:, "Action":].to_numpy(), movie_test.loc[:, "Action":].to_numpy()], rating_test.loc[:, "rating":].to_numpy()),
    )


Epoch 1/5
[1m25000/25000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 1ms/step - loss: 0.2686 - val_loss: 0.2579
Epoch 2/5
[1m25000/25000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 1ms/step - loss: 0.2596 - val_loss: 0.2610
Epoch 3/5
[1m25000/25000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 1ms/step - loss: 0.2578 - val_loss: 0.2586
Epoch 4/5
[1m25000/25000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 1ms/step - loss: 0.2577 - val_loss: 0.2568
Epoch 5/5
[1m25000/25000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 1ms/step - loss: 0.2571 - val_loss: 0.2562


<keras.src.callbacks.history.History at 0x19f961dd8e0>

In [22]:
def predict_rating(user, movie):
    # If the input is a pandas series, we need to convert it to a dataframe with one row
    if isinstance(user, pd.Series):
        user = pd.DataFrame(user).T
    if isinstance(movie, pd.Series):
        movie = pd.DataFrame(movie).T
    
    # The movie input is a pandas series, we need to convert it to a dataframe with one row
    prediction = model.predict([user.loc[:, "Action":].to_numpy().astype(np.float64), movie.loc[:, "Action":].to_numpy().astype(np.float64)], verbose=0)
    prediction = scalerTarget.inverse_transform(prediction)
    return prediction

predict_rating(user_train.loc[1], movie_train.loc[1])

array([[3.563559]], dtype=float32)

## Evaluation of the model

We put side by side the true rating and the predicted rating

In [23]:
# Evaluate the model
y_pred = model.predict([user_test.loc[:, "Action":].to_numpy(), movie_test.loc[:, "Action":].to_numpy()])
y_pu = scalerTarget.inverse_transform(y_pred)
y_true_u = scalerTarget.inverse_transform(rating_test["rating"].to_numpy().reshape(-1, 1))
result = rating_test.copy()
result["rating"] = y_true_u.flatten()
result["prediction"] = y_pu.flatten()

result

[1m6250/6250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1ms/step


Unnamed: 0,userId,movieId,rating,prediction
276826,5483,1678,4.0,3.988578
849425,3219,223,3.0,3.624578
504499,2643,1246,5.0,3.422122
601054,5020,2143,2.0,3.638400
980221,5109,2080,4.0,3.830724
...,...,...,...,...
555867,4328,370,4.0,3.571474
30004,5458,1883,4.0,3.598290
124730,4934,580,4.0,3.309470
195783,5475,50,4.0,4.274350


In [24]:
# Compute the standard deviation
std = np.std(result["rating"] - result["prediction"])
print("Standard deviation of the model: ", std)

Standard deviation of the model:  1.0122766314683291


## Recommendation system

In [25]:
# Function that returns the 100 most similar movies to the movies that two users have rated (50 each)
def get_most_similars(user_id1, user_id2):
    # Get the movies that user1 and user2 have rated the highest
    user1_movies = df_ratings.loc[df_ratings['userId'] == user_id1].sort_values(by='rating', ascending=False).head(5)
    user2_movies = df_ratings.loc[df_ratings['userId'] == user_id2].sort_values(by='rating', ascending=False).head(5)
    
    # Get the 10 most similar movies to each of the movies that user1 and user2 have rated
    similar_movies = []
    for movieId in user1_movies['movieId']:
        similar_movies.extend(get_similar_movies(movieId, 10).index)
    for movieId in user2_movies['movieId']:
        similar_movies.extend(get_similar_movies(movieId, 10).index)
    
    if user_id1 == user_id2:
        similar_movies = list(set(similar_movies))
    else:
        # Remove the movies that user1 and user2 have rated
        similar_movies = list(set(similar_movies) - set(user1_movies['movieId']) - set(user2_movies['movieId']))
    
    return similar_movies

get_most_similars(1, 2)

[1282,
 1032,
 3724,
 781,
 3342,
 783,
 1936,
 3602,
 1299,
 661,
 3606,
 918,
 1178,
 2971,
 1951,
 3749,
 938,
 3759,
 2096,
 3377,
 2994,
 3379,
 308,
 2104,
 316,
 1087,
 3776,
 3143,
 2250,
 1868,
 1356,
 1489,
 594,
 2641,
 2643,
 2642,
 3675,
 1371,
 1375,
 2528,
 1376,
 2275,
 3053,
 3315,
 3571,
 2934,
 631,
 121,
 3196,
 254]

In [26]:
# Function that returns the title of a movie given its movieId, to make the recommendation more user-friendly

def get_movie_info(movieId):
    return df_movies.loc[df_movies['movieId'] == movieId]

get_movie_info(1)

Unnamed: 0,movieId,title,genres
0,1,toy story,"[Animation, Children's, Comedy]"


#### Recommendation function for two users with ids

In [27]:
# Recommandation function
def recommand_common_movie_from_user_ids(user_id1, user_id2):
    common_user = get_common_user(user_id1, user_id2)
    most_similars = get_most_similars(user_id1, user_id2)
    
    best_movie = None
    best_rating = -1
    for movieId in most_similars:
        rating = predict_rating(common_user, df_features_movies.loc[movieId])
        if rating > best_rating:
            best_rating = rating
            best_movie = movieId
            
    return get_movie_info(best_movie)

display(recommand_common_movie_from_user_ids(65, 756))
display(get_common_user(65, 756).sort_values(ascending=False))

Unnamed: 0,movieId,title,genres
1192,1210,star wars: episode vi - return of the jedi,"[Action, Adventure, Romance, Sci-Fi, War]"


Crime          4.622222
Horror         4.500000
Musical        4.500000
War            4.343750
Thriller       4.331418
Action         4.269592
Sci-Fi         4.255263
Drama          4.250000
Adventure      4.032353
Comedy         3.785714
Fantasy        3.500000
Romance        3.281250
Documentary    2.500000
Mystery        2.500000
Animation      2.300000
Children's     2.062500
Film-Noir      2.000000
Western        0.000000
dtype: float64

#### Recommendation function for two made up users from their given preferences

In [28]:
# Now, we will create a function to build a user preferences dataframe
def build_user_preferences(
        Action = 0.0,
        Adventure = 0.0,
        Animation = 0.0,
        Childrens = 0.0,
        Comedy = 0.0,
        Crime = 0.0,
        Documentary = 0.0,
        Drama = 0.0,
        Fantasy = 0.0,
        FilmNoir = 0.0,
        Horror = 0.0,
        Musical = 0.0,
        Mystery = 0.0,
        Romance = 0.0,
        SciFi = 0.0,
        Thriller = 0.0,
        War = 0.0,
        Western = 0.0
):
    user_prefs =  pd.DataFrame({
        'Action': [Action],
        'Adventure': [Adventure],
        'Animation': [Animation],
        'Childrens': [Childrens],
        'Comedy': [Comedy],
        'Crime': [Crime],
        'Documentary': [Documentary],
        'Drama': [Drama],
        'Fantasy': [Fantasy],
        'FilmNoir': [FilmNoir],
        'Horror': [Horror],
        'Musical': [Musical],
        'Mystery': [Mystery],
        'Romance': [Romance],
        'SciFi': [SciFi],
        'Thriller': [Thriller],
        'War': [War],
        'Western': [Western]
    })
    
    user_prefs = (user_prefs - 2.5) / 2.5
    
    return user_prefs

user_1 = build_user_preferences(Action=5, Adventure=5, SciFi=5)
user_2 = build_user_preferences(Action=5, Adventure=5, SciFi=5)

In [29]:
# We need to create a function that gives the closest user to a given user preferences
# We will use the cosine similarity for that

def get_closest_user(user_preferences):
    closest_user = None
    closest_similarity = -1
    for userId in df_features_users.index:
        similarity = cosine_similarity(user_preferences, df_features_users.loc[userId].to_numpy().reshape(1, -1))
        if similarity > closest_similarity:
            closest_similarity = similarity
            closest_user = userId
            
    return closest_user

get_closest_user(user_1)

4991

In [30]:
# Now we can create the recommendation function
def recommand_common_movie_from_user_preferences(user1_preferences, user2_preferences):
    closest_user_1 = get_closest_user(user1_preferences)
    closest_user_2 = get_closest_user(user2_preferences)
    
    common_user = get_common_user_from_preferences(user1_preferences, user2_preferences)
    most_similars = get_most_similars(closest_user_1, closest_user_2)
    
    best_movie = None
    best_rating = -1
    for movieId in most_similars:
        rating = predict_rating(common_user, df_features_movies.loc[movieId])
        if rating > best_rating:
            best_rating = rating
            best_movie = movieId
            
    return get_movie_info(best_movie)

recommand_common_movie_from_user_preferences(user_1, user_2)

Unnamed: 0,movieId,title,genres
771,781,stealing beauty,[Drama]


## Testing the recommendation system with random users
We can now test the recommendation system with random users
To see if the recommendation is relevant, we will display the preferences of the two users and the recommendation
The recommendation should be a movie that is similar to the preferences of the two users
If the 2 users prefer comedies, the recommendation should be a comedy, etc.

In [31]:
# We will create two random users and recommend a movie for them
for i in range(10):
    user_1 = build_user_preferences(
        Action=np.random.randint(0, 6),
        Adventure=np.random.randint(0, 6),
        Animation=np.random.randint(0, 6),
        Childrens=np.random.randint(0, 6),
        Comedy=np.random.randint(0, 6),
        Crime=np.random.randint(0, 6),
        Documentary=np.random.randint(0, 6),
        Drama=np.random.randint(0, 6),
        Fantasy=np.random.randint(0, 6),
        FilmNoir=np.random.randint(0, 6),
        Horror=np.random.randint(0, 6),
        Musical=np.random.randint(0, 6),
        Mystery=np.random.randint(0, 6),
        Romance=np.random.randint(0, 6),
        SciFi=np.random.randint(0, 6),
        Thriller=np.random.randint(0, 6),
        War=np.random.randint(0, 6),
        Western=np.random.randint(0, 6)
    )
    user_2 = build_user_preferences(
        Action=np.random.randint(0, 6),
        Adventure=np.random.randint(0, 6),
        Animation=np.random.randint(0, 6),
        Childrens=np.random.randint(0, 6),
        Comedy=np.random.randint(0, 6),
        Crime=np.random.randint(0, 6),
        Documentary=np.random.randint(0, 6),
        Drama=np.random.randint(0, 6),
        Fantasy=np.random.randint(0, 6),
        FilmNoir=np.random.randint(0, 6),
        Horror=np.random.randint(0, 6),
        Musical=np.random.randint(0, 6),
        Mystery=np.random.randint(0, 6),
        Romance=np.random.randint(0, 6),
        SciFi=np.random.randint(0, 6),
        Thriller=np.random.randint(0, 6),
        War=np.random.randint(0, 6),
        Western=np.random.randint(0, 6)
    )
    reco = recommand_common_movie_from_user_preferences(user_1, user_2)
    print("User 1: ")
    display(user_1)
    print("User 2: ")
    display(user_2)
    print("Recommendation: ")
    display(reco)
    print(" ----- ")

User 1: 


Unnamed: 0,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,Drama,Fantasy,FilmNoir,Horror,Musical,Mystery,Romance,SciFi,Thriller,War,Western
0,-1.0,0.2,-0.2,0.2,-0.2,-0.2,-0.2,1.0,-0.6,0.6,-0.6,-1.0,-0.2,-0.2,0.6,-0.6,0.2,0.2


User 2: 


Unnamed: 0,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,Drama,Fantasy,FilmNoir,Horror,Musical,Mystery,Romance,SciFi,Thriller,War,Western
0,1.0,-0.2,-1.0,-0.6,-0.2,0.6,-0.6,-1.0,-1.0,-0.2,0.6,-0.2,-0.6,-1.0,-0.6,-1.0,1.0,-1.0


Recommendation: 


Unnamed: 0,movieId,title,genres
2499,2568,the mod squad,"[Action, Crime]"


 ----- 
User 1: 


Unnamed: 0,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,Drama,Fantasy,FilmNoir,Horror,Musical,Mystery,Romance,SciFi,Thriller,War,Western
0,0.6,1.0,1.0,1.0,0.6,-0.2,-0.2,0.2,0.6,0.6,-0.6,1.0,0.6,-0.6,-0.6,-0.6,-0.6,-1.0


User 2: 


Unnamed: 0,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,Drama,Fantasy,FilmNoir,Horror,Musical,Mystery,Romance,SciFi,Thriller,War,Western
0,0.6,-1.0,-0.2,0.6,-0.2,1.0,0.6,-0.6,1.0,-0.2,-1.0,-0.6,-1.0,1.0,1.0,-0.6,0.6,-0.2


Recommendation: 


Unnamed: 0,movieId,title,genres
817,828,the adventures of pinocchio,"[Adventure, Children's]"


 ----- 
User 1: 


Unnamed: 0,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,Drama,Fantasy,FilmNoir,Horror,Musical,Mystery,Romance,SciFi,Thriller,War,Western
0,-0.2,0.2,-0.2,-0.6,-0.2,-0.2,-1.0,-0.6,-0.6,-0.6,0.2,1.0,-0.2,0.2,-0.2,0.2,1.0,0.6


User 2: 


Unnamed: 0,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,Drama,Fantasy,FilmNoir,Horror,Musical,Mystery,Romance,SciFi,Thriller,War,Western
0,-0.2,-1.0,0.2,-0.6,-0.6,-0.2,-1.0,-0.6,-1.0,0.2,-0.6,-0.2,1.0,-0.2,0.2,0.2,-1.0,0.2


Recommendation: 


Unnamed: 0,movieId,title,genres
2752,2821,male and female,"[Adventure, Drama]"


 ----- 
User 1: 


Unnamed: 0,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,Drama,Fantasy,FilmNoir,Horror,Musical,Mystery,Romance,SciFi,Thriller,War,Western
0,1.0,-0.2,1.0,-0.2,-1.0,1.0,-1.0,0.6,-1.0,1.0,0.2,-0.6,-0.2,1.0,-0.2,-0.2,-0.2,-1.0


User 2: 


Unnamed: 0,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,Drama,Fantasy,FilmNoir,Horror,Musical,Mystery,Romance,SciFi,Thriller,War,Western
0,0.2,-0.2,-0.2,-0.2,-0.2,-1.0,0.6,-1.0,1.0,-1.0,0.2,1.0,0.6,1.0,-0.2,0.6,-1.0,-0.2


Recommendation: 


Unnamed: 0,movieId,title,genres
3647,3716,fatal beauty,"[Action, Crime]"


 ----- 
User 1: 


Unnamed: 0,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,Drama,Fantasy,FilmNoir,Horror,Musical,Mystery,Romance,SciFi,Thriller,War,Western
0,0.6,-1.0,1.0,-0.6,0.2,-0.2,0.6,-0.2,-0.6,-0.2,0.2,0.6,0.2,-1.0,-0.6,-0.6,0.6,-1.0


User 2: 


Unnamed: 0,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,Drama,Fantasy,FilmNoir,Horror,Musical,Mystery,Romance,SciFi,Thriller,War,Western
0,-1.0,0.2,0.2,-0.6,1.0,-0.2,0.2,0.2,-0.6,1.0,-0.2,-0.2,-0.6,0.6,-0.2,0.6,-0.6,-0.6


Recommendation: 


Unnamed: 0,movieId,title,genres
3643,3712,soapdish,[Comedy]


 ----- 
User 1: 


Unnamed: 0,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,Drama,Fantasy,FilmNoir,Horror,Musical,Mystery,Romance,SciFi,Thriller,War,Western
0,0.6,0.6,-1.0,-0.2,1.0,-0.2,-1.0,-0.2,-0.2,-0.6,0.2,-0.6,-0.6,0.6,-0.6,0.2,-0.6,0.2


User 2: 


Unnamed: 0,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,Drama,Fantasy,FilmNoir,Horror,Musical,Mystery,Romance,SciFi,Thriller,War,Western
0,-0.6,0.2,0.6,-0.2,1.0,-0.6,0.6,0.2,-1.0,1.0,0.2,-0.2,-1.0,0.6,-0.6,1.0,0.2,-0.2


Recommendation: 


Unnamed: 0,movieId,title,genres
1825,1894,six days seven nights,"[Adventure, Comedy, Romance]"


 ----- 
User 1: 


Unnamed: 0,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,Drama,Fantasy,FilmNoir,Horror,Musical,Mystery,Romance,SciFi,Thriller,War,Western
0,-1.0,-0.6,0.6,-1.0,0.6,-0.2,-0.6,0.2,0.6,0.2,-0.6,0.6,-0.6,-1.0,-0.2,-1.0,-0.2,-0.6


User 2: 


Unnamed: 0,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,Drama,Fantasy,FilmNoir,Horror,Musical,Mystery,Romance,SciFi,Thriller,War,Western
0,-0.6,0.6,-0.2,1.0,-0.2,-0.6,0.6,-0.2,0.2,0.2,-0.6,-0.6,-0.6,0.6,0.6,0.6,-0.2,-1.0


Recommendation: 


Unnamed: 0,movieId,title,genres
1983,2052,hocus pocus,"[Children's, Comedy]"


 ----- 
User 1: 


Unnamed: 0,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,Drama,Fantasy,FilmNoir,Horror,Musical,Mystery,Romance,SciFi,Thriller,War,Western
0,0.2,1.0,1.0,-1.0,-0.2,0.6,-1.0,-0.2,-0.6,-0.2,1.0,-0.2,1.0,-1.0,0.2,0.6,1.0,0.6


User 2: 


Unnamed: 0,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,Drama,Fantasy,FilmNoir,Horror,Musical,Mystery,Romance,SciFi,Thriller,War,Western
0,0.6,-0.6,-1.0,0.6,1.0,-1.0,-0.6,1.0,0.6,-0.6,-0.2,0.2,0.2,0.2,1.0,-0.6,0.2,-1.0


Recommendation: 


Unnamed: 0,movieId,title,genres
2700,2769,the yards,"[Crime, Mystery]"


 ----- 
User 1: 


Unnamed: 0,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,Drama,Fantasy,FilmNoir,Horror,Musical,Mystery,Romance,SciFi,Thriller,War,Western
0,1.0,-1.0,-1.0,-1.0,0.2,0.2,-0.6,-0.2,0.2,0.2,0.6,-0.2,-0.2,-0.6,0.6,1.0,-1.0,-0.6


User 2: 


Unnamed: 0,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,Drama,Fantasy,FilmNoir,Horror,Musical,Mystery,Romance,SciFi,Thriller,War,Western
0,0.2,-0.2,0.6,-1.0,-0.6,-0.6,0.6,0.2,0.6,0.2,-0.6,-1.0,0.2,-1.0,1.0,0.2,-0.2,0.2


Recommendation: 


Unnamed: 0,movieId,title,genres
3594,3663,puppet master 4,"[Horror, Sci-Fi, Thriller]"


 ----- 
User 1: 


Unnamed: 0,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,Drama,Fantasy,FilmNoir,Horror,Musical,Mystery,Romance,SciFi,Thriller,War,Western
0,0.6,-0.2,1.0,-1.0,1.0,1.0,0.6,0.6,-0.6,-0.2,-1.0,0.2,-0.6,-0.2,0.6,0.6,-0.6,-0.2


User 2: 


Unnamed: 0,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,Drama,Fantasy,FilmNoir,Horror,Musical,Mystery,Romance,SciFi,Thriller,War,Western
0,1.0,-0.6,0.2,-0.6,0.6,-0.6,1.0,1.0,-0.6,-0.6,0.6,0.6,-0.6,1.0,-1.0,-1.0,-0.6,-1.0


Recommendation: 


Unnamed: 0,movieId,title,genres
263,266,legends of the fall,"[Drama, Romance, War, Western]"


 ----- 


## Conclusion

We can see that the recommendation system works most of the time.
But there are some rare cases where the recommendation is not relevant.

We could try to improve the model by adding more layers or more neurons, but as the dataset is quite small, and the training time is already quite long, we will not have time to improve this model, unfortunately.

Thus, using more features like the cast or directors would not really help, as adding to much dimension would make the model even worse.