# movie lens recommender system
## for this data we chose the content based approach
## this code was written by Erez Ratner, Ilan Yevdaiev and Ido Reshef only
## for more info please refer to the presentation :
## https://www.canva.com/design/DAFMHeMPZlU/z0qHan_aSeVS2F89jIKuQg/view?utm_content=DAFMHeMPZlU&utm_campaign=designshare&utm_medium=link&utm_source=publishsharelink

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from lightgbm import LGBMRegressor
import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv("genome-scores.csv")
df

Unnamed: 0,movieId,tagId,relevance
0,1,1,0.02875
1,1,2,0.02375
2,1,3,0.06250
3,1,4,0.07575
4,1,5,0.14075
...,...,...,...
15584443,206499,1124,0.11000
15584444,206499,1125,0.04850
15584445,206499,1126,0.01325
15584446,206499,1127,0.14025


In [3]:
df["movieId"].unique()

array([     1,      2,      3, ..., 205383, 205425, 206499])

In [4]:
#newdf1 = pd.DataFrame(columns=["movieId"])
#newdf2 = pd.DataFrame(columns=[i+1 for i in range(1128)])
#newdf = pd.concat([newdf1,newdf2])
#newdf

#for movieID in list(df["movieId"].unique()):
#    c = [movieID]
#    c.extend(list(df[df["movieId"] == movieID]["relevance"]))
#    newdf.loc[len(newdf)] = c

#newdf

#newdf.to_csv("movie_relevence.csv",index=False)

In [5]:
all_users_ratings = pd.read_csv("ratings.csv")
all_relevance = pd.read_csv("movie_relevence.csv")
movies_names=pd.read_csv("movies.csv")

In [6]:
ratings=all_users_ratings[all_users_ratings.userId==1]
ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1,296,5.0,1147880044
1,1,306,3.5,1147868817
2,1,307,5.0,1147868828
3,1,665,5.0,1147878820
4,1,899,3.5,1147868510
...,...,...,...,...
65,1,27193,3.0,1147879774
66,1,27266,4.5,1147879365
67,1,27721,3.0,1147869115
68,1,31956,3.5,1147877610


In [7]:
RATING_NUMBER = 4

def get_top_genres_of_user(id):
    genres_counter = {}

    user_rated_movies = all_users_ratings[all_users_ratings.userId == id]
    user_rated_movies = user_rated_movies[user_rated_movies["rating"] > RATING_NUMBER]

    for genre in list(movies_names.loc[movies_names["movieId"].isin(user_rated_movies["movieId"])]["genres"]):
        if genre.count("|") > 0:
            l = genre.split("|")
            for item in l:
                if item in genres_counter.keys():
                    genres_counter[item] += 1
                else:
                    genres_counter[item] = 1
        else:
            if genre in genres_counter.keys():
                genres_counter[item] += 1
            else:
                genres_counter[item] = 1

    genres_counter = pd.DataFrame({"genre":genres_counter.keys(), "counter":genres_counter.values()})
    genres_counter.sort_values("counter", ascending=False, inplace=True)
    genres_counter.reset_index(drop=True, inplace=True)
    return genres_counter

In [8]:
get_top_genres_of_user(1)

Unnamed: 0,genre,counter
0,Drama,13
1,Romance,11
2,Comedy,9
3,Crime,4
4,Sci-Fi,4
5,Thriller,3
6,War,3
7,Adventure,2
8,Action,2
9,Mystery,1


In [9]:
NUMBER_OF_RECOMMENDATIONS = 10
NUMBER_OF_GENRES = 3

def get_recommendations(id):
    # Movies that rated by the user
    user_rated_movies = all_users_ratings[all_users_ratings.userId==id]

    # Tags relevance for movies that user rated
    user_rated_relevance = all_relevance.loc[all_relevance["movieId"].isin(user_rated_movies["movieId"])]
    
    # Movies that rated by the user and appears in user_rated_relevance
    user_rated_movies = user_rated_movies.loc[user_rated_movies["movieId"].isin(user_rated_relevance["movieId"])]

    user_rated_relevance.reset_index(drop=True,inplace=True)
    user_rated_movies.reset_index(drop=True,inplace=True)

    # add user_rated_movies column to all movies relevance in user_rated_relevance
    user_rated_relevance.loc[user_rated_relevance.index,"user_rated_movies"] = user_rated_movies["rating"]

    # Tags relevance for movies that user not rated
    user_not_rated_relevance = all_relevance.loc[~(all_relevance["movieId"].isin(user_rated_relevance["movieId"]))]

    # X_train and y_train for training the model
    X_train = user_rated_relevance.iloc[:,1:-1]
    y_train = user_rated_relevance.iloc[:,-1]

    # creating model and training it
    lgb = LGBMRegressor(learning_rate=0.01,num_iterations=1000,num_leaves=100)
    lgb.fit(X_train,y_train)
    train_score = "{:.2f}".format(lgb.score(X_train,y_train))

    # The features of the movies that not rated by the user for prediction the rates of them
    X_test = user_not_rated_relevance.iloc[:,1:]
    # Predictions of the movies
    pred = list(lgb.predict(X_test))

    # New dataframe that shows the movie ids and the prediction of them
    df_movieId_rating = pd.DataFrame({"movieId": user_not_rated_relevance.movieId, "rating": pred})
    df_movieId_rating = df_movieId_rating.sort_values("rating", ascending=False)

    # Taking the top 10 movies with highest rate
    top_recommendations = df_movieId_rating.head(NUMBER_OF_RECOMMENDATIONS)
    movies_recommendations = movies_names.loc[movies_names["movieId"].isin(top_recommendations["movieId"])]
    movies_recommendations.reset_index(drop=True, inplace=True)


    genres_counter = {}
    for genre in list(movies_recommendations["genres"]):
        if genre.count("|") > 0:
            l = genre.split("|")
            for item in l:
                if item in genres_counter.keys():
                    genres_counter[item] += 1
                else:
                    genres_counter[item] = 1
        else:
            if genre in genres_counter.keys():
                genres_counter[item] += 1
            else:
                genres_counter[item] = 1
    genres_counter = pd.DataFrame({"genre":genres_counter.keys(), "counter":genres_counter.values()})
    genres_counter.sort_values("counter", ascending=False, inplace=True)
    genres_counter.reset_index(drop=True, inplace=True)

    print(f" TOP {NUMBER_OF_GENRES} GENRES OF RECOMMENDATIONS ".center(50, "-"))
    print(genres_counter.head(NUMBER_OF_GENRES))
    print(f" TRAINING SCORE ".center(50, "-"))
    print("training score:", train_score)
    return movies_recommendations

In [10]:
recommendations = get_recommendations(1)
recommendations

-------- TOP 3 GENRES OF RECOMMENDATIONS ---------
    genre  counter
0   Drama       10
1  Comedy        5
2   Crime        3
----------------- TRAINING SCORE -----------------
training score: 0.96


Unnamed: 0,movieId,title,genres
0,1729,Jackie Brown (1997),Crime|Drama|Thriller
1,1757,Fallen Angels (Duo luo tian shi) (1995),Drama|Romance
2,2395,Rushmore (1998),Comedy|Drama
3,2624,After Life (Wandafuru raifu) (1998),Drama|Fantasy
4,3019,Drugstore Cowboy (1989),Crime|Drama
5,3328,Ghost Dog: The Way of the Samurai (1999),Crime|Drama
6,3925,Stranger Than Paradise (1984),Comedy|Drama
7,4741,Together (Tillsammans) (2000),Comedy|Drama|Romance
8,87234,Submarine (2010),Comedy|Drama|Romance
9,102800,Frances Ha (2012),Comedy|Drama
