> So, In this Notebook, we're going to make a memory-based content-based recommender system using the following library and techniques. Let's get started:

In [503]:
import numpy as np
import pandas as pd

In [157]:
movies = pd.read_csv("n_movies.csv")

In [158]:
movies.head()

Unnamed: 0,title,year,certificate,duration,genre,rating,description,stars,votes
0,Cobra Kai,(2018– ),TV-14,30 min,"Action, Comedy, Drama",8.5,Decades after their 1984 All Valley Karate Tou...,"['Ralph Macchio, ', 'William Zabka, ', 'Courtn...",177031
1,The Crown,(2016– ),TV-MA,58 min,"Biography, Drama, History",8.7,Follows the political rivalries and romance of...,"['Claire Foy, ', 'Olivia Colman, ', 'Imelda St...",199885
2,Better Call Saul,(2015–2022),TV-MA,46 min,"Crime, Drama",8.9,The trials and tribulations of criminal lawyer...,"['Bob Odenkirk, ', 'Rhea Seehorn, ', 'Jonathan...",501384
3,Devil in Ohio,(2022),TV-MA,356 min,"Drama, Horror, Mystery",5.9,When a psychiatrist shelters a mysterious cult...,"['Emily Deschanel, ', 'Sam Jaeger, ', 'Gerardo...",9773
4,Cyberpunk: Edgerunners,(2022– ),TV-MA,24 min,"Animation, Action, Adventure",8.6,A Street Kid trying to survive in a technology...,"['Zach Aguilar, ', 'Kenichiro Ohashi, ', 'Emi ...",15413


> We Dont Need Some Columns So we can drop them & we have some NA rows that will drop:

In [159]:
movies = movies.drop(columns=["year","certificate",'description','stars','votes','duration','rating']).dropna(axis=0)

In [160]:
# Making Random Movie ID to be more efficient:
from numpy import random
random.seed(1)
movies["id"] = [i for i in range(100000,100000+len(movies.values))]
movies.set_index(movies["id"],inplace=True)
movies.drop("id",inplace=True,axis=1)

In [161]:
# Spliting Genres to make them easier to access:
movies["genre"] = movies["genre"].str.split(',')

In [162]:
# make each genre a cloumn because of matrix calculation:
for index,series in movies.iterrows():
    for genre in series["genre"]:
        movies.at[index,genre.strip()] = 1

movies.fillna(0, inplace=True)
    

In [163]:
# Remove Duplicate Movies:
movies.drop_duplicates(subset="title",inplace=True)

> #### Alright, Now The DataFrame is cleaned and ready to handle the recommender system:
> Let's start it by defining a function to pass specific user data easier:

In [164]:
movies.head(4)

Unnamed: 0_level_0,title,genre,Action,Comedy,Drama,Biography,History,Crime,Horror,Mystery,...,Documentary,War,Music,Reality-TV,Musical,Western,Game-Show,Talk-Show,Film-Noir,News
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
100000,Cobra Kai,"[Action, Comedy, Drama]",1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
100001,The Crown,"[Biography, Drama, History]",0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
100002,Better Call Saul,"[Crime, Drama]",0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
100003,Devil in Ohio,"[Drama, Horror, Mystery]",0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


> we have to pass the dictionary of movies that user rated to the function (We Suppose That All Passed Movies are unique and Exists in "movies" DataFrame):


In [573]:
def user_profiler(data_dictionary,movies_matrix):
    mov_mat = []
    rate_mat = []
    for (movie,rate) in zip(data_dictionary.keys(),data_dictionary.values()):
        rate_mat.append(pd.Series(rate))
        mov_mat.append(movies_matrix.iloc[movies_matrix.index[movies_matrix["title"]==movie][0]-100000][2:])
        
    mov_mat = pd.DataFrame(mov_mat, columns=movies_matrix.columns[2:])
    rate_mat = pd.DataFrame(rate_mat)
    mov_mat = mov_mat.fillna(0)
    rate_mat = rate_mat.to_numpy().transpose()
    user_p =  rate_mat @ mov_mat
    # Normalizing:
    user_p = user_p / user_p.max(axis=1)[0]
    return user_p
    

> ### Here is our final Recommender Function:

In [574]:
def recommender(user_profile,movies_matrix,number_of_recommends=2):
    transposed_mov_mat = pd.DataFrame(movies_matrix.T.values[2:,:]).to_numpy().transpose()
    transposed_user_p = user_profile.to_numpy().transpose()
    recommended_movies =  transposed_mov_mat @ transposed_user_p
    recommended_movies = pd.DataFrame(recommended_movies,movies_matrix['title'])
    recommended_movies.sort_values(by=[0],inplace=True,ascending=False)
    recommended_movies = recommended_movies[:number_of_recommends]
    
    
    
    
    return recommended_movies

In [575]:
recommender(user_profiler({"Cobra Kai":3.3,"The Crown":3.1},movies),movies)

Unnamed: 0_level_0,0
title,Unnamed: 1_level_1
Cobra Kai,2.03125
ReBoot: The Guardian Code,2.03125


### to verify that everything works well, we can apply the functions to a sample user and dataset:

In [577]:
movies_sample = pd.DataFrame({
    'title':['first','second','third'],
    'genre':[1222,12222,323233],
    'Action':[0,1,1],
    'Comedy':[1,0,0],
    'Scary': [1,1,1],
    'id':[100000,100001,100002]
})
movies_sample.set_index(movies_sample['id'],inplace=True)
movies_sample.drop('id',inplace=True,axis=1)
Sample_user_dict = {"first":3.3,"third":3.1}
movies_sample

Unnamed: 0_level_0,title,genre,Action,Comedy,Scary
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
100000,first,1222,0,1,1
100001,second,12222,1,0,1
100002,third,323233,1,0,1


In [578]:
recommender(user_profiler(Sample_user_dict,movies_sample),movies_sample)

Unnamed: 0_level_0,0
title,Unnamed: 1_level_1
first,1.515625
second,1.484375
