# Collaborative-Filtering Film Recommendation System

item to item collaborative filtering
https://www.youtube.com/watch?v=3ecNC-So0r4

In [None]:
import pandas as pd
from scipy import sparse
from sklearn.metrics.pairwise import cosine_similarity

## Toy Dataset
[USE COSINE SIMILARITY METHOD]

In [None]:
ratings = pd.read_csv("dataset_collab/toy_dataset.csv")
ratings

In [None]:
ratings = pd.read_csv("dataset_collab/toy_dataset.csv", index_col = 0) # set col 1 as index
ratings = ratings.fillna(0) # on NaN row values set to 0
ratings

In [None]:
def standardize(row):
    ''' Standardize ratings '''
    new_row = row - row.mean() / (row.max() - row.min())
    return new_row

ratings_std = ratings.apply(standardize)
ratings_std

In [None]:
# transpose since we get similarity between items which is in rows
item_similarity = cosine_similarity(ratings_std.T) # Create item similarity rowise 
print(item_similarity)

In [None]:
# Create DF from item similarity
item_similarity_df = pd.DataFrame(item_similarity, index=ratings.columns , columns = ratings.columns)
item_similarity_df

In [None]:
# Recommendation

def get_similar_movies (movie_name , user_rating):
    similar_score = item_similarity_df[movie_name]*(user_rating - 2.5)
    similar_score = similar_score.sort_values(ascending=False)
    return  similar_score

print(get_similar_movies("romantic3",1))

In [None]:
# Sample User for toy dataset
action_lover = [("action1",5) , ("romantic2",1) , ("romantic3", 1)]

similar_movies = pd.DataFrame()

for movie , rating in action_lover:
    similar_movies = similar_movies.append(get_similar_movies(movie,rating) , ignore_index = True)
    
similar_movies.head()


In [None]:
# Recommended movie for "action_lover" user

similar_movies.sum().sort_values(ascending=False)

## movielens Dataset

In [None]:
import pandas as pd
from scipy import sparse
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
ratings = pd.read_csv("dataset_collab/ratings.csv")
movies = pd.read_csv("dataset_collab/movies.csv")
ratings = pd.merge(movies , ratings)
ratings.head() # (100836, 6)

In [None]:
# Drop Columns
ratings.drop(['genres', 'timestamp'] , axis=1)

In [None]:
user_ratings = ratings.pivot_table(index="userId" , columns = ['title'] , values='rating')
user_ratings.head() # (610, 9719)

In [None]:
# Remove movies which have than less than 10 users who rated it . and fill remaining NaN with 0

user_ratings = user_ratings.dropna(thresh=10 , axis=1).fillna(0)
user_ratings

In [None]:
# Build Similarity Matrix

item_similarity_df = user_ratings.corr(method="pearson") # no need for standardize method we made above
item_similarity_df.head()


In [None]:
# Save Similarity Model to CSV

item_similarity_df.to_csv("similarity_model.csv")

In [None]:
def get_similar_movies (movie_name , user_rating):
    similar_score = item_similarity_df[movie_name]*(user_rating - 2.5)
    similar_score = similar_score.sort_values(ascending=False)
    return  similar_score

# print(get_similar_movies("101 Dalmatians (1996)",1)) # Test

In [None]:
# Sample User [ IGNORE SAME MOVIES INPUTTED ]
movie_user = [
                ("Zombieland (2009)",5) ,
                ("Zootopia (2016)",1) ,
                ("10 Cloverfield Lane (2016)", 1) ,
                ("(500) Days of Summer (2009)" , 3) , 
                ("10 Things I Hate About You (1999)" , 3) 
               ]

similar_movies = pd.DataFrame()

for movie , rating in movie_user:
    similar_movies = similar_movies.append(get_similar_movies(movie,rating) , ignore_index = True)
    
similar_movies.head()
similar_movies.sum().sort_values(ascending=False)

## Create Movie CSV with unique movie ID

In [61]:
import pandas as pd

In [62]:
movies = pd.read_csv("app/dataset_collab/movies.csv")
movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [63]:
movies = movies.drop(['genres'] , axis=1)
movies

Unnamed: 0,movieId,title
0,1,Toy Story (1995)
1,2,Jumanji (1995)
2,3,Grumpier Old Men (1995)
3,4,Waiting to Exhale (1995)
4,5,Father of the Bride Part II (1995)
...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017)
9738,193583,No Game No Life: Zero (2017)
9739,193585,Flint (2017)
9740,193587,Bungo Stray Dogs: Dead Apple (2018)


In [69]:
# movies.to_csv (r'F:\Programming\Python\~PROJECTS\College~\secureV\movie-reco-app\movie_list.csv', index = None, header=True)

In [64]:
movies_dict = []
movies_dict = movies.set_index("movieId").T.to_dict("list")

In [65]:
mydict = movies_dict

In [66]:
print(type(movies_dict[2]))

<class 'list'>


In [67]:
search_for = "Casino (1995)"
for movie_id , movie_name in movies_dict.items():
#     print(movie_name)
    if movie_name[0] == search_for:
        print(movie_id)
    

16


## Test on new csv

In [22]:
import pandas as pd

In [23]:
movie_pd = pd.read_csv(r"app/data/movie_list.csv", index_col=0)
movie_pd

Unnamed: 0_level_0,title
movieId,Unnamed: 1_level_1
1,Toy Story (1995)
2,Jumanji (1995)
3,Grumpier Old Men (1995)
4,Waiting to Exhale (1995)
5,Father of the Bride Part II (1995)
...,...
193581,Black Butler: Book of the Atlantic (2017)
193583,No Game No Life: Zero (2017)
193585,Flint (2017)
193587,Bungo Stray Dogs: Dead Apple (2018)


In [24]:
movie_dict = movie_pd.to_dict()["title"]
movie_dict

{1: 'Toy Story (1995)',
 2: 'Jumanji (1995)',
 3: 'Grumpier Old Men (1995)',
 4: 'Waiting to Exhale (1995)',
 5: 'Father of the Bride Part II (1995)',
 6: 'Heat (1995)',
 7: 'Sabrina (1995)',
 8: 'Tom and Huck (1995)',
 9: 'Sudden Death (1995)',
 10: 'GoldenEye (1995)',
 11: 'American President, The (1995)',
 12: 'Dracula: Dead and Loving It (1995)',
 13: 'Balto (1995)',
 14: 'Nixon (1995)',
 15: 'Cutthroat Island (1995)',
 16: 'Casino (1995)',
 17: 'Sense and Sensibility (1995)',
 18: 'Four Rooms (1995)',
 19: 'Ace Ventura: When Nature Calls (1995)',
 20: 'Money Train (1995)',
 21: 'Get Shorty (1995)',
 22: 'Copycat (1995)',
 23: 'Assassins (1995)',
 24: 'Powder (1995)',
 25: 'Leaving Las Vegas (1995)',
 26: 'Othello (1995)',
 27: 'Now and Then (1995)',
 28: 'Persuasion (1995)',
 29: 'City of Lost Children, The (Cité des enfants perdus, La) (1995)',
 30: 'Shanghai Triad (Yao a yao yao dao waipo qiao) (1995)',
 31: 'Dangerous Minds (1995)',
 32: 'Twelve Monkeys (a.k.a. 12 Monkeys) (199

In [25]:
search_for = "Casino (1995)"
for movie_id , movie_name in movie_dict.items():
    if movie_name == search_for:
        print(movie_id)
    

16
