### **TEST DATASET**

In [1]:
import pandas as pd
from scipy import sparse
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
ratings = pd.read_csv("toy_dataset.csv")
ratings

Unnamed: 0.1,Unnamed: 0,action1,action2,action3,romantic1,romantic2,romantic3
0,user 1,4.0,5.0,3.0,,2.0,1.0
1,user 2,5.0,3.0,3.0,2.0,2.0,
2,user 3,1.0,,,4.0,5.0,4.0
3,user 4,,2.0,1.0,4.0,,3.0
4,user 5,1.0,,2.0,3.0,3.0,4.0


In [3]:
ratings = pd.read_csv("toy_dataset.csv", index_col = 0) # set col 1 as index
ratings = ratings.fillna(0) # on NaN row values set to 0
ratings

Unnamed: 0,action1,action2,action3,romantic1,romantic2,romantic3
user 1,4.0,5.0,3.0,0.0,2.0,1.0
user 2,5.0,3.0,3.0,2.0,2.0,0.0
user 3,1.0,0.0,0.0,4.0,5.0,4.0
user 4,0.0,2.0,1.0,4.0,0.0,3.0
user 5,1.0,0.0,2.0,3.0,3.0,4.0


In [4]:
def standardize(row):
    ''' Standardize ratings '''
    new_row = row - row.mean() / (row.max() - row.min())
    return new_row

ratings_std = ratings.apply(standardize)
ratings_std

Unnamed: 0,action1,action2,action3,romantic1,romantic2,romantic3
user 1,3.56,4.6,2.4,-0.65,1.52,0.4
user 2,4.56,2.6,2.4,1.35,1.52,-0.6
user 3,0.56,-0.4,-0.6,3.35,4.52,3.4
user 4,-0.44,1.6,0.4,3.35,-0.48,2.4
user 5,0.56,-0.4,1.4,2.35,2.52,3.4


In [5]:
# transpose since we get similarity between items which is in rows
item_similarity = cosine_similarity(ratings_std.T) # Create item similarity rowise 
print(item_similarity)

[[1.         0.83330162 0.90188642 0.17274695 0.50089217 0.04535422]
 [0.83330162 1.         0.84756554 0.11801408 0.23579746 0.04652421]
 [0.90188642 0.84756554 1.         0.20907933 0.37635513 0.15772927]
 [0.17274695 0.11801408 0.20907933 1.         0.66377471 0.8841249 ]
 [0.50089217 0.23579746 0.37635513 0.66377471 1.         0.73715585]
 [0.04535422 0.04652421 0.15772927 0.8841249  0.73715585 1.        ]]


In [6]:
# Create DF from item similarity
item_similarity_df = pd.DataFrame(item_similarity, index=ratings.columns , columns = ratings.columns)
item_similarity_df

Unnamed: 0,action1,action2,action3,romantic1,romantic2,romantic3
action1,1.0,0.833302,0.901886,0.172747,0.500892,0.045354
action2,0.833302,1.0,0.847566,0.118014,0.235797,0.046524
action3,0.901886,0.847566,1.0,0.209079,0.376355,0.157729
romantic1,0.172747,0.118014,0.209079,1.0,0.663775,0.884125
romantic2,0.500892,0.235797,0.376355,0.663775,1.0,0.737156
romantic3,0.045354,0.046524,0.157729,0.884125,0.737156,1.0


In [7]:
# Recommendation

def get_similar_movies (movie_name , user_rating):
    similar_score = item_similarity_df[movie_name]*(user_rating - 2.5)
    similar_score = similar_score.sort_values(ascending=False)
    return  similar_score

print(get_similar_movies("romantic3",1))

action1     -0.068031
action2     -0.069786
action3     -0.236594
romantic2   -1.105734
romantic1   -1.326187
romantic3   -1.500000
Name: romantic3, dtype: float64


In [8]:
# Sample User for toy dataset
action_lover = [("action1",5) , ("romantic2",1) , ("romantic3", 1)]

similar_movies = pd.DataFrame()

for movie , rating in action_lover:
    similar_movies = similar_movies.append(get_similar_movies(movie,rating) , ignore_index = True)
    
similar_movies.head()

Unnamed: 0,action1,action3,action2,romantic2,romantic1,romantic3
0,2.5,2.254716,2.083254,1.25223,0.431867,0.113386
1,-0.751338,-0.564533,-0.353696,-1.5,-0.995662,-1.105734
2,-0.068031,-0.236594,-0.069786,-1.105734,-1.326187,-1.5


In [9]:
# Recommended movie for "action_lover" user

similar_movies.sum().sort_values(ascending=False)

action1      1.680630
action2      1.659772
action3      1.453589
romantic2   -1.353503
romantic1   -1.889982
romantic3   -2.492348
dtype: float64

### **MovieLens Dataset**

In [10]:
ratings = pd.read_csv("ratings.csv")
movies = pd.read_csv("movies.csv")
ratings = pd.merge(movies , ratings)
ratings.head() # (100836, 6)

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1,4.0,964982703
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5,4.0,847434962
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,7,4.5,1106635946
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,15,2.5,1510577970
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,17,4.5,1305696483


In [11]:
# Drop Columns
ratings.drop(['genres', 'timestamp'] , axis=1)

Unnamed: 0,movieId,title,userId,rating
0,1,Toy Story (1995),1,4.0
1,1,Toy Story (1995),5,4.0
2,1,Toy Story (1995),7,4.5
3,1,Toy Story (1995),15,2.5
4,1,Toy Story (1995),17,4.5
...,...,...,...,...
100831,193581,Black Butler: Book of the Atlantic (2017),184,4.0
100832,193583,No Game No Life: Zero (2017),184,3.5
100833,193585,Flint (2017),184,3.5
100834,193587,Bungo Stray Dogs: Dead Apple (2018),184,3.5


In [12]:
user_ratings = ratings.pivot_table(index="userId" , columns = ['title'] , values='rating')
user_ratings.head() # (610, 9719)

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,4.0,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,


In [13]:
user_ratings = user_ratings.dropna(thresh=10 , axis=1).fillna(0)
user_ratings

title,"'burbs, The (1989)",(500) Days of Summer (2009),10 Cloverfield Lane (2016),10 Things I Hate About You (1999),"10,000 BC (2008)",101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),12 Angry Men (1957),12 Years a Slave (2013),127 Hours (2010),...,Zack and Miri Make a Porno (2008),Zero Dark Thirty (2012),Zero Effect (1998),Zodiac (2007),Zombieland (2009),Zoolander (2001),Zootopia (2016),eXistenZ (1999),xXx (2002),¡Three Amigos! (1986)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,3.0,0.0,4.5,3.5,0.0
609,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
item_similarity_df = user_ratings.corr(method="pearson") # no need for standardize method we made above
item_similarity_df.head()

title,"'burbs, The (1989)",(500) Days of Summer (2009),10 Cloverfield Lane (2016),10 Things I Hate About You (1999),"10,000 BC (2008)",101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),12 Angry Men (1957),12 Years a Slave (2013),127 Hours (2010),...,Zack and Miri Make a Porno (2008),Zero Dark Thirty (2012),Zero Effect (1998),Zodiac (2007),Zombieland (2009),Zoolander (2001),Zootopia (2016),eXistenZ (1999),xXx (2002),¡Three Amigos! (1986)
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"'burbs, The (1989)",1.0,0.063117,-0.023768,0.143482,0.011998,0.087931,0.224052,0.034223,0.009277,0.008331,...,0.017477,0.03247,0.134701,0.153158,0.101301,0.049897,0.003233,0.187953,0.062174,0.353194
(500) Days of Summer (2009),0.063117,1.0,0.142471,0.273989,0.19396,0.148903,0.142141,0.159756,0.135486,0.200135,...,0.374515,0.178655,0.068407,0.414585,0.355723,0.252226,0.216007,0.053614,0.241092,0.125905
10 Cloverfield Lane (2016),-0.023768,0.142471,1.0,-0.005799,0.112396,0.006139,-0.016835,0.031704,-0.024275,0.272943,...,0.242663,0.099059,-0.023477,0.272347,0.241751,0.195054,0.319371,0.177846,0.096638,0.002733
10 Things I Hate About You (1999),0.143482,0.273989,-0.005799,1.0,0.24467,0.223481,0.211473,0.011784,0.091964,0.043383,...,0.243118,0.104858,0.13246,0.091853,0.158637,0.281934,0.050031,0.121029,0.130813,0.110612
"10,000 BC (2008)",0.011998,0.19396,0.112396,0.24467,1.0,0.234459,0.119132,0.059187,-0.025882,0.089328,...,0.260261,0.087592,0.094913,0.184521,0.242299,0.240231,0.094773,0.088045,0.203002,0.083518


In [15]:
item_similarity_df.to_csv("similarity_model.csv")

In [19]:
def get_similar_movies (movie_name , user_rating):
    similar_score = item_similarity_df[movie_name]*(user_rating - 2.5)
    similar_score = similar_score.sort_values(ascending=False)
    return  similar_score
    print(get_similar_movies("101 Dalmatians (1996)",1))

In [20]:
# Sample User [ IGNORE SAME MOVIES INPUTTED ]
movie_user = [
                ("Zombieland (2009)",5) ,
                ("Zootopia (2016)",1) ,
                ("10 Cloverfield Lane (2016)", 1) ,
                ("(500) Days of Summer (2009)" , 3) , 
                ("10 Things I Hate About You (1999)" , 3) 
               ]

similar_movies = pd.DataFrame()

for movie , rating in movie_user:
    similar_movies = similar_movies.append(get_similar_movies(movie,rating) , ignore_index = True)
    
similar_movies.head()
similar_movies.sum().sort_values(ascending=False)

Zombieland (2009)              1.946797
Hangover, The (2009)           1.221417
Adventureland (2009)           1.206465
Accepted (2006)                1.029126
(500) Days of Summer (2009)    0.988586
                                 ...   
Get Out (2017)                -0.484821
Coco (2017)                   -0.506028
Moana (2016)                  -0.685061
Zootopia (2016)               -1.099777
10 Cloverfield Lane (2016)    -1.306343
Length: 2269, dtype: float64

### **CREATE MOVIE WITH UNIQUE CSV ID**

In [21]:
import pandas as pd

In [22]:
movies = pd.read_csv("movies.csv")
movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [23]:
movies = movies.drop(['genres'] , axis=1)
movies

Unnamed: 0,movieId,title
0,1,Toy Story (1995)
1,2,Jumanji (1995)
2,3,Grumpier Old Men (1995)
3,4,Waiting to Exhale (1995)
4,5,Father of the Bride Part II (1995)
...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017)
9738,193583,No Game No Life: Zero (2017)
9739,193585,Flint (2017)
9740,193587,Bungo Stray Dogs: Dead Apple (2018)


In [24]:
movies_dict = []
movies_dict = movies.set_index("movieId").T.to_dict("list")

In [25]:
mydict = movies_dict

In [26]:
print(type(movies_dict[2]))

<class 'list'>


In [27]:
search_for = "Casino (1995)"
for movie_id , movie_name in movies_dict.items():
#     print(movie_name)
    if movie_name[0] == search_for:
        print(movie_id)

16


### **TEST ON THE NEW CREATED CSV FILE**

In [28]:
movie_pd = pd.read_csv(r"movie_list.csv", index_col=0)
movie_pd

Unnamed: 0_level_0,title
movieId,Unnamed: 1_level_1
1,Toy Story (1995)
2,Jumanji (1995)
3,Grumpier Old Men (1995)
4,Waiting to Exhale (1995)
5,Father of the Bride Part II (1995)
...,...
193581,Black Butler: Book of the Atlantic (2017)
193583,No Game No Life: Zero (2017)
193585,Flint (2017)
193587,Bungo Stray Dogs: Dead Apple (2018)


In [29]:
movie_dict = movie_pd.to_dict()["title"]
movie_dict

{1: 'Toy Story (1995)',
 2: 'Jumanji (1995)',
 3: 'Grumpier Old Men (1995)',
 4: 'Waiting to Exhale (1995)',
 5: 'Father of the Bride Part II (1995)',
 6: 'Heat (1995)',
 7: 'Sabrina (1995)',
 8: 'Tom and Huck (1995)',
 9: 'Sudden Death (1995)',
 10: 'GoldenEye (1995)',
 11: 'American President, The (1995)',
 12: 'Dracula: Dead and Loving It (1995)',
 13: 'Balto (1995)',
 14: 'Nixon (1995)',
 15: 'Cutthroat Island (1995)',
 16: 'Casino (1995)',
 17: 'Sense and Sensibility (1995)',
 18: 'Four Rooms (1995)',
 19: 'Ace Ventura: When Nature Calls (1995)',
 20: 'Money Train (1995)',
 21: 'Get Shorty (1995)',
 22: 'Copycat (1995)',
 23: 'Assassins (1995)',
 24: 'Powder (1995)',
 25: 'Leaving Las Vegas (1995)',
 26: 'Othello (1995)',
 27: 'Now and Then (1995)',
 28: 'Persuasion (1995)',
 29: 'City of Lost Children, The (Cité des enfants perdus, La) (1995)',
 30: 'Shanghai Triad (Yao a yao yao dao waipo qiao) (1995)',
 31: 'Dangerous Minds (1995)',
 32: 'Twelve Monkeys (a.k.a. 12 Monkeys) (199

In [30]:
search_for = "Casino (1995)"
for movie_id , movie_name in movie_dict.items():
    if movie_name == search_for:
        print(movie_id)

16
