In [1]:
import tensorflow as tf
from tensorflow.keras import datasets
from tensorflow.keras.layers import Conv2D,Dense,MaxPooling2D
import sklearn
import numpy as np
import pandas as pd

In [2]:
user_col = ["user_id","age","sex","occupation","zipcode"]
users = pd.read_csv("ml-100k/u.user",sep="|",names=user_col)

In [3]:
users.head()

Unnamed: 0,user_id,age,sex,occupation,zipcode
0,1,24,M,technician,85711
1,2,53,F,other,94043
2,3,23,M,writer,32067
3,4,24,M,technician,43537
4,5,33,F,other,15213


In [4]:
users.shape

(943, 5)

In [5]:
rating_cols =['user_id', 'movie_id', 'rating', 'unix_timestamp']
ratings =pd.read_csv("ml-100k/u.data",sep="\t",names=rating_cols,encoding='latin-1') 
ratings = ratings.drop(["unix_timestamp"],axis=1)
ratings.head()

Unnamed: 0,user_id,movie_id,rating
0,196,242,3
1,186,302,3
2,22,377,1
3,244,51,2
4,166,346,1


In [6]:
movie_cols =['movie_id', 'movie_title' ,'release date','video release date', 'IMDb URL', 'unknown', 'Action', 'Adventure',
             'Animation', 'Children\'s', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror',
             'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']
movies =pd.read_csv("ml-100k/u.item",sep="|",names=movie_cols,encoding='latin-1') 
movies.info()
movies.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1682 entries, 0 to 1681
Data columns (total 24 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   movie_id            1682 non-null   int64  
 1   movie_title         1682 non-null   object 
 2   release date        1681 non-null   object 
 3   video release date  0 non-null      float64
 4   IMDb URL            1679 non-null   object 
 5   unknown             1682 non-null   int64  
 6   Action              1682 non-null   int64  
 7   Adventure           1682 non-null   int64  
 8   Animation           1682 non-null   int64  
 9   Children's          1682 non-null   int64  
 10  Comedy              1682 non-null   int64  
 11  Crime               1682 non-null   int64  
 12  Documentary         1682 non-null   int64  
 13  Drama               1682 non-null   int64  
 14  Fantasy             1682 non-null   int64  
 15  Film-Noir           1682 non-null   int64  
 16  Horror

Unnamed: 0,movie_id,movie_title,release date,video release date,IMDb URL,unknown,Action,Adventure,Animation,Children's,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Toy%20Story%2...,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
1,2,GoldenEye (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?GoldenEye%20(...,0,1,1,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3,Four Rooms (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Four%20Rooms%...,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,4,Get Shorty (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Get%20Shorty%...,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,5,Copycat (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Copycat%20(1995),0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [7]:
movies = movies.drop(['release date','video release date', 'IMDb URL','unknown','Action', 'Adventure',
             'Animation', 'Children\'s', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror',
             'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'],axis=1)

In [8]:
movies.head()

Unnamed: 0,movie_id,movie_title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


## Final Table

In [9]:
ratings = ratings.merge(movies,on="movie_id")
ratings.head()

Unnamed: 0,user_id,movie_id,rating,movie_title
0,196,242,3,Kolya (1996)
1,63,242,3,Kolya (1996)
2,226,242,5,Kolya (1996)
3,154,242,3,Kolya (1996)
4,306,242,5,Kolya (1996)


In [11]:
ratings.nunique()

user_id         943
movie_id       1682
rating            5
movie_title    1664
dtype: int64

In [12]:
ratings.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 100000 entries, 0 to 99999
Data columns (total 4 columns):
 #   Column       Non-Null Count   Dtype 
---  ------       --------------   ----- 
 0   user_id      100000 non-null  int64 
 1   movie_id     100000 non-null  int64 
 2   rating       100000 non-null  int64 
 3   movie_title  100000 non-null  object
dtypes: int64(3), object(1)
memory usage: 3.8+ MB


## Sumarizing The Data

In [15]:
ratings_data = ratings.pivot_table(index = ["user_id"],columns=["movie_title"],values = "rating")
ratings_data

movie_title,'Til There Was You (1997),1-900 (1994),101 Dalmatians (1996),12 Angry Men (1957),187 (1997),2 Days in the Valley (1996),"20,000 Leagues Under the Sea (1954)",2001: A Space Odyssey (1968),3 Ninjas: High Noon At Mega Mountain (1998),"39 Steps, The (1935)",...,Yankee Zulu (1994),Year of the Horse (1997),You So Crazy (1994),Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),"Young Poisoner's Handbook, The (1995)",Zeus and Roxanne (1997),unknown,Á köldum klaka (Cold Fever) (1994)
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,2.0,5.0,,,3.0,4.0,,,...,,,,5.0,3.0,,,,4.0,
2,,,,,,,,,1.0,,...,,,,,,,,,,
3,,,,,2.0,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,2.0,,,,,4.0,,,...,,,,4.0,,,,,4.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,,,,,,,,,,,...,,,,,,,,,,
940,,,,,,,,,,,...,,,,,,,,,,
941,,,,,,,,,,,...,,,,,,,,,,
942,,,,,,,,3.0,,3.0,...,,,,,,,,,,


In [19]:
ratings_data = ratings_data.dropna(thresh=10,axis=1).fillna(0)
ratings_data.head()

movie_title,101 Dalmatians (1996),12 Angry Men (1957),187 (1997),2 Days in the Valley (1996),"20,000 Leagues Under the Sea (1954)",2001: A Space Odyssey (1968),"39 Steps, The (1935)",8 1/2 (1963),Absolute Power (1997),"Abyss, The (1989)",...,Wolf (1994),"Women, The (1939)","Wonderful, Horrible Life of Leni Riefenstahl, The (1993)",Wonderland (1997),"Wrong Trousers, The (1993)",Wyatt Earp (1994),Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),"Young Poisoner's Handbook, The (1995)"
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,2.0,5.0,0.0,0.0,3.0,4.0,0.0,0.0,0.0,3.0,...,0.0,0.0,0.0,0.0,5.0,0.0,5.0,3.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0
5,2.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,5.0,0.0,4.0,0.0,0.0,0.0


In [30]:
similar_df = ratings_data.corr()
similar_df

movie_title,101 Dalmatians (1996),12 Angry Men (1957),187 (1997),2 Days in the Valley (1996),"20,000 Leagues Under the Sea (1954)",2001: A Space Odyssey (1968),"39 Steps, The (1935)",8 1/2 (1963),Absolute Power (1997),"Abyss, The (1989)",...,Wolf (1994),"Women, The (1939)","Wonderful, Horrible Life of Leni Riefenstahl, The (1993)",Wonderland (1997),"Wrong Trousers, The (1993)",Wyatt Earp (1994),Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),"Young Poisoner's Handbook, The (1995)"
movie_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
101 Dalmatians (1996),1.000000,0.059246,-0.001097,0.052877,0.128745,0.078260,0.015504,0.005750,0.221025,0.121153,...,0.057741,0.051950,-0.034417,0.000720,0.074635,0.109054,0.155450,0.115195,0.039172,-0.005917
12 Angry Men (1957),0.059246,1.000000,-0.014343,0.066339,0.230274,0.298716,0.339195,0.174500,0.019790,0.156721,...,0.048741,0.145040,0.138347,-0.004803,0.170189,0.160140,0.290188,0.164959,0.079341,0.038111
187 (1997),-0.001097,-0.014343,1.000000,0.078770,-0.010333,-0.039939,-0.021414,-0.006246,0.127531,0.017270,...,0.084986,-0.024094,-0.020299,0.115321,-0.025833,-0.000838,-0.021873,0.006811,0.053843,0.063789
2 Days in the Valley (1996),0.052877,0.066339,0.078770,1.000000,0.056283,0.090983,-0.019962,-0.008210,0.245191,0.129204,...,0.087568,0.069019,-0.001841,-0.026994,0.028208,0.116497,0.061328,0.197620,0.176032,0.146777
"20,000 Leagues Under the Sea (1954)",0.128745,0.230274,-0.010333,0.056283,1.000000,0.384535,0.274525,0.118108,0.117512,0.231247,...,0.244088,0.130652,0.062662,-0.001717,0.101581,0.286848,0.309511,0.243306,0.057977,0.071111
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wyatt Earp (1994),0.109054,0.160140,-0.000838,0.116497,0.286848,0.183051,0.137189,0.041004,0.121793,0.166022,...,0.185584,0.033161,-0.023045,0.059858,0.048645,1.000000,0.242159,0.336002,0.202880,0.018951
Young Frankenstein (1974),0.155450,0.290188,-0.021873,0.061328,0.309511,0.429298,0.205543,0.170677,0.100104,0.204965,...,0.190200,0.121887,0.029982,-0.012047,0.313386,0.242159,1.000000,0.294564,0.188973,0.051973
Young Guns (1988),0.115195,0.164959,0.006811,0.197620,0.243306,0.168440,0.057132,0.035488,0.136609,0.310994,...,0.331909,0.053611,0.008057,0.002881,0.090600,0.336002,0.294564,1.000000,0.599633,0.037991
Young Guns II (1990),0.039172,0.079341,0.053843,0.176032,0.057977,0.071705,0.025560,0.022169,0.135415,0.210445,...,0.229470,-0.025195,-0.021226,-0.019674,0.033696,0.202880,0.188973,0.599633,1.000000,0.012340


## Making Recommendation Model

In [61]:
def similar_movies(movie_title,rating):
    score = similar_df[movie_title]*(rating-2.5)
    score = score.sort_values(ascending = False)
    return score

In [80]:
similar_movies("Wolf (1994)",5)

movie_title
Wolf (1994)                          2.500000
Interview with the Vampire (1994)    1.238520
Nightmare on Elm Street, A (1984)    1.194780
Natural Born Killers (1994)          1.162235
Candyman (1992)                      1.130016
                                       ...   
Assignment, The (1997)              -0.084858
Raise the Red Lantern (1991)        -0.085277
Walkabout (1971)                    -0.085324
Soul Food (1997)                    -0.095260
Smile Like Yours, A (1997)          -0.100348
Name: Wolf (1994), Length: 1144, dtype: float64

# Movie Recommender 

In [123]:
#userdata = [(Movie,Rating),(Movie,Rating),(Movie,Rating)]  #Rating Between 1 to 5

userdata = [("Wolf (1994)",5),("Candyman (1992)",4),("Nightmare on Elm Street, A (1984)",4)]
recommended_movie = pd.DataFrame()
for movie_title,rating in userdata:
    recommended_movie = recommended_movie.append(similar_movies(movie_title,rating),ignore_index = True)
recommended = recommended_movie.sum().sort_values(ascending = False)
recommended.head(10)

Wolf (1994)                               3.894878
Nightmare on Elm Street, A (1984)         3.633084
Candyman (1992)                           3.568321
Interview with the Vampire (1994)         2.839155
Natural Born Killers (1994)               2.682538
Bram Stoker's Dracula (1992)              2.543459
Cape Fear (1991)                          2.433487
American Werewolf in London, An (1981)    2.408723
Candyman: Farewell to the Flesh (1995)    2.364817
Wes Craven's New Nightmare (1994)         2.349270
dtype: float64