In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

from sklearn.metrics.pairwise import linear_kernel
from sklearn.metrics import pairwise_distances
from sklearn.model_selection import train_test_split, LeaveOneOut
from surprise.model_selection import train_test_split as tts
from scipy.spatial.distance import cosine, correlation
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

from scipy import sparse


In [2]:
os.listdir('../../Data')

['hitrates.csv',
 'ml-latest-small',
 'movies_processed.csv',
 'movie_diversity.csv',
 'popularity_ratings.csv',
 'popularity_table.csv',
 'ratings_processed.csv',
 'user_diversity.csv']

In [3]:
popularityTable = pd.read_csv('../../Data/popularity_table.csv', index_col=[0])
popRatings = pd.read_csv('../../Data/popularity_ratings.csv', index_col=[0])

In [4]:
movies = pd.read_csv('../../Data/movies_processed.csv', index_col=[0])#, parse_dates=['year'])
ratings = pd.read_csv('../../Data/ratings_processed.csv', index_col=[0])#, parse_dates=['timestamp'])

In [5]:
movies.head()

Unnamed: 0,movieId,title,short_title,year,(no genres listed),Action,Adventure,Animation,Children,Comedy,...,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),Toy Story,1995,0,0,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
1,2,Jumanji (1995),Jumanji,1995,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,3,Grumpier Old Men (1995),Grumpier Old Men,1995,0,0,0,0,0,1,...,0,0,0,0,0,1,0,0,0,0
3,4,Waiting to Exhale (1995),Waiting to Exhale,1995,0,0,0,0,0,1,...,0,0,0,0,0,1,0,0,0,0
4,5,Father of the Bride Part II (1995),Father of the Bride Part II,1995,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


In [6]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,07-30-2000
1,1,3,4.0,07-30-2000
2,1,6,4.0,07-30-2000
3,1,47,5.0,07-30-2000
4,1,50,5.0,07-30-2000


In [7]:
movies.columns

Index(['movieId', 'title', 'short_title', 'year', '(no genres listed)',
       'Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime',
       'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'IMAX',
       'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War',
       'Western'],
      dtype='object')

In [8]:
feats = ['year', '(no genres listed)',
       'Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime',
       'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'IMAX',
       'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War',
       'Western']

In [9]:
cosine_similarity(movies.drop(['title', 'short_title'], axis=1).iloc[0].values.reshape(1, -1), 
                  movies.drop(['title', 'short_title'], axis=1).iloc[1].values.reshape(1, -1))

array([[0.99999962]])

In [10]:
cosine_similarity(movies[feats].iloc[0].values.reshape(1, -1), 
                  movies[feats].iloc[1].values.reshape(1, -1))

array([[0.99999975]])

In [11]:
cosine_similarity(movies[feats].iloc[0].values.reshape(1, -1), 
                  movies[feats].iloc[2355].values.reshape(1, -1))

array([[1.]])

In [12]:
ratings[ratings.userId == 3].merge(movies[['movieId', 'title']], on='movieId')

Unnamed: 0,userId,movieId,rating,timestamp,title
0,3,31,0.5,05-27-2011,Dangerous Minds (1995)
1,3,527,0.5,05-27-2011,Schindler's List (1993)
2,3,647,0.5,05-27-2011,Courage Under Fire (1996)
3,3,688,0.5,05-27-2011,Operation Dumbo Drop (1995)
4,3,720,0.5,05-27-2011,Wallace & Gromit: The Best of Aardman Animatio...
5,3,849,5.0,05-27-2011,Escape from L.A. (1996)
6,3,914,0.5,05-27-2011,My Fair Lady (1964)
7,3,1093,0.5,05-27-2011,"Doors, The (1991)"
8,3,1124,0.5,05-27-2011,On Golden Pond (1981)
9,3,1263,0.5,05-27-2011,"Deer Hunter, The (1978)"


In [13]:
recents = movies[movies.year == 2018]

In [14]:
movies_sparse = sparse.csr_matrix(movies.drop(['title', 'short_title'], axis=1))

similarities = cosine_similarity(movies_sparse)

similarities = pd.DataFrame(similarities)
similarities.index = movies.set_index(movies.movieId).index
similarities.columns = movies.set_index(movies.movieId).index
similarities

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.000000,1.000000,0.999999,0.999998,0.999997,0.999996,0.999995,0.999993,0.999991,0.999989,...,0.010885,0.010900,0.010905,0.010910,0.010910,0.010920,0.010920,0.010920,0.010925,0.010784
2,1.000000,1.000000,0.999999,0.999999,0.999998,0.999997,0.999996,0.999995,0.999993,0.999991,...,0.011386,0.011401,0.011406,0.011411,0.011411,0.011421,0.011421,0.011421,0.011426,0.011286
3,0.999999,0.999999,1.000000,1.000000,0.999999,0.999998,0.999998,0.999996,0.999995,0.999993,...,0.011887,0.011903,0.011908,0.011913,0.011912,0.011923,0.011922,0.011922,0.011927,0.011787
4,0.999998,0.999999,1.000000,1.000000,1.000000,0.999999,0.999999,0.999997,0.999996,0.999995,...,0.012388,0.012404,0.012409,0.012414,0.012413,0.012424,0.012424,0.012424,0.012429,0.012288
5,0.999997,0.999998,0.999999,1.000000,1.000000,0.999999,0.999999,0.999998,0.999998,0.999996,...,0.012890,0.012905,0.012910,0.012915,0.012915,0.012925,0.012925,0.012925,0.012930,0.012789
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193581,0.010920,0.011421,0.011923,0.012424,0.012925,0.013426,0.013927,0.014429,0.014930,0.015431,...,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
193583,0.010920,0.011421,0.011922,0.012424,0.012925,0.013426,0.013927,0.014428,0.014930,0.015431,...,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
193585,0.010920,0.011421,0.011922,0.012424,0.012925,0.013426,0.013927,0.014428,0.014930,0.015431,...,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
193587,0.010925,0.011426,0.011927,0.012429,0.012930,0.013431,0.013932,0.014433,0.014935,0.015436,...,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000


In [15]:
similaritiesWithNames = similarities.copy(deep=True)
similaritiesWithNames.index = movies.set_index(movies.short_title).index
similaritiesWithNames.columns = movies.set_index(movies.short_title).index
similaritiesWithNames

short_title,Toy Story,Jumanji,Grumpier Old Men,Waiting to Exhale,Father of the Bride Part II,Heat,Sabrina,Tom and Huck,Sudden Death,GoldenEye,...,Gintama: The Movie,anohana: The Flower We Saw That Day - The Movie,Silver Spoon,Love Live! The School Idol Movie,Jon Stewart Has Left the Building,Black Butler: Book of the Atlantic,No Game No Life: Zero,Flint,Bungo Stray Dogs: Dead Apple,Andrew Dice Clay: Dice Rules
short_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Toy Story,1.000000,1.000000,0.999999,0.999998,0.999997,0.999996,0.999995,0.999993,0.999991,0.999989,...,0.010885,0.010900,0.010905,0.010910,0.010910,0.010920,0.010920,0.010920,0.010925,0.010784
Jumanji,1.000000,1.000000,0.999999,0.999999,0.999998,0.999997,0.999996,0.999995,0.999993,0.999991,...,0.011386,0.011401,0.011406,0.011411,0.011411,0.011421,0.011421,0.011421,0.011426,0.011286
Grumpier Old Men,0.999999,0.999999,1.000000,1.000000,0.999999,0.999998,0.999998,0.999996,0.999995,0.999993,...,0.011887,0.011903,0.011908,0.011913,0.011912,0.011923,0.011922,0.011922,0.011927,0.011787
Waiting to Exhale,0.999998,0.999999,1.000000,1.000000,1.000000,0.999999,0.999999,0.999997,0.999996,0.999995,...,0.012388,0.012404,0.012409,0.012414,0.012413,0.012424,0.012424,0.012424,0.012429,0.012288
Father of the Bride Part II,0.999997,0.999998,0.999999,1.000000,1.000000,0.999999,0.999999,0.999998,0.999998,0.999996,...,0.012890,0.012905,0.012910,0.012915,0.012915,0.012925,0.012925,0.012925,0.012930,0.012789
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Black Butler: Book of the Atlantic,0.010920,0.011421,0.011923,0.012424,0.012925,0.013426,0.013927,0.014429,0.014930,0.015431,...,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
No Game No Life: Zero,0.010920,0.011421,0.011922,0.012424,0.012925,0.013426,0.013927,0.014428,0.014930,0.015431,...,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
Flint,0.010920,0.011421,0.011922,0.012424,0.012925,0.013426,0.013927,0.014428,0.014930,0.015431,...,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
Bungo Stray Dogs: Dead Apple,0.010925,0.011426,0.011927,0.012429,0.012930,0.013431,0.013932,0.014433,0.014935,0.015436,...,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000


In [16]:
similaritiesWithNames.loc['Toy Story 2'].sort_values(ascending=False).head(20)

short_title
Toy Story 2                 1.000000
Flawless                    1.000000
Jeremiah Johnson            1.000000
Maurice                     1.000000
End of Days                 1.000000
River Runs Through It, A    1.000000
Stanley & Iris              1.000000
Tora! Tora! Tora!           1.000000
Ride with the Devil         1.000000
Matewan                     0.999999
Awakenings                  0.999999
Backdraft                   0.999999
Fatal Attraction            0.999999
Fisher King, The            0.999999
Come See the Paradise       0.999999
Longest Day, The            0.999999
Kagemusha                   0.999999
Midnight Run                0.999999
Natural, The                0.999999
Scrooged                    0.999999
Name: Toy Story 2, dtype: float64

In [17]:
usr80 = ratings[ratings.userId == 80].merge(movies[['movieId', 'title']], on='movieId')
usr80.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,157,158,159,160,161,162,163,164,165,166
userId,80,80,80,80,80,80,80,80,80,80,...,80,80,80,80,80,80,80,80,80,80
movieId,32,50,150,318,431,593,648,858,1200,1214,...,96917,98361,98961,100383,100810,101864,102125,103075,103249,103688
rating,4.0,4.5,4.0,5.0,4.0,5.0,4.5,4.0,4.5,5.0,...,4.0,4.0,4.5,4.5,4.0,4.0,4.0,4.0,4.0,4.0
timestamp,08-24-2013,08-24-2013,08-24-2013,08-24-2013,08-24-2013,08-25-2013,08-24-2013,08-24-2013,08-25-2013,08-24-2013,...,08-25-2013,08-24-2013,08-24-2013,08-24-2013,08-25-2013,08-24-2013,08-24-2013,08-25-2013,08-25-2013,08-24-2013
title,Twelve Monkeys (a.k.a. 12 Monkeys) (1995),"Usual Suspects, The (1995)",Apollo 13 (1995),"Shawshank Redemption, The (1994)",Carlito's Way (1993),"Silence of the Lambs, The (1991)",Mission: Impossible (1996),"Godfather, The (1972)",Aliens (1986),Alien (1979),...,House at the End of the Street (2012),Byzantium (2012),Zero Dark Thirty (2012),Side Effects (2013),Dark Skies (2013),Oblivion (2013),Iron Man 3 (2013),"Purge, The (2013)",World War Z (2013),"Conjuring, The (2013)"


In [18]:
pred80 = similaritiesWithNames.loc['Iron Man 3'].sort_values(ascending=False)[1:51]
pred80

short_title
Grandmaster, The                        1.0
This Is the End                         1.0
Resolution                              1.0
Maniac Cop 2                            1.0
Grabbers                                1.0
Justice League: Doom                    1.0
Mezzo Forte                             1.0
English Teacher, The                    1.0
Disconnect                              1.0
Pain & Gain                             1.0
Wolf Children                           1.0
Hulk Vs.                                1.0
Mud                                     1.0
Pawn                                    1.0
Invincible Iron Man, The                1.0
Dark Tide                               1.0
42                                      1.0
Oblivion                                1.0
Syrup                                   1.0
Great Gatsby, The                       1.0
Star Trek Into Darkness                 1.0
Angst                                   1.0
Perfect Plan, A     

In [19]:
rat80 = ratings[ratings.userId == 80].merge(movies[['movieId', 'short_title']], on='movieId')[['short_title', 'movieId']].set_index('short_title')
rat80

Unnamed: 0_level_0,movieId
short_title,Unnamed: 1_level_1
Twelve Monkeys,32
"Usual Suspects, The",50
Apollo 13,150
"Shawshank Redemption, The",318
Carlito's Way,431
...,...
Oblivion,101864
Iron Man 3,102125
"Purge, The",103075
World War Z,103249


In [20]:
rat80.iloc[162] == 'Oblivion'

movieId    False
Name: Oblivion, dtype: bool

In [21]:
rat80.loc['Oblivion']

movieId    101864
Name: Oblivion, dtype: int64

In [22]:
'Oblivion' in rat80.index

True

In [23]:
thing = []
for i in pred80.index:
    thing1 = i in rat80.index
    thing.append(thing1)
sum(thing)

1

In [24]:
ratings.groupby('movieId')['rating'].mean().mean()

3.2624251207839774

In [25]:
numUsers = len(ratings.userId.unique())
userAvg = ratings.rating.mean()
numMovies = len(ratings.movieId.unique())
ratingAvg = ratings.groupby('movieId')['rating'].mean().mean()

In [26]:
total_rating = (numUsers*userAvg + numMovies*ratingAvg) / (numUsers + numMovies)
total_rating

3.2765437327505316

In [27]:
ratings['rating_new'] = ratings.rating - total_rating

In [28]:
pivot_table_user = pd.pivot_table(data=ratings,values='rating_new',index='userId',columns='movieId')
pivot_table_user = pivot_table_user.fillna(0)
pivot_table_movie = pd.pivot_table(data=ratings,values='rating',index='userId',columns='movieId')
pivot_table_movie = pivot_table_movie.fillna(0)

In [29]:
pivot_table_movie

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,0.0,0.0,0.0,0.0,0.0,2.5,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,2.5,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
609,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [30]:
user_based_similarity = 1 - pairwise_distances( pivot_table_user.values, metric="cosine" )
movie_based_similarity = 1 - pairwise_distances( pivot_table_movie.T.values, metric="cosine" )

In [31]:
user_based_similarity = pd.DataFrame(user_based_similarity)
user_based_similarity.columns = user_based_similarity.columns+1
user_based_similarity.index = user_based_similarity.index+1

movie_based_similarity = pd.DataFrame(movie_based_similarity)
movie_based_similarity.columns = movie_based_similarity.columns+1
movie_based_similarity.index = movie_based_similarity.index+1

In [32]:
movies.columns

Index(['movieId', 'title', 'short_title', 'year', '(no genres listed)',
       'Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime',
       'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'IMAX',
       'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War',
       'Western'],
      dtype='object')

In [33]:
def rec_movie(movie_id, moviedf=movies):
    temp_table = pd.DataFrame(columns = moviedf.columns)
    movies = movie_based_similarity[movie_id].sort_values(ascending = False).index.tolist()[:11]
    for mov in movies:
#         display(items[items['movie id'] == mov])
        temp_table = temp_table.append(moviedf[moviedf['movieId'] == mov], ignore_index=True)
    return temp_table
def rec_user(user_id, ratingdf=ratings):
    temp_table = pd.DataFrame(columns = ratings.columns)
    us = user_based_similarity[user_id].sort_values(ascending = False).index.tolist()[:101]
    for u in us:
#         display(items[items['movie id'] == mov])
        temp_table = temp_table.append(ratingdf[ratingdf['userId'] == u], ignore_index=True)
    return temp_table

In [34]:
display(rec_movie(176))
display(rec_movie(11))

Unnamed: 0,movieId,title,short_title,year,(no genres listed),Action,Adventure,Animation,Children,Comedy,...,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,176,Living in Oblivion (1995),Living in Oblivion,1995,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
1,166,"Doom Generation, The (1995)","Doom Generation, The",1995,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
2,237,Forget Paris (1995),Forget Paris,1995,0,0,0,0,0,1,...,0,0,0,0,0,1,0,0,0,0
3,1301,Forbidden Planet (1956),Forbidden Planet,1956,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
4,190,Safe (1995),Safe,1995,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
5,15,Cutthroat Island (1995),Cutthroat Island,1995,0,1,1,0,0,0,...,0,0,0,0,0,1,0,0,0,0
6,648,Mission: Impossible (1996),Mission: Impossible,1996,0,1,1,0,0,0,...,0,0,0,0,1,0,0,1,0,0
7,242,Farinelli: il castrato (1994),Farinelli: il castrato,1994,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
8,243,Gordy (1995),Gordy,1995,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
9,4689,"Cat o' Nine Tails, The (Gatto a nove code, Il)...","Cat o' Nine Tails, The",1971,0,0,0,0,0,0,...,0,0,0,0,1,0,0,1,0,0


Unnamed: 0,movieId,title,short_title,year,(no genres listed),Action,Adventure,Animation,Children,Comedy,...,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,11,"American President, The (1995)","American President, The",1995,0,0,0,0,0,1,...,0,0,0,0,0,1,0,0,0,0
1,515,"Remains of the Day, The (1993)","Remains of the Day, The",1993,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,384,Bad Company (1995),Bad Company,1995,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,473,In the Army Now (1994),In the Army Now,1994,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,1,0
4,506,Orlando (1992),Orlando,1992,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
5,316,Stargate (1994),Stargate,1994,0,1,1,0,0,0,...,0,0,0,0,0,0,1,0,0,0
6,509,"Piano, The (1993)","Piano, The",1993,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
7,338,Virtuosity (1995),Virtuosity,1995,0,1,0,0,0,0,...,0,0,0,0,0,0,1,1,0,0
8,21,Get Shorty (1995),Get Shorty,1995,0,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0


In [35]:
rec_user(80).merge(movies[['movieId', 'title']], on='movieId')[:50]

Unnamed: 0,userId,movieId,rating,timestamp,rating_new,title
0,80,32,4.0,08-24-2013,0.723456,Twelve Monkeys (a.k.a. 12 Monkeys) (1995)
1,239,32,5.0,09-11-2008,1.723456,Twelve Monkeys (a.k.a. 12 Monkeys) (1995)
2,249,32,5.0,09-04-2012,1.723456,Twelve Monkeys (a.k.a. 12 Monkeys) (1995)
3,166,32,4.0,09-06-2007,0.723456,Twelve Monkeys (a.k.a. 12 Monkeys) (1995)
4,18,32,4.0,02-11-2016,0.723456,Twelve Monkeys (a.k.a. 12 Monkeys) (1995)
5,580,32,5.0,01-03-2007,1.723456,Twelve Monkeys (a.k.a. 12 Monkeys) (1995)
6,376,32,5.0,04-03-2013,1.723456,Twelve Monkeys (a.k.a. 12 Monkeys) (1995)
7,122,32,5.0,04-25-2016,1.723456,Twelve Monkeys (a.k.a. 12 Monkeys) (1995)
8,282,32,4.0,09-06-2013,0.723456,Twelve Monkeys (a.k.a. 12 Monkeys) (1995)
9,254,32,4.5,05-29-2007,1.223456,Twelve Monkeys (a.k.a. 12 Monkeys) (1995)


In [36]:
rec_movie(32)

Unnamed: 0,movieId,title,short_title,year,(no genres listed),Action,Adventure,Animation,Children,Comedy,...,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,32,Twelve Monkeys (a.k.a. 12 Monkeys) (1995),Twelve Monkeys,1995,0,0,0,0,0,0,...,0,0,0,0,1,0,1,1,0,0
1,258,"Kid in King Arthur's Court, A (1995)","Kid in King Arthur's Court, A",1995,0,0,1,0,1,1,...,0,0,0,0,0,1,0,0,0,0
2,508,Philadelphia (1993),Philadelphia,1993,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,616,"Aristocats, The (1970)","Aristocats, The",1970,0,0,0,1,1,0,...,0,0,0,0,0,0,0,0,0,0
4,44,Mortal Kombat (1995),Mortal Kombat,1995,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,521,Romeo Is Bleeding (1993),Romeo Is Bleeding,1993,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
6,47,Seven (a.k.a. Se7en) (1995),Seven,1995,0,0,0,0,0,0,...,0,0,0,0,1,0,0,1,0,0
7,419,"Beverly Hillbillies, The (1993)","Beverly Hillbillies, The",1993,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
8,225,Disclosure (1994),Disclosure,1994,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
9,6,Heat (1995),Heat,1995,0,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [37]:
rec_user(239).merge(movies[['movieId', 'title']], on='movieId')[:50]

Unnamed: 0,userId,movieId,rating,timestamp,rating_new,title
0,239,1,4.0,09-11-2008,0.723456,Toy Story (1995)
1,166,1,5.0,09-16-2007,1.723456,Toy Story (1995)
2,17,1,4.5,05-18-2011,1.223456,Toy Story (1995)
3,282,1,4.5,09-06-2013,1.223456,Toy Story (1995)
4,573,1,5.0,08-10-2007,1.723456,Toy Story (1995)
5,220,1,5.0,12-23-2008,1.723456,Toy Story (1995)
6,580,1,3.0,01-03-2007,-0.276544,Toy Story (1995)
7,18,1,3.5,02-11-2016,0.223456,Toy Story (1995)
8,434,1,4.0,04-07-2010,0.723456,Toy Story (1995)
9,45,1,4.0,02-21-2000,0.723456,Toy Story (1995)


In [38]:
movie_based_similarity

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,9713,9714,9715,9716,9717,9718,9719,9720,9721,9722
1,1.000000,0.410562,0.296917,0.035573,0.308762,0.376316,0.277491,0.131629,0.232586,0.395573,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.410562,1.000000,0.282438,0.106415,0.287795,0.297009,0.228576,0.172498,0.044835,0.417693,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.296917,0.282438,1.000000,0.092406,0.417802,0.284257,0.402831,0.313434,0.304840,0.242954,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.035573,0.106415,0.092406,1.000000,0.188376,0.089685,0.275035,0.158022,0.000000,0.095598,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.308762,0.287795,0.417802,0.188376,1.000000,0.298969,0.474002,0.283523,0.335058,0.218061,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9718,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0
9719,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0
9720,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0
9721,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0


In [39]:
user_based_similarity.to_csv('../../Data/user_cosine_similarity.csv')
movie_based_similarity.to_csv('../../Data/movie_cosine_similarity.csv')

In [40]:
similarities.to_csv('../../Data/cos_sim_id.csv')
similaritiesWithNames.to_csv('../../Data/cos_sim_names.csv')

In [41]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp,rating_new
0,1,1,4.0,07-30-2000,0.723456
1,1,3,4.0,07-30-2000,0.723456
2,1,6,4.0,07-30-2000,0.723456
3,1,47,5.0,07-30-2000,1.723456
4,1,50,5.0,07-30-2000,1.723456


In [42]:
ratings.to_csv('../../Data/ratings_processed1.csv')