In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

from sklearn.metrics.pairwise import linear_kernel
from sklearn.metrics import pairwise_distances
from sklearn.model_selection import train_test_split, LeaveOneOut
from surprise.model_selection import train_test_split as tts
from scipy.spatial.distance import cosine, correlation
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

from scipy import sparse


In [2]:
os.listdir('../../Data')

['cos_sim_id.csv',
 'cos_sim_names.csv',
 'ml-latest-small',
 'movies_processed.csv',
 'movie_cosine_similarity.csv',
 'movie_diversity.csv',
 'popularity_ratings.csv',
 'popularity_table.csv',
 'ratings_processed.csv',
 'ratings_processed1.csv',
 'user_cosine_similarity.csv',
 'user_diversity.csv']

In [3]:
popularityTable = pd.read_csv('../../Data/popularity_table.csv', index_col=[0])
popRatings = pd.read_csv('../../Data/popularity_ratings.csv', index_col=[0])

In [4]:
movies = pd.read_csv('../../Data/movies_processed.csv', index_col=[0])#, parse_dates=['year'])
ratings = pd.read_csv('../../Data/ratings_processed.csv', index_col=[0])#, parse_dates=['timestamp'])

In [5]:
movies.head()

Unnamed: 0,movieId,title,short_title,year,(no genres listed),Action,Adventure,Animation,Children,Comedy,...,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),Toy Story,1995,0,0,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
1,2,Jumanji (1995),Jumanji,1995,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,3,Grumpier Old Men (1995),Grumpier Old Men,1995,0,0,0,0,0,1,...,0,0,0,0,0,1,0,0,0,0
3,4,Waiting to Exhale (1995),Waiting to Exhale,1995,0,0,0,0,0,1,...,0,0,0,0,0,1,0,0,0,0
4,5,Father of the Bride Part II (1995),Father of the Bride Part II,1995,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


In [6]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,07-30-2000
1,1,3,4.0,07-30-2000
2,1,6,4.0,07-30-2000
3,1,47,5.0,07-30-2000
4,1,50,5.0,07-30-2000


In [7]:
movies.columns

Index(['movieId', 'title', 'short_title', 'year', '(no genres listed)',
       'Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime',
       'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'IMAX',
       'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War',
       'Western'],
      dtype='object')

In [8]:
feats = ['movieId', 'year', '(no genres listed)',
       'Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime',
       'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'IMAX',
       'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War',
       'Western']

In [9]:
from scipy.spatial.distance import hamming

In [10]:
hamming(movies.drop(['title', 'short_title'], axis=1).iloc[0].values.reshape(1, -1), 
                  movies.drop(['title', 'short_title'], axis=1).iloc[9739].values.reshape(1, -1))

0.2727272727272727

In [11]:
movies.iloc[9739]

movieId                                            193609
title                 Andrew Dice Clay: Dice Rules (1991)
short_title                  Andrew Dice Clay: Dice Rules
year                                                 1991
(no genres listed)                                      0
Action                                                  0
Adventure                                               0
Animation                                               0
Children                                                0
Comedy                                                  1
Crime                                                   0
Documentary                                             0
Drama                                                   0
Fantasy                                                 0
Film-Noir                                               0
Horror                                                  0
IMAX                                                    0
Musical       

In [12]:
hamming(movies[feats].iloc[0].values.reshape(1, -1), 
                  movies[feats].iloc[9739].values.reshape(1, -1))

0.2727272727272727

In [13]:
hamming(movies.drop(['title', 'short_title'], axis=1).iloc[0].values.reshape(1, -1), 
                  movies.drop(['title', 'short_title'], axis=1).iloc[1].values.reshape(1, -1))

0.13636363636363635

In [14]:
hamming(movies.drop(['title', 'short_title'], axis=1).iloc[0].values.reshape(1, -1), 
                  movies.drop(['title', 'short_title'], axis=1).iloc[2355].values.reshape(1, -1))

0.09090909090909091

In [15]:
cosine_similarity(movies.drop(['title', 'short_title'], axis=1).iloc[0].values.reshape(1, -1), 
                  movies.drop(['title', 'short_title'], axis=1).iloc[1].values.reshape(1, -1))

array([[0.99999962]])

In [16]:
cosine_similarity(movies[feats].iloc[0].values.reshape(1, -1), 
                  movies[feats].iloc[1].values.reshape(1, -1))

array([[0.99999962]])

In [17]:
movies[movies.title.str.contains('Toy Story')]

Unnamed: 0,movieId,title,short_title,year,(no genres listed),Action,Adventure,Animation,Children,Comedy,...,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),Toy Story,1995,0,0,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
2355,3114,Toy Story 2 (1999),Toy Story 2,1999,0,0,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
7355,78499,Toy Story 3 (2010),Toy Story 3,2010,0,0,1,1,1,1,...,0,0,1,0,0,0,0,0,0,0


In [18]:
cosine_similarity(movies[feats].iloc[0].values.reshape(1, -1), 
                  movies[feats].iloc[2355].values.reshape(1, -1))

array([[0.54063313]])

In [19]:
ratings[ratings.userId == 3].merge(movies[['movieId', 'short_title']], on='movieId')

Unnamed: 0,userId,movieId,rating,timestamp,short_title
0,3,31,0.5,05-27-2011,Dangerous Minds
1,3,527,0.5,05-27-2011,Schindler's List
2,3,647,0.5,05-27-2011,Courage Under Fire
3,3,688,0.5,05-27-2011,Operation Dumbo Drop
4,3,720,0.5,05-27-2011,Wallace & Gromit: The Best of Aardman Animation
5,3,849,5.0,05-27-2011,Escape from L.A.
6,3,914,0.5,05-27-2011,My Fair Lady
7,3,1093,0.5,05-27-2011,"Doors, The"
8,3,1124,0.5,05-27-2011,On Golden Pond
9,3,1263,0.5,05-27-2011,"Deer Hunter, The"


In [20]:
recents = movies[movies.year == 2018]

In [21]:
movies_sparse = sparse.csr_matrix(movies.drop(['title', 'short_title'], axis=1))

similarities = cosine_similarity(movies_sparse)

similarities = pd.DataFrame(similarities)
similarities.index = movies.set_index(movies.short_title).index
similarities.columns = movies.set_index(movies.short_title).index
similarities

short_title,Toy Story,Jumanji,Grumpier Old Men,Waiting to Exhale,Father of the Bride Part II,Heat,Sabrina,Tom and Huck,Sudden Death,GoldenEye,...,Gintama: The Movie,anohana: The Flower We Saw That Day - The Movie,Silver Spoon,Love Live! The School Idol Movie,Jon Stewart Has Left the Building,Black Butler: Book of the Atlantic,No Game No Life: Zero,Flint,Bungo Stray Dogs: Dead Apple,Andrew Dice Clay: Dice Rules
short_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Toy Story,1.000000,1.000000,0.999999,0.999998,0.999997,0.999996,0.999995,0.999993,0.999991,0.999989,...,0.010885,0.010900,0.010905,0.010910,0.010910,0.010920,0.010920,0.010920,0.010925,0.010784
Jumanji,1.000000,1.000000,0.999999,0.999999,0.999998,0.999997,0.999996,0.999995,0.999993,0.999991,...,0.011386,0.011401,0.011406,0.011411,0.011411,0.011421,0.011421,0.011421,0.011426,0.011286
Grumpier Old Men,0.999999,0.999999,1.000000,1.000000,0.999999,0.999998,0.999998,0.999996,0.999995,0.999993,...,0.011887,0.011903,0.011908,0.011913,0.011912,0.011923,0.011922,0.011922,0.011927,0.011787
Waiting to Exhale,0.999998,0.999999,1.000000,1.000000,1.000000,0.999999,0.999999,0.999997,0.999996,0.999995,...,0.012388,0.012404,0.012409,0.012414,0.012413,0.012424,0.012424,0.012424,0.012429,0.012288
Father of the Bride Part II,0.999997,0.999998,0.999999,1.000000,1.000000,0.999999,0.999999,0.999998,0.999998,0.999996,...,0.012890,0.012905,0.012910,0.012915,0.012915,0.012925,0.012925,0.012925,0.012930,0.012789
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Black Butler: Book of the Atlantic,0.010920,0.011421,0.011923,0.012424,0.012925,0.013426,0.013927,0.014429,0.014930,0.015431,...,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
No Game No Life: Zero,0.010920,0.011421,0.011922,0.012424,0.012925,0.013426,0.013927,0.014428,0.014930,0.015431,...,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
Flint,0.010920,0.011421,0.011922,0.012424,0.012925,0.013426,0.013927,0.014428,0.014930,0.015431,...,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
Bungo Stray Dogs: Dead Apple,0.010925,0.011426,0.011927,0.012429,0.012930,0.013431,0.013932,0.014433,0.014935,0.015436,...,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000


In [22]:
similarities.loc['Toy Story 2'].sort_values(ascending=False).head(20)

short_title
Toy Story 2                 1.000000
Flawless                    1.000000
Jeremiah Johnson            1.000000
Maurice                     1.000000
End of Days                 1.000000
River Runs Through It, A    1.000000
Stanley & Iris              1.000000
Tora! Tora! Tora!           1.000000
Ride with the Devil         1.000000
Matewan                     0.999999
Awakenings                  0.999999
Backdraft                   0.999999
Fatal Attraction            0.999999
Fisher King, The            0.999999
Come See the Paradise       0.999999
Longest Day, The            0.999999
Kagemusha                   0.999999
Midnight Run                0.999999
Natural, The                0.999999
Scrooged                    0.999999
Name: Toy Story 2, dtype: float64

In [23]:
usr80 = ratings[ratings.userId == 80].merge(movies[['movieId', 'short_title']], on='movieId')
usr80.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,157,158,159,160,161,162,163,164,165,166
userId,80,80,80,80,80,80,80,80,80,80,...,80,80,80,80,80,80,80,80,80,80
movieId,32,50,150,318,431,593,648,858,1200,1214,...,96917,98361,98961,100383,100810,101864,102125,103075,103249,103688
rating,4.0,4.5,4.0,5.0,4.0,5.0,4.5,4.0,4.5,5.0,...,4.0,4.0,4.5,4.5,4.0,4.0,4.0,4.0,4.0,4.0
timestamp,08-24-2013,08-24-2013,08-24-2013,08-24-2013,08-24-2013,08-25-2013,08-24-2013,08-24-2013,08-25-2013,08-24-2013,...,08-25-2013,08-24-2013,08-24-2013,08-24-2013,08-25-2013,08-24-2013,08-24-2013,08-25-2013,08-25-2013,08-24-2013
short_title,Twelve Monkeys,"Usual Suspects, The",Apollo 13,"Shawshank Redemption, The",Carlito's Way,"Silence of the Lambs, The",Mission: Impossible,"Godfather, The",Aliens,Alien,...,House at the End of the Street,Byzantium,Zero Dark Thirty,Side Effects,Dark Skies,Oblivion,Iron Man 3,"Purge, The",World War Z,"Conjuring, The"


In [24]:
pred80 = similarities.loc['Iron Man 3'].sort_values(ascending=False)[1:51]
pred80

short_title
Grandmaster, The                        1.0
This Is the End                         1.0
Resolution                              1.0
Maniac Cop 2                            1.0
Grabbers                                1.0
Justice League: Doom                    1.0
Mezzo Forte                             1.0
English Teacher, The                    1.0
Disconnect                              1.0
Pain & Gain                             1.0
Wolf Children                           1.0
Hulk Vs.                                1.0
Mud                                     1.0
Pawn                                    1.0
Invincible Iron Man, The                1.0
Dark Tide                               1.0
42                                      1.0
Oblivion                                1.0
Syrup                                   1.0
Great Gatsby, The                       1.0
Star Trek Into Darkness                 1.0
Angst                                   1.0
Perfect Plan, A     

In [25]:
rat80 = ratings[ratings.userId == 80].merge(
    movies[['movieId', 'short_title']], on='movieId')[['short_title', 'movieId']].set_index('short_title')
rat80

Unnamed: 0_level_0,movieId
short_title,Unnamed: 1_level_1
Twelve Monkeys,32
"Usual Suspects, The",50
Apollo 13,150
"Shawshank Redemption, The",318
Carlito's Way,431
...,...
Oblivion,101864
Iron Man 3,102125
"Purge, The",103075
World War Z,103249


In [26]:
rat80.iloc[162] == 'Oblivion'

movieId    False
Name: Oblivion, dtype: bool

In [27]:
rat80.loc['Oblivion']

movieId    101864
Name: Oblivion, dtype: int64

In [28]:
'Oblivion' in rat80.index

True

In [29]:
thing = []
for i in pred80.index:
    thing1 = i in rat80.index
    thing.append(thing1)
sum(thing)

1

In [30]:
ratings.groupby('movieId')['rating'].mean().mean()

3.2624251207839774

In [31]:
numUsers = len(ratings.userId.unique())
userAvg = ratings.rating.mean()
numMovies = len(ratings.movieId.unique())
ratingAvg = ratings.groupby('movieId')['rating'].mean().mean()

In [32]:
total_rating = (numUsers*userAvg + numMovies*ratingAvg) / (numUsers + numMovies)
total_rating

3.2765437327505316

In [33]:
ratings['rating_new'] = ratings.rating - total_rating

In [34]:
pivot_table_user = pd.pivot_table(data=ratings,values='rating_new',index='userId',columns='movieId')
pivot_table_user = pivot_table_user.fillna(0)
pivot_table_movie = pd.pivot_table(data=ratings,values='rating',index='userId',columns='movieId')
pivot_table_movie = pivot_table_movie.fillna(0)

In [35]:
user_based_similarity = 1 - pairwise_distances( pivot_table_user.values, metric="hamming" )
movie_based_similarity = 1 - pairwise_distances( pivot_table_movie.T.values, metric="hamming" )

In [36]:
user_based_similarity = pd.DataFrame(user_based_similarity)
user_based_similarity.columns = user_based_similarity.columns+1
user_based_similarity.index = user_based_similarity.index+1

movie_based_similarity = pd.DataFrame(movie_based_similarity)
movie_based_similarity.columns = movie_based_similarity.columns+1
movie_based_similarity.index = movie_based_similarity.index+1

In [37]:
movies.columns

Index(['movieId', 'title', 'short_title', 'year', '(no genres listed)',
       'Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime',
       'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'IMAX',
       'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War',
       'Western'],
      dtype='object')

In [38]:
def rec_movie(movie_id, moviedf=movies):
    temp_table = pd.DataFrame(columns = moviedf.columns)
    movies = movie_based_similarity[movie_id].sort_values(ascending = False).index.tolist()[:11]
    for mov in movies:
#         display(items[items['movie id'] == mov])
        temp_table = temp_table.append(moviedf[moviedf['movieId'] == mov], ignore_index=True)
    return temp_table
def rec_user(user_id, ratingdf=ratings):
    temp_table = pd.DataFrame(columns = ratings.columns)
    us = user_based_similarity[user_id].sort_values(ascending = False).index.tolist()[:101]
    for u in us:
#         display(items[items['movie id'] == mov])
        temp_table = temp_table.append(ratingdf[ratingdf['userId'] == u], ignore_index=True)
    return temp_table

In [39]:
display(rec_movie(176))
display(rec_movie(11))

Unnamed: 0,movieId,title,short_title,year,(no genres listed),Action,Adventure,Animation,Children,Comedy,...,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,176,Living in Oblivion (1995),Living in Oblivion,1995,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
1,648,Mission: Impossible (1996),Mission: Impossible,1996,0,1,1,0,0,0,...,0,0,0,0,1,0,0,1,0,0
2,1345,Carrie (1976),Carrie,1976,0,0,0,0,0,0,...,0,1,0,0,0,0,0,1,0,0
3,6197,Spider (2002),Spider,2002,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
4,6210,Volcano High (Whasango) (2001),Volcano High,2001,0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
5,1965,Repo Man (1984),Repo Man,1984,0,0,0,0,0,1,...,0,0,0,0,0,0,1,0,0,0
6,6212,Bringing Down the House (2003),Bringing Down the House,2003,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
7,3214,American Flyers (1985),American Flyers,1985,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,7657,Versus (2000),Versus,2000,0,1,0,0,0,1,...,0,1,0,0,0,0,0,0,0,0


Unnamed: 0,movieId,title,short_title,year,(no genres listed),Action,Adventure,Animation,Children,Comedy,...,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,11,"American President, The (1995)","American President, The",1995,0,0,0,0,0,1,...,0,0,0,0,0,1,0,0,0,0
1,313,"Swan Princess, The (1994)","Swan Princess, The",1994,0,0,0,1,1,0,...,0,0,0,0,0,0,0,0,0,0
2,2419,Extremities (1986),Extremities,1986,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,2316,Practical Magic (1998),Practical Magic,1998,0,0,0,0,0,0,...,0,0,0,0,1,1,0,0,0,0
4,3308,"Flamingo Kid, The (1984)","Flamingo Kid, The",1984,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
5,2012,Back to the Future Part III (1990),Back to the Future Part III,1990,0,0,1,0,0,1,...,0,0,0,0,0,0,1,0,0,1
6,2518,Night Shift (1982),Night Shift,1982,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
7,196,Species (1995),Species,1995,0,0,0,0,0,0,...,0,1,0,0,0,0,1,0,0,0
8,1080,Monty Python's Life of Brian (1979),Monty Python's Life of Brian,1979,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
9,2539,Analyze This (1999),Analyze This,1999,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


In [40]:
rec_user(80).merge(movies[['movieId', 'short_title']], on='movieId')[:50]

Unnamed: 0,userId,movieId,rating,timestamp,rating_new,short_title
0,80,32,4.0,08-24-2013,0.723456,Twelve Monkeys
1,364,32,3.0,07-21-1997,-0.276544,Twelve Monkeys
2,120,32,3.0,04-03-1997,-0.276544,Twelve Monkeys
3,544,32,3.0,12-15-1996,-0.276544,Twelve Monkeys
4,529,32,5.0,02-10-1997,1.723456,Twelve Monkeys
5,513,32,4.0,10-04-2006,0.723456,Twelve Monkeys
6,206,32,3.0,12-16-1996,-0.276544,Twelve Monkeys
7,81,32,5.0,10-14-1996,1.723456,Twelve Monkeys
8,80,50,4.5,08-24-2013,1.223456,"Usual Suspects, The"
9,515,50,4.5,12-19-2017,1.223456,"Usual Suspects, The"


In [41]:
rec_movie(32)

Unnamed: 0,movieId,title,short_title,year,(no genres listed),Action,Adventure,Animation,Children,Comedy,...,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,32,Twelve Monkeys (a.k.a. 12 Monkeys) (1995),Twelve Monkeys,1995,0,0,0,0,0,0,...,0,0,0,0,1,0,1,1,0,0
1,948,Giant (1956),Giant,1956,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,1
2,100,City Hall (1996),City Hall,1996,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,14,Nixon (1995),Nixon,1995,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,48,Pocahontas (1995),Pocahontas,1995,0,0,0,1,1,0,...,0,0,0,1,0,1,0,0,0,0
5,942,Laura (1944),Laura,1944,0,0,0,0,0,0,...,1,0,0,0,1,0,0,0,0,0
6,661,James and the Giant Peach (1996),James and the Giant Peach,1996,0,0,1,1,1,0,...,0,0,0,1,0,0,0,0,0,0
7,214,Before the Rain (Pred dozhdot) (1994),Before the Rain,1994,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [42]:
rec_user(239).merge(movies[['movieId', 'short_title']], on='movieId')[:50]

Unnamed: 0,userId,movieId,rating,timestamp,rating_new,short_title
0,239,1,4.0,09-11-2008,0.723456,Toy Story
1,399,1,4.0,12-27-2006,0.723456,Toy Story
2,193,1,2.0,07-02-2015,-1.276544,Toy Story
3,145,1,5.0,05-14-1996,1.723456,Toy Story
4,443,1,4.0,08-03-2017,0.723456,Toy Story
5,364,1,5.0,07-21-1997,1.723456,Toy Story
6,336,1,4.0,07-24-2005,0.723456,Toy Story
7,529,1,3.0,02-10-1997,-0.276544,Toy Story
8,347,1,5.0,11-10-1996,1.723456,Toy Story
9,130,1,3.0,05-20-1996,-0.276544,Toy Story


In [43]:
user_based_similarity.to_csv('../../Data/user_hamming_similarity.csv')
movie_based_similarity.to_csv('../../Data/movie_hamming_similarity.csv')

In [44]:
movie_based_similarity.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,9713,9714,9715,9716,9717,9718,9719,9720,9721,9722
1,1.0,0.608197,0.631148,0.639344,0.631148,0.608197,0.619672,0.644262,0.640984,0.565574,...,0.645902,0.645902,0.645902,0.645902,0.645902,0.645902,0.645902,0.645902,0.645902,0.645902
2,0.608197,1.0,0.788525,0.813115,0.785246,0.732787,0.77377,0.819672,0.798361,0.72623,...,0.818033,0.818033,0.818033,0.818033,0.818033,0.818033,0.818033,0.818033,0.818033,0.818033
3,0.631148,0.788525,1.0,0.904918,0.885246,0.798361,0.867213,0.909836,0.906557,0.742623,...,0.913115,0.913115,0.913115,0.913115,0.913115,0.913115,0.913115,0.913115,0.913115,0.913115
4,0.639344,0.813115,0.904918,1.0,0.914754,0.82459,0.913115,0.978689,0.962295,0.781967,...,0.986885,0.986885,0.986885,0.986885,0.986885,0.986885,0.986885,0.986885,0.986885,0.986885
5,0.631148,0.785246,0.885246,0.914754,1.0,0.803279,0.886885,0.916393,0.913115,0.744262,...,0.918033,0.918033,0.918033,0.918033,0.918033,0.918033,0.918033,0.918033,0.918033,0.918033


In [45]:
rec_user(80)

Unnamed: 0,userId,movieId,rating,timestamp,rating_new
0,80,32,4.0,08-24-2013,0.723456
1,80,50,4.5,08-24-2013,1.223456
2,80,150,4.0,08-24-2013,0.723456
3,80,318,5.0,08-24-2013,1.723456
4,80,431,4.0,08-24-2013,0.723456
...,...,...,...,...,...
2580,130,434,3.0,05-20-1996,-0.276544
2581,130,588,2.0,05-20-1996,-1.276544
2582,130,590,3.0,05-20-1996,-0.276544
2583,130,592,3.0,05-20-1996,-0.276544


In [46]:
ratings.userId.nunique()

610