In [1]:
import numpy as np
from sklearn.neighbors import NearestNeighbors
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from scipy.stats import pearsonr
from scipy.spatial import distance
from functions import *

In [2]:
# Column names for the dataset
column_names = ['userID','movie_id','rating','timestamp']

# Reading both the datasets and then merging them using movie_id
dataset1 = pd.read_csv('Movie_Id_Titles') 
dataset2 = pd.read_csv('u.data', sep='\t',names=column_names)
dataset1 = dataset1.rename(columns={'item_id':'movie_id'})
dataset = pd.merge(dataset1,dataset2,on='movie_id')
dataset

Unnamed: 0,movie_id,title,userID,rating,timestamp
0,1,Toy Story (1995),308,4,887736532
1,1,Toy Story (1995),287,5,875334088
2,1,Toy Story (1995),148,4,877019411
3,1,Toy Story (1995),280,4,891700426
4,1,Toy Story (1995),66,3,883601324
...,...,...,...,...,...
99998,1678,Mat' i syn (1997),863,1,889289570
99999,1679,B. Monkey (1998),863,3,889289491
100000,1680,Sliding Doors (1998),863,2,889289570
100001,1681,You So Crazy (1994),896,3,887160722


In [3]:
# Create a Pivot Table
pivot_table = dataset.pivot_table(values='rating',index='userID',columns='title')
pivot_table = pivot_table.transpose()
# Counting the number of votes of each movie and adding a column to the table to sort the table later
pivot_table['counts'] = pivot_table.count(axis=1)
# Sorting the table based on the number of votes each movie got
pivot_table = pivot_table.sort_values(by='counts', ascending=False)
# Drop the count column
pivot_table = pivot_table.drop(['counts'],axis=1)
# Fill the NaN values with the average rating of each movie
pivot_table.fillna(pivot_table.mean(), inplace=True)
pivot_table

userID,0,1,2,3,4,5,6,7,8,9,...,934,935,936,937,938,939,940,941,942,943
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Star Wars (1977),5.000000,5.000000,5.000000,2.773585,5.000000,4.000000,4.000000,5.000000,5.00000,5.000000,...,5.000000,3.923077,4.000000,5.000000,5.000000,4.3125,4.000000,4.045455,5.00000,4.000000
Contact (1997),3.666667,5.000000,3.000000,2.000000,5.000000,2.874286,2.000000,4.000000,5.00000,4.272727,...,3.696532,3.923077,3.000000,4.000000,5.000000,4.0000,5.000000,4.000000,4.00000,3.410714
Fargo (1996),3.666667,5.000000,5.000000,2.773585,4.333333,5.000000,5.000000,5.000000,3.79661,4.272727,...,4.000000,3.000000,4.000000,3.000000,5.000000,4.3125,3.000000,4.045455,4.25641,5.000000
Return of the Jedi (1983),3.666667,5.000000,3.704918,4.000000,4.333333,5.000000,3.639423,3.000000,4.00000,4.272727,...,4.000000,4.000000,4.000000,3.358974,5.000000,4.3125,3.000000,5.000000,4.25641,4.000000
Liar Liar (1997),3.666667,3.605166,1.000000,2.000000,5.000000,2.874286,2.000000,1.000000,3.00000,4.000000,...,3.696532,3.923077,3.000000,1.000000,3.268519,4.3125,4.000000,4.000000,4.25641,3.410714
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
To Cross the Rubicon (1991),3.666667,3.605166,3.704918,2.773585,4.333333,2.874286,3.639423,3.962594,3.79661,4.272727,...,3.696532,3.923077,3.744681,3.358974,3.268519,4.3125,3.457944,4.045455,4.25641,3.410714
"I, Worst of All (Yo, la peor de todas) (1990)",3.666667,3.605166,3.704918,2.773585,4.333333,2.874286,3.639423,3.962594,3.79661,4.272727,...,3.696532,3.923077,3.744681,3.358974,3.268519,4.3125,3.457944,4.045455,4.25641,3.410714
Small Faces (1995),3.666667,3.605166,3.704918,2.773585,4.333333,2.874286,3.639423,3.962594,3.79661,4.272727,...,3.696532,3.923077,3.744681,3.358974,3.268519,4.3125,3.457944,4.045455,4.25641,3.410714
Tokyo Fist (1995),3.666667,3.605166,3.704918,2.773585,4.333333,2.874286,3.639423,3.962594,3.79661,4.272727,...,3.696532,3.923077,3.744681,3.358974,3.268519,4.3125,3.457944,4.045455,4.25641,3.410714


In [5]:
# Get the recommendation for a movie
ans = get_recommendations('Return of the Jedi (1983)',5,pivot_table=pivot_table)
ans

Unnamed: 0,Correlation,Cosine_distance,score
Star Wars (1977),1.0,0.0,0.5
"Empire Strikes Back, The (1980)",0.846554,0.080054,0.38325
Safe Passage (1994),0.0177,0.555247,-0.268773
Destiny Turns on the Radio (1995),0.003491,0.56593,-0.281219
Butterfly Kiss (1995),0.0,0.583333,-0.291667
