# CP421 Project - Recommendation System(CF)
#### Shawn Davis 
#### Adam Cassidy 
#### Mengdan Wan

##### Import rating and movie data

In [1]:
import pandas as pd
import numpy as np

rnames = ['user_id', 'movie_id', 'rating', 'timestamp']
ratings = pd.read_csv('ml-latest-small/ml-latest-small/ratings.csv', skiprows=1, sep=',', header=None, names=rnames)

ratings[:]


Unnamed: 0,user_id,movie_id,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


In [2]:
mnames = ['movie_id', 'title', 'genres']
movies = pd.read_csv('ml-latest-small/ml-latest-small/movies.csv', skiprows=1, sep=',', header=None, names=mnames,)

movies

Unnamed: 0,movie_id,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


##### Merge movies and ratings

In [3]:
data = pd.merge(ratings, movies)
data

Unnamed: 0,user_id,movie_id,rating,timestamp,title,genres
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,5,1,4.0,847434962,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,7,1,4.5,1106635946,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
3,15,1,2.5,1510577970,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
4,17,1,4.5,1305696483,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
...,...,...,...,...,...,...
100831,610,160341,2.5,1479545749,Bloodmoon (1997),Action|Thriller
100832,610,160527,4.5,1479544998,Sympathy for the Underdog (1971),Action|Crime|Drama
100833,610,160836,3.0,1493844794,Hazard (2005),Action|Drama|Thriller
100834,610,163937,3.5,1493848789,Blair Witch (2016),Horror|Thriller


##### Random split data for training and test (train = 0.9 and test = 0.1)

In [4]:
data_copy = data.copy()
train_set = data_copy.sample(frac=0.9) #random choose sample (change the frac to change the training ratio)
test_set = data_copy.drop(train_set.index) #Drop the training data, the rest is test data

train_set[:]

Unnamed: 0,user_id,movie_id,rating,timestamp,title,genres
89002,414,7439,2.0,1089043902,"Punisher, The (2004)",Action|Crime|Thriller
20756,496,912,4.5,1415166610,Casablanca (1942),Drama|Romance
63046,571,3072,5.0,966900054,Moonstruck (1987),Comedy|Romance
24778,4,4252,3.0,1007569465,"Circle, The (Dayereh) (2000)",Drama
22466,135,2076,4.0,1009693935,Blue Velvet (1986),Drama|Mystery|Thriller
...,...,...,...,...,...,...
26542,284,410,3.0,832786349,Addams Family Values (1993),Children|Comedy|Fantasy
66600,590,3638,2.5,1258428713,Moonraker (1979),Action|Adventure|Sci-Fi|Thriller
67586,341,59900,5.0,1487275987,You Don't Mess with the Zohan (2008),Comedy
49258,18,1260,4.0,1455059781,M (1931),Crime|Film-Noir|Thriller


In [5]:
#Create a data frame for movies and genres in training data, use for content filter
train_mov = pd.DataFrame(train_set.loc[:,'title':'genres'])
train_no_dup = train_mov.drop_duplicates('title')
train_no_dup = train_no_dup.reset_index(drop=True)
train_no_dup

Unnamed: 0,title,genres
0,"Punisher, The (2004)",Action|Crime|Thriller
1,Casablanca (1942),Drama|Romance
2,Moonstruck (1987),Comedy|Romance
3,"Circle, The (Dayereh) (2000)",Drama
4,Blue Velvet (1986),Drama|Mystery|Thriller
...,...,...
9358,All the Vermeers in New York (1990),Comedy|Drama|Romance
9359,"Day of the Locust, The (1975)",Drama
9360,Gigli (2003),Comedy|Crime|Romance
9361,UnHung Hero (2013),Documentary


In [6]:
#Covert all the movies genres to binary data, 1 if the movie belong the genre, 0 otherwise
GENRES = ["Action", "Adventure", "Animation" ,"Children", "Comedy", "Crime", "Documentary", "Drama", "Fantasy",
            "Film-Noir", "Horror", "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller", "War", "Western",
              "(no genres listed)"]

#Create the matrix for genres of all the training movies
genres_matrix = []
for i in range(0, len(train_no_dup)):
    b = []
    mov = train_no_dup.loc[i, 'genres']
    for g in GENRES:
        if g in mov:
            b.append(1)
        else:
            b.append(0)
    print(b)
    genres_matrix.append(b) #Making 2D array
print(genres_matrix)

[1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0]
[1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0]
[0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0]
[1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 1

[0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0]
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0]
[0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0]
[0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]
[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]
[1, 1, 0, 0, 1

[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]
[1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]
[0, 0, 0, 0, 0

[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0]
[0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0]
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]
[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0]
[0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]
[0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0

[[1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0], [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0], [1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 

In [7]:
#Create dataframe for the 2D array for binary matrix of genres and movies
gen = pd.DataFrame(genres_matrix, index = train_no_dup.loc[:, 'title'], columns = GENRES)
gen = gen.T
gen

title,"Punisher, The (2004)",Casablanca (1942),Moonstruck (1987),"Circle, The (Dayereh) (2000)",Blue Velvet (1986),Logan's Run (1976),"Others, The (2001)",Apollo 13 (1995),Alive (1993),"Bourne Identity, The (2002)",...,Tomboy (2011),American Buffalo (1996),Born to Be Wild (1995),Glory Daze (1995),"Muppet Christmas: Letters to Santa, A (2008)",All the Vermeers in New York (1990),"Day of the Locust, The (1975)",Gigli (2003),UnHung Hero (2013),People Will Talk (1951)
Action,1,0,0,0,0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
Adventure,0,0,0,0,0,1,0,1,0,0,...,0,0,1,0,0,0,0,0,0,0
Animation,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Children,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,1,0,0,0,0,0
Comedy,0,0,1,0,0,0,0,0,0,0,...,0,0,1,0,1,1,0,1,0,1
Crime,1,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,1,0,0
Documentary,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
Drama,0,1,0,1,1,0,1,1,1,0,...,1,1,1,1,0,1,1,0,0,0
Fantasy,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Film-Noir,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [8]:
#Calculating the similarties by using genres
corr_gen = gen.corr(method = 'pearson')
corr_gen

title,"Punisher, The (2004)",Casablanca (1942),Moonstruck (1987),"Circle, The (Dayereh) (2000)",Blue Velvet (1986),Logan's Run (1976),"Others, The (2001)",Apollo 13 (1995),Alive (1993),"Bourne Identity, The (2002)",...,Tomboy (2011),American Buffalo (1996),Born to Be Wild (1995),Glory Daze (1995),"Muppet Christmas: Letters to Santa, A (2008)",All the Vermeers in New York (1990),"Day of the Locust, The (1975)",Gigli (2003),UnHung Hero (2013),People Will Talk (1951)
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Punisher, The (2004)",1.000000,-0.148522,-0.148522,-0.102062,0.208333,0.208333,0.130437,-0.148522,-0.102062,0.604167,...,-0.102062,0.321798,-0.223607,-0.102062,-0.148522,-0.187500,-0.102062,0.208333,-0.102062,-0.148522
Casablanca (1942),-0.148522,1.000000,0.441176,0.687184,0.321798,-0.148522,0.243544,0.441176,0.687184,-0.148522,...,0.687184,0.441176,0.243544,0.687184,-0.117647,0.792118,0.687184,0.321798,-0.080845,0.441176
Moonstruck (1987),-0.148522,0.441176,1.000000,-0.080845,-0.148522,-0.148522,-0.177123,-0.117647,-0.080845,-0.148522,...,-0.080845,-0.117647,0.243544,-0.080845,0.441176,0.792118,-0.080845,0.792118,-0.080845,1.000000
"Circle, The (Dayereh) (2000)",-0.102062,0.687184,-0.080845,1.000000,0.544331,-0.102062,0.456435,0.687184,1.000000,-0.102062,...,1.000000,0.687184,0.456435,1.000000,-0.080845,0.544331,1.000000,-0.102062,-0.055556,-0.080845
Blue Velvet (1986),0.208333,0.321798,-0.148522,0.544331,1.000000,-0.187500,0.838525,0.321798,0.544331,0.604167,...,0.544331,0.321798,0.130437,0.544331,-0.148522,0.208333,0.544331,-0.187500,-0.102062,-0.148522
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
All the Vermeers in New York (1990),-0.187500,0.792118,0.792118,0.544331,0.208333,-0.187500,0.130437,0.321798,0.544331,-0.187500,...,0.544331,0.321798,0.484481,0.544331,0.321798,1.000000,0.544331,0.604167,-0.102062,0.792118
"Day of the Locust, The (1975)",-0.102062,0.687184,-0.080845,1.000000,0.544331,-0.102062,0.456435,0.687184,1.000000,-0.102062,...,1.000000,0.687184,0.456435,1.000000,-0.080845,0.544331,1.000000,-0.102062,-0.055556,-0.080845
Gigli (2003),0.208333,0.321798,0.792118,-0.102062,-0.187500,-0.187500,-0.223607,-0.148522,-0.102062,-0.187500,...,-0.102062,0.321798,0.130437,-0.102062,0.321798,0.604167,-0.102062,1.000000,-0.102062,0.792118
UnHung Hero (2013),-0.102062,-0.080845,-0.080845,-0.055556,-0.102062,-0.102062,-0.121716,-0.080845,-0.055556,-0.102062,...,-0.055556,-0.080845,-0.121716,-0.055556,-0.080845,-0.102062,-0.055556,-0.102062,1.000000,-0.080845


In [9]:
#include global mean, min, max of training data
train_set.describe()

Unnamed: 0,user_id,movie_id,rating,timestamp
count,90752.0,90752.0,90752.0,90752.0
mean,326.013465,19431.697175,3.500325,1206156000.0
std,182.494968,35522.193924,1.041263,216124500.0
min,1.0,1.0,0.5,828124600.0
25%,177.0,1198.75,3.0,1019125000.0
50%,325.0,2993.0,3.5,1186161000.0
75%,477.0,8128.0,4.0,1435994000.0
max,610.0,193609.0,5.0,1537757000.0


##### Get mean rating of movies

In [10]:
#Get the mean for every training movies
ratings = pd.DataFrame(train_set.groupby('title')['rating'].mean())
ratings

Unnamed: 0_level_0,rating
title,Unnamed: 1_level_1
'71 (2014),4.000000
'Hellboy': The Seeds of Creation (2004),4.000000
'Round Midnight (1986),3.500000
'Salem's Lot (2004),5.000000
'Til There Was You (1997),4.000000
...,...
anohana: The Flower We Saw That Day - The Movie (2013),3.000000
eXistenZ (1999),3.763158
xXx (2002),2.736842
xXx: State of the Union (2005),2.000000


##### Count how many ratings of each movie

In [11]:
#Get the count of how many times rated by users for every training movies
ratings['number_of_ratings'] = data.groupby('title')['rating'].count()
ratings.sort_values(by='number_of_ratings', ascending=False).head(10)

Unnamed: 0_level_0,rating,number_of_ratings
title,Unnamed: 1_level_1,Unnamed: 2_level_1
Forrest Gump (1994),4.159649,329
"Shawshank Redemption, The (1994)",4.441281,317
Pulp Fiction (1994),4.190217,307
"Silence of the Lambs, The (1991)",4.164683,279
"Matrix, The (1999)",4.170683,278
Star Wars: Episode IV - A New Hope (1977),4.221973,251
Jurassic Park (1993),3.734234,238
Braveheart (1995),3.997561,237
Terminator 2: Judgment Day (1991),3.967662,224
Schindler's List (1993),4.222772,220


In [12]:
#import seaborn as sns
#sns.jointplot(x='rating', y='number_of_ratings', data=ratings)

In [13]:
#Create the pivot table for movies and users, rating as the value
train_matrix = pd.pivot_table(train_set, values='rating', index='user_id', columns='title')
train_matrix

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...All the Marbles (1981),...,Zulu (1964),Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986)
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,,4.0
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,,,,,,,,,,,...,,,,,,,,,,
607,,,,,,,,,,,...,,,,,,,,,,
608,,,,,,,,,,,...,,,,,,,4.5,,,
609,,,,,,,,,,,...,,,,,,,,,,


##### Get pearson correlation matrix of movies

In [14]:
#Calculate the pearson correlations between all the pair of training movies
corr_matrix = train_matrix.corr(method = 'pearson')
corr_matrix

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...All the Marbles (1981),...,Zulu (1964),Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986)
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'71 (2014),,,,,,,,,,,...,,,,,,,,,,
'Hellboy': The Seeds of Creation (2004),,,,,,,,,,,...,,,,,,,,,,
'Round Midnight (1986),,,,,,,,,,,...,,,,,,,,,,
'Salem's Lot (2004),,,,,,,,,,,...,,,,,,,,,,
'Til There Was You (1997),,,,,1.0,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
anohana: The Flower We Saw That Day - The Movie (2013),,,,,,,,,,,...,,,,,,,,,,
eXistenZ (1999),,,,,,-0.067522,,0.737043,1.000000,,...,,,,,,,1.000000,1.000000,,-0.337963
xXx (2002),,,,,,,,0.817105,,,...,,,-0.922613,,,,1.000000,1.000000,0.981981,0.500000
xXx: State of the Union (2005),,,,,,,,1.000000,,,...,,,-0.755929,,,,,0.981981,1.000000,


## The Recommendation System
#### Top 10 recommendation movies which every movies are rated at least N times by users.

In [15]:
N = 20
#some movies only has negtive similarities with other movies, set threshold to -1 to see the result
similarity_Threshold = 0    
movie_i = input("Enter the movie name: ") #Get movie name as input

#Find similar movie according to past rating by users (CF item-based)
if (corr_matrix.index == movie_i).any():   #recommend by collaborative filtering
    recom = pd.DataFrame(corr_matrix[movie_i])
    recom.dropna(inplace=True)
    recom = recom.join(ratings['number_of_ratings'])  
    recom = recom[recom['number_of_ratings'] > N] #filtering the similar movies that at least rated N times
    #recom = recom[recom[movie_i] < 1] 
    recom = recom[recom[movie_i] > similarity_Threshold] #filtering by the threshold of similarities
    recom_n = recom.sort_values(by=movie_i, ascending=False).head(10)
    print(recom_n.empty)
    print("1")
    
    #Only filter by past rating, some movies may not find the similar movie.
    #in this case, we find similar movie by using content filtering of genres.
    if recom_n.empty:   #recommend by content filtering using genres
        recom = pd.DataFrame(corr_gen[movie_i])
        recom.dropna(inplace=True)
        recom = recom.join(ratings['number_of_ratings'])
        recom = recom[recom['number_of_ratings'] > N]
        #recom = recom[recom[movie_i] < 1] 
        recom = recom[recom[movie_i] > similarity_Threshold] 
        recom_n = recom.sort_values(by=movie_i, ascending=False).head(10)
        print(recom_n.empty)
        print("2")
        
#Another case is for the new movie, the movie did not in training movies
else:
    
    # if the movie already add to training movie then directly using similarties to get similar movies
    if (corr_gen.index == movie_i).any():   #recommend by content filtering using genres
        recom = pd.DataFrame(corr_gen[movie_i])
        recom.dropna(inplace=True)
        recom = recom.join(ratings['number_of_ratings'])
        recom = recom[recom['number_of_ratings'] > N]
        #recom = recom[recom[movie_i] < 1] 
        recom = recom[recom[movie_i] > similarity_Threshold]  
        recom_n = recom.sort_values(by=movie_i, ascending=False).head(10)
        print(recom_n.empty)
        print("3")
    #calculate the similarty between new movie and the movies already in traing movie
    else:    #recommend by content filtering using genres, for new movies.
        new_g = []
        b = []
        for genre in GENRES:
            if genre in g:
                b.append(1)
            else:
                b.append(0)
        new_g.append(b)
        current = pd.DataFrame(new_g, index=[movie_i],columns=GENRES)
        current = current.T
        
        # add new movie to traing movies
        gen = gen.join(current)

        corr_gen = gen.corr(method = 'pearson')
        recom = pd.DataFrame(corr_gen[movie_i])
        recom.dropna(inplace=True)
        recom = recom.join(ratings['number_of_ratings'])
        recom = recom[recom['number_of_ratings'] > N]
        #recom = recom[recom[movie_i] < 1] 
        recom = recom[recom[movie_i] > similarity_Threshold] 
        recom_n = recom.sort_values(by=movie_i, ascending=False).head(10)
        print(recom_n.empty)
        print("4")
        
recom_n



Enter the movie name: 'Hellboy': The Seeds of Creation (2004)
True
1
False
2


Unnamed: 0_level_0,'Hellboy': The Seeds of Creation (2004),number_of_ratings
title,Unnamed: 1_level_1,Unnamed: 2_level_1
Pirates of the Caribbean: The Curse of the Black Pearl (2003),0.864099,149
Big Trouble in Little China (1986),0.864099,28
Last Action Hero (1993),0.864099,53
Pirates of the Caribbean: At World's End (2007),0.864099,56
"Princess Bride, The (1987)",0.728571,142
"Goonies, The (1985)",0.728571,57
Army of Darkness (1993),0.728571,51
Hancock (2008),0.728571,29
Dogma (1999),0.724569,79
Austin Powers: The Spy Who Shagged Me (1999),0.724569,121


## Making prediction of test data

In [16]:
#drop the useless columns in test data
test_set.drop(['movie_id', 'timestamp'], axis=1,inplace=True)
test_set

Unnamed: 0,user_id,rating,title,genres
9,31,5.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
25,73,4.5,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
34,96,5.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
43,132,2.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
49,144,3.5,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
...,...,...,...,...
100783,610,3.0,Escape From Tomorrow (2013),Drama|Fantasy|Horror
100798,610,4.0,"Guest, The (2014)",Thriller
100831,610,2.5,Bloodmoon (1997),Action|Thriller
100832,610,4.5,Sympathy for the Underdog (1971),Action|Crime|Drama


In [17]:
print(len(test_set))

#convert data frame to numpy array, that can go through every test data 
test_array = test_set.to_numpy()
test_array

10084


array([[31, 5.0, 'Toy Story (1995)',
        'Adventure|Animation|Children|Comedy|Fantasy'],
       [73, 4.5, 'Toy Story (1995)',
        'Adventure|Animation|Children|Comedy|Fantasy'],
       [96, 5.0, 'Toy Story (1995)',
        'Adventure|Animation|Children|Comedy|Fantasy'],
       ...,
       [610, 2.5, 'Bloodmoon (1997)', 'Action|Thriller'],
       [610, 4.5, 'Sympathy for the Underdog (1971)',
        'Action|Crime|Drama'],
       [610, 3.0, 'Hazard (2005)', 'Action|Drama|Thriller']], dtype=object)

##### Go through every test data, and using pearson correlation matrix to find top k similar movies that already rated by current user. 
##### If user did not rate any similar movie, then use the average rating as predicting of this movie
##### If no similar movie or the movie did not in trainning data(New movies), then using 'Content Filtering' by genres to find the similar movies, and make predicting by using average rating of the top 20 similar movies which filtering by genres.

In [18]:
pred = [] # use for measure RMSE
k = 20
c=0
noOfRatingThreshold = 20

for m in test_array:   #loop for go through every test data
    t = m[2]
    u = m[0]
    g = m[3]
    
    #Select similar movies as in part of recomendation system
    if (corr_matrix.index == t).any():
        movie_corr = pd.DataFrame(corr_matrix[t])
        movie_corr.dropna(inplace=True)
        #movie_corr = movie_corr.join(ratings['number_of_ratings'])
        #movie_corr = movie_corr[movie_corr['number_of_ratings'] > noOfRatingThreshold]
        movie_corr = movie_corr[movie_corr[t] < 1] 
        movie_corr = movie_corr[movie_corr[t] > 0] 
        sim_movies = movie_corr.sort_values(by=t, ascending=False)
        #sim_movies
        sim_title = sim_movies.index
        #print(sim_title)

        #calculate the prediction rating
        count = 0
        r_s = 0
        total_sim = 0
        for movie in sim_title:
            r = train_matrix.loc[u, movie]
            #print(u)
            #print(movie)
            if not np.isnan(r):  
                #print(r)
                count += 1
                sim = sim_movies.loc[movie, t]
                r_s += r*sim   #sum of (rating * similarity)
                total_sim += sim  #sum of similarity
            if count >= k: #choose first 20 similar movies that has rated by the user
                break
                #print(count)
        
        if total_sim == 0: #this is case of no similar movies in collabortive filtering
            
            g_corr = pd.DataFrame(corr_gen[t])
            g_corr.dropna(inplace=True)
            #movie_corr = movie_corr.join(ratings['number_of_ratings'])
            #movie_corr = movie_corr[movie_corr['number_of_ratings'] > noOfRatingThreshold]
            g_corr = g_corr[g_corr[t] < 1] 
            g_corr = g_corr[g_corr[t] > 0] 
            sim_gen = g_corr.sort_values(by=t, ascending=False)
            sim_genres = sim_gen.index
            
            count_1 = 0
            r_s_1 = 0
            total_sim_1 = 0
            for tit in sim_genres:
                if (corr_matrix.index == tit).any():
                    rt = train_matrix.loc[u, tit]
                    
                    if not np.isnan(rt):
                        count_1 += 1
                        sim1 = sim_gen.loc[tit, t]
                        #print(sim)
                        r_s_1 += rt*sim1
                        total_sim_1 += sim1
                    if count_1 >= k:
                        break
                        #print(count)
            #In this case the movie not have positive similarities with other movies
            if total_sim_1 == 0:  
                rate = ratings.loc[tit, 'rating'] #using mean rating of this movie as predicting
                
            else:
                rate = r_s_1/total_sim_1  #predicting rating = sum of (rating * similarity) / sum of similarity
                
            pred.append(rate) 
            print(rate)
            c += 1
            
        else:
            rate = r_s/total_sim  #predicting rating = sum of (rating * similarity) / sum of similarity
            pred.append(rate)
            print(rate)
            c += 1
    
    # this part for new movie
    else:  
        if (corr_gen.index == t).any():
            curr_corr = pd.DataFrame(corr_gen[t])
            curr_corr.dropna(inplace=True)
            #movie_corr = movie_corr.join(ratings['number_of_ratings'])
            #movie_corr = movie_corr[movie_corr['number_of_ratings'] > noOfRatingThreshold]
            curr_corr = curr_corr[curr_corr[t] < 1] 
            curr_corr = curr_corr[curr_corr[t] > 0] 
            sim_gen = curr_corr.sort_values(by=t, ascending=False)
            sim_tit = sim_gen.index
        else:
            #print("new")
            g_a = []
            b = []
            for genre in GENRES:
                if genre in g:
                    b.append(1)
                else:
                    b.append(0)
            #print(b)
            g_a.append(b)
            current = pd.DataFrame(g_a, index=[t],columns=GENRES)
            current = current.T
            current

            gen = gen.join(current)
            corr_gen = gen.corr(method = 'pearson')
            curr_corr = pd.DataFrame(corr_gen[t])
            curr_corr.dropna(inplace=True)
            #movie_corr = movie_corr.join(ratings['number_of_ratings'])
            #movie_corr = movie_corr[movie_corr['number_of_ratings'] > noOfRatingThreshold]
            curr_corr = curr_corr[curr_corr[t] < 1] 
            curr_corr = curr_corr[curr_corr[t] > 0] 
            sim_gen = curr_corr.sort_values(by=t, ascending=False)
            sim_tit = sim_gen.index

        count_2 = 0
        r_s_2 = 0
        tot_r = 0

        for tit in sim_tit:
            if (ratings.index == tit).any():
                rt2 = ratings.loc[tit, 'rating']
                count_2 += 1
                tot_r += rt2
            if count_2 >= 20:
                break
                
        
        if count_2 == 0:    #No genres, set the rate to 0
            rate = 0
        else:
            rate = tot_r/count_2 #new movie are in the test set has no ratings, 
                                    #so use top 20 simlary movies' average rating to get the average as predicting
        print(rate)
        pred.append(rate)
        
        #print("No Recommendation(New movie)")
        c += 1
    print(c)
print("Total movies: %d" % c)
print(len(pred))



3.686955526989126
1
3.6763228025719448
2
3.4898117407760667
3
2.8053413590238208
4
3.9056507240130633
5
3.668935899812337
6
4.760680305138938
7
3.5760232822831597
8
3.4725054113638967
9
3.550241949560414
10
3.7784261414230897
11
3.5039013870510867
12
3.6482448830350163
13
3.415108559038859
14
3.6028079081892748
15
3.802565134221221
16
3.099693696278877
17
3.7071280104394972
18
4.158615317616055
19
2.9807268546228
20
3.2603673948412326
21
3.3120749562915104
22
3.406748220913814
23
2.3684320503685967
24
4.101828363038404
25
2.6889778243824143
26
2.975477488547423
27
3.3853289044740595
28
3.5843663754364887
29
3.1129497578854486
30
3.67945731486964
31
3.743655497063215
32
4.2541022108632385
33
3.4437061297815927
34
4.347090422355134
35
3.596188992160123
36
2.902189170822071
37
2.4796665671630693
38
3.02824185043783
39
4.713563328896411
40
3.088429647479826
41
3.29297455824025
42
3.5477306906650594
43
3.678631545583569
44
3.752531260039961
45
3.176505375734846
46
3.10385243916689
47
4.2926

3.91578567289882
387
3.7618420197355893
388
3.8192474454265986
389
2.9268000415211706
390
4.120269596027044
391
3.4299107083105573
392
3.2610440533897855
393
2.9152639848783313
394
3.46388869952186
395
3.0182379921782125
396
3.385016634745445
397
2.5036487268535144
398
4.40733039683285
399
3.025794442217428
400
3.8427513348308966
401
2.7936158762294707
402
4.198996390448859
403
2.6676394699456147
404
3.427153353586227
405
3.9150642770285895
406
4.039162208487516
407
3.4657346970419094
408
3.9870204421760453
409
3.169949371575386
410
3.548746508576493
411
3.0514082938612814
412
3.5824927117436673
413
2.6409746806716567
414
4.366711658621148
415
4.157915956190212
416
3.3369100183744353
417
4.192500657515398
418
4.182506017960556
419
4.458591364769576
420
3.612829753756729
421
3.8001703617554767
422
4.144967625789381
423
3.62288560477534
424
3.7252307269032476
425
3.0925953251842238
426
2.6270046504090816
427
3.9783264433515573
428
2.807478914714051
429
3.1207535413266365
430
3.9347423422

4.43139327738932
758
3.59948894985083
759
4.318229142496463
760
3.228592855854994
761
2.912121232961395
762
3.7048747432626232
763
3.366017934040208
764
3.842915141718451
765
3.700638756365973
766
3.1171299798839116
767
2.8252111611145043
768
3.562813530158888
769
2.6705378790375423
770
3.5699802491323407
771
4.102283150424015
772
3.2646671959011915
773
3.5087524227228912
774
3.8364886643729945
775
2.9543655708564414
776
3.2221574683423837
777
3.0475578413444557
778
4.374173034171101
779
4.6685490287592275
780
2.8433723992587177
781
3.655779442265059
782
4.020805084451047
783
2.899321707665966
784
3.872388985386215
785
2.790244113679582
786
3.343595938820113
787
4.126348437459369
788
3.8972404215086467
789
3.452954108530137
790
3.9963818109840403
791
3.8240240808331234
792
4.482413597047306
793
3.276376847336764
794
2.809691933025423
795
4.106783057213994
796
3.880540489565276
797
3.7101717807370096
798
3.9717435844660582
799
3.498387096289511
800
3.7628192245568077
801
3.3791076720969

3.608193698850995
1132
3.295568374132348
1133
2.6000903143427596
1134
4.059593836545139
1135
3.983463962816672
1136
2.5060213264417754
1137
1.960278046744431
1138
3.504227314772859
1139
3.878861087518656
1140
2.559839634860488
1141
4.289356328624135
1142
3.9877920572289027
1143
4.303162473234986
1144
3.740752036202453
1145
3.7163993706458447
1146
3.196583959237818
1147
3.69316328090452
1148
4.244151924790976
1149
3.651467838385647
1150
3.536541583952006
1151
3.2318631845139523
1152
2.8437342069971163
1153
3.203013783488598
1154
4.192004983160085
1155
2.279548597720161
1156
3.751945389676729
1157
3.1057463465900574
1158
3.3526228726425624
1159
3.747665139932719
1160
3.8087710821252094
1161
2.567296178744092
1162
3.4898983418044995
1163
3.4326714815147845
1164
2.85387210355978
1165
2.5966030564316993
1166
4.177786566209708
1167
2.6666315027355343
1168
3.2533810656271203
1169
3.9654738754244896
1170
3.2540054609595606
1171
2.918560953003017
1172
2.4421566133820907
1173
3.0362942004053717


3.0969608844348304
1493
3.8206091425863153
1494
3.3936258507432044
1495
3.212265331555531
1496
3.0799334472157702
1497
3.6137652559188163
1498
3.700151764329303
1499
3.7997998782540736
1500
3.750447925103922
1501
3.4567154789454606
1502
4.365747632208398
1503
4.07063806937885
1504
3.4543410031509167
1505
3.803295227922933
1506
2.744654416805019
1507
3.944657517926976
1508
3.6106636194244723
1509
2.7904711360432466
1510
3.6168179956034656
1511
3.3240556196612747
1512
2.7353632050908048
1513
4.320714292877795
1514
3.3069369106294686
1515
3.90323620454441
1516
3.6329981543597443
1517
3.0944711925103006
1518
2.7431459764333663
1519
4.2056363869822135
1520
2.8885246337681645
1521
2.2496123638683065
1522
3.170054747240556
1523
4.157451482148533
1524
3.2973052337023367
1525
2.7883202087703163
1526
3.7077368698072886
1527
3.235950524677078
1528
3.4402484295016604
1529
3.6392356535379493
1530
2.9984418085155764
1531
3.591281130570407
1532
3.706013372148157
1533
4.141694424128311
1534
3.57788203

3.1275919447269964
1860
3.972001632806084
1861
3.9257700592290172
1862
0.9385239966292449
1863
3.533115437754811
1864
3.371608181197312
1865
3.395814563546898
1866
3.28222143713513
1867
3.576926151117189
1868
4.09725521614742
1869
3.1003279741567185
1870
3.985783829075883
1871
3.789252145300336
1872
3.8003525758634895
1873
3.3920147557936313
1874
3.9145615642842024
1875
3.9413119821583615
1876
3.7544604186506274
1877
3.8398708547311493
1878
4.3370988854909625
1879
4.340318980974138
1880
1.486304183491694
1881
4.114155005984454
1882
3.125000000000001
1883
3.5567454937228793
1884
3.1991518137885433
1885
3.81118048931013
1886
3.213916996526377
1887
4.175702221877029
1888
2.558370677751644
1889
2.3959728480410436
1890
2.7897467957317894
1891
2.7008691639516273
1892
3.323344213277591
1893
3.762770648551433
1894
4.247721145429266
1895
3.1143113577122956
1896
3.101108255973675
1897
3.8207864422882474
1898
2.738707766289692
1899
3.195901576112854
1900
3.9243811764613055
1901
3.9078427978000274

4.246936976205673
2223
3.635563509857089
2224
3.4983856837526304
2225
3.5905090544167155
2226
3.725375157297576
2227
3.4950590356844238
2228
3.5754615270540056
2229
3.1797614547313953
2230
3.7088324377421977
2231
4.117363617054233
2232
3.4554638702391793
2233
3.4356051779860657
2234
2.452541005078443
2235
4.05882844168394
2236
3.8758985987876433
2237
3.8246502882564535
2238
4.299303326393064
2239
3.582984536514884
2240
3.554636919745092
2241
3.7122055432482886
2242
3.5012310416328463
2243
3.8211118548781076
2244
3.7602712821906517
2245
4.279750267654337
2246
3.818740693081432
2247
2.4982644948026453
2248
3.3729536572176597
2249
3.6567626748186446
2250
2.9714690839406948
2251
3.5145212052376764
2252
3.785147405804818
2253
3.2898356195413787
2254
3.7281885011412643
2255
4.099353765528161
2256
3.702849493415734
2257
4.076204067748351
2258
4.040486847378931
2259
3.5352850925046244
2260
4.434593544662711
2261
3.6688914686899827
2262
3.26195671210906
2263
3.9925378995933904
2264
3.5411909845

3.1999782570541977
2581
4.084006525835753
2582
2.6665138095151546
2583
3.3144514871128
2584
3.3796246362447557
2585
3.214244624076243
2586
3.7933399039062325
2587
2.453342405346893
2588
4.03303987412889
2589
3.7625751666449334
2590
3.198686917574137
2591
4.104636578435505
2592
3.4221374897180654
2593
3.349647615665742
2594
2.6754067284114935
2595
3.332408221607448
2596
2.956245866549656
2597
3.5422838961385956
2598
3.674767414061999
2599
2.945180104597378
2600
3.0520931095830193
2601
3.382896978429208
2602
3.4590811449316083
2603
3.6257557183443154
2604
3.433443940031302
2605
3.2252778961626443
2606
3.5450811447699695
2607
4.005331562365809
2608
3.2126960937908735
2609
3.6811317625164355
2610
4.2517621546799935
2611
3.623156963638971
2612
3.531145996718143
2613
3.460562061683751
2614
3.386169538517014
2615
3.740273289389436
2616
4.078816398216679
2617
3.272637002098767
2618
3.9506491325594415
2619
3.4911633919113263
2620
4.021503980937766
2621
3.2217376083362477
2622
3.161676058952115


3.0611898062993266
2935
3.1467641157688857
2936
3.911335450964784
2937
3.6290767771736046
2938
3.4304754131369237
2939
3.511904302615945
2940
3.231774217043278
2941
3.5871258013134444
2942
3.43709866829264
2943
4.219860404303461
2944
3.291765410422627
2945
3.8841127490821843
2946
2.75300778991543
2947
2.8716144762412656
2948
2.103432670606256
2949
2.895308984450622
2950
3.866359121837109
2951
3.5840766588342317
2952
3.0679218213718613
2953
3.622237903193258
2954
2.9702782911016254
2955
3.397779704133067
2956
2.912637355889728
2957
3.2802681685400272
2958
4.001191008273358
2959
3.3020224170325374
2960
3.13309569395697
2961
3.37221084109386
2962
3.8999381891596974
2963
4.02022540875014
2964
3.610666864227036
2965
4.274057929748714
2966
3.2767759817853683
2967
3.7682697956593185
2968
4.017279932936824
2969
3.4919445254605304
2970
2.8989879598789305
2971
3.50719358799462
2972
3.400521730563756
2973
3.3746048845562484
2974
3.7157230945249533
2975
3.576298861109584
2976
2.500263777123799
297

3.5406563164008555
3290
3.1119939480809045
3291
3.68780280376364
3292
2.2374499220807103
3293
3.4436825428480446
3294
3.346700787806756
3295
3.069729791739697
3296
3.8218475084002175
3297
3.2897158470441052
3298
3.8108541013889625
3299
3.6612512280342493
3300
4.055659682667574
3301
3.646487859906568
3302
4.314609633355253
3303
3.7277350773719338
3304
3.89985798698259
3305
3.3846789518980964
3306
3.4938734784248684
3307
3.255004761330782
3308
2.948873327632609
3309
3.228999516012137
3310
3.1033842603806363
3311
4.267375601147417
3312
3.806687179148902
3313
3.091094907490637
3314
3.2430208235331497
3315
3.4894608270959275
3316
3.0055834859682213
3317
3.517866665842829
3318
3.878797134875356
3319
2.677388135317698
3320
3.3549609427232534
3321
3.407954109611789
3322
3.4434796041215154
3323
2.798453454735061
3324
3.9344841619153037
3325
4.061404479482361
3326
3.6138335580125114
3327
3.8435960222084082
3328
4.074286824636076
3329
4.446059918639808
3330
3.3144876540427246
3331
4.0183480545653

3.8429444537213753
3644
2.8234366730873943
3645
4.026771943657557
3646
3.4191364073845465
3647
2.7672393185596094
3648
3.064185449434461
3649
3.8517891145587906
3650
3.4814298765571787
3651
4.263394774271082
3652
2.9958597727064706
3653
3.2598920512410663
3654
3.517934924712367
3655
2.935302149826312
3656
4.2383829895908365
3657
2.1590580577632097
3658
3.0840596835133245
3659
3.9390705749333166
3660
3.6464213161254113
3661
3.134655974500374
3662
3.8882684817478905
3663
3.1508101262227597
3664
3.8712325343899456
3665
4.305395305176205
3666
3.946979077147434
3667
3.0010651602493876
3668
3.7511098366516036
3669
3.0249699880087824
3670
3.3088006367262657
3671
3.145679150062652
3672
1.8691139985611258
3673
3.398092722750207
3674
1.519954549821848
3675
3.0470575457597096
3676
2.3380725951119232
3677
3.7559257748866193
3678
3.717702370450813
3679
3.4305520710777655
3680
3.399848230410536
3681
3.463669385712718
3682
4.345149907807781
3683
4.201475049633625
3684
4.0012787059245865
3685
4.220761

3.7807255062884018
4000
2.5022394558287906
4001
3.8836305200806547
4002
3.9261154422142712
4003
3.517403529735931
4004
4.2019220172443825
4005
3.4000666071200722
4006
4.0852428105321685
4007
4.55371530466197
4008
3.186513188878947
4009
2.820918289458587
4010
3.3474065646605227
4011
3.852593064209096
4012
4.256281445236072
4013
3.4655359559620473
4014
3.615607358846591
4015
2.809333737780889
4016
3.214832347910121
4017
3.100652008696257
4018
3.36229007661801
4019
3.3391307933478416
4020
3.7499366302575305
4021
3.3330894206958828
4022
4.512467048542531
4023
2.9945194964479067
4024
3.799843221951411
4025
3.658901672249841
4026
3.560857084695638
4027
3.118739280012495
4028
3.4508318607811344
4029
3.705006290702356
4030
2.427297018732649
4031
3.998375467154374
4032
3.305988793423261
4033
3.636320881579631
4034
4.007065278909554
4035
4.248497729145548
4036
2.6222159252037684
4037
2.820829936668143
4038
2.976585766377713
4039
2.890660191886831
4040
1.897676918645314
4041
3.9609128357220564
40

3.2034580653640865
4356
3.4738834606383495
4357
3.2545389088469316
4358
3.9244807459654423
4359
2.9072427651335313
4360
3.8584699283186232
4361
3.1376439039427773
4362
3.355290170155753
4363
2.978017594109664
4364
3.957056137127216
4365
4.0158732076317385
4366
2.471681631026748
4367
3.388420395503056
4368
3.0694952334036123
4369
3.300916806601046
4370
3.235673644571054
4371
3.47040045358561
4372
3.737788885645198
4373
3.220422920667852
4374
3.6290735836885744
4375
3.617711150140979
4376
3.270537754703838
4377
2.9717016753662606
4378
2.6935021081454744
4379
4.18930887016352
4380
3.5041908849127363
4381
3.4483602453933697
4382
3.3951840206072017
4383
3.8593725556254452
4384
4.437296658068139
4385
4.23339127982423
4386
4.037593100420912
4387
3.875251549366258
4388
3.405168396471269
4389
3.797275338184348
4390
3.8133677914311024
4391
3.2541019006991734
4392
4.1307523978227385
4393
3.610113209022972
4394
3.257574599586658
4395
3.3826013236838963
4396
2.646242880413248
4397
2.970222434211048

3.1538411395522377
4718
3.993335490208146
4719
3.5482884248379136
4720
3.7001803830193065
4721
3.716094435631206
4722
3.4086249718766144
4723
3.424358098502231
4724
3.013776036055649
4725
3.6452641566519843
4726
3.9489874975821584
4727
3.4410209896366997
4728
3.9501249458702588
4729
4.234451230019126
4730
3.360917096760821
4731
4.401609130182131
4732
3.5581684608240445
4733
3.01883522706235
4734
3.523601586738793
4735
3.4349887622702266
4736
3.8225449137276057
4737
3.1535128638652017
4738
3.482801722029979
4739
3.030854564629103
4740
3.5609263633809976
4741
4.308627134639159
4742
3.423443738052134
4743
4.062157593061735
4744
4.323809271776122
4745
3.571946490284308
4746
4.51232183785199
4747
3.524800654256075
4748
4.286093127333443
4749
3.5473082760727577
4750
3.3835244824421693
4751
3.5590199725479637
4752
3.6038878794613574
4753
3.8476272696410394
4754
3.1702333603834134
4755
4.2516757025312675
4756
3.2992864002084783
4757
3.955962781363773
4758
3.821719619845932
4759
3.4578633442929

2.6120189436698453
5077
4.281475856872477
5078
3.780573146726978
5079
3.8450240621944807
5080
3.8793530325527725
5081
4.221092340140832
5082
3.9936934993006212
5083
3.32319798444586
5084
4.662217779030786
5085
3.3473349367153853
5086
3.0009893594972046
5087
4.030971645307525
5088
3.6113916843647202
5089
3.606888915126177
5090
4.342696140892745
5091
3.7300183733637935
5092
4.2716265215416165
5093
3.521763089804751
5094
2.845883268451232
5095
2.9258434629770096
5096
3.8084328341719638
5097
3.500799493277481
5098
3.283295925505202
5099
2.9893533035818844
5100
3.725922795483617
5101
4.2193982295492
5102
2.8982801819764172
5103
2.7908506775879505
5104
2.9707778218869567
5105
3.95198228020055
5106
2.798203685964608
5107
3.326047632049578
5108
3.809156928583328
5109
2.877314796102153
5110
3.2192510828557217
5111
4.267875269731582
5112
3.37171533723197
5113
3.428713080553787
5114
3.4589703142028325
5115
3.999890638890879
5116
3.11563627523909
5117
2.6977359528920277
5118
2.0924628227775095
511

3.9130394971166576
5451
3.580537485104417
5452
3.743818011462527
5453
3.7952958800833745
5454
3.323850971317261
5455
4.727939613040805
5456
2.9021828611175104
5457
3.8370439808180956
5458
3.1417732966001206
5459
3.6542026357602833
5460
2.575996383635948
5461
3.9688158773133058
5462
3.3389051330389643
5463
3.723979660472089
5464
3.6873467836072993
5465
3.201902543847206
5466
3.9087153174597313
5467
2.621478349472566
5468
2.888612636461967
5469
3.1744842189743836
5470
3.2449083103821144
5471
3.0345468834810854
5472
4.170817448594
5473
4.475473934575939
5474
4.052428153137802
5475
2.8141813573106513
5476
3.0693929139286253
5477
3.653543975700372
5478
3.3232537842722945
5479
3.4432144880238758
5480
3.6571482506999637
5481
3.110750872376261
5482
3.7057776916844043
5483
4.070695400457458
5484
2.7455241937581394
5485
3.1554825700821434
5486
3.285907064659737
5487
3.36259879984487
5488
2.5809182171549767
5489
4.194714205944558
5490
4.035372843850897
5491
3.810009948862295
5492
3.96528417658135

3.778123739209721
5812
3.8947750064587905
5813
2.92235740015261
5814
3.3119470402532256
5815
2.906283331735902
5816
4.549410115872268
5817
4.357674870657413
5818
4.093297604037753
5819
3.952457706342673
5820
3.4315802980046595
5821
3.2987222467625403
5822
3.936603037683987
5823
3.6121742143419
5824
3.530132108695694
5825
4.215414560191035
5826
3.8365938996608464
5827
3.746332045473111
5828
3.014964407819881
5829
3.4728062696854725
5830
4.213904230142009
5831
3.8442360386211494
5832
3.701991729697202
5833
3.8551206503589284
5834
4.424996314787861
5835
4.263969659217576
5836
3.1603223888699357
5837
4.250871023641901
5838
3.3506570036688457
5839
3.102318524148664
5840
4.029325586473871
5841
3.3501184065298615
5842
3.1437978992089075
5843
2.2857611132577356
5844
3.14717595268815
5845
3.4125289153249483
5846
3.0610126094738392
5847
3.7697805365349555
5848
2.6816196819617466
5849
3.3777100669755895
5850
3.4875260184169026
5851
4.51378670544676
5852
3.997037804931587
5853
3.072764558365162
58

2.699612391676847
6176
3.9825060184824803
6177
3.9001394335106716
6178
2.814410842750959
6179
2.9862976847158027
6180
3.6459724200997137
6181
2.34451506129914
6182
4.356426321239251
6183
3.5337445898202007
6184
3.287831455574701
6185
3.718641097049706
6186
3.4283832151168276
6187
3.8000494676324386
6188
4.071559999242263
6189
3.2775298630099265
6190
3.52734236367723
6191
4.101155014136994
6192
3.825594251328784
6193
3.2916081017116903
6194
3.551405664956367
6195
2.3500000000000005
6196
2.1553821931072505
6197
3.3051475960377816
6198
3.324137723099799
6199
2.8310117683681666
6200
3.9933285985372606
6201
3.7068025134888347
6202
4.365584390726393
6203
2.56583434391147
6204
3.7285448607531344
6205
3.9805026112848
6206
3.3835114346951656
6207
3.152412074316779
6208
2.9964949728840193
6209
2.9924543583526115
6210
4.41098533893714
6211
3.624623347319169
6212
3.3840103065648828
6213
4.149156469373876
6214
3.8412987928889684
6215
2.5941064096735795
6216
3.5734469617719076
6217
3.645632282014222

3.6351503085394437
6536
4.126384903516518
6537
2.7522895528144113
6538
3.5934279347365257
6539
4.030014368718779
6540
4.031031184063115
6541
3.6067290264420206
6542
2.8663561595745337
6543
3.6479071869774335
6544
3.3572615648348925
6545
3.0015135706521967
6546
3.016341076606244
6547
3.5498635932267173
6548
3.3793863602836196
6549
2.287033767001069
6550
3.520214662850584
6551
3.7980259737623467
6552
3.5177662055672005
6553
4.27299753487266
6554
3.1913931769414647
6555
3.593210026930949
6556
3.8135851356529225
6557
3.6238894202100647
6558
3.4909512690634
6559
3.8327689057174616
6560
2.4785803255785113
6561
2.91984314888698
6562
3.537719877215272
6563
3.4742380186294657
6564
2.7251324732029043
6565
3.9283462451645947
6566
3.2531276017198785
6567
3.625583707354836
6568
3.1931496075441643
6569
3.406833751468498
6570
3.2515475050414184
6571
3.65517557057352
6572
3.8064203817451534
6573
2.9457171952751113
6574
3.7530934438544272
6575
2.9422157076872857
6576
3.352395498191265
6577
2.8911484428

3.2109172360064995
6907
4.545992408398498
6908
3.430421532620638
6909
3.950858802396092
6910
2.655092500841588
6911
4.344792317392305
6912
3.420479627246922
6913
3.1826693850425234
6914
2.501018433979196
6915
3.4270296825828055
6916
3.5707313213371674
6917
4.159641805522305
6918
4.1879557812772985
6919
4.626520250997385
6920
3.1450983996599375
6921
3.622011414719809
6922
4.15283538426676
6923
3.5757227929908346
6924
3.855881548198284
6925
3.4072888953022518
6926
3.4926346183963237
6927
3.525689299903195
6928
3.7867908807423207
6929
4.233062969109231
6930
3.801702757642125
6931
4.065678512856893
6932
4.057764004592844
6933
3.934235355033458
6934
4.123449298459152
6935
3.727990481601009
6936
2.960997462444684
6937
4.0928066932535065
6938
2.9702175388513505
6939
4.402586840025748
6940
4.5253563163021875
6941
3.6505146322792656
6942
2.350488667426602
6943
3.6493259144125485
6944
4.057821082556389
6945
2.5072243328374815
6946
3.345552247074687
6947
3.6703004237906667
6948
3.275367019613712


3.3897586218567026
7259
3.172326769929275
7260
4.010136856742282
7261
2.0788114642072353
7262
3.661172645634312
7263
4.0513060059173815
7264
4.128320411480162
7265
4.17386078525674
7266
3.1497104599161467
7267
3.176646066590766
7268
3.4212439479053915
7269
3.875012401743636
7270
3.970564590186214
7271
3.081644795547193
7272
3.397200625629462
7273
3.436129549885269
7274
3.4248958437095296
7275
2.8710667210921303
7276
2.393930778916282
7277
2.971192061750663
7278
3.2723303253599183
7279
3.9884851826071337
7280
3.452887732302937
7281
2.778782779821073
7282
3.645086648991037
7283
3.618647089467415
7284
3.3242272897629705
7285
3.7565610372147855
7286
2.718130112423984
7287
2.9550788468801246
7288
2.6513451254664724
7289
2.9893203016586245
7290
3.100425438080357
7291
3.1127084713102144
7292
3.3889516797207277
7293
4.21710909891837
7294
2.4715386125626386
7295
2.674636183011204
7296
2.9568840579710143
7297
3.450497011232024
7298
2.648178330038528
7299
3.7300647139358127
7300
2.500520549010212

3.304282125996511
7625
3.841055219755959
7626
3.4205616531610783
7627
3.9151249588605626
7628
2.858644305611871
7629
3.3610569229586225
7630
3.092969742680285
7631
3.527689951064062
7632
3.870057459635523
7633
3.447322979535539
7634
3.700020181199947
7635
3.514337787628083
7636
4.157328019908739
7637
3.3789342524938837
7638
3.741266139511697
7639
3.0459910071006586
7640
3.014650876572278
7641
2.805333837042222
7642
3.6933860284781264
7643
3.8391773155769253
7644
2.9386808385965266
7645
3.106476182473003
7646
3.1766725957553787
7647
3.5454478498091806
7648
3.7257274626811
7649
2.889198640337674
7650
2.1447546974567837
7651
3.8443623971009253
7652
3.5386829443172605
7653
3.5407914197396644
7654
3.565438111949951
7655
3.570604170282837
7656
3.922456940837337
7657
4.0
7658
3.0005860593844766
7659
3.8724829941261527
7660
4.469604962150222
7661
3.2463347045017588
7662
3.6609672212826707
7663
3.9722867342822683
7664
3.7074918383387354
7665
3.4037182668522723
7666
2.673285652738709
7667
3.5747

2.919905417181991
7987
3.4922242528285934
7988
2.9626920314606724
7989
4.432124582231528
7990
3.5786247383675898
7991
3.301499055917876
7992
4.121722828559865
7993
3.7691682819836503
7994
4.356882037226278
7995
3.0570923285283964
7996
3.2873028798310155
7997
4.055551308003754
7998
3.767025005841495
7999
3.4735719556621407
8000
2.801100200233594
8001
2.441108104345839
8002
4.039087786367081
8003
3.6314751264555727
8004
3.4657664453311843
8005
3.467663683938206
8006
3.0691891764152612
8007
3.3276886308157994
8008
3.3244273998578353
8009
3.321973571704587
8010
4.197027752829887
8011
3.6781527685737223
8012
3.390266951241911
8013
2.897397631831676
8014
1.8500000000000012
8015
2.025000000000001
8016
3.6261393720371835
8017
2.553610841292755
8018
2.442222126757042
8019
3.368586109881728
8020
3.6789241926365444
8021
4.063002323199256
8022
4.230491607824275
8023
4.098324073852784
8024
3.641941043363319
8025
3.5638790938392386
8026
3.546153823431444
8027
3.475172185307552
8028
3.488940818134333

3.679752623918582
8356
2.4411015758738954
8357
3.2576247456439424
8358
3.6535843616219914
8359
3.9954882765038167
8360
3.270846418465383
8361
2.7072030224806727
8362
3.270917014387449
8363
3.954384878046051
8364
3.2750000000000012
8365
4.348928379028322
8366
3.651677017547533
8367
3.2299402214464994
8368
4.049878143663477
8369
3.542114709235685
8370
3.7446926200008837
8371
3.5757818703948425
8372
3.2750000000000012
8373
3.9750000000000005
8374
3.3722343262649352
8375
2.0214108289099197
8376
3.8957305463693244
8377
4.089113924201491
8378
3.276535422526412
8379
3.488193187141529
8380
3.597106351284649
8381
2.9250000000000007
8382
3.468733619538242
8383
3.7942845369464115
8384
3.497637690720238
8385
2.7742818889657133
8386
3.623528836754643
8387
3.6754338091501886
8388
3.67846498432358
8389
4.541190832846142
8390
3.9743135041000963
8391
3.4175344908805263
8392
3.3250000000000015
8393
3.478538931377669
8394
4.099257125551929
8395
3.47377641190441
8396
4.4055913424837385
8397
2.993415840890

3.142306798294501
8719
4.559686629870253
8720
3.5631817790179237
8721
2.5247725781776404
8722
2.951259918347718
8723
3.0500000000000003
8724
4.093346890384912
8725
3.900630456006593
8726
2.885199124371554
8727
3.62361268020893
8728
3.935907479661359
8729
4.357326052591139
8730
2.8739439346557747
8731
3.893584657710509
8732
4.232668462795356
8733
3.5252445789617943
8734
3.3447895054740604
8735
3.9432190795151314
8736
3.58480009824033
8737
3.4260636113515295
8738
4.116070569211184
8739
2.9720802327276035
8740
3.6269675189656265
8741
3.986029937154526
8742
3.8994599908024177
8743
3.7958788323154753
8744
2.3778707866395603
8745
3.7644462830644847
8746
3.6489504852010284
8747
3.5851658759526934
8748
3.6361210338838226
8749
2.5206120904398506
8750
4.5702458135728845
8751
2.899487090218165
8752
2.8922023209698664
8753
4.80969322937659
8754
3.969100297415927
8755
3.115657784886508
8756
3.7272727272727275
8757
3.825
8758
2.9879814802319933
8759
3.7750000000000004
8760
3.7157349567228812
8761
3.

3.884034602474431
9082
3.9594656878092422
9083
3.0000000000000018
9084
3.3000000000000003
9085
3.290728556616456
9086
3.5057771986271824
9087
3.7740963141958646
9088
3.6099888213578692
9089
3.6344650881731613
9090
3.5265942270054893
9091
2.423240446286354
9092
4.331385151774785
9093
3.206563788642529
9094
3.7524546071481883
9095
2.6847854299122154
9096
3.3440508765901296
9097
3.864767591912912
9098
3.95059643824301
9099
2.957219067446149
9100
4.354423313661282
9101
3.8191171564504103
9102
3.3793784139234533
9103
3.6958332654169146
9104
3.3660254037844393
9105
3.302117470887808
9106
3.40733894918049
9107
4.089264135344625
9108
3.352094613967734
9109
3.4532529182642633
9110
3.951706113292529
9111
3.4464096015517742
9112
3.3776700509851385
9113
3.849782893222309
9114
2.973049832112332
9115
2.444444444444444
9116
4.202155157366158
9117
3.8769843956626273
9118
3.625000000000001
9119
3.791629134738447
9120
3.7416264178005636
9121
3.6877207004478665
9122
3.7565263992861877
9123
3.704627304227

3.5034615384615386
9444
4.000000000000002
9445
3.025
9446
2.8828217227273822
9447
3.3363419938199512
9448
3.475000000000001
9449
3.4270684098145003
9450
3.2236376921528054
9451
3.0750000000000006
9452
3.1826793932852713
9453
2.7039564538349663
9454
5.0
9455
4.683012701892219
9456
3.5000000000000013
9457
3.578906054719685
9458
4.044518572532058
9459
3.4749999999999996
9460
3.5920501224347845
9461
3.828050635760924
9462
3.6
9463
4.269877280361693
9464
3.529082330697885
9465
3.324967344723245
9466
4.284175552940927
9467
3.400000000000001
9468
3.6221011627261626
9469
3.6221011627261626
9470
3.047665907859118
9471
1.8281940814013922
9472
3.4731397519822504
9473
3.4263697613379653
9474
3.3750000000000018
9475
2.025000000000001
9476
4.249510183084429
9477
2.4722464584806945
9478
4.252268242767146
9479
3.8545443984274623
9480
3.63767107092785
9481
3.6551465541224597
9482
3.471576053651616
9483
2.951170693159976
9484
3.450837415333123
9485
3.0861032234765604
9486
2.8000000000000007
9487
3.52500

3.925000000000001
9804
3.1828911435535567
9805
3.377419142437662
9806
3.15
9807
3.9054166666666665
9808
3.5
9809
4.125000000000001
9810
2.7482338692369845
9811
2.6476701878490605
9812
3.4235885792485945
9813
4.159699176759125
9814
3.8288432272827495
9815
0
9816
3.9783217208648707
9817
1.0
9818
2.8750000000000013
9819
2.6750000000000003
9820
3.833333333333333
9821
3.7446926200008837
9822
2.515395816242822
9823
2.3999999999999995
9824
3.273973657921026
9825
3.273973657921026
9826
2.4360331816236824
9827
3.326019292816036
9828
3.349999999999999
9829
4.106487833915352
9830
3.050337212607387
9831
3.2582664682372693
9832
3.7098138922847377
9833
2.7116902410034647
9834
3.110915200391007
9835
3.9250000000000007
9836
3.428997022573051
9837
2.4564759912167387
9838
3.428997022573051
9839
3.493371943371943
9840
3.0955013733390393
9841
3.1490963203463207
9842
2.9472252673133474
9843
3.3341061935140885
9844
3.4856723487788854
9845
2.7394541073322225
9846
3.189813504645676
9847
3.6540431488801053
984

#### RMSE measure (for k = 20, choose top 20 similar movies)

In [31]:
from sklearn.metrics import mean_squared_error
from math import sqrt

# add all actual ratings to array that can measure the RMSE with rediction ratings
actuals = []
for i in range(0, len(test_array)):
    actuals.append(test_array[i][1])
#print(actuals)
rmse = sqrt(mean_squared_error(actuals, pred)) #sqrt(MSE)
print("RMSE: %f" % rmse)

#compute global average
total = 0
for i in range(0, len(test_array)):
    
    total += test_array[i][1]

# Array where all estimates are the global average global
glbl = []
glbl_avg = total / len(test_array)

for i in range(0, len(test_array)):
    glbl.append(glbl_avg)

#compute RMSE    
rmse1 = sqrt(mean_squared_error(actuals, glbl))
print("Global:", rmse1)

#compute user averages
users = []
for i in range(0, len(test_array)):
    users.append([0,0])

#index [x] is whichever user id we are calculating, sum up all averages and count their reviews
for i in range(0, len(test_array)):
    users[test_array[i][0]][0] += test_array[i][1]
    users[test_array[i][0]][1] += 1

#new array, each index corresponds to a user id, and has their average rating
users_avg = []
for i in range(0, len(test_array)):
    if (users[i][1]) > 0:
        users_avg.append(users[i][0] / users[i][1])
    else:
        users_avg.append(0)

        #compute RMSE
se = 0
for i in range(0, len(test_array)):
    se += (test_array[i][1] - users_avg[test_array[i][0]])**2

mse = se / len(test_array)

rmse2 = sqrt(mse)

print("User Average:", rmse2)

#compute item averages
items = []
for i in range(1, 193610):
    items.append([0,0])

#each index is a movie id, storing total rating and number of reviews
for i in range(0, len(test_array)):
    items[test_array[i][0]][0] += test_array[i][1]
    items[test_array[i][0]][1] += 1

items_avg = []

#Calculate average for each movie
for i in range(0, len(items)):
    if (items[i][1]) > 0:
        items_avg.append(items[i][0] / items[i][1])
    else:
        items_avg.append(0)

#Compute RMSE        
for i in range(0, len(test_array)):
    se += (test_array[i][1] - items_avg[test_array[i][0]])**2

mse = se / len(test_array)

rmse3 = sqrt(mse)
print("Item:", rmse3)

RMSE: 0.971519
Global: 1.053795858633015
User Average: 0.9152171669149258
Item: 1.2943125299677793
