In [2]:
import pandas as pd
from surprise.model_selection import train_test_split
from surprise import Dataset, Reader
from surprise.model_selection import cross_validate
from surprise import NormalPredictor, KNNBasic, KNNWithMeans, KNNWithZScore, KNNBaseline, SVD, BaselineOnly, SVDpp, NMF, SlopeOne, CoClustering
from surprise.accuracy import rmse

In [None]:
df = pd.read_csv('../data/movies/ratings.csv')

In [None]:
df.drop('timestamp',axis=1,inplace=True)

In [None]:
reader = Reader(rating_scale=(0, 5))
data = Dataset.load_from_df(df,reader)

In [None]:
train, test = train_test_split(data, test_size=0.2)

## Test Surprise Algorithms

In [None]:
benchmark = []
# Iterate over all algorithms
for algorithm in [SVD(), SVDpp(), SlopeOne(), NMF(), NormalPredictor(), KNNBaseline(), KNNBasic(), KNNWithMeans(), KNNWithZScore(), BaselineOnly(), CoClustering()]:
    # Perform cross validation
    results = cross_validate(algorithm, data, measures=['RMSE'], cv=3, verbose=False)
    
    # Get results & append algorithm name
    tmp = pd.DataFrame.from_dict(results).mean(axis=0)
    tmp = tmp.append(pd.Series([str(algorithm).split(' ')[0].split('.')[-1]], index=['Algorithm']))
    benchmark.append(tmp)

In [None]:
surprise_results = pd.DataFrame(benchmark).set_index('Algorithm').sort_values('test_rmse')

In [None]:
surprise_results

## Using Surprise SVD++

In [None]:
svdpp_options = {} # adjust hyper-params here
algo = SVDpp(SVDpp_options=svdpp_options)

In [None]:
predictions = algo.fit(train).test(test)

In [None]:
accuracy.rmse(predictions)

## Merging Nick & Kayla's Movie Rating Data

In [3]:
teach = pd.read_csv('../data/movies/g90_movie_ratings.csv')

In [4]:
teach.head()

Unnamed: 0,Name,21 Jump Street,28 Days Later,A Beautiful Mind,About Time,Air Force One,Alien,Aliens,American Gangster,Annihilation,...,The Godfather,The Intouchables,The Life Aquatic w. Steve Zissou,The Martian,The Pianist,Thor: Ragnarok,Three Billboards Outside Ebbing Missouri,Tron: Legacy,War Games,Wayne's World
0,Kayla Thomas,,,4.0,10.0,3.0,,,4.0,,...,,,3.0,,6.0,,7.0,,,2.0
1,Nick Jacobsohn,7.0,,,,,10.0,8.0,,8.0,...,,,,,,,8.0,,,7.0
2,Andrew,6.0,,,,5.0,6.0,6.0,7.0,,...,,,,,,8.0,7.0,,,
3,John Herr,,,8.0,,,,,,,...,8.0,,,,,,,,,
4,Jarred Bultema,6.0,6.0,8.0,,5.0,5.0,,7.0,3.0,...,,,6.0,8.0,6.0,10.0,,5.0,5.0,7.0


In [8]:
teach = teach[:2]

In [9]:
teach

Unnamed: 0,Name,21 Jump Street,28 Days Later,A Beautiful Mind,About Time,Air Force One,Alien,Aliens,American Gangster,Annihilation,...,The Godfather,The Intouchables,The Life Aquatic w. Steve Zissou,The Martian,The Pianist,Thor: Ragnarok,Three Billboards Outside Ebbing Missouri,Tron: Legacy,War Games,Wayne's World
0,Kayla Thomas,,,4.0,10.0,3.0,,,4.0,,...,,,3.0,,6.0,,7.0,,,2.0
1,Nick Jacobsohn,7.0,,,,,10.0,8.0,,8.0,...,,,,,,,8.0,,,7.0


In [20]:
class_rat = pd.read_csv('../data/movies/g99_movie_rating.csv')

In [21]:
class_rat.head()

Unnamed: 0,Name,21 Jump Street,28 Days Later,A Beautiful Mind,About Time,Air Force One,Alien,Aliens,American Gangster,Annihilation,...,The Godfather,The Intouchables,The Life Aquatic w. Steve Zissou,The Martian,The Pianist,Thor: Ragnarok,Three Billboards Outside Ebbing Missouri,Tron: Legacy,War Games,Wayne's World
0,Kayla Thomas,5.0,,4.0,10.0,3.0,,,4.0,,...,,,3.0,,6.0,,7.0,,,2.0
1,Alex Cross,5.0,4.0,8.0,,,7.0,,,,...,,,5.0,,,9.0,,,,
2,Alex Rook,,6.0,,,,8.0,6.0,,,...,,,,8.0,,8.0,,,7.0,6.0
3,Dan Reiff,6.0,,,,,,,,,...,10.0,,,5.0,,4.0,4.0,,,
4,Dan Riggi,7.0,3.0,7.0,,7.0,8.0,6.0,9.0,,...,10.0,,5.0,7.0,,7.0,,3.0,,4.0


In [25]:
cols = list(teach.columns.values)

In [29]:
teach.set_index(teach['Name'],inplace=True)

In [34]:
teach.drop(columns='Name',inplace=True)

In [40]:
teach.fillna(0,inplace=True)

In [42]:
teach = teach/2

In [43]:
teach

Unnamed: 0_level_0,21 Jump Street,28 Days Later,A Beautiful Mind,About Time,Air Force One,Alien,Aliens,American Gangster,Annihilation,Apollo 13,...,The Godfather,The Intouchables,The Life Aquatic w. Steve Zissou,The Martian,The Pianist,Thor: Ragnarok,Three Billboards Outside Ebbing Missouri,Tron: Legacy,War Games,Wayne's World
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Kayla Thomas,0.0,0.0,2.0,5.0,1.5,0.0,0.0,2.0,0.0,2.0,...,0.0,0.0,1.5,0.0,3.0,0.0,3.5,0.0,0.0,1.0
Nick Jacobsohn,3.5,0.0,0.0,0.0,0.0,5.0,4.0,0.0,4.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,3.5


In [49]:
teach.index[0]

'Kayla Thomas'

In [58]:
names = []
movies = []
rating = []

for idx, row in enumerate(teach.values):
    for movie, val in enumerate(row):
        if val > 0:
            #input = ','.join(name,title,val)
            names.append(teach.index[idx])
            movies.append(cols[movie])
            rating.append(val)
            

In [10]:
ratings = pd.read_csv('../data/movies/ratings.csv')

In [11]:
movies = pd.read_csv('../data/movies/movies.csv')

In [12]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [15]:
links = pd.read_csv('../data/movies/links.csv')

In [16]:
links.head()

Unnamed: 0,movieId,imdbId,tmdbId
0,1,114709,862.0
1,2,113497,8844.0
2,3,113228,15602.0
3,4,114885,31357.0
4,5,113041,11862.0


In [18]:
tags = pd.read_csv('../data/movies/tags.csv')

In [19]:
tags.head()

Unnamed: 0,userId,movieId,tag,timestamp
0,15,339,sandra 'boring' bullock,1138537770
1,15,1955,dentist,1193435061
2,15,7478,Cambodia,1170560997
3,15,32892,Russian,1170626366
4,15,34162,forgettable,1141391765
