In [7]:
import pandas as pd
import numpy as np
import re
from surprise import Dataset
from surprise import Reader
from surprise import accuracy
from surprise import KNNBasic
from surprise.model_selection import train_test_split
from surprise.model_selection import KFold

In [8]:
tn_ratings = pd.read_csv('C:/Users/cheft/Desktop/NSS/Capstone/capstone/data/tn-ratings.csv.zip', compression='zip')
tn_ratings

Unnamed: 0,users,ratings,route_id,name,grade,type
0,5dbd835ec5ce9ac449ed8832f99367fb035a141b,3.0,106798179,Die Capitalist Dog,5.7,"{'trad': True, 'sport': True}"
1,5fbef7b9fc4e7b547481d765e179ba457af34823,3.0,106798179,Die Capitalist Dog,5.7,"{'trad': True, 'sport': True}"
2,4a449105062722799cdfa8d564166933dfd95930,2.0,106308324,Who Shot Bambi?,5.10c/d,{'sport': True}
3,b393955c35864cc532da7bae7ca3bd09584a644c,3.0,109954045,What Would Chouinard Do?,5.7,{'trad': True}
4,4d56360626da2b9cb652157564b253935f419732,1.0,109954045,What Would Chouinard Do?,5.7,{'trad': True}
...,...,...,...,...,...,...
28526,64242b0eb94218fedcc4413e4a8a8e02fcb51df4,3.0,108925625,Bayou Boy,5.10b,{'sport': True}
28527,6c75698e01973c96d5232f5a2ee494269bcbb44b,3.0,108925625,Bayou Boy,5.10b,{'sport': True}
28528,64cc5d36279f9f8467d52af3708e948504847429,3.0,108925625,Bayou Boy,5.10b,{'sport': True}
28529,832a05b9b8b99bb84dbd6edc735957fd95a4f611,3.0,113509073,Adventurete,5.11a,{'sport': True}


In [9]:
tn_ratings['type'] = tn_ratings['type'].str.extract(r"((?<=')\w+)")


In [10]:
# Group and rank climbs by number of reviews to show most popular climbs

popularity = tn_ratings.groupby(['route_id','name'])['ratings'].count().reset_index(name='count').sort_values(by=['count'],ascending=False)
popularity

Unnamed: 0,route_id,name,count
10,105888826,Golden Locks,336
170,106068973,Best Seat In The House,303
4,105888371,Prerequisite for Excellence,302
259,106111639,Spawn,286
57,105894863,Passages,274
...,...,...,...
1250,113656726,Hello Nasty,1
1249,113656716,Crack The Keg,1
1248,113656702,No One To Hear You Screem,1
858,110688040,Ambidextrous,1


In [11]:
# Build model

reader = Reader(rating_scale=(0,4))
data = Dataset.load_from_df(tn_ratings[['users', 'route_id', 'ratings']], reader)
sim_options = {'name': 'cosine', 'user_based': True, 'min_support': 4}
algo = KNNBasic(sim_options=sim_options)
kf = KFold(n_splits=5)

for trainset, testset in kf.split(data):
    # train and test algorithm
    algo.fit(trainset)
    predictions = algo.test(testset)
    # compute and print Root Mean Squared Error
    accuracy.rmse(predictions, verbose=True)


Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.7189
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.7297
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.7260
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.7236
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.7255


In [12]:
climb_name = 'Rage'

# look up route_id from human-readable name
route_id = tn_ratings[tn_ratings.name == climb_name]['route_id'].iloc[1]

print('People who climbed ' + climb_name + ' also liked')

# retrieve similar climbs
prediction = algo.get_neighbors(trainset.to_inner_iid(route_id), 10)


# convert IDs back to human-readable names
recs = map(lambda id: trainset.to_raw_iid(id), np.asarray(prediction))
results = tn_ratings[tn_ratings.route_id.isin(recs)]
r = results.pivot_table(
    index=['name', 'type', 'grade'],
    aggfunc=[np.mean, np.median, np.size],
    values='ratings')
print(r)

People who climbed Rage also liked
                                         mean  median    size
                                      ratings ratings ratings
name                  type  grade                            
Blade Loafer          trad  5.10b    2.350000     2.0      60
Donald Pump           sport 5.12b    3.000000     3.0      12
Flim Flam             trad  5.8      1.666667     1.0       9
Foster Child          sport 5.10d    2.384615     3.0      26
Girly Man             sport 5.11a/b  2.600000     3.0       5
Kidnapper             sport 5.8      1.833333     2.0       6
Last Laugh            sport 5.10c    1.250000     1.0       4
Magnetic Feet         sport 5.11d    1.000000     1.0       1
When Old Friends Meet sport 5.10b    3.000000     3.0       7
Wristlets             sport 5.11c/d  3.729412     4.0      85
