In [1]:
import pandas as pd
import numpy as np
import re
from surprise import Dataset
from surprise import Reader
from surprise import accuracy
from surprise import KNNBasic
from surprise.model_selection import train_test_split
from surprise.model_selection import KFold

In [2]:
tn_ratings = pd.read_csv('C:/Users/cheft/Desktop/NSS/Capstone/capstone/data/tn-ratings.csv.zip', compression='zip')
tn_ratings

Unnamed: 0,users,ratings,route_id,name,grade,type
0,5dbd835ec5ce9ac449ed8832f99367fb035a141b,3.0,106798179,Die Capitalist Dog,5.7,"{'trad': True, 'sport': True}"
1,5fbef7b9fc4e7b547481d765e179ba457af34823,3.0,106798179,Die Capitalist Dog,5.7,"{'trad': True, 'sport': True}"
2,4a449105062722799cdfa8d564166933dfd95930,2.0,106308324,Who Shot Bambi?,5.10c/d,{'sport': True}
3,b393955c35864cc532da7bae7ca3bd09584a644c,3.0,109954045,What Would Chouinard Do?,5.7,{'trad': True}
4,4d56360626da2b9cb652157564b253935f419732,1.0,109954045,What Would Chouinard Do?,5.7,{'trad': True}
...,...,...,...,...,...,...
28526,64242b0eb94218fedcc4413e4a8a8e02fcb51df4,3.0,108925625,Bayou Boy,5.10b,{'sport': True}
28527,6c75698e01973c96d5232f5a2ee494269bcbb44b,3.0,108925625,Bayou Boy,5.10b,{'sport': True}
28528,64cc5d36279f9f8467d52af3708e948504847429,3.0,108925625,Bayou Boy,5.10b,{'sport': True}
28529,832a05b9b8b99bb84dbd6edc735957fd95a4f611,3.0,113509073,Adventurete,5.11a,{'sport': True}


In [3]:
tn_ratings['type'] = tn_ratings['type'].str.extract(r"((?<=')\w+)")


In [4]:
# Group and rank climbs by number of reviews to show most popular climbs

popularity = tn_ratings.groupby(['route_id','name'])['ratings'].count().reset_index(name='count').sort_values(by=['count'],ascending=False)
popularity

Unnamed: 0,route_id,name,count
10,105888826,Golden Locks,336
170,106068973,Best Seat In The House,303
4,105888371,Prerequisite for Excellence,302
259,106111639,Spawn,286
57,105894863,Passages,274
...,...,...,...
1250,113656726,Hello Nasty,1
1249,113656716,Crack The Keg,1
1248,113656702,No One To Hear You Screem,1
858,110688040,Ambidextrous,1


In [19]:
climbs = tn_ratings['name'].unique().tolist()
climbs

['Die Capitalist Dog',
 'Who Shot Bambi?',
 'What Would Chouinard Do?',
 'Wish I Was Trad',
 'The Turret',
 'Tallboy',
 'Machete Mayhem',
 'Land of the Long',
 'Inga La Dinga',
 'Gnash and Grab',
 'Gentry Route',
 'Fragile Holdings',
 'Flake Right',
 'Easy Enough',
 'Streaker',
 'Swindled',
 'Orange Peel',
 'Optional Ethics',
 'Nasty Lite',
 'Lucky Streak',
 'Kingpin',
 'The High Road',
 'Fun Employed',
 'Chiminey',
 'Brown Streak',
 'Break Away',
 'AGR',
 'Scarlete Begonia',
 'Rad Line',
 'Predator',
 'Escapism',
 'El Che',
 'Cottonmouth',
 'Copperhead',
 'Paradise',
 'El Breezy',
 'Throb',
 'The Theater',
 'The Tall Cool One',
 'Steal The Air',
 'Soul Sounds',
 'Pump It To The Sky',
 'Snow White',
 'Pocket  Wilderness',
 'Off To The Wild Blue Yonder',
 'Leapin Lizards',
 'Jacobs Ladder II',
 'Fit Makes The Fashion',
 'Direct Finish',
 'Dakota Blues',
 'Cuyahoga Falls',
 'Cracksmiths Delight',
 'Crack Of Dawn',
 'Cinderella',
 'The Beefeater',
 'Ambition Ammunition',
 'Alternate Finis

In [5]:
# Build model

reader = Reader(rating_scale=(0,4))
data = Dataset.load_from_df(tn_ratings[['users', 'route_id', 'ratings']], reader)
sim_options = {'name': 'cosine', 'user_based': True, 'min_support': 4}
algo = KNNBasic(sim_options=sim_options)
kf = KFold(n_splits=5)

for trainset, testset in kf.split(data):
    # train and test algorithm
    algo.fit(trainset)
    predictions = algo.test(testset)
    # compute and print Root Mean Squared Error
    accuracy.rmse(predictions, verbose=True)


Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.7308
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.7311
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.7285
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.7156
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.7118


In [13]:
climb_name = 'Rage'

# look up route_id from human-readable name
route_id = tn_ratings[tn_ratings.name == climb_name]['route_id'].iloc[1]

print('People who climbed ' + climb_name + ' also liked')

# retrieve similar climbs
prediction = algo.get_neighbors(trainset.to_inner_iid(route_id), 10)


# convert IDs back to human-readable names
recs = map(lambda id: trainset.to_raw_iid(id), np.asarray(prediction))
results = tn_ratings[tn_ratings.route_id.isin(recs)]
r = results.pivot_table(
    index=['name', 'type', 'grade'],
    aggfunc=[np.mean, np.size],
    values='ratings')
print(r)

People who climbed Rage also liked
                                        mean    size
                                     ratings ratings
name                   type  grade                  
Bottom Feeder          sport 5.11b  1.714286      14
Ethnic Cleansing       sport 5.12b  3.172414      87
Finders Keepers        sport 5.10b  2.155556      45
Pale Face              sport 5.11a  3.573333     150
Rebel Flag             trad  5.12b  2.000000       1
Spring Break           trad  5.8    1.875000       8
The Real McCoy         sport 5.10d  2.500000       4
Thunderstruck          sport 5.10b  2.187500      16
Use Me                 sport 5.10   2.333333      18
Welcome to the Machine sport 5.12c  2.333333       3


In [24]:
r = r.reset_index()
r['climb'] = 'Rage'


Unnamed: 0_level_0,name,type,grade,mean,size,climb
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,ratings,ratings,Unnamed: 6_level_1
0,Bottom Feeder,sport,5.11b,1.714286,14,Rage
1,Ethnic Cleansing,sport,5.12b,3.172414,87,Rage
2,Finders Keepers,sport,5.10b,2.155556,45,Rage
3,Pale Face,sport,5.11a,3.573333,150,Rage
4,Rebel Flag,trad,5.12b,2.0,1,Rage
5,Spring Break,trad,5.8,1.875,8,Rage
6,The Real McCoy,sport,5.10d,2.5,4,Rage
7,Thunderstruck,sport,5.10b,2.1875,16,Rage
8,Use Me,sport,5.10,2.333333,18,Rage
9,Welcome to the Machine,sport,5.12c,2.333333,3,Rage


In [28]:
r.columns = r.columns.droplevel(level=1)


In [29]:
r

Unnamed: 0,name,type,grade,mean,size,climb
0,Bottom Feeder,sport,5.11b,1.714286,14,Rage
1,Ethnic Cleansing,sport,5.12b,3.172414,87,Rage
2,Finders Keepers,sport,5.10b,2.155556,45,Rage
3,Pale Face,sport,5.11a,3.573333,150,Rage
4,Rebel Flag,trad,5.12b,2.0,1,Rage
5,Spring Break,trad,5.8,1.875,8,Rage
6,The Real McCoy,sport,5.10d,2.5,4,Rage
7,Thunderstruck,sport,5.10b,2.1875,16,Rage
8,Use Me,sport,5.10,2.333333,18,Rage
9,Welcome to the Machine,sport,5.12c,2.333333,3,Rage


In [30]:
dflist = []

for i in climbs:
    try:
        climb_name = i
        route_id = tn_ratings[tn_ratings.name == climb_name]['route_id'].iloc[1]
        prediction = algo.get_neighbors(trainset.to_inner_iid(route_id), 10)
        recs = map(lambda id: trainset.to_raw_iid(id), np.asarray(prediction))
        results = tn_ratings[tn_ratings.route_id.isin(recs)]
        r = results.pivot_table(
            index=['name', 'type', 'grade'],
            aggfunc=[np.mean, np.size],
            values='ratings')
        r = r.reset_index()
        r['climb'] = climb_name
        r.columns = r.columns.droplevel(level=1)
        dflist.append(r)
    except:
        pass

In [33]:
dflist

[                     name   type    grade      mean  size               climb
 0             Ankles Away  sport      5.9  2.754789   261  Die Capitalist Dog
 1          Barb's Grimace  sport    5.10d  3.000000     4  Die Capitalist Dog
 2  Best Seat In The House  sport      5.9  3.267327   303  Die Capitalist Dog
 3                 Eclipse  sport  5.12c/d  3.674419    43  Die Capitalist Dog
 4        Ethnic Cleansing  sport    5.12b  3.172414    87  Die Capitalist Dog
 5     Finger Lockin' Good   trad  5.10b/c  3.552381   210  Die Capitalist Dog
 6                      It  sport    5.10a  2.171429   140  Die Capitalist Dog
 7     Keeper of the Flame  sport    5.11d  3.886364    44  Die Capitalist Dog
 8                   Mammy  sport      5.9  2.251656   151  Die Capitalist Dog
 9         Sweet Surrender  sport      5.8  3.459459    37  Die Capitalist Dog,
                      name   type    grade      mean  size  \
 0             Ankles Away  sport      5.9  2.754789   261   
 1    

In [34]:
rec_df = pd.concat(dflist)

In [37]:
rec_df#.to_excel('recommendations.xlsx')

Unnamed: 0,name,type,grade,mean,size,climb
0,Ankles Away,sport,5.9,2.754789,261,Die Capitalist Dog
1,Barb's Grimace,sport,5.10d,3.000000,4,Die Capitalist Dog
2,Best Seat In The House,sport,5.9,3.267327,303,Die Capitalist Dog
3,Eclipse,sport,5.12c/d,3.674419,43,Die Capitalist Dog
4,Ethnic Cleansing,sport,5.12b,3.172414,87,Die Capitalist Dog
...,...,...,...,...,...,...
5,Finger Lockin' Good,trad,5.10b/c,3.552381,210,Adventurete
6,It,sport,5.10a,2.171429,140,Adventurete
7,Keeper of the Flame,sport,5.11d,3.886364,44,Adventurete
8,Mammy,sport,5.9,2.251656,151,Adventurete
