In [None]:
import pandas as pd

df = pd.read_csv("./openbeta-ratings-nevada.zip", compression="zip")

In [None]:
df.sample(5)

In [None]:
# aggregate climbs by rating counts
popular = df.groupby(['name'])['ratings'].count().reset_index(name="count").sort_values(by=['count'], ascending=False)
print("Popular climbs by user ratings")
popular.head(20)

In [None]:
# create user-climb matrix
mx = df.pivot_table(index = 'users', columns = 'route_id', aggfunc=np.mean, values='ratings').fillna(0)
mx.sample(5)

In [None]:
from surprise import Dataset
from surprise import Reader
from surprise import accuracy
from surprise import KNNWithMeans
from surprise import KNNWithZScore
from surprise.model_selection import train_test_split
from surprise.model_selection import KFold


reader = Reader(rating_scale=(0, 4)) # not sure scale should be 0-4 or 1-4?
data = Dataset.load_from_df(df[['users', 'route_id', 'ratings']], reader)

sim_options = {'name': 'msd', 'user_based': False, 'min_support': 2}
algo = KNNWithZScore(sim_options=sim_options)

kf = KFold(n_splits=5)

for trainset, testset in kf.split(data):
    
    # train and test algorithm.
    algo.fit(trainset)
    predictions = algo.test(testset)

    # Compute and print Root Mean Squared Error
    accuracy.rmse(predictions, verbose=True)


def predict_routes(climb_name):
    route_id = df[df.name==climb_name]['route_id'].iloc[1]
    iid = route_id
    print("People who climbed '{}' also climbed".format(climb_name))
    
    # get similar climbs
    pred = algo.get_neighbors(trainset.to_inner_iid(iid), 50)

    # convert Surprise internal id to MP id
    print(pred)
    recs = map(lambda id: trainset.to_raw_iid(id), np.asarray(pred))
    results = df[df.route_id.isin(recs)]

    r = results.pivot_table(index=['name', 'route_id', 'type', 'grade'], aggfunc=[np.mean, np.median, np.size], values='ratings')
    print(r)

In [None]:
predict_routes("Epinephrine")