In [13]:
"""DOC strings"""

# import packages
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity



class hikeRecommender(object):

    def __init__(self, hikes, weights=None):
        self.hike_matrix = hikes
        self.feature_weights = weights
        self.hikes_liked = pd.DataFrame(columns=hikes.columns)
        self.hikes_disliked = pd.DataFrame(columns=hikes.columns)
        if not weights:
            self.weights={c:1 for c in hikes.columns}
        else: 
            self.weights=weights
            self.weighted_hike_matrix = self.apply_weights()

    def like_hike(self, hike_id):
        indx = self.hike_matrix[self.hike_matrix['hike_id']==hike_id].index[0]
        self.hikes_liked = self.hikes_liked.append(self.hike_matrix.iloc[indx])
        self.hike_matrix = self.hike_matrix.drop(indx, axis=0).reset_index(drop=True)

    def recommend(self, n=5, apply_weights=True):
        # Calculate similarity to all of the hikes
        # average similarities
        # return top 5
        indx_id = self.hike_matrix['hike_id']
        X = self.hike_matrix.drop('hike_id', axis=1)
        y = self.hikes_liked.drop('hike_id', axis=1)
        cs = cosine_similarity(X, y).mean(axis=1)
        rec_index= np.argsort(cs)[-n:][::-1]
        recommendations = indx_id.iloc[rec_index]
        return recommendations
        
    def apply_weights(self):
        weighted_hike_matrix = pd.DataFrame(columns=self.hike_matrix.columns)
        for col, weight in self.weights.items():
            weighted_hike_matrix[col] = self.hike_matrix[col].apply(lambda x: x*weight) 
        return weighted_hike_matrix
    
    

In [31]:
hike_data = pd.read_csv('../data/2021-05-11-wta-hike-recommendation-export-2.csv', index_col=0)

In [32]:
hike_data.head()

Unnamed: 0_level_0,region name,uid,average_rating,elevation_gain,highpoint,mileage,mileage_type,tripreport_count,campsites,coast,...,kids_ok,lakes,mountains,old_growth,ridges,rivers,summits,waterfalls,wildflowers,wildlife
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Escondido Tarn,Snoqualmie Region -- Snoqualmie Pass,001677bfe7114e3297b630e6c59e8e00,0.0,,,,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
Duke of Kent,Snoqualmie Region -- North Bend Area,00234c47604711f44cd63a1439aabb6c,3.0,3400.0,4840.0,7.0,1,6,0,0,...,0,0,0,0,0,0,0,0,0,0
Lookout Mountain Forest Preserve,Puget Sound and Islands -- Bellingham Area,00330df2a0c041debbbb6063395d4299,3.333333,,,6.5,4,44,0,0,...,1,1,0,0,0,1,0,1,1,0
Dead Horse Lake,North Cascades -- Methow/Sawtooth,003d7e0686ea4397ba4c4463373a2ff3,0.0,,,,1,3,0,0,...,0,0,0,0,0,0,0,0,0,0
Sheep Mountain,North Cascades -- Mountain Loop Highway,0062cfc6a710437ba3b34ccb8f6fde66,0.0,,,,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [33]:
def norm(df, col):
    """Normalized the give column of the provided dataframe"""
    df[col] = pd.to_numeric(df[col],errors='coerce')
    df[f"{col}_normalized"] = (df[col] - df[col].mean())/(df[col].max() - df[col].min())

In [34]:
norm(hike_data, 'elevation_gain')
norm(hike_data, 'tripreport_count')
norm(hike_data, 'mileage')

In [35]:

hike_data.replace(to_replace=["None"], value=np.nan, inplace=True)
hike_data.fillna(0, inplace=True)
hike_data.reset_index(inplace=True)

In [51]:
hike_data['hike_id'] = hike_data.index

In [52]:
hike_data[hike_data['title']=="Dirty Harry's Balcony"]

Unnamed: 0,title,region name,uid,average_rating,elevation_gain,highpoint,mileage,mileage_type,tripreport_count,campsites,...,ridges,rivers,summits,waterfalls,wildflowers,wildlife,elevation_gain_normalized,tripreport_count_normalized,mileage_normalized,hike_id
3923,Dirty Harry's Balcony,Snoqualmie Region -- Snoqualmie Pass,f01eec1e5890dfd5c26bf9fe0b203a4a,3.178571,1300.0,2600.0,4.4,1,555,0,...,0,0,0,0,0,1,-0.011198,0.22075,-0.003278,3923


In [53]:
hike_data.columns

Index(['title', 'region name', 'uid', 'average_rating', 'elevation_gain',
       'highpoint', 'mileage', 'mileage_type', 'tripreport_count', 'campsites',
       'coast', 'dogs_ok', 'foliage', 'kids_ok', 'lakes', 'mountains',
       'old_growth', 'ridges', 'rivers', 'summits', 'waterfalls',
       'wildflowers', 'wildlife', 'elevation_gain_normalized',
       'tripreport_count_normalized', 'mileage_normalized', 'hike_id'],
      dtype='object')

In [83]:
# import weights
import json
with open('../data/weights_20210512.json', 'r') as weights_fp:
    weight = json.load(weights_fp)

In [84]:
recommender_cols = list(weight.keys()) + ['hike_id']

In [85]:
rec_test_data = hike_data[recommender_cols]

In [87]:
hike_data[hike_data['title'].str.contains("Mount Washington")]

Unnamed: 0,title,region name,uid,average_rating,elevation_gain,highpoint,mileage,mileage_type,tripreport_count,campsites,...,ridges,rivers,summits,waterfalls,wildflowers,wildlife,elevation_gain_normalized,tripreport_count_normalized,mileage_normalized,hike_id
1914,Mount Washington,Olympic Peninsula -- Hood Canal,73863a834f87a35d038d0513f8cd3b12,3.5,3190.0,0.0,4.0,1,34,0,...,0,0,1,0,0,0,0.056312,-0.010088,-0.003611,1914
3335,Mount Washington,Snoqualmie Region -- North Bend Area,cc9b2714eabe8c1d4fd44e79c33bdd6a,3.987654,3250.0,4450.0,8.5,1,1227,0,...,0,0,1,0,1,0,0.058455,0.51849,0.000139,3335


In [88]:
hr=hikeRecommender(rec_test_data, weights=weight)

In [89]:
mailbox = 3335

In [90]:
hr.like_hike(mailbox)

In [72]:
hr.like_hike(townsend)

In [91]:
recs = hike_data.iloc[list(hr.recommend(10))]

In [94]:
hike_data.iloc[list(hr.recommend(10))].title.to_list()

['Goat Mountain',
 'Bearhead Mountain',
 'Red Top Lookout',
 'Sauk Mountain',
 'Mount Pilchuck',
 'Mailbox Peak - Old Trail',
 'McClellan Butte',
 'Thorp Mountain via Knox Creek',
 'Tatoosh Lookout',
 'Phils Trail - Thrush Gap Loop']

In [93]:
recs.title.to_list()

['Goat Mountain',
 'Bearhead Mountain',
 'Red Top Lookout',
 'Sauk Mountain',
 'Mount Pilchuck',
 'Mailbox Peak - Old Trail',
 'McClellan Butte',
 'Thorp Mountain via Knox Creek',
 'Tatoosh Lookout',
 'Phils Trail - Thrush Gap Loop']

In [None]:
gower = [ 'Sauk Mountain',
 'McClellan Butte',
 'Mount Pilchuck',
 'Mailbox Peak - Old Trail',
 "Dirty Harry's Peak",
 'Goat Mountain',
 'Bearhead Mountain',
 'Red Top Lookout',
 'Thorp Mountain via Knox Creek',
 'Sulphur Mountain']