In [205]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity


In [172]:
hike_data = pd.read_csv('data/itemData.csv')
item_data = hike_data.drop(labels=['Unnamed: 0', 'hike_name', 'stars'], axis=1)

In [174]:
def norm(df, col):
    """Normalized the give column of the provided dataframe"""
    df[col] = (df[col] - df[col].mean())/(df[col].max() - df[col].min())

In [175]:
norm(item_data, 'elevation gain')
norm(item_data, 'time_from_seattle')
norm(item_data, 'numReports')
norm(item_data, 'total_dist')

In [176]:
item_data.fillna(0, inplace=True)

In [227]:
hike_data[hike_data['hike_name']=="Dirty Harry's Balcony"]

Unnamed: 0.1,Unnamed: 0,hike_name,hike_id,numReports,total_dist,elevation gain,time_from_seattle,Coast,stars,Dogs allowed on leash,...,Good for kids,Lakes,Mountain views,Old growth,Ridges/passes,Rivers,Summits,Waterfalls,Wildflowers/Meadows,Wildlife
251,251,Dirty Harry's Balcony,251,221.0,6.0,1300.0,42.2,0,2.71,0,...,0,0,0,0,0,0,0,0,0,1


In [177]:
hikes_liked = pd.DataFrame(item_data.iloc[65]).transpose()

In [178]:
hikes_liked

Unnamed: 0,hike_id,numReports,total_dist,elevation gain,time_from_seattle,Coast,Dogs allowed on leash,Established campsites,Fall foliage,Good for kids,Lakes,Mountain views,Old growth,Ridges/passes,Rivers,Summits,Waterfalls,Wildflowers/Meadows,Wildlife
65,65.0,2.0,2.2,600.0,87.316667,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0


In [179]:
hikes_liked = hikes_liked.append(item_data.iloc[87])

In [180]:
hikes_liked

Unnamed: 0,hike_id,numReports,total_dist,elevation gain,time_from_seattle,Coast,Dogs allowed on leash,Established campsites,Fall foliage,Good for kids,Lakes,Mountain views,Old growth,Ridges/passes,Rivers,Summits,Waterfalls,Wildflowers/Meadows,Wildlife
65,65.0,2.0,2.2,600.0,87.316667,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
87,87.0,223.0,4.2,1200.0,140.85,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [181]:
hikes_liked.columns

Index(['hike_id', 'numReports', 'total_dist', 'elevation gain',
       'time_from_seattle', 'Coast', 'Dogs allowed on leash',
       'Established campsites', 'Fall foliage', 'Good for kids', 'Lakes',
       'Mountain views', 'Old growth', 'Ridges/passes', 'Rivers', 'Summits',
       'Waterfalls', 'Wildflowers/Meadows', 'Wildlife'],
      dtype='object')

In [182]:
"""DOC strings"""

# import packages
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity



class hikeRecommender(object):

    def __init__(self, hikes, weights=None):
        self.hike_matrix = hikes
        self.feature_weights = weights
        self.hikes_liked = pd.DataFrame(columns=hikes.columns)
        self.hikes_disliked = pd.DataFrame(columns=hikes.columns)
        if not weights:
            self.weights={c:1 for c in hikes.columns}
        else: 
            self.weights=weights
            self.weighted_hike_matrix = self.apply_weights()

    def like_hike(self, hike_id):
        indx = self.hike_matrix[self.hike_matrix['hike_id']==hike_id].index[0]
        self.hikes_liked = self.hikes_liked.append(self.hike_matrix.iloc[indx])
        self.hike_matrix = self.hike_matrix.drop(indx, axis=0).reset_index(drop=True)

    def recommend(self, n=5, apply_weights=True):
        # Calculate similarity to all of the hikes
        # average similarities
        # return top 5
        indx_id = self.hike_matrix['hike_id']
        X = self.hike_matrix.drop('hike_id', axis=1)
        y = self.hikes_liked.drop('hike_id', axis=1)
        cs = cosine_similarity(X, y).mean(axis=1)
        rec_index= np.argsort(cs)[-n:][::-1]
        recommendations = indx_id.iloc[rec_index]
        return recommendations
        
    def apply_weights(self):
        weighted_hike_matrix = pd.DataFrame(columns=self.hike_matrix.columns)
        for col, weight in self.weights.items():
            weighted_hike_matrix[col] = self.hike_matrix[col].apply(lambda x: x*weight) 
        return weighted_hike_matrix
    
    

In [183]:
# import weights
import json
with open('data/weights.json', 'r') as weights_fp:
    weight = json.load(weights_fp)

In [195]:
hr=hikeRecommender(item_data, weights=weights)

In [196]:
hike_data[hike_data['hike_name'].str.contains("Townsend")]

Unnamed: 0.1,Unnamed: 0,hike_name,hike_id,numReports,total_dist,elevation gain,time_from_seattle,Coast,stars,Dogs allowed on leash,...,Good for kids,Lakes,Mountain views,Old growth,Ridges/passes,Rivers,Summits,Waterfalls,Wildflowers/Meadows,Wildlife
305,305,Mount Townsend - Silver Lakes Traverse,305,8.0,14.0,3200.0,202.183333,0,4.0,0,...,0,1,0,1,0,0,1,0,0,0
345,345,Mount Townsend,345,409.0,8.0,3010.0,156.7,0,4.03,1,...,0,0,1,1,0,0,1,1,0,0
512,512,Fort Townsend Historical State Park,512,2.0,,,119.75,0,0.0,0,...,1,0,0,1,0,0,0,0,0,0
860,860,Townsend Mountain,860,7.0,,,,0,5.0,0,...,0,0,0,0,0,0,0,0,0,0
2043,2043,Mount Townsend Snowshoe,2043,13.0,13.5,2500.0,156.716667,0,1.0,0,...,0,0,0,0,0,0,0,0,0,0


In [197]:
mailbox = 652
mt_si = 326
mt_townsend = 345

In [198]:
# hr.like_hike(mailbox)

In [199]:
hr.like_hike(mt_townsend)

In [200]:
hr.recommend(10)

167      167
349      350
513      514
599      600
898      899
891      892
327      327
379      380
695      696
1051    1052
Name: hike_id, dtype: int64

In [223]:
recs = hike_data.iloc[list(hr.recommend(10))]
rec_list = []
for index, row in recs.iterrows():
     rec_list.append(row)

In [224]:
rec_list

[Unnamed: 0                                     167
 hike_name                Greenwater and Echo Lakes
 hike_id                                        167
 numReports                                     275
 total_dist                                      14
 elevation gain                                1900
 time_from_seattle                             97.9
 Coast                                            0
 stars                                         3.39
 Dogs allowed on leash                            1
 Established campsites                            1
 Fall foliage                                     0
 Good for kids                                    1
 Lakes                                            1
 Mountain views                                   0
 Old growth                                       1
 Ridges/passes                                    0
 Rivers                                           1
 Summits                                          0
 Waterfalls 

In [202]:
list(hr.hikes_liked.index)

[345]

In [203]:
weights

{'Coast': 0.0008605416502796801,
 'Dogs allowed on leash': 0.015485443549527633,
 'Established campsites': 0.007265152002914418,
 'Fall foliage': 0.003734896224677904,
 'Good for kids': 0.018869290343909564,
 'Lakes': 0.0025923560848305394,
 'Mountain views': 0.008364118987063957,
 'Old growth': 0.006148622728215646,
 'Ridges/passes': 0.0017456838896329156,
 'Rivers': 0.0077048155791259966,
 'Summits': 0.023042377674711238,
 'Waterfalls': 0.013400840316294587,
 'Wildflowers/Meadows': 0.0018299341255906603,
 'Wildlife': 0.02294547318827456,
 'elevation gain': 0.19793960947377007,
 'numReports': 0.23698143453301185,
 'time_from_seattle': 0.26267208494186467,
 'total_dist': 0.16841732470630383}

In [204]:
hr.weighted_hike_matrix.iloc[list(hr.recommend(10))+ list(hr.hikes_liked.index)]

Unnamed: 0,hike_id,numReports,total_dist,elevation gain,time_from_seattle,Coast,Dogs allowed on leash,Established campsites,Fall foliage,Good for kids,Lakes,Mountain views,Old growth,Ridges/passes,Rivers,Summits,Waterfalls,Wildflowers/Meadows,Wildlife
167,,65.169894,2.357843,376.085258,25.715597,0.0,0.015485,0.007265,0.0,0.018869,0.002592,0.0,0.006149,0.0,0.007705,0.0,0.0,0.0,0.022945
350,,99.05824,1.044187,653.200711,35.075479,0.0,0.015485,0.0,0.0,0.0,0.0,0.008364,0.006149,0.001746,0.0,0.023042,0.0,0.0,0.022945
514,,108.537497,1.26313,653.200711,31.739544,0.0,0.015485,0.007265,0.0,0.0,0.002592,0.0,0.006149,0.0,0.0,0.023042,0.013401,0.0,0.0
600,,17.773608,0.707353,96.792469,5.905744,0.0,0.0,0.0,0.0,0.018869,0.0,0.008364,0.0,0.0,0.0,0.0,0.0,0.0,0.022945
899,,36.732122,0.505252,263.259681,16.679677,0.0,0.015485,0.0,0.0,0.018869,0.0,0.008364,0.0,0.0,0.0,0.023042,0.0,0.0,0.0
892,,53.794786,1.178921,296.909414,26.306609,0.0,0.0,0.0,0.003735,0.018869,0.0,0.0,0.0,0.0,0.007705,0.0,0.0,0.0,0.0
327,,64.221969,1.785224,455.261102,38.582151,0.0,0.0,0.0,0.003735,0.0,0.0,0.008364,0.006149,0.0,0.007705,0.0,0.0,0.0,0.022945
380,,81.521613,1.549439,562.148491,26.486102,0.0,0.0,0.0,0.0,0.0,0.002592,0.008364,0.0,0.0,0.007705,0.0,0.0,0.0,0.0
696,,59.956303,1.26313,435.467141,34.335619,0.0,0.015485,0.007265,0.003735,0.0,0.002592,0.008364,0.0,0.0,0.007705,0.023042,0.013401,0.00183,0.0
1052,,12.796997,0.421043,84.124334,8.322327,0.000861,0.015485,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
