In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity


In [127]:
item_data = pd.read_csv('data/itemData.csv')
item_data = item_data.drop(labels=['Unnamed: 0', 'hike_name'], axis=1)

In [105]:
item_data.head()

Unnamed: 0,hike_id,numReports,total_dist,elevation gain,time_from_seattle,Coast,stars,Dogs allowed on leash,Established campsites,Fall foliage,Good for kids,Lakes,Mountain views,Old growth,Ridges/passes,Rivers,Summits,Waterfalls,Wildflowers/Meadows,Wildlife
0,0,0.177287,0.035247,-0.057533,-0.009605,0,4.25,0,1,0,0,0,1,0,0,0,0,0,0,1
1,2,-0.003483,-0.002136,0.129758,0.029158,0,3.0,0,0,0,0,1,0,1,0,0,0,0,0,0
2,3,-0.021752,-0.054472,-0.017399,0.022196,0,3.67,1,0,0,0,0,1,0,0,0,0,0,0,0
3,4,-0.021752,0.453939,0.169892,-0.009879,0,2.75,1,1,0,1,0,1,0,1,0,0,0,0,1
4,8,-0.00829,-0.114286,-0.181412,-0.037453,0,0.0,1,0,0,1,0,0,1,0,0,0,1,0,0


In [128]:
def norm(df, col):
    """Normalized the give column of the provided dataframe"""
    df[col] = (df[col] - df[col].mean())/(df[col].max() - df[col].min())

In [129]:
norm(item_data, 'elevation gain')
norm(item_data, 'time_from_seattle')
norm(item_data, 'numReports')
norm(item_data, 'total_dist')

In [130]:
item_data.dropna(inplace=True)

In [53]:
hikes_liked = pd.DataFrame(item_data.ix[65]).transpose()

In [54]:
hikes_liked

Unnamed: 0,hike_id,numReports,total_dist,elevation gain,time_from_seattle,Coast,stars,Dogs allowed on leash,Established campsites,Fall foliage,Good for kids,Lakes,Mountain views,Old growth,Ridges/passes,Rivers,Summits,Waterfalls,Wildflowers/Meadows,Wildlife
65,65.0,-0.024637,-0.181575,-0.124422,-0.019827,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0


In [139]:
for col in [c for c in item_data.columns if c not in [u'hike_id']]:
    print col

numReports
total_dist
elevation gain
time_from_seattle
Coast
stars
Dogs allowed on leash
Established campsites
Fall foliage
Good for kids
Lakes
Mountain views
Old growth
Ridges/passes
Rivers
Summits
Waterfalls
Wildflowers/Meadows
Wildlife


In [55]:
hikes_liked.append(item_data.ix[87])

Unnamed: 0,hike_id,numReports,total_dist,elevation gain,time_from_seattle,Coast,stars,Dogs allowed on leash,Established campsites,Fall foliage,Good for kids,Lakes,Mountain views,Old growth,Ridges/passes,Rivers,Summits,Waterfalls,Wildflowers/Meadows,Wildlife
65,65.0,-0.024637,-0.181575,-0.124422,-0.019827,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
87,87.0,0.187863,-0.106809,-0.044155,-0.001098,0.0,4.05,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [56]:
hikes_liked

Unnamed: 0,hike_id,numReports,total_dist,elevation gain,time_from_seattle,Coast,stars,Dogs allowed on leash,Established campsites,Fall foliage,Good for kids,Lakes,Mountain views,Old growth,Ridges/passes,Rivers,Summits,Waterfalls,Wildflowers/Meadows,Wildlife
65,65.0,-0.024637,-0.181575,-0.124422,-0.019827,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0


In [136]:
hikes_liked.columns

Index([u'hike_id', u'numReports', u'total_dist', u'elevation gain',
       u'time_from_seattle', u'Coast', u'stars', u'Dogs allowed on leash',
       u'Established campsites', u'Fall foliage', u'Good for kids', u'Lakes',
       u'Mountain views', u'Old growth', u'Ridges/passes', u'Rivers',
       u'Summits', u'Waterfalls', u'Wildflowers/Meadows', u'Wildlife'],
      dtype='object')

In [126]:
"""DOC strings"""

# import packages
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity



class hikeRecommender(object):

    def __init__(self, hikes, weights=None):
        self.hike_matrix = hikes
        self.feature_weights = weights
        self.hikes_liked = []
        self.hikes_disliked = []

    def likeHike(self, hike_id):
        indx = self.hike_matrix[self.hike_matrix['hike_id']==hike_id].index[0]
        if len(self.hikes_liked)==0:
            self.hikes_liked = pd.DataFrame(self.hike_matrix.ix[indx]).transpose()
            self.hike_matrix = self.hike_matrix.drop(indx, axis=0).reset_index(drop=True)
        else:
            self.hikes_liked = self.hikes_liked.append(self.hike_matrix.ix[indx])
            self.hike_matrix = self.hike_matrix.drop(indx, axis=0).reset_index(drop=True)

    def recommend(self, n=5):
        # Calculate similarity to all of the hikes
        # average similarities
        # return top 5
        indx_id = self.hike_matrix['hike_id']
        X = self.hike_matrix.drop('hike_id', axis=1)
        y = self.hikes_liked.drop('hike_id', axis=1)
        cs = cosine_similarity(X, y).mean(axis=1)
        rec_index= np.argsort(cs)[-n:][::-1]
        recommendations = indx_id.ix[rec_index]
        return recommendations


In [131]:
hikeRecommender(item_data)

<__main__.hikeRecommender at 0x11480ce50>

In [132]:
hr=hikeRecommender(item_data)

In [133]:
hr.likeHike(65)

In [134]:
hr.likeHike(4)

In [135]:
hr.recommend(10)

962     1215
811     1001
1044    1332
371      421
960     1213
1237    1774
1187    1609
479      552
316      359
1031    1311
Name: hike_id, dtype: int64