In [1]:
!pip3 install gower

You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.8/bin/python3.8 -m pip install --upgrade pip' command.[0m


## Add recommender building code

In [28]:
"""DOC strings"""

# import packages
import pandas as pd
import numpy as np
import gower
from sklearn.metrics.pairwise import cosine_similarity



class hikeRecommender(object):

    def __init__(self, hike_data, recommender_columns, weights=None):
        self.hike_data = hike_data
        self.hike_matrix = hike_data[recommender_columns]
        self.feature_weights = weights
        self.hikes_liked = pd.DataFrame(columns=self.hike_matrix.columns)
        self.weights = weights

    def like_hike(self, hike_id):
        indx = self.hike_matrix[self.hike_matrix['hike_id']==hike_id].index[0]
        self.hikes_liked = self.hikes_liked.append(self.hike_matrix.iloc[indx])
        self.hike_matrix = self.hike_matrix.drop(indx, axis=0).reset_index(drop=True)

    def recommend_cosine(self, n=5, apply_weights=True):
        # Calculate similarity to all of the hikes
        # average similarities
        # return top 5
        hike_matrix = self.hike_matrix
        if apply_weights:
            hike_matrix = self.apply_weights()
        indx_id = hike_matrix['hike_id']
        X = hike_matrix.drop('hike_id', axis=1)
        y = self.hikes_liked.drop('hike_id', axis=1)
        cs = cosine_similarity(X, y).mean(axis=1)
        rec_index= np.argsort(cs)[-n:][::-1]
        recommendations = indx_id.iloc[rec_index]
        return self.hike_data.iloc[list(recommendations)]
        
    def recommend_gower(self, n=5, apply_weights=True):
        # Calculate similarity to all of the hikes
        # average similarities
        # return top 5
        hike_matrix = self.hike_matrix
        if apply_weights:
            hike_matrix = self.apply_weights()
        indx_id = hike_matrix['hike_id']
        X = hike_matrix.drop('hike_id', axis=1)
        y = self.hikes_liked.drop('hike_id', axis=1)
        recs = gower.gower_topn(y, X, n = n)
        rec_index = recs['index']
        recommendations = indx_id.iloc[rec_index]
        return self.hike_data.iloc[list(recommendations)]
    
    def compare_model_recommendations(self, n=5, apply_weights=True):
        return {
            "Cosine_recommendations": self.recommend_cosine(n, apply_weights).title.to_list(),
            "Gower_recommendations": self.recommend_gower(n, apply_weights).title.to_list(),
        }
    
    def apply_weights(self):
        weighted_hike_matrix = self.hike_matrix.copy()
        for col, weight in self.weights.items():
            weighted_hike_matrix[col] = weighted_hike_matrix[col]*weight 
        return weighted_hike_matrix
    
    def norm(self, col):
        """Normalized the given column of the hike dataframe"""
        self.hike_matrix[col] = pd.to_numeric(self.hike_matrix[col],errors='coerce')
        self.hike_matrix[col] = (self.hike_matrix[col] - self.hike_matrix[col].mean())/(self.hike_matrix[col].max() - self.hike_matrix[col].min())
    

## Import data and weights

In [29]:
hike_data = pd.read_csv('../data/2021-05-11-wta-hike-recommendation-export-2.csv', index_col=0)
hike_data.replace(to_replace=["None"], value=np.nan, inplace=True)
hike_data.fillna(0, inplace=True)
hike_data.reset_index(inplace=True)
hike_data['hike_id'] = hike_data.index

In [30]:
# import weights
import json
with open('../data/weights.json', 'r') as weights_fp:
    weight = json.load(weights_fp)
recommender_cols = list(weight.keys()) + ['hike_id']

In [31]:
hike_data[hike_data['title']=="Dirty Harry's Balcony"]

Unnamed: 0,title,region name,uid,average_rating,elevation_gain,highpoint,mileage,mileage_type,tripreport_count,campsites,...,lakes,mountains,old_growth,ridges,rivers,summits,waterfalls,wildflowers,wildlife,hike_id
3923,Dirty Harry's Balcony,Snoqualmie Region -- Snoqualmie Pass,f01eec1e5890dfd5c26bf9fe0b203a4a,3.178571,1300.0,2600.0,4.4,1,555,0,...,0,1,0,0,0,0,0,0,1,3923


In [32]:
hike_data.columns

Index(['title', 'region name', 'uid', 'average_rating', 'elevation_gain',
       'highpoint', 'mileage', 'mileage_type', 'tripreport_count', 'campsites',
       'coast', 'dogs_ok', 'foliage', 'kids_ok', 'lakes', 'mountains',
       'old_growth', 'ridges', 'rivers', 'summits', 'waterfalls',
       'wildflowers', 'wildlife', 'hike_id'],
      dtype='object')

## Test Recommender
### Instructions
1. Find the index number corresponding to the hike that you want to use for the basis of recommendations. The following example uses Mount Washington, which will return two options and I will chose the row with index number 3335

```hike_data[hike_data['title'].str.contains("Mount Washington")]```

2. Assign that hike to the hike_index variable

```hike_index = 3335```

3. Instantiate the hike recommender class

```hr=hikeRecommender(hike_data, recommender_cols, weights=weight)```

4. Add the chosen hike index to the hike recommenders liked hikes using the like_hike function

```hr.like_hike(hike_index)```

5. Run the compare_model_recommendations function to get the titles of the cosine and gower recommendations

```hr.compare_model_recommendations(apply_weights=False)```

6. You can also get the results from the individual models, as well as the corresponding data for each hike, using the `recommend_cosine` and `recommend_gower functions`

```hr.recommend_cosine(apply_weights=False)```

```hr.recommend_gower(apply_weights=False)```

In [33]:
hike_data[hike_data['title'].str.contains("Mount Washington")]

Unnamed: 0,title,region name,uid,average_rating,elevation_gain,highpoint,mileage,mileage_type,tripreport_count,campsites,...,lakes,mountains,old_growth,ridges,rivers,summits,waterfalls,wildflowers,wildlife,hike_id
1914,Mount Washington,Olympic Peninsula -- Hood Canal,73863a834f87a35d038d0513f8cd3b12,3.5,3190.0,0.0,4.0,1,34,0,...,0,0,0,0,0,1,0,0,0,1914
3335,Mount Washington,Snoqualmie Region -- North Bend Area,cc9b2714eabe8c1d4fd44e79c33bdd6a,3.987654,3250.0,4450.0,8.5,1,1227,0,...,0,1,1,0,0,1,0,1,0,3335


In [34]:
hr=hikeRecommender(hike_data, recommender_cols, weights=weight)
columns_to_normalize = ['mileage', 'elevation_gain', 'tripreport_count']
for col in columns_to_normalize:
    hr.norm(col)

hike_index = 3335

hr.like_hike(hike_index)

hr.compare_model_recommendations(apply_weights=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.hike_matrix[col] = pd.to_numeric(self.hike_matrix[col],errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.hike_matrix[col] = (self.hike_matrix[col] - self.hike_matrix[col].mean())/(self.hike_matrix[col].max() - self.hike_matrix[col].min())


{'Cosine_recommendations': ['Goat Mountain',
  'Bearhead Mountain',
  'Sauk Mountain',
  'Red Top Lookout',
  'Mount Pilchuck'],
 'Gower_recommendations': ['McClellan Butte',
  'Mailbox Peak - Old Trail',
  'Mount Pilchuck',
  'Sauk Mountain',
  "Dirty Harry's Peak"]}

In [37]:
hr.recommend_cosine(apply_weights=True)

Unnamed: 0,title,region name,uid,average_rating,elevation_gain,highpoint,mileage,mileage_type,tripreport_count,campsites,...,lakes,mountains,old_growth,ridges,rivers,summits,waterfalls,wildflowers,wildlife,hike_id
4032,Bearhead Mountain,Mount Rainier Area -- NW - Carbon River/Mowich,f5e0ffe1a66cbf9fa5bf630bef8212a1,3.785714,1800.0,6089.0,6.0,1,181,0,...,0,1,1,1,0,1,0,1,0,4032
3210,Larrabee State Park - Rock Trail,Puget Sound and Islands -- Bellingham Area,c56415ae36b340d08e45edf20cad169d,4.272727,1200.0,1820.0,2.4,1,141,0,...,1,1,1,0,0,1,0,0,0,3210
2190,Thorp Mountain via Knox Creek,Snoqualmie Region -- Salmon La Sac/Teanaway,86251d2c43dd97c93f17092b6e7820a4,2.777778,1734.0,5854.0,4.4,1,84,0,...,0,1,1,0,0,1,0,1,0,2190
182,Phils Trail - Thrush Gap Loop,Issaquah Alps -- Squak Mountain,0aa97af9ac7b769ee58121caf89c0b7f,2.4,1606.0,2037.0,8.2,1,29,0,...,0,1,1,0,0,1,0,1,0,182
906,Tatoosh Ridge,South Cascades -- White Pass/Cowlitz River Valley,3514568a1b0717684d514e3ae9729adb,3.727273,2900.0,5775.0,6.2,1,111,0,...,1,1,1,1,0,1,0,1,0,906


In [38]:
hr.recommend_cosine(apply_weights=False)

Unnamed: 0,title,region name,uid,average_rating,elevation_gain,highpoint,mileage,mileage_type,tripreport_count,campsites,...,lakes,mountains,old_growth,ridges,rivers,summits,waterfalls,wildflowers,wildlife,hike_id
179,Goat Mountain,North Cascades -- Mount Baker Area,0a8d9afaf51007a8f5d62e13d269f825,4.058824,3100.0,5600.0,8.2,1,218,0,...,0,1,1,1,0,1,0,1,0,179
4032,Bearhead Mountain,Mount Rainier Area -- NW - Carbon River/Mowich,f5e0ffe1a66cbf9fa5bf630bef8212a1,3.785714,1800.0,6089.0,6.0,1,181,0,...,0,1,1,1,0,1,0,1,0,4032
4039,Sauk Mountain,North Cascades -- North Cascades Highway - Hwy 20,f6150b8f857726d4c4ba13fd8565fb10,4.372093,1200.0,5500.0,4.2,1,641,0,...,0,1,0,0,0,1,0,1,0,4039
1247,Red Top Lookout,Snoqualmie Region -- Salmon La Sac/Teanaway,49a985c14ff245789b445937ea6deded,3.45,350.0,5360.0,1.5,1,166,0,...,0,1,1,0,0,1,0,1,0,1247
4046,Mount Pilchuck,North Cascades -- Mountain Loop Highway,f6701e1bd8083fcdd0b1bc3ddac563db,4.540881,2300.0,5327.0,5.4,1,1885,0,...,0,1,0,0,0,1,0,1,0,4046


In [10]:
hr.recommend_gower(apply_weights=False)

Unnamed: 0,title,region name,uid,average_rating,elevation_gain,highpoint,mileage,mileage_type,tripreport_count,campsites,...,lakes,mountains,old_growth,ridges,rivers,summits,waterfalls,wildflowers,wildlife,hike_id
1473,Abercrombie Mountain Trail,Eastern Washington -- Selkirk Range,5690b606b5ca52501e243a1acbdde51f,2.571429,2350.0,7300.0,7.3,1,21,0,...,0,1,0,0,0,1,0,1,0,1473
807,Monte Carlo,Southwest Washington -- Columbia River Gorge - WA,2f15a6e124a65418fba9c824c9f1d5d7,1.75,2560.0,3360.0,10.0,1,2,0,...,0,1,1,0,0,1,0,1,0,807
3914,Ned Hill,Olympic Peninsula -- Hood Canal,efb1dc32a9a1ddaceebdbe1965b97c81,1.833333,900.0,3469.0,2.2,1,20,0,...,0,1,0,0,0,1,0,1,0,3914
316,Kloshe Nanitch,Olympic Peninsula -- Northern Coast,1286fb310c25a82688b354cfa5cf32cd,3.5,2200.0,3160.0,6.4,1,41,0,...,0,1,0,0,0,1,0,1,0,316
735,Sulphur Mountain,North Cascades -- Mountain Loop Highway,2a11f1b759a5c8617ae7e6fc6ebe2028,3.4,4200.0,6000.0,10.0,1,75,0,...,0,1,1,0,0,1,0,1,0,735


In [11]:
hr.apply_weights()

TypeError: can't multiply sequence by non-int of type 'float'

In [13]:
weighted_hike_matrix = hr.hike_matrix.copy()

In [14]:
for col, weight in hr.weights.items():
    weighted_hike_matrix[col] *= weight

TypeError: can't multiply sequence by non-int of type 'float'

In [27]:
weighted_hike_matrix.loc['mileage']

TypeError: Cannot index by location index with a non-integer key