In [1]:
!pip3 install gower

You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.8/bin/python3.8 -m pip install --upgrade pip' command.[0m


## Add recommender building code

In [2]:
"""DOC strings"""

# import packages
import pandas as pd
import numpy as np
import gower
from sklearn.metrics.pairwise import cosine_similarity



class hikeRecommender(object):

    def __init__(self, hike_data, recommender_columns, weights=None):
        self.hike_data = hike_data
        self.hike_matrix = hike_data[recommender_columns]
        self.feature_weights = weights
        self.hikes_liked = pd.DataFrame(columns=self.hike_matrix.columns)
        self.weights = weights

    def like_hike(self, hike_id):
        indx = self.hike_matrix[self.hike_matrix['hike_id']==hike_id].index[0]
        self.hikes_liked = self.hikes_liked.append(self.hike_matrix.iloc[indx])
        self.hike_matrix = self.hike_matrix.drop(indx, axis=0).reset_index(drop=True)

    def recommend_cosine(self, n=5, apply_weights=True):
        # Calculate similarity to all of the hikes
        # average similarities
        # return top 5
        hike_matrix = self.hike_matrix
        if apply_weights:
            hike_matrix = self.apply_weights()
        indx_id = hike_matrix['hike_id']
        X = hike_matrix.drop('hike_id', axis=1)
        y = self.hikes_liked.drop('hike_id', axis=1)
        cs = cosine_similarity(X, y).mean(axis=1)
        rec_index= np.argsort(cs)[-n:][::-1]
        recommendations = indx_id.iloc[rec_index]
        return self.hike_data.iloc[list(recommendations)]
        
    def recommend_gower(self, n=5, apply_weights=True):
        # Calculate similarity to all of the hikes
        # average similarities
        # return top 5
        hike_matrix = self.hike_matrix
        if apply_weights:
            hike_matrix = self.apply_weights()
        indx_id = hike_matrix['hike_id']
        X = hike_matrix.drop('hike_id', axis=1)
        y = self.hikes_liked.drop('hike_id', axis=1)
        recs = gower.gower_topn(y, X, n = n)
        rec_index = recs['index']
        recommendations = indx_id.iloc[rec_index]
        return self.hike_data.iloc[list(recommendations)]
    
    def compare_model_recommendations(self, n=5, apply_weights=True):
        return {
            "Cosine_recommendations": self.recommend_cosine(n, apply_weights).title.to_list(),
            "Gower_recommendations": self.recommend_gower(n, apply_weights).title.to_list(),
        }
    
    def apply_weights(self):
        weighted_hike_matrix = self.hike_matrix.copy()
        for col, weight in self.weights.items():
            weighted_hike_matrix[col] = weighted_hike_matrix[col]*weight 
        return weighted_hike_matrix
    
    def norm(self, col):
        """Normalized the given column of the hike dataframe"""
        self.hike_matrix[col] = pd.to_numeric(self.hike_matrix[col],errors='coerce')
        self.hike_matrix[col] = (self.hike_matrix[col] - self.hike_matrix[col].mean())/(self.hike_matrix[col].max() - self.hike_matrix[col].min())
    

## Import data and weights

In [3]:
hike_data = pd.read_csv('../data/2021-05-11-wta-hike-recommendation-export-2.csv', index_col=0)
hike_data.replace(to_replace=["None"], value=np.nan, inplace=True)
hike_data.fillna(0, inplace=True)
hike_data.reset_index(inplace=True)
hike_data['hike_id'] = hike_data.index

In [4]:
# import weights
import json
with open('../data/weights.json', 'r') as weights_fp:
    weight = json.load(weights_fp)
recommender_cols = list(weight.keys()) + ['hike_id']

In [5]:
hike_data[hike_data['title']=="Dirty Harry's Balcony"]

Unnamed: 0,title,region name,uid,average_rating,elevation_gain,highpoint,mileage,mileage_type,tripreport_count,campsites,...,lakes,mountains,old_growth,ridges,rivers,summits,waterfalls,wildflowers,wildlife,hike_id
3923,Dirty Harry's Balcony,Snoqualmie Region -- Snoqualmie Pass,f01eec1e5890dfd5c26bf9fe0b203a4a,3.178571,1300.0,2600.0,4.4,1,555,0,...,0,1,0,0,0,0,0,0,1,3923


In [6]:
weight


{'wildlife': 0.02294547318827456,
 'rivers': 0.0077048155791259966,
 'mileage': 0.16841732470630383,
 'campsites': 0.007265152002914418,
 'waterfalls': 0.013400840316294587,
 'ridges': 0.0017456838896329156,
 'dogs_ok': 0.015485443549527633,
 'coast': 0.0008605416502796801,
 'kids_ok': 0.018869290343909564,
 'mountains': 0.008364118987063957,
 'lakes': 0.0025923560848305394,
 'summits': 0.023042377674711238,
 'old_growth': 0.006148622728215646,
 'tripreport_count': 0.23698143453301185,
 'elevation_gain': 0.19793960947377007,
 'wildflowers': 0.0018299341255906603,
 'foliage': 0.003734896224677904}

## Test Recommender
### Instructions
1. Find the index number corresponding to the hike that you want to use for the basis of recommendations. The following example uses Mount Washington, which will return two options and I will chose the row with index number 3335

```hike_data[hike_data['title'].str.contains("Mount Washington")]```

2. Assign that hike to the hike_index variable

```hike_index = 3335```

3. Instantiate the hike recommender class

```hr=hikeRecommender(hike_data, recommender_cols, weights=weight)```

4. Add the chosen hike index to the hike recommenders liked hikes using the like_hike function

```hr.like_hike(hike_index)```

5. Run the compare_model_recommendations function to get the titles of the cosine and gower recommendations

```hr.compare_model_recommendations(apply_weights=False)```

6. You can also get the results from the individual models, as well as the corresponding data for each hike, using the `recommend_cosine` and `recommend_gower functions`

```hr.recommend_cosine(apply_weights=False)```

```hr.recommend_gower(apply_weights=False)```

In [7]:
hike_data[hike_data['title'].str.contains("Mount Washington")]

Unnamed: 0,title,region name,uid,average_rating,elevation_gain,highpoint,mileage,mileage_type,tripreport_count,campsites,...,lakes,mountains,old_growth,ridges,rivers,summits,waterfalls,wildflowers,wildlife,hike_id
1914,Mount Washington,Olympic Peninsula -- Hood Canal,73863a834f87a35d038d0513f8cd3b12,3.5,3190.0,0.0,4.0,1,34,0,...,0,0,0,0,0,1,0,0,0,1914
3335,Mount Washington,Snoqualmie Region -- North Bend Area,cc9b2714eabe8c1d4fd44e79c33bdd6a,3.987654,3250.0,4450.0,8.5,1,1227,0,...,0,1,1,0,0,1,0,1,0,3335


In [8]:
hr=hikeRecommender(hike_data, recommender_cols, weights=weight)
columns_to_normalize = ['mileage', 'elevation_gain', 'tripreport_count']
for col in columns_to_normalize:
    hr.norm(col)

hike_index = 3335

hr.like_hike(hike_index)

hr.compare_model_recommendations(apply_weights=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.hike_matrix[col] = pd.to_numeric(self.hike_matrix[col],errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.hike_matrix[col] = (self.hike_matrix[col] - self.hike_matrix[col].mean())/(self.hike_matrix[col].max() - self.hike_matrix[col].min())


{'Cosine_recommendations': ['Goat Mountain',
  'Bearhead Mountain',
  'Sauk Mountain',
  'Red Top Lookout',
  'Mount Pilchuck'],
 'Gower_recommendations': ['McClellan Butte',
  'Mailbox Peak - Old Trail',
  'Mount Pilchuck',
  'Sauk Mountain',
  "Dirty Harry's Peak"]}

In [9]:
hr.recommend_cosine(apply_weights=True)

Unnamed: 0,title,region name,uid,average_rating,elevation_gain,highpoint,mileage,mileage_type,tripreport_count,campsites,...,lakes,mountains,old_growth,ridges,rivers,summits,waterfalls,wildflowers,wildlife,hike_id
4032,Bearhead Mountain,Mount Rainier Area -- NW - Carbon River/Mowich,f5e0ffe1a66cbf9fa5bf630bef8212a1,3.785714,1800.0,6089.0,6.0,1,181,0,...,0,1,1,1,0,1,0,1,0,4032
3210,Larrabee State Park - Rock Trail,Puget Sound and Islands -- Bellingham Area,c56415ae36b340d08e45edf20cad169d,4.272727,1200.0,1820.0,2.4,1,141,0,...,1,1,1,0,0,1,0,0,0,3210
2190,Thorp Mountain via Knox Creek,Snoqualmie Region -- Salmon La Sac/Teanaway,86251d2c43dd97c93f17092b6e7820a4,2.777778,1734.0,5854.0,4.4,1,84,0,...,0,1,1,0,0,1,0,1,0,2190
182,Phils Trail - Thrush Gap Loop,Issaquah Alps -- Squak Mountain,0aa97af9ac7b769ee58121caf89c0b7f,2.4,1606.0,2037.0,8.2,1,29,0,...,0,1,1,0,0,1,0,1,0,182
906,Tatoosh Ridge,South Cascades -- White Pass/Cowlitz River Valley,3514568a1b0717684d514e3ae9729adb,3.727273,2900.0,5775.0,6.2,1,111,0,...,1,1,1,1,0,1,0,1,0,906


In [10]:
hr.recommend_cosine(apply_weights=False)

Unnamed: 0,title,region name,uid,average_rating,elevation_gain,highpoint,mileage,mileage_type,tripreport_count,campsites,...,lakes,mountains,old_growth,ridges,rivers,summits,waterfalls,wildflowers,wildlife,hike_id
179,Goat Mountain,North Cascades -- Mount Baker Area,0a8d9afaf51007a8f5d62e13d269f825,4.058824,3100.0,5600.0,8.2,1,218,0,...,0,1,1,1,0,1,0,1,0,179
4032,Bearhead Mountain,Mount Rainier Area -- NW - Carbon River/Mowich,f5e0ffe1a66cbf9fa5bf630bef8212a1,3.785714,1800.0,6089.0,6.0,1,181,0,...,0,1,1,1,0,1,0,1,0,4032
4039,Sauk Mountain,North Cascades -- North Cascades Highway - Hwy 20,f6150b8f857726d4c4ba13fd8565fb10,4.372093,1200.0,5500.0,4.2,1,641,0,...,0,1,0,0,0,1,0,1,0,4039
1247,Red Top Lookout,Snoqualmie Region -- Salmon La Sac/Teanaway,49a985c14ff245789b445937ea6deded,3.45,350.0,5360.0,1.5,1,166,0,...,0,1,1,0,0,1,0,1,0,1247
4046,Mount Pilchuck,North Cascades -- Mountain Loop Highway,f6701e1bd8083fcdd0b1bc3ddac563db,4.540881,2300.0,5327.0,5.4,1,1885,0,...,0,1,0,0,0,1,0,1,0,4046


In [11]:
hr.recommend_gower(apply_weights=False)

Unnamed: 0,title,region name,uid,average_rating,elevation_gain,highpoint,mileage,mileage_type,tripreport_count,campsites,...,lakes,mountains,old_growth,ridges,rivers,summits,waterfalls,wildflowers,wildlife,hike_id
1483,McClellan Butte,Snoqualmie Region -- Snoqualmie Pass,5725cf0e39b624da5e8aad7488f3687e,4.0,3700.0,5162.0,9.0,1,577,0,...,0,1,1,0,0,1,0,1,0,1483
3312,Mailbox Peak - Old Trail,Snoqualmie Region -- North Bend Area,cb716cfce342ba11e4b1e0afe03810ee,4.409836,4000.0,4822.0,5.4,1,1891,0,...,0,1,1,0,0,1,0,1,0,3312
4046,Mount Pilchuck,North Cascades -- Mountain Loop Highway,f6701e1bd8083fcdd0b1bc3ddac563db,4.540881,2300.0,5327.0,5.4,1,1885,0,...,0,1,0,0,0,1,0,1,0,4046
4039,Sauk Mountain,North Cascades -- North Cascades Highway - Hwy 20,f6150b8f857726d4c4ba13fd8565fb10,4.372093,1200.0,5500.0,4.2,1,641,0,...,0,1,0,0,0,1,0,1,0,4039
3285,Dirty Harry's Peak,Snoqualmie Region -- Snoqualmie Pass,c9af264628600819470a78dd2548d79a,3.1875,3326.0,4662.0,9.2,1,331,0,...,0,1,0,0,0,1,0,1,0,3285


In [12]:
hr.recommend_gower(apply_weights=True)

Unnamed: 0,title,region name,uid,average_rating,elevation_gain,highpoint,mileage,mileage_type,tripreport_count,campsites,...,lakes,mountains,old_growth,ridges,rivers,summits,waterfalls,wildflowers,wildlife,hike_id
3312,Mailbox Peak - Old Trail,Snoqualmie Region -- North Bend Area,cb716cfce342ba11e4b1e0afe03810ee,4.409836,4000.0,4822.0,5.4,1,1891,0,...,0,1,1,0,0,1,0,1,0,3312
4046,Mount Pilchuck,North Cascades -- Mountain Loop Highway,f6701e1bd8083fcdd0b1bc3ddac563db,4.540881,2300.0,5327.0,5.4,1,1885,0,...,0,1,0,0,0,1,0,1,0,4046
2671,Picket Range,North Cascades -- North Cascades Highway - Hwy 20,a3beff4355324a468879b34b6b1a892f,0.0,10000.0,8151.0,20.0,1,1,0,...,0,1,0,0,0,1,0,1,0,2671
618,Mount St. Helens - Worm Flows Route,South Cascades -- Mount St. Helens,230a54b873322ffb2d3e882922cfda00,4.571429,5699.0,8364.0,12.0,1,507,0,...,0,1,0,0,0,1,0,0,0,618
3897,Goode Mountain,North Cascades -- North Cascades Highway - Hwy 20,ef038b027a4148b09af8837d01a5fb6c,0.0,8400.0,9200.0,37.0,1,1,0,...,0,1,0,0,0,1,0,1,0,3897


# Backpack testing

In [13]:
hike_data[hike_data['title'].str.contains('Poo Poo Point')]

Unnamed: 0,title,region name,uid,average_rating,elevation_gain,highpoint,mileage,mileage_type,tripreport_count,campsites,...,lakes,mountains,old_growth,ridges,rivers,summits,waterfalls,wildflowers,wildlife,hike_id
821,Poo Poo Point,Issaquah Alps -- Tiger Mountain,2fadd3e051dd37f5a1b7a57aece5bb09,3.81982,1858.0,2021.0,7.2,1,782,0,...,0,1,0,0,0,0,0,1,1,821
2918,Poo Poo Point - Chirico Trail,Issaquah Alps -- Tiger Mountain,b2e69e7ea8655ddfff5e8699ca4e32a8,4.058824,1760.0,1850.0,3.8,1,825,0,...,0,1,0,0,0,1,0,0,0,2918


In [14]:
backpack1 = [3381,
3499,
2117,
1698,
2849,
1910,
3627,
4046,
1556,
1018,
1383,
961,
502,
2993,
1914,
1148,
427,
1064,
1776]

In [15]:
backpack2 = [14,
3499,
2849,
1383,
961,
314,
3902,
2879]

backpack3 = [1148,
427,
1556,
2993,
166,
1371]

backpack4 = [502,
2117]

backpack5 = [2879,
961,
821]

In [16]:
backpacks = [backpack1, backpack2, backpack3, backpack4, backpack5]

In [21]:
import warnings
warnings.simplefilter(action='ignore')

backpack = backpack1

hr = hikeRecommender(hike_data, recommender_cols, weights=weight)
columns_to_normalize = ['mileage', 'elevation_gain', 'tripreport_count']
for col in columns_to_normalize:
    hr.norm(col)
for hike in backpack:
    hr.like_hike(hike)
print("All hikes in recommender at once:")
print(hr.compare_model_recommendations(apply_weights=True))

print("One hike at a time, cosine:")
df = []
for hike in backpack:
    hr = hikeRecommender(hike_data, recommender_cols, weights=weight)
    columns_to_normalize = ['mileage', 'elevation_gain', 'tripreport_count']
    for col in columns_to_normalize:
        hr.norm(col)
    hr.like_hike(hike)
    if len(df)==0:
        df = hr.recommend_cosine(apply_weights=False, n=2)
    else:
        df = df.append(hr.recommend_cosine(apply_weights=False, n=2))
print(df.sample(min(5, len(df))).title.to_list())

print("One hike at a time, gower:")
df = []
for hike in backpack:
    hr = hikeRecommender(hike_data, recommender_cols, weights=weight)
    columns_to_normalize = ['mileage', 'elevation_gain', 'tripreport_count']
    for col in columns_to_normalize:
        hr.norm(col)
    hr.like_hike(hike)
    if len(df)==0:
        df = hr.recommend_gower(apply_weights=False, n=2)
    else:
        df = df.append(hr.recommend_gower(apply_weights=False, n=2))
print(df.sample(min(5, len(df))).title.to_list())

All hikes in recommender at once:
{'Cosine_recommendations': ['Lake Ann (Rainy Pass)', 'Red Top Lookout', 'Heather Lake (near Lake Wenatchee)', 'Suntop Lookout', 'Deception Pass State Park - Goose Rock'], 'Gower_recommendations': ['Pacific Crest Trail (PCT) Section J - Snoqualmie Pass to Stevens Pass', 'Pacific Crest Trail (PCT) Section I - White Pass to Snoqualmie Pass', 'Pacific Crest Trail (PCT) Section L - Rainy Pass to Manning Park', 'Boundary Trail', 'Lake Margaret']}
One hike at a time, cosine:
['Capitol State Forest - Rock Candy Mountain', 'Chuckanut Ridge Trail', 'Surprise and Glacier Lakes', 'Granite Mountain', 'Carne Mountain']
One hike at a time, gower:
['Mount Pugh', 'Capitol State Forest - Rock Candy Mountain', 'Skookum Flats', 'Mount Higgins', 'Nason Ridge Snowshoe']


In [18]:
hr.recommend_gower(apply_weights=True)

Unnamed: 0,title,region name,uid,average_rating,elevation_gain,highpoint,mileage,mileage_type,tripreport_count,campsites,...,lakes,mountains,old_growth,ridges,rivers,summits,waterfalls,wildflowers,wildlife,hike_id
721,Round Lake,North Cascades -- Mountain Loop Highway,29381c4a6b73cbcf55e2632204f1c9a6,4.5,4300.0,5600.0,11.0,1,193,0,...,0,0,0,0,0,0,0,0,0,721
107,West Cady Ridge to Benchmark Mountain,Central Cascades -- Stevens Pass - West,05c64656c23f1620a2f5cdda82f56145,3.428571,3700.0,5816.0,14.4,1,181,0,...,0,0,0,0,0,0,0,1,0,107
1425,Black Peak,North Cascades -- North Cascades Highway - Hwy 20,53bac16d4370aca559ee3c4f0e784165,4.0,5100.0,8970.0,12.0,1,81,0,...,0,0,0,0,0,0,0,0,0,1425
1849,Purcell Mountain Trail,South Cascades -- White Pass/Cowlitz River Valley,6ff02ad0336f486094221b0bd1c216b0,0.0,4542.0,5442.0,16.2,1,14,0,...,0,0,0,0,0,0,0,0,1,1849
2870,Caroline Peak,Snoqualmie Region -- Snoqualmie Pass,af253cf3d6c34780be5e806d0b53b2b7,0.0,5200.0,5885.0,17.0,1,1,0,...,0,0,0,0,0,0,0,0,0,2870


In [19]:
df = []

for hike in backpack1:
    hr = hikeRecommender(hike_data, recommender_cols, weights=weight)
    columns_to_normalize = ['mileage', 'elevation_gain', 'tripreport_count']
    for col in columns_to_normalize:
        hr.norm(col)
    hr.like_hike(hike)
    if len(df)==0:
        df = hr.recommend_gower(apply_weights=False, n=2)
    else:
        df = df.append(hr.recommend_gower(apply_weights=False, n=2))

In [20]:
df.sample(5).title.to_list()

['Little Si',
 'Carne Mountain',
 'Nason Ridge Snowshoe',
 'Central Peak',
 'The Brothers']