# Trail Recommendations

List everything that we need to do.

In [1]:
import Trailforks as tf
import TrailforksScraper as tfs
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

## Import all the files

In [2]:
trails_checkins = pd.read_csv('trail-checkins.csv')
trailforks_trails = pd.read_csv('trailforks-trails.csv')
wta_trails = pd.read_csv('ResultsZip.csv')

## Getting data from Trailforks

- 'activitytype=6' for hiking
- Trailstyle = Popularity

Trailforks api documentation: https://www.trailforks.com/about/api/#!/trail

Trailforks popularity heatmap: https://www.trailforks.com/region/united-states/?activitytype=6&z=10.4&lat=48.30699&lon=-120.42421&trailstyle=popularity

Trailforks popularity scores (sorted in descending order): https://www.trailforks.com/region/united-states/trails/?sort=t.popularity_score&order=desc&difficulty=2,3,4,11,9,5,6,8&activitytype=6

In [3]:
trailForksScrapper = tfs.trailforksScrapper()

In [38]:
# This code was used to scrate all the trails in north-carolina and washington 
# from trailforks along with their popularity.

# north_carolina = trailForksScrapper.fetchTrailsByRegionAndPages('north-carolina',23)
# washington = trailForksScrapper.fetchTrailsByRegionAndPages('washington',72)

Import data from all the saved files

In [37]:
trails_scraped = trails_checkins['trail'].unique()
len(trails_scraped)
trails = trailforks_trails[~trailforks_trails['title'].isin(trails_scraped)]['title']
len(trails)
trails = trails[trails.str.contains('--')]
trails = trails.str.replace('--','-')
trails

22                               lounge-lower
122                              lounge-lower
220            hozomeen-trail-east-bank-trail
221        willow-creek-trail-east-lake-trail
223     lightning-creek-trail-east-lake-trail
                        ...                  
9455                canopy-trail--enduro-line
9476              black-mountain-trail-middle
9482                     spencer-branch-upper
9489                        spencer-gap-upper
9495               black-mountain-trail-lower
Name: title, Length: 890, dtype: object

Convert trail titles to have only words separated by -
This will be used in url for scraping checkins.

In [26]:
# north_carolina['title'] = north_carolina['title'].str.replace('\W', ' ').str.lower()
# north_carolina['title'] = north_carolina['title'].str.replace('[^a-z A-Z]', '').str.strip().str.replace(' ','-')
# washington['title'] = washington['title'].str.replace('\W', ' ').str.lower()
# washington['title'] = washington['title'].str.replace('[^a-z A-Z]', '').str.strip().str.replace(' ','-')

  north_carolina['title'] = north_carolina['title'].str.replace('\W', ' ').str.lower()
  north_carolina['title'] = north_carolina['title'].str.replace('[^a-z A-Z]', '').str.strip().str.replace(' ','-')
  washington['title'] = washington['title'].str.replace('\W', ' ').str.lower()
  washington['title'] = washington['title'].str.replace('[^a-z A-Z]', '').str.strip().str.replace(' ','-')


Getting trail stats for all the trails collected previously.

In [40]:
# This code can be used to scrape trails checkins and store them in dataframe.

import pandas as pd 

df = pd.DataFrame()
for trail in trails:
    df_trail = trailForksScrapper.fetchTrailStats(trail)
    if df_trail is not None:
        df_trail['trail'] = trail
        df = pd.concat([df,df_trail])

df

  checkins_per_date['Check-Ins'] = checkins_per_date['Check-Ins'].str.replace('\W', '')
  checkins_per_date['Check-Ins'] = checkins_per_date['Check-Ins'].str.replace('\W', '')
  checkins_per_date['Check-Ins'] = checkins_per_date['Check-Ins'].str.replace('\W', '')
  checkins_per_date['Check-Ins'] = checkins_per_date['Check-Ins'].str.replace('\W', '')
  checkins_per_date['Check-Ins'] = checkins_per_date['Check-Ins'].str.replace('\W', '')
  checkins_per_date['Check-Ins'] = checkins_per_date['Check-Ins'].str.replace('\W', '')
  checkins_per_date['Check-Ins'] = checkins_per_date['Check-Ins'].str.replace('\W', '')
  checkins_per_date['Check-Ins'] = checkins_per_date['Check-Ins'].str.replace('\W', '')
  checkins_per_date['Check-Ins'] = checkins_per_date['Check-Ins'].str.replace('\W', '')
  checkins_per_date['Check-Ins'] = checkins_per_date['Check-Ins'].str.replace('\W', '')
  checkins_per_date['Check-Ins'] = checkins_per_date['Check-Ins'].str.replace('\W', '')
  checkins_per_date['Check-Ins']

Unnamed: 0,Period,Check-Ins,trail
0,1am,31,monument-trail
1,2am,18,monument-trail
2,3am,37,monument-trail
3,4am,28,monument-trail
4,5am,6,monument-trail
...,...,...,...
2868,2022109,13,black-mountain-trail-lower
2869,20221010,5,black-mountain-trail-lower
2870,20221011,5,black-mountain-trail-lower
2871,20221012,39,black-mountain-trail-lower


## Merge all the tables to create one dataset

Cleaning up datasets:
- Removing trails with title unknown and NAN
- Removing unnamed columns

In [7]:
trailforks_trails = trailforks_trails.dropna(subset='title')
trailforks_trails = trailforks_trails[~trailforks_trails['title'].str.contains('unknown')]
trailforks_trails = trailforks_trails[['title','riding area','rating','distance','descent','climb','popularity_score']]
trailforks_trails['title'] = trailforks_trails['title'].str.replace('--','-')
trailforks_trails

Unnamed: 0,title,riding area,rating,distance,descent,climb,popularity_score
3,iron-peak,Teanaway,,3 miles,-519 ft,"2,203 ft",0
4,frog-trail,Pilchuck Tree Farm,,485 ft,-9 ft,52 ft,0
5,haida-s-trail,Pilchuck Tree Farm,,"2,924 ft",-191 ft,146 ft,0
6,stephanie-s-sweet-spot,Pilchuck Tree Farm,,"1,277 ft",-208 ft,26 ft,0
7,sandy-s-trail,Pilchuck Tree Farm,,908 ft,-54 ft,9 ft,0
...,...,...,...,...,...,...,...
9495,black-mountain-trail-lower,Pisgah Ranger District,,1 mile,-563 ft,,100
9496,hickory-mountain-loop,Dupont State Recreational Forest,,1 mile,-257 ft,261 ft,100
9497,the-jam,Rocky Knob Park,,682 ft,-20 ft,18 ft,100
9498,panda,U.S. National Whitewater Center,,1 mile,-20 ft,19 ft,100


In [13]:
wta_trails['title'] = wta_trails['TITLE']
wta_trails['title'] = wta_trails['title'].str.replace('\W', ' ').str.lower()
wta_trails['title'] = wta_trails['title'].str.replace('[^a-z A-Z]', '').str.strip().str.replace(' ','-')
wta_trails['title'] = wta_trails['title'].str.replace('--','-',regex=True) ## this is not working. Need to check why

  wta_trails['title'] = wta_trails['title'].str.replace('\W', ' ').str.lower()
  wta_trails['title'] = wta_trails['title'].str.replace('[^a-z A-Z]', '').str.strip().str.replace(' ','-')


Unnamed: 0.1,Unnamed: 0,TITLE,REGION,DISTANCE,DIST_TYPE,GAIN,HIGHEST,RATING,RATING_COUNT,LATITUDE,LONGITUDE,REPORT_DATE,REPORT_COUNT,URL,title
1,2,Ranger Hole - Interrorem Nature Trail,Olympic Peninsula,2.10,roundtrip,200.0,320.0,4.33,12,47.680685,-122.992312,2021-04-09,71,https://www.wta.org/go-hiking/hikes/ranger-hole,ranger-hole--interrorem-nature-trail
15,36,Lake Whatcom Park - Hertz Trail,Puget Sound and Islands,6.20,roundtrip,100.0,,3.56,9,48.730207,-122.309044,2021-04-03,48,https://www.wta.org/go-hiking/hikes/lake-whatc...,lake-whatcom-park--hertz-trail
18,39,Anacortes Community Forest Lands - Mount Erie,Puget Sound and Islands,5.00,roundtrip,1000.0,1300.0,4.36,14,48.469016,-122.629368,2021-04-20,140,https://www.wta.org/go-hiking/hikes/mount-erie,anacortes-community-forest-lands--mount-erie
35,56,Olallie State Park - Weeks Falls,Snoqualmie Region,1.60,roundtrip,,,3.33,6,47.441833,-121.672546,2021-03-30,14,https://www.wta.org/go-hiking/hikes/weeks-falls,olallie-state-park--weeks-falls
39,60,Sharpe Park - Sares Head,Puget Sound and Islands,2.10,roundtrip,440.0,490.0,4.29,7,48.430617,-122.664628,2021-03-31,105,https://www.wta.org/go-hiking/hikes/sharpe-par...,sharpe-park--sares-head
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3068,3883,Battle Ground Lake State Park - Lower Lake Trail,Southwest Washington,1.00,roundtrip,120.0,680.0,4.50,2,45.802981,-122.491357,2020-08-18,13,https://www.wta.org/go-hiking/hikes/battle-gro...,battle-ground-lake-state-park--lower-lake-trail
3076,3891,Anacortes Community Forest Lands - Heart Lake,Puget Sound and Islands,2.86,roundtrip,130.0,620.0,3.00,2,48.475112,-122.627249,2020-11-05,30,https://www.wta.org/go-hiking/hikes/anacortes-...,anacortes-community-forest-lands--heart-lake
3081,3897,Ahtanum State Forest - Whites Ridge,Central Washington,10.90,roundtrip,1900.0,5000.0,5.00,1,46.523175,-121.009681,2021-01-09,8,https://www.wta.org/go-hiking/hikes/whites-ridge,ahtanum-state-forest--whites-ridge
3082,3898,Rock Creek - Red Pass Loop,Snoqualmie Region,17.00,roundtrip,5600.0,5400.0,0.00,0,47.445420,-121.423529,2020-08-02,12,https://www.wta.org/go-hiking/hikes/snow-lake-...,rock-creek--red-pass-loop


In [50]:
dataset_1 = trailforks_trails
dataset_2 = wta_trails
dataset_3 = trails_checkins[['Period','Check-Ins','trail']]
dataset_1['title'] = dataset_1['title'].str.replace('-trail','')
dataset_2['title'] = dataset_2['title'].str.replace('-trail','')
dataset_3['trail'] = dataset_3['trail'].str.replace('-trail','')
combined_trails = pd.merge(dataset_1,dataset_2,on='title',how='left')
combined_trails = pd.merge(combined_trails,dataset_3.set_index('trail'),left_on='title',right_on='trail',how='left')
combined_trails

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset_3['trail'] = dataset_3['trail'].str.replace('-trail','')


Unnamed: 0.1,title,riding area,rating,distance,descent,climb,popularity_score,Unnamed: 0,TITLE,REGION,...,HIGHEST,RATING,RATING_COUNT,LATITUDE,LONGITUDE,REPORT_DATE,REPORT_COUNT,URL,Period,Check-Ins
0,iron-peak,Teanaway,,3 miles,-519 ft,"2,203 ft",0,1185.0,Iron Peak,Snoqualmie Region,...,6160.0,4.2,10.0,47.421427,-120.937243,2020-11-01,239.0,https://www.wta.org/go-hiking/hikes/iron-peak,,
1,frog,Pilchuck Tree Farm,,485 ft,-9 ft,52 ft,0,,,,...,,,,,,,,,1am,7.0
2,frog,Pilchuck Tree Farm,,485 ft,-9 ft,52 ft,0,,,,...,,,,,,,,,2am,1.0
3,frog,Pilchuck Tree Farm,,485 ft,-9 ft,52 ft,0,,,,...,,,,,,,,,4am,16.0
4,frog,Pilchuck Tree Farm,,485 ft,-9 ft,52 ft,0,,,,...,,,,,,,,,5am,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28031070,hard-times-connector,Bent Creek Experimental Forest,,413 ft,,,100,,,,...,,,,,,,,,2022108,8.0
28031071,hard-times-connector,Bent Creek Experimental Forest,,413 ft,,,100,,,,...,,,,,,,,,2022109,10.0
28031072,hard-times-connector,Bent Creek Experimental Forest,,413 ft,,,100,,,,...,,,,,,,,,20221010,2.0
28031073,hard-times-connector,Bent Creek Experimental Forest,,413 ft,,,100,,,,...,,,,,,,,,20221011,1.0


In [52]:
combined_trails.drop(columns=['Unnamed: 0'])