In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import datetime

# Data Access
* Must first access the mountains base page (BASE_URL + Mountain)
* Then isolate the forecast link which includes the elevation default


In [6]:
class mountain_gather(object):
    def __init__(self):
        self.BASE_URL = """https://www.mountain-forecast.com"""
        
        self.value_pulls = {'rain':['forecast__table-rain','forecast__table-value'],                    # Unit: cm
                            'snow':['forecast__table-snow','forecast__table-value'],                    # Unit: cm
                            'temp_max':['forecast__table-max-temperature','forecast__table-value'],     # Unit: C
                            'temp_min':['forecast__table-min-temperature','forecast__table-value'],     # Unit: C
                            'chill':['forecast__table-chill','forecast__table-value'],                  # Unit: C
                            'freeze_level':['forecast__table-freezing-level','heightfl']}               # Unit: m
        self.payload = {}
    
    def _get_forecast_url(self, mountain_name):
        r = requests.get(f"{self.BASE_URL}/peaks/{mountain_name}")
        soup = BeautifulSoup(r.text, 'html.parser')
        forecast_tag = soup.find(name='li',attrs={'class':'tabs__list-item'}).find(name='a')
        forecast_link = forecast_tag.attrs['href']
        return forecast_link

    def _generic_data_retrieval(self, forecast_table, value_id, span_id):
        overall = forecast_table.find(name='tr', attrs={'class':value_id})
        tags = overall.find_all(name="span", attrs={'class':span_id})
        vals = [i.text.strip('\n -') for i in tags]
        return vals

    @staticmethod
    def _expand_days(days, day_periods):
        expanded_days = []
        if day_periods[0] == 'AM':
            primary_day_count = 3
        elif day_periods[0] == 'PM':
            primary_day_count = 2
        else:
            primary_day_count = 1
        ### Check day
        if int(days[0].split('_')[-1]) == datetime.datetime.now().day:
            today = datetime.datetime.today().date()
        elif int(days[0].split('_')[-1]) == datetime.datetime.now().day-1:
            today = (datetime.datetime.today() - datetime.timedelta(days=1)).date()
        elif int(days[0].split('_')[-1]) == datetime.datetime.now().day+1:
            today = (datetime.datetime.today() + datetime.timedelta(days=1)).date()
        else:
            assert "WTF Time is BROKEN"
        
        for a in range(primary_day_count):
            expanded_days.append(today)
        current_day = today + datetime.timedelta(days=1)
        while len(expanded_days) < len(day_periods):
            for day in days[1:]:
                [expanded_days.append(current_day) for a in range(3)]
                current_day = current_day + datetime.timedelta(days=1)
        return expanded_days[:len(day_periods)]
    
    def _get_day_periods(self, ret=False):
        day_tags = self.forecast_table.find_all(name='td' , attrs={'class':'forecast__table-days-item'})
        days = [day.attrs['data-column-name'] for day in day_tags]
        
        
        day_period_tags = self.forecast_table.find_all(name="td", attrs={'class':'forecast__table-time-item'})
        day_periods = [i.find('span').text.strip('\n ') for i in day_period_tags]
        self.payload['day_periods'] = day_periods
        
        expanded_days = self._expand_days(days, day_periods)
        self.payload['days'] = expanded_days
        
        if ret:
            return (expanded_days, day_periods)
        
    def _get_weather_summary(self, ret=False):
        weather_tags = self.forecast_table.find(name='tr',
                                                attrs='forecast__table-weather'
                                               ).find_all(name='div',
                                                          attrs={'class':'icon-weather'})
        period_weather = [i.find('img').attrs['alt'] for i in weather_tags]
        self.payload['period_weather'] = period_weather

        wind_overall = self.forecast_table.find(name='tr', attrs={'class':'forecast__table-wind'})
        wind_tags = wind_overall.find_all(name="div", attrs={'class':'windcell'})
        period_wind = [i.find('img').attrs['alt'] for i in wind_tags]
        
        self.payload['period_wind'] = period_wind
        if ret:
            return (period_weather, period_wind)
        
    def _cleanup(self):
        self.mountain_frame[['wind_value','wind_direction']] = self.mountain_frame.period_wind.apply( 
                                                lambda x: pd.Series(str(x).split(" ")))
#         self.mountain_frame[['dow','day']] = self.mountain_frame.days.apply( 
#                                                 lambda x: pd.Series(str(x).split("_")))
        
        
    def get_measures(self):
        for measure, ids in self.value_pulls.items():
            metric = self.value_pulls[measure]
            self.payload[measure] = self._generic_data_retrieval(forecast_table=self.forecast_table,
                                                                 value_id=ids[0],
                                                                 span_id=ids[1])

    def run(self, mountain_name):
        self.payload = {}                                                    # Reset paload for this run
        forecast_url = self._get_forecast_url(mountain_name=mountain_name)   # Fetch full URL
        r = requests.get(f"{self.BASE_URL}/{forecast_url}")                  # Fetch full page
        self.page_soup = BeautifulSoup(r.text, 'html.parser')
        self.forecast_table = self.page_soup.find(name='table', attrs={'class':'forecast__table'})
        self._get_day_periods()
        self._get_weather_summary()
        self.get_measures()
        self.mountain_frame = pd.DataFrame(self.payload)
        self._cleanup()
        print(f'Retrieval for {mountain_name} Complete')
        return 0

In [13]:
mountains = ['Mount-Sill','Mount-Wilson-California','Mount-Whitney',
             'Mount-Williamson','Split-Mountain','Mount-Ritter',
             'Mount-Conness','Mount-Baldy-San-Gabriel','Split-Mountain',
             'Minarets-California','Half-Dome']
current_run = pd.DataFrame()

mg = mountain_gather()
for mountain_name in mountains:
    mg.run(mountain_name)
    frame = mg.mountain_frame
    frame['mountain_name'] = mountain_name
    current_run = current_run.append(frame)

Retrieval for Mount-Sill Complete
Retrieval for Mount-Wilson-California Complete
Retrieval for Mount-Whitney Complete
Retrieval for Mount-Williamson Complete
Retrieval for Split-Mountain Complete
Retrieval for Mount-Ritter Complete
Retrieval for Mount-Conness Complete
Retrieval for Mount-Baldy-San-Gabriel Complete
Retrieval for Split-Mountain Complete
Retrieval for Minarets-California Complete
Retrieval for Half-Dome Complete


In [12]:
current_run

Unnamed: 0,day_periods,days,period_weather,period_wind,rain,snow,temp_max,temp_min,chill,freeze_level,wind_value,wind_direction,mountain_name
0,night,2020-12-14,clear,45 NW,,,10,12,20,2100,45,NW,Mount-Sill
1,AM,2020-12-15,clear,25 NNW,,,8,9,16,2900,25,NNW,Mount-Sill
2,PM,2020-12-15,clear,25 NW,,,8,8,16,3000,25,NW,Mount-Sill
3,night,2020-12-15,clear,20 WNW,,,8,9,16,2900,20,WNW,Mount-Sill
4,AM,2020-12-16,clear,25 WNW,,,7,7,15,3200,25,WNW,Mount-Sill
...,...,...,...,...,...,...,...,...,...,...,...,...,...
13,AM,2020-12-19,clear,35 NNW,,,7,8,15,2950,35,NNW,Mount-Ritter
14,PM,2020-12-19,clear,25 N,,,7,7,15,2950,25,N,Mount-Ritter
15,night,2020-12-19,clear,5 NW,,,5,6,8,3200,5,NW,Mount-Ritter
16,AM,2020-12-20,clear,5 N,,,4,4,6,3400,5,N,Mount-Ritter


In [16]:
BASE_URL = """https://www.mountain-forecast.com"""
def get_forecast_url(mountain_name, BASE_URL=BASE_URL):
    r = requests.get(f"{BASE_URL}/peaks/{mountain_name}")
    soup = BeautifulSoup(r.text, 'html.parser')
    forecast_tag = soup.find(name='li',attrs={'class':'tabs__list-item'}).find(name='a')
    forecast_link = forecast_tag.attrs['href']
    return forecast_link

In [149]:
get_forecast_url(mountain_name)

'/peaks/Mount-Sill/forecasts/4314'

In [150]:
def get_mountain_forecast(mountain_name, BASE_URL=BASE_URL):
    forecast_url = get_forecast_url(mountain_name=mountain_name)
    r = requests.get(f"{BASE_URL}/{forecast_url}")

In [151]:
forecast_url = get_forecast_url(mountain_name=mountain_name)
r = requests.get(f"{BASE_URL}/{forecast_url}")

In [152]:
soup = BeautifulSoup(r.text, 'html.parser')

In [153]:
forecast_table = soup.find(name='table', attrs={'class':'forecast__table'})

In [154]:
day_tags = forecast_table.find_all(name='td' , attrs={'class':'forecast__table-days-item'})
days = [day.attrs['data-column-name'] for day in day_tags]

day_period_tags = forecast_table.find_all(name="td", attrs={'class':'forecast__table-time-item'})
day_periods = [i.find('span').text.strip('\n ') for i in day_period_tags]

In [155]:
weather_tags = forecast_table.find(name='tr' ,attrs='forecast__table-weather').find_all(name='div',
                                                                                        attrs={'class':'icon-weather'})
period_weather = [i.find('img').attrs['alt'] for i in weather_tags]

wind_overall = forecast_table.find(name='tr', attrs={'class':'forecast__table-wind'})
wind_tags = wind_overall.find_all(name="div", attrs={'class':'windcell'})
period_wind = [i.find('img').attrs['alt'] for i in wind_tags]

In [157]:
def generic_data_retrieval(forecast_table, value_id, span_id):
    overall = forecast_table.find(name='tr', attrs={'class':value_id})
    tags = overall.find_all(name="span", attrs={'class':span_id})
    vals = [i.text.strip('\n -') for i in tags]
    return vals

In [158]:
value_pulls = {'rain':['forecast__table-rain','forecast__table-value'],                    # Unit: cm
               'snow':['forecast__table-snow','forecast__table-value'],                    # Unit: cm
               'temp_max':['forecast__table-max-temperature','forecast__table-value'],     # Unit: C
               'temp_min':['forecast__table-min-temperature','forecast__table-value'],     # Unit: C
               'chill':['forecast__table-chill','forecast__table-value'],                  # Unit: C
               'freeze_level':['forecast__table-freezing-level','heightfl']}               # Unit: m

In [175]:
df = pd.DataFrame()
df['wind'] = period_wind
df['wind_value'] = df['wind'].apply(lambda x: x.split(' ')[0])
df['wind_direction'] = df['wind'].apply(lambda x: x.split(' ')[1])
for measure, ids in value_pulls.items():
    metric = value_pulls[measure]
    df[measure] = generic_data_retrieval(forecast_table=forecast_table,
                                         value_id=ids[0],
                                         span_id=ids[1])
df['Mountain'] = mountain_name

In [176]:
df

Unnamed: 0,wind,wind_value,wind_direction,rain,snow,temp_max,temp_min,chill,freeze_level,Mountain
0,55 WSW,55,WSW,,18.0,11,12,24,2500,Mount-Sill
1,55 WNW,55,WNW,,8.0,14,16,28,2050,Mount-Sill
2,60 NW,60,NW,,,12,13,25,2000,Mount-Sill
3,50 NW,50,NW,,,12,12,25,2100,Mount-Sill
4,45 NW,45,NW,,,9,11,18,2150,Mount-Sill
5,25 NNW,25,NNW,,,8,8,16,3050,Mount-Sill
6,25 WNW,25,WNW,,,9,9,18,2900,Mount-Sill
7,20 WNW,20,WNW,,,8,9,16,2900,Mount-Sill
8,30 W,30,W,,,8,8,17,3100,Mount-Sill
9,40 W,40,W,,,9,10,19,2900,Mount-Sill
