In [2]:
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import pandas as pd
from bs4 import BeautifulSoup
import requests
import sqlite3

In [2]:
# get all ski resort names 

# Extract relevant information
url = "https://en.m.wikipedia.org/wiki/List_of_ski_areas_and_resorts_in_the_United_States"

# Send a GET request to the URL
response = requests.get(url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Parse the HTML content of the page
    soup = BeautifulSoup(response.content, 'html.parser')
    # print(soup)
    sections = soup.find_all(['h2', 'h3'])
    data = []
    str_data = []

    location_catalog = ''  

    for section in sections:
        if section.name == 'h2':
            headline = section.find('span', {'class': 'mw-headline'})
            if headline and ' (' in headline.text.strip():
                #print(headline.text.strip())
                location_catalog = headline.text.strip()
                location_catalog = location_catalog.split(' (')[0] 
                #print(location_catalog)
        elif section.name == 'h3' and 'mw-headline' in section.span.attrs.get('class', []):
            state = section.span.a.get('title')
            if ' (' in state:
                state = state.split(' (')[0] 
            resort_list = section.find_next('ul')
            resorts = resort_list.find_all('li')
            #print(resorts)
            for resort in resorts:
                if resort:
                    resort_name = resort.text.strip()
                    resort_name = resort_name.split(' (')[0] if ' (' in resort_name else resort_name
                    #print(resort_name)
                    data.append({'location_catalog': location_catalog, 'state': state, 'resort_name': resort_name})
                    str_data.append(str(resort_name))
                
    # Create DataFrame
    ski_df = pd.DataFrame(data)
    #print(df)

In [3]:
ski_df

Unnamed: 0,location_catalog,state,resort_name
0,New England,Connecticut,Mohawk Mountain Ski Area — Cornwall
1,New England,Connecticut,Mount Southington — Plantsville
2,New England,Connecticut,Powder Ridge Ski Area — Middlefield
3,New England,Connecticut,Ski Sundown — New Hartford
4,New England,Maine,Baker Mountain — Bingham
...,...,...,...
461,West Coast,Washington,Alpental
462,West Coast,Washington,Summit Central
463,West Coast,Washington,Summit East
464,West Coast,Washington,Summit West


In [6]:
# Function to get location details from Google Maps API
def get_location_details(resort_name, api_key):
    print(resort_name)
    base_url = "https://maps.googleapis.com/maps/api/geocode/json"
    params = {"address": resort_name, "key": api_key}

    response = requests.get(base_url, params=params)
    data = response.json()

    if data["status"] == "OK":
        result = data["results"][0]
        location = result["geometry"]["location"]
        address_components = result["address_components"]

        # Extracting relevant information
        latitude, longitude = location["lat"], location["lng"]
        address = result.get("formatted_address", "")
        state = next((component["long_name"] for component in address_components if "administrative_area_level_1" in component["types"]), "")
        city = next((component["long_name"] for component in address_components if "locality" in component["types"]), "")
        zipcode = next((component["long_name"] for component in address_components if "postal_code" in component["types"]), "")
        address = result.get("formatted_address", "")
        
        elevation_url = f'https://maps.googleapis.com/maps/api/elevation/json?locations={latitude},{longitude}&key={api_key}'
        elevation_response = requests.get(elevation_url)
        elevation_data = elevation_response.json()

        if elevation_data["status"] == "OK" and elevation_data.get("results"):
            elevation = elevation_data["results"][0].get("elevation")
        else:
            elevation = None

        #Use Places API to get additional details
        places_url = "https://maps.googleapis.com/maps/api/place/findplacefromtext/json"
        places_params = {
            "input": resort_name,
            "inputtype": "textquery",
            "fields": "place_id",
            "key": api_key
        }

        places_response = requests.get(places_url, params=places_params)
        places_data = places_response.json()

        if places_data.get("status") == "OK" and places_data.get("candidates"):
            place_id = places_data["candidates"][0]["place_id"]

            # Use Place Details API to get website
            details_url = f'https://maps.googleapis.com/maps/api/place/details/json?place_id={place_id}&key={api_key}'
            details_response = requests.get(details_url)
            details_data = details_response.json()
            url = details_data["result"].get("website", "")

            return latitude, longitude, elevation, state, city, zipcode, address, url
        else:
            return latitude, longitude, elevation, state, city, zipcode, address, None
    else:
        return None, None, None, None, None, None, None, None


In [8]:
# construct location entries for all ski resorts

db_file = "skiDataset.db"
conn = sqlite3.connect(db_file)
cursor = conn.cursor()
cursor.execute('''
    CREATE TABLE IF NOT EXISTS location (
        resort_name TEXT PRIMARY KEY,
        latitude REAL,
        longitude REAL,
        elevation REAL,
        location_catalog TEXT,
        state TEXT,
        city TEXT,
        zipcode TEXT,
        address TEXT,
        url TEXT
    )
''')

# get the latitude, longtitude from google maps API and insert into databse
api_key = 'AIzaSyCBigbh1Visz0wkgHQrd7Qp5yKoZbZ7S7c'
for index, row in ski_df.iterrows():
    location_catalog = row['location_catalog']
    resort_states = row['state']
    resort_name = row['resort_name']
    latitude, longitude, elevation, state, city, zipcode, address, url = get_location_details(resort_name + ', ' + resort_states, api_key)
    cursor.execute('''
    INSERT INTO location (resort_name, latitude, longitude, elevation, location_catalog, state, city, zipcode, address, url)
    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (resort_name, latitude, longitude, elevation, location_catalog, state, city, zipcode, address, url))
    
# Commit the changes and close the connection
conn.commit()
conn.close()

Mohawk Mountain Ski Area — Cornwall, Connecticut
Mount Southington — Plantsville, Connecticut
Powder Ridge Ski Area — Middlefield, Connecticut
Ski Sundown — New Hartford, Connecticut
Baker Mountain — Bingham, Maine
Big Rock — Mars Hill, Maine
Big Squaw — Greenville, Maine
Black Mountain of Maine — Rumford, Maine
Camden Snow Bowl — Camden, Maine
Eaton Mountain — Skowhegan, Maine
Hermon Mountain — Hermon, Maine
Lonesome Pine Trails — Fort Kent, Maine
Lost Valley — Auburn, Maine
Mount Abram — Greenwood, Maine
Mount Jefferson Ski Area — Lee, Maine
Pinnacle Ski Club — Pittsfield, Maine
Pleasant Mountain — Bridgton, Maine
Powderhouse Hill — South Berwick, Maine
Quoggy Jo — Presque Isle, Maine
Saddleback Maine — Rangeley, Maine
Sugarloaf — Carrabassett Valley, Maine
Sunday River — Newry, Maine
Titcomb Mountain — Farmington, Maine
Berkshire East Ski Resort — Charlemont, Massachusetts
Blue Hills Ski Area — Canton, Massachusetts
Bousquet Ski Area — Pittsfield, Massachusetts
Butternut — Great Bar

In [9]:
import sqlite3
import pandas as pd

conn = sqlite3.connect('./skiDataset.db')
cursor = conn.cursor()

cursor.execute('''
     SELECT *
     FROM location
     WHERE latitude IS NULL OR longitude IS NULL
''')
data = cursor.fetchall()

for resort in data:
    resort_name = resort[0]

    # Delete rows from the 'location' table
    cursor.execute('''
        DELETE FROM location
        WHERE resort_name = ?
    ''', (resort_name,))

conn.commit()
conn.close()

In [39]:
import sqlite3

conn = sqlite3.connect('./rawWeather.db')
cursor = conn.cursor()

# make two tables for station data and weather data
cursor.execute('''
    -- Create table for weather stations
    CREATE TABLE IF NOT EXISTS WeatherStations (
        StationID TEXT PRIMARY KEY,
        Region TEXT,
        State TEXT,
        Latitude REAL,
        Longitude REAL
    );
''')

cursor.execute('''
  -- Create table for weather data
    CREATE TABLE IF NOT EXISTS WeatherData (
        DataID INTEGER PRIMARY KEY,
        StationID TEXT,
        Date TEXT,
        Snowfall REAL,
        -- Add other fields as needed
        FOREIGN KEY (StationID) REFERENCES WeatherStations(StationID)
    );
''')

conn.commit()
conn.close()


In [41]:
import concurrent.futures
import json
import requests  # Added import requests

zone_lookup = {
    121: "east",
    122: "south",
    123: "central",
    124: "west",
    'AK': "alaska"
}
import sqlite3

def get_noaa_weather(year_month, zone_num):
    print(f"https://www.ncei.noaa.gov/access/monitoring/daily-snow/{zone_num}/snowfall/{year_month}/map-data.json")
    return requests.get(f"https://www.ncei.noaa.gov/access/monitoring/daily-snow/{zone_num}/snowfall/{year_month}/map-data.json").json()

def get_all(year_month):
    conn = sqlite3.connect('./rawWeather.db')
    cursor = conn.cursor()
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = {executor.submit(get_noaa_weather, year_month, zone_num): zone_num for zone_num, zone in zone_lookup.items()}
        for future in concurrent.futures.as_completed(futures):
            zone = futures[future]
            print("zone: ",zone)
            data = future.result()
            for feature in data['features']:
                cursor.execute('''
                    INSERT or IGNORE INTO WeatherStations (StationID, Region, State, Latitude, Longitude)
                    VALUES (?,?,?,?,?)
                ''', (feature['ghcnId'], zone_lookup[zone], feature['properties']['state']['name'], feature['geometry']['coordinates'][0], feature['geometry']['coordinates'][1]))  # Fixed usage of zone_lookup
                for day, snowfall in feature['properties']['values'].items():
                    cursor.execute('''
                        INSERT INTO WeatherData (StationID, Date, Snowfall)
                        VALUES (?,?,?)
                    ''', (feature['ghcnId'], f'{year_month}{int(day):02d}', snowfall))
    print("save progress")
    conn.commit()
    conn.close()

snowfall_data = {}
for year in range(2015, 2024):
    for month in range(10, 13):
        year_month = f"{year}{month:02d}"
        print(year_month)
        get_all(year_month)


202310
https://www.ncei.noaa.gov/access/monitoring/daily-snow/121/snowfall/202310/map-data.json
https://www.ncei.noaa.gov/access/monitoring/daily-snow/122/snowfall/202310/map-data.json
https://www.ncei.noaa.gov/access/monitoring/daily-snow/123/snowfall/202310/map-data.json
https://www.ncei.noaa.gov/access/monitoring/daily-snow/124/snowfall/202310/map-data.json
https://www.ncei.noaa.gov/access/monitoring/daily-snow/AK/snowfall/202310/map-data.json
zone:  AK
zone:  124
zone:  121
zone:  122
zone:  123
save progress
202311
https://www.ncei.noaa.gov/access/monitoring/daily-snow/121/snowfall/202311/map-data.json
https://www.ncei.noaa.gov/access/monitoring/daily-snow/122/snowfall/202311/map-data.json
https://www.ncei.noaa.gov/access/monitoring/daily-snow/123/snowfall/202311/map-data.json
https://www.ncei.noaa.gov/access/monitoring/daily-snow/124/snowfall/202311/map-data.json
https://www.ncei.noaa.gov/access/monitoring/daily-snow/AK/snowfall/202311/map-data.json
zone:  AK
zone:  124
zone:  12

In [6]:
import sqlite3

def valid_stations(date, cursor):
    # Ensure the date is passed as a tuple
    cursor.execute('''
        SELECT ws.StationID, ws.Latitude, ws.Longitude, wd.snowfall
        FROM WeatherData wd
        INNER JOIN WeatherStations ws ON wd.StationID = ws.StationID
        WHERE wd.Date = ? AND wd.Snowfall != 'M'
    ''', (date,))
    # Fetch the results
    result = cursor.fetchall()
    # Close the connection
    return result


In [26]:
conn = sqlite3.connect('./rawWeather.db')
cursor = conn.cursor()  
cursor.execute("ALTER TABLE WeatherStations RENAME COLUMN Latitude TO Latitude1")
cursor.execute("ALTER TABLE WeatherStations RENAME COLUMN Longitude TO Latitude")
cursor.execute("ALTER TABLE WeatherStations RENAME COLUMN Latitude1 TO Longitude")
valid_stations('20150101', cursor)

[('USC00500235', 70.35, -150.93, 0.0),
 ('USC00500237', 63.04, -147.25, 3.5),
 ('USC00500243', 60.96, -149.11, 0.0),
 ('USC00500247', 62.19, -150.5, 0.0),
 ('US1AKAB0028', 61.06, -149.75, 0.5),
 ('US1AKAB0051', 61.21, -149.76, 0.2),
 ('USC00500275', 61.16, -149.98, 'T'),
 ('USC00500284', 61.09, -149.76, 0.5),
 ('USW00026451', 61.17, -150.03, 'T'),
 ('USC00500281', 61.1, -149.72, 2.0),
 ('USW00025308', 55.04, -131.58, 0.0),
 ('USC00500363', 58.32, -134.1, 6.4),
 ('USC00500464', 58.38, -134.65, 0.2),
 ('USC00500490', 64.86, -147.72, 'T'),
 ('USW00027502', 71.28, -156.78, 0.0),
 ('USW00026615', 60.78, -161.83, 0.0),
 ('USW00026533', 66.92, -151.52, 0.1),
 ('USC00501240', 61.02, -147.51, 0.0),
 ('USC00501684', 64.09, -141.92, 0.0),
 ('US1AKAB0048', 61.41, -149.49, 'T'),
 ('USC00501926', 62.83, -149.91, 0.5),
 ('USC00501987', 65.49, -144.64, 0.5),
 ('USC00502015', 64.25, -149.18, 0.5),
 ('USC00502084', 56.0, -132.83, 0.0),
 ('USW00025624', 55.22, -162.73, 0.0),
 ('USC00502107', 64.86, -147.

In [8]:
# algorithm to match the ski resort to the closest station
resort_conn = sqlite3.connect('./skiDataset.db')
resort_cursor = resort_conn.cursor()
resort_cursor.execute('''
    SELECT resort_name, latitude, longitude FROM location
''')
ski_resorts = resort_cursor.fetchall()
ski_resorts

[('Mohawk Mountain Ski Area — Cornwall', 41.838231, -73.3150586),
 ('Mount Southington — Plantsville', 41.5821041, -72.9248698),
 ('Powder Ridge Ski Area — Middlefield', 41.501645, -72.73645429999999),
 ('Ski Sundown — New Hartford', 41.8846852, -72.94669859999999),
 ('Baker Mountain — Bingham', 45.0585117, -69.8843382),
 ('Big Rock — Mars Hill', 46.5224036, -67.8289461),
 ('Big Squaw — Greenville', 45.5066618, -69.7020055),
 ('Black Mountain of Maine — Rumford', 44.5769444, -70.6133333),
 ('Camden Snow Bowl — Camden', 44.2172685, -69.13465339999999),
 ('Eaton Mountain — Skowhegan', 44.7681256, -69.6188915),
 ('Hermon Mountain — Hermon', 44.7807854, -68.9572709),
 ('Lonesome Pine Trails — Fort Kent', 47.2460499, -68.59446299999999),
 ('Lost Valley — Auburn', 44.1355896, -70.2806749),
 ('Mount Abram — Greenwood', 44.3797963, -70.7068649),
 ('Mount Jefferson Ski Area — Lee', 45.3575295, -68.283776),
 ('Pinnacle Ski Club — Pittsfield', 44.7825145, -69.3833847),
 ('Pleasant Mountain — Brid

In [3]:
import math
def haversine(lat1, lon1, lat2, lon2):
     
    # distance between latitudes
    # and longitudes
    dLat = (lat2 - lat1) * math.pi / 180.0
    dLon = (lon2 - lon1) * math.pi / 180.0
 
    # convert to radians
    lat1 = (lat1) * math.pi / 180.0
    lat2 = (lat2) * math.pi / 180.0
 
    # apply formulae
    a = (pow(math.sin(dLat / 2), 2) +
         pow(math.sin(dLon / 2), 2) *
             math.cos(lat1) * math.cos(lat2))
    rad = 6371
    c = 2 * math.asin(math.sqrt(a))
    return rad * c

def min_distance_station(lat, lon, weatherstations, max_limit):
    min_station = None
    min_distance = math.inf
    for station in weatherstations:
        distance = haversine(lat, lon, station[1], station[2])
        if distance <= max_limit:
            if min_distance > distance or min_station is None:
                min_station = station
                min_distance = distance
    return min_station

In [4]:
db_file = "skiDataset.db"
conn = sqlite3.connect(db_file)
cursor = conn.cursor()
cursor.execute('''
    CREATE TABLE IF NOT EXISTS snowfall (
        date TEXT,
        snowfall REAL,
        resort_name REAL,
        FOREIGN KEY (resort_name) REFERENCES location(resort_name)
    )
''')
conn.commit()
conn.close()

In [9]:
from calendar import Calendar
conn = sqlite3.connect('./rawWeather.db')
cursor = conn.cursor()
obj = Calendar()
# match the daily snowfall data
for year in range(2015, 2024):
    for month in range(1, 13):
        resort_conn = sqlite3.connect('./skiDataset.db')
        resort_cursor = resort_conn.cursor()
        monthIterator = obj.itermonthdays(year, month)
        for day in monthIterator:
            if day != 0:
                date = f'{year}{month:02d}{day:02d}'
                print(date)
                stations = valid_stations(date, cursor)
                for ski_resort in ski_resorts:
                    nearest_station = min_distance_station(ski_resort[1], ski_resort[2], stations, 30)
                    if nearest_station is not None:
                        resort_cursor.execute('''
                            INSERT INTO snowfall (date, snowfall, resort_name)
                            VALUES (?, ?, ?)
                            ''', (date, nearest_station[3] if nearest_station[3] != 'T' else 0, ski_resort[0]))
        resort_conn.commit()
        resort_conn.close()

20150101
20150102
20150103
20150104
20150105
20150106
20150107
20150108
20150109
20150110
20150111
20150112
20150113
20150114
20150115
20150116
20150117
20150118
20150119
20150120
20150121
20150122
20150123
20150124
20150125
20150126
20150127
20150128
20150129
20150130
20150131
20150201
20150202
20150203
20150204
20150205
20150206
20150207
20150208
20150209
20150210
20150211
20150212
20150213
20150214
20150215
20150216
20150217
20150218
20150219
20150220
20150221
20150222
20150223
20150224
20150225
20150226
20150227
20150228
20150301
20150302
20150303
20150304
20150305
20150306
20150307
20150308
20150309
20150310
20150311
20150312
20150313
20150314
20150315
20150316
20150317
20150318
20150319
20150320
20150321
20150322
20150323
20150324
20150325
20150326
20150327
20150328
20150329
20150330
20150331
20150401
20150402
20150403
20150404
20150405
20150406
20150407
20150408
20150409
20150410
20150411
20150412
20150413
20150414
20150415
20150416
20150417
20150418
20150419
20150420
20150421
2

In [11]:
resort_conn = sqlite3.connect('./skiDataset.db')
resort_cursor = resort_conn.cursor()
resort_cursor.execute('''
        UPDATE snowfall
        SET date = CONCAT(SUBSTRING(date, 1, 4), '-', SUBSTRING(date, 5, 2), '-', SUBSTRING(date, 7, 2))
        ''')
resort_conn.commit()
resort_conn.close()