In [1]:
!pip install meteostat

Collecting meteostat
  Downloading meteostat-1.6.7-py3-none-any.whl.metadata (4.6 kB)
Downloading meteostat-1.6.7-py3-none-any.whl (31 kB)
Installing collected packages: meteostat
Successfully installed meteostat-1.6.7


In [133]:
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from meteostat import Point, Daily
import pandas as pd
from bs4 import BeautifulSoup
import requests
import sqlite3

In [None]:
import sqlite3
import requests

# SQLite database file
db_file = "dataset.db"

# Connect to the database
conn = sqlite3.connect(db_file)
cursor = conn.cursor()


cursor.execute('''
    CREATE TABLE IF NOT EXISTS weather (
        latitude REAL,
        longitude REAL,
        date TEXT,
        tavg REAL,
        tmin REAL,
        tmax REAL,
        snow REAL,
        PRIMARY KEY (latitude, longitude),
        FOREIGN KEY (latitude, longitude) REFERENCES location (latitude, longitude)
    )
''')

cursor.execute('''
    CREATE TABLE IF NOT EXISTS skiInfo (
        resort_name TEXT PRIMARY KEY,
        url TEXT,
        daily_ticket_price REAL,
        num_of_green INTEGER,
        num_of_blue INTEGER,
        num_of_black INTEGER,
        FOREIGN KEY (resort_name) REFERENCES location (resort_name)
    )
''')


weather_data = [(latitude, longitude, "2024-02-24", 25.0, 20.0, 30.0, 10.0),
                # Add more weather data as needed
               ]
cursor.executemany('''
    INSERT INTO weather (latitude, longitude, date, tavg, tmin, tmax, snow)
    VALUES (?, ?, ?, ?, ?, ?, ?)
''', weather_data)

ski_info_data = [("Resort1", "https://resort1.com", 50.0, 5, 10, 3),
                 ("Resort2", "https://resort2.com", 60.0, 7, 8, 5),
                 # Add more ski resort data as needed
                ]
cursor.executemany('''
    INSERT INTO skiInfo (resort_name, url, daily_ticket_price, num_of_green, num_of_blue, num_of_black)
    VALUES (?, ?, ?, ?, ?, ?)
''', ski_info_data)

# Commit the changes and close the connection
conn.commit()
conn.close()


## Table location(resort_name, latitude, longitude, location_catalog, state, city, zipcode, address, url)

In [89]:
# Extract relevant information
url = "https://en.m.wikipedia.org/wiki/List_of_ski_areas_and_resorts_in_the_United_States"

# Send a GET request to the URL
response = requests.get(url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Parse the HTML content of the page
    soup = BeautifulSoup(response.content, 'html.parser')
    # print(soup)

    # Extract relevant information
    sections = soup.find_all(['h2', 'h3'])
    data = []

    location_catalog = ''  # Initialize location_catalog outside the loop

    for section in sections:
        if section.name == 'h2':
            headline = section.find('span', {'class': 'mw-headline'})
            if headline and ' (' in headline.text.strip():
                #print(headline.text.strip())
                location_catalog = headline.text.strip()
                location_catalog = location_catalog.split(' (')[0] 
                #print(location_catalog)
        elif section.name == 'h3' and 'mw-headline' in section.span.attrs.get('class', []):
            state = section.span.a.get('title')
            if ' (' in state:
                state = state.split(' (')[0] 
            resort_list = section.find_next('ul')
            resorts = resort_list.find_all('li')
            #print(resorts)
            for resort in resorts:
                if resort:
                    resort_name = resort.text.strip()
                    resort_name = resort_name.split(' (')[0] if ' (' in resort_name else resort_name
                    #print(resort_name)
                    data.append({'location_catalog': location_catalog, 'state': state, 'resort_name': resort_name})

    # Create DataFrame
    ski_df = pd.DataFrame(data)
    #print(df)

In [90]:
ski_df

Unnamed: 0,location_catalog,state,resort_name
0,New England,Connecticut,Mohawk Mountain Ski Area — Cornwall
1,New England,Connecticut,Mount Southington — Plantsville
2,New England,Connecticut,Powder Ridge Ski Area — Middlefield
3,New England,Connecticut,Ski Sundown — New Hartford
4,New England,Maine,Baker Mountain — Bingham
...,...,...,...
461,West Coast,Washington,Alpental
462,West Coast,Washington,Summit Central
463,West Coast,Washington,Summit East
464,West Coast,Washington,Summit West


In [98]:
ski_df.groupby(["location_catalog","state"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,resort_name
location_catalog,state,Unnamed: 2_level_1
Mid-Atlantic,Maryland,1
Mid-Atlantic,New Jersey,4
Mid-Atlantic,New York,36
Mid-Atlantic,Pennsylvania,22
Midwest,Illinois,5
Midwest,Indiana,2
Midwest,Iowa,4
Midwest,Michigan,34
Midwest,Minnesota,17
Midwest,Missouri,2


In [86]:
# Function to get location details from Google Maps API
def get_location_details(resort_name, api_key):
    base_url = "https://maps.googleapis.com/maps/api/geocode/json"
    params = {"address": resort_name, "key": api_key}

    response = requests.get(base_url, params=params)
    data = response.json()

    if data["status"] == "OK":
        result = data["results"][0]
        location = result["geometry"]["location"]
        address_components = result["address_components"]

        # Extracting relevant information
        latitude, longitude = location["lat"], location["lng"]
        address = result.get("formatted_address", "")
        state = next((component["long_name"] for component in address_components if "administrative_area_level_1" in component["types"]), "")
        city = next((component["long_name"] for component in address_components if "locality" in component["types"]), "")
        zipcode = next((component["long_name"] for component in address_components if "postal_code" in component["types"]), "")
        address = result.get("formatted_address", "")
        
        elevation_url = f'https://maps.googleapis.com/maps/api/elevation/json?locations={latitude},{longitude}&key={api_key}'
        elevation_response = requests.get(elevation_url)
        elevation_data = elevation_response.json()

        if elevation_data["status"] == "OK" and elevation_data.get("results"):
            elevation = elevation_data["results"][0].get("elevation")
        else:
            elevation = None

        #Use Places API to get additional details
        places_url = "https://maps.googleapis.com/maps/api/place/findplacefromtext/json"
        places_params = {
            "input": resort_name,
            "inputtype": "textquery",
            "fields": "place_id",
            "key": api_key
        }

        places_response = requests.get(places_url, params=places_params)
        places_data = places_response.json()

        if places_data.get("status") == "OK" and places_data.get("candidates"):
            place_id = places_data["candidates"][0]["place_id"]

            # Use Place Details API to get website
            details_url = f'https://maps.googleapis.com/maps/api/place/details/json?place_id={place_id}&key={api_key}'
            details_response = requests.get(details_url)
            details_data = details_response.json()
            url = details_data["result"].get("website", "")

            return latitude, longitude, elevation, state, city, zipcode, address, url
        else:
            return latitude, longitude, elevation, state, city, zipcode, address, None
    else:
        return None, None, None, None, None, None, None, None


In [84]:
# conn = sqlite3.connect(db_file)
# cursor = conn.cursor()

# # Drop the location table if it exists
# cursor.execute('DROP TABLE IF EXISTS location')

# # Commit the changes and close the connection
# conn.commit()
# conn.close()

In [92]:
# SQLite database file
db_file = "skiDataset.db"

# Connect to the database
conn = sqlite3.connect(db_file)
cursor = conn.cursor()

# Create table location
cursor.execute('''
    CREATE TABLE IF NOT EXISTS location (
        resort_name TEXT PRIMARY KEY,
        latitude REAL,
        longitude REAL,
        elevation REAL,
        location_catalog TEXT,
        state TEXT,
        city TEXT,
        zipcode TEXT,
        address TEXT,
        url TEXT
    )
''')

# get the latitude, longtitude from google maps API and insert into databse
api_key = 'HIDDEN'
for index, row in ski_df.iterrows():
    location_catalog = row['location_catalog']
    resort_name = row['resort_name']
    latitude, longitude, elevation, state, city, zipcode, address, url = get_location_details(resort_name, api_key)
    cursor.execute('''
    INSERT INTO location (resort_name, latitude, longitude, elevation, location_catalog, state, city, zipcode, address, url)
    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (resort_name, latitude, longitude, elevation, location_catalog, state, city, zipcode, address, url))
    
# Commit the changes and close the connection
conn.commit()
conn.close()

## Table weather (resort_name, latitude, longitude, date, tavg, tmin, tmax, snowfall)

In [139]:
import requests
from datetime import datetime, timedelta

def getWeather(resort_name, latitude, longitude, cursor):
    start_date = datetime(2014, 1, 1)
    end_date = datetime(2024, 2, 24)
    access_key = "hidden"
    coordinate = f"{latitude},{longitude}"

    while start_date < end_date:
        # Set the end date for each 60-day interval
        interval_end_date = start_date + timedelta(days=59)
        if interval_end_date > end_date:
            interval_end_date = end_date

        params = {
            'access_key': access_key,
            'query': coordinate,
            'historical_date_start': start_date.strftime('%Y-%m-%d'),
            'historical_date_end': interval_end_date.strftime('%Y-%m-%d'),
            'unit': 'm'
        }

        api_result = requests.get('http://api.weatherstack.com/historical', params)
        api_response = api_result.json()
        if "success" in api_response:
            print(api_response)
        past_data = dict(api_response["historical"])

        for date, info in past_data.items():
            info = dict(info)
            tmin = info["mintemp"]
            tmax = info["maxtemp"]
            tavg = info["avgtemp"]
            snowfall = info["totalsnow"] * 10  # cm to mm

            cursor.execute('''
                INSERT INTO weather(resort_name, latitude, longitude, date, tavg, tmin, tmax, snowfall)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?)
            ''', (resort_name, latitude, longitude, date, tavg, tmin, tmax, snowfall))

        # Move to the next 60-day interval
        start_date = interval_end_date + timedelta(days=1)


In [140]:
conn = sqlite3.connect(db_file) 
cursor = conn.cursor()

# Create the table weather
cursor.execute('''
    CREATE TABLE IF NOT EXISTS weather (
        resort_name TEXT,
        latitude REAL,
        longitude REAL,
        date TEXT,
        tavg REAL,
        tmin REAL,
        tmax REAL,
        snowfall REAL,
        PRIMARY KEY (latitude, longitude, date),
        FOREIGN KEY (latitude, longitude) REFERENCES location (latitude, longitude),
        FOREIGN KEY (resort_name) REFERENCES location (resort_name)
    )
''')
# Get the data
cursor.execute('SELECT resort_name, latitude, longitude FROM location;')
result_set = cursor.fetchall()

# Process the data 
for row in result_set:
    resort_name = row[0]
    latitude = row[1]
    longitude = row[2]
    #print(resort_name, latitude, longitude)
    getWeather(resort_name, latitude, longitude, cursor)
#print(result_df)

# Commit changes and close the connection
conn.commit()
conn.close()


KeyboardInterrupt: 

466

In [141]:
conn.commit()
conn.close()