In [40]:
import requests
import pandas as pd
import sqlalchemy as sa
from datetime import datetime, timedelta, timezone

In [41]:
# New API URL for weather forecast data
api_url = 'https://api.open-meteo.com/v1/forecast'

# Database connection URL
connection_url = sa.engine.URL.create(
    drivername="mssql+pyodbc",
    username="wyx0108",
    password="2024!Schulich",
    host="mban2024-ms-sql-server.c1oick8a8ywa.ca-central-1.rds.amazonaws.com",
    port="1433",
    database="wyx0108_db",
    query={
        "driver": "ODBC Driver 18 for SQL Server",
        "TrustServerCertificate": "yes"
    }
)

# Create the database engine
engine = sa.create_engine(connection_url)


In [42]:
# Load all postal codes from the CSV
postal_codes_df = pd.read_csv('/Users/Owner/Desktop/ca_geo_dimension.csv')

# Filter for postal codes in Toronto
toronto_postal_codes_df = postal_codes_df[postal_codes_df['region'].str.contains("Toronto", case=False)]

# For now, select a subset of 15 postal codes
subset_size = 15
use_all_postal_codes = False  # Set this to True to process all postal codes

if use_all_postal_codes:
    selected_postal_codes = toronto_postal_codes_df
else:
    selected_postal_codes = toronto_postal_codes_df.sample(n=subset_size, random_state=1)

# Convert the selected postal codes to a list of dictionaries
postal_codes = selected_postal_codes.to_dict('records')


In [43]:
# Define the date range for the weather forecast (e.g., the next 7 days)
start_date = datetime.now(timezone.utc).strftime('%Y-%m-%d')  # Current date in UTC
end_date = (datetime.strptime(start_date, '%Y-%m-%d') + timedelta(days=6)).strftime('%Y-%m-%d')  # 7 days from today

# Initialize list to store weather data
forecast_weather_data = []

# Iterate over each postal code location
for location in postal_codes:
    # Define the parameters for the API call
    params = {
        'latitude': location['latitude'],  # Latitude
        'longitude': location['longitude'],  # Longitude
        'start_date': start_date,  # Start date in the format 'YYYY-MM-DD'
        'end_date': end_date,      # End date in the format 'YYYY-MM-DD'
        'daily': 'temperature_2m_max,temperature_2m_min,apparent_temperature_max,apparent_temperature_min',  # Request daily max/min temperature and apparent temperature
        'timezone': 'UTC'  # Ensure timestamps are in UTC
    }
    
    # Make the API request
    response = requests.get(api_url, params=params)
    
    # Check if the API request was successful
    if response.status_code == 200:
        data = response.json()
        for i, timestamp in enumerate(data['daily']['time']):
            entry = {
                'city': location['region'],  # City or region
                'latitude': location['latitude'],  # Latitude
                'longitude': location['longitude'],  # Longitude
                'zipcode': location['zipcode'],  # Zipcode
                'max_temperature': data['daily']['temperature_2m_max'][i],  # Maximum Temperature (2m)
                'min_temperature': data['daily']['temperature_2m_min'][i],  # Minimum Temperature (2m)
                'max_feels_like': data['daily']['apparent_temperature_max'][i],  # Maximum Apparent Temperature (2m)
                'min_feels_like': data['daily']['apparent_temperature_min'][i],  # Minimum Apparent Temperature (2m)
                'timestamp_utc': timestamp  # Date in UTC
            }
            forecast_weather_data.append(entry)
    else:
        print(f"Failed to fetch data for {location['zipcode']}: {response.status_code}")



In [44]:
# Insert the cleaned data into the SQL database
df.to_sql(
    name='forecast_weather',  # Specify the table name
    con=engine,  # Use the SQLAlchemy engine for database connection
    schema='uploads',  # Specify the schema name
    if_exists='replace',  # Replace the table if it already exists
    index=False,  # Do not write DataFrame index as a column
    dtype={
        'city': sa.types.VARCHAR(100),  # Define city column as VARCHAR(100)
        'latitude': sa.types.FLOAT,  # Define latitude column as FLOAT
        'longitude': sa.types.FLOAT,  # Define longitude column as FLOAT
        'zipcode': sa.types.VARCHAR(10),  # Define zipcode column as VARCHAR(10)
        'max_temperature': sa.types.DECIMAL(5, 2),  # Define max temperature column as DECIMAL(5, 2)
        'min_temperature': sa.types.DECIMAL(5, 2),  # Define min temperature column as DECIMAL(5, 2)
        'max_feels_like': sa.types.DECIMAL(5, 2),  # Define max feels_like column as DECIMAL(5, 2)
        'min_feels_like': sa.types.DECIMAL(5, 2),  # Define min feels_like column as DECIMAL(5, 2)
        'timestamp_utc': sa.types.DATETIME  # Define timestamp_utc column as DATETIME
    },
    method='multi'  # Enable multi-row insert for better performance
)

print("Forecast weather data has been successfully stored.")

# Display the first 10 rows of the DataFrame
df.head(10)




Forecast weather data has been successfully stored.


Unnamed: 0,city,latitude,longitude,zipcode,max_temperature,min_temperature,max_feels_like,min_feels_like,timestamp_utc
0,Toronto,43.780775,-79.40083,M2N4N1,28.0,22.1,31.6,23.7,2024-08-02 00:00:00
1,Toronto,43.780775,-79.40083,M2N4N1,29.6,21.2,32.7,24.7,2024-08-03 00:00:00
2,Toronto,43.780775,-79.40083,M2N4N1,31.2,20.0,32.2,21.0,2024-08-04 00:00:00
3,Toronto,43.780775,-79.40083,M2N4N1,26.9,20.2,26.8,20.7,2024-08-05 00:00:00
4,Toronto,43.780775,-79.40083,M2N4N1,22.3,17.7,21.5,16.6,2024-08-06 00:00:00
5,Toronto,43.780775,-79.40083,M2N4N1,22.6,17.6,21.5,16.4,2024-08-07 00:00:00
6,Toronto,43.780775,-79.40083,M2N4N1,25.4,17.0,24.6,16.4,2024-08-08 00:00:00
7,Toronto,43.780775,-79.40083,M2N4N1,31.2,19.9,31.7,21.5,2024-08-09 00:00:00
8,Toronto,43.810684,-79.319894,M1W3H2,28.1,22.2,32.1,24.7,2024-08-02 00:00:00
9,Toronto,43.810684,-79.319894,M1W3H2,29.4,21.4,32.3,24.8,2024-08-03 00:00:00


In [45]:
# Convert the list of data to a DataFrame
df = pd.DataFrame(forecast_weather_data)

# Convert timestamp_utc to a proper datetime object if not already
df['timestamp_utc'] = pd.to_datetime(df['timestamp_utc']).dt.strftime('%Y-%m-%d %H:%M:%S')

# Print the DataFrame's column names and types
print(df.dtypes)

city                object
latitude           float64
longitude          float64
zipcode             object
max_temperature    float64
min_temperature    float64
max_feels_like     float64
min_feels_like     float64
timestamp_utc       object
dtype: object


In [46]:


# Insert the cleaned data into the SQL database
df.to_sql(
    name='forecast_weather',  # Specify the table name
    con=engine,  # Use the SQLAlchemy engine for database connection
    schema='uploads',  # Specify the schema name
    if_exists='replace',  # Replace the table if it already exists
    index=False,  # Do not write DataFrame index as a column
    dtype={
        'city': sa.types.VARCHAR(100),  # Define city column as VARCHAR(100)
        'latitude': sa.types.FLOAT,  # Define latitude column as FLOAT
        'longitude': sa.types.FLOAT,  # Define longitude column as FLOAT
        'zipcode': sa.types.VARCHAR(10),  # Define zipcode column as VARCHAR(10)
        'max_temperature': sa.types.DECIMAL(5, 2),  # Define max temperature column as DECIMAL(5, 2)
        'min_temperature': sa.types.DECIMAL(5, 2),  # Define min temperature column as DECIMAL(5, 2)
        'max_feels_like': sa.types.DECIMAL(5, 2),  # Define max feels_like column as DECIMAL(5, 2)
        'min_feels_like': sa.types.DECIMAL(5, 2),  # Define min feels_like column as DECIMAL(5, 2)
        'timestamp_utc': sa.types.DATETIME  # Define timestamp_utc column as DATETIME
    },
    method='multi'  # Enable multi-row insert for better performance
)

print("Forecast weather data has been successfully stored.")


Forecast weather data has been successfully stored.


In [47]:
df.head(10)

Unnamed: 0,city,latitude,longitude,zipcode,max_temperature,min_temperature,max_feels_like,min_feels_like,timestamp_utc
0,Toronto,43.780775,-79.40083,M2N4N1,28.0,22.1,31.6,23.7,2024-08-02 00:00:00
1,Toronto,43.780775,-79.40083,M2N4N1,29.6,21.2,32.7,24.7,2024-08-03 00:00:00
2,Toronto,43.780775,-79.40083,M2N4N1,31.2,20.0,32.2,21.0,2024-08-04 00:00:00
3,Toronto,43.780775,-79.40083,M2N4N1,26.9,20.2,26.8,20.7,2024-08-05 00:00:00
4,Toronto,43.780775,-79.40083,M2N4N1,22.3,17.7,21.5,16.6,2024-08-06 00:00:00
5,Toronto,43.780775,-79.40083,M2N4N1,22.6,17.6,21.5,16.4,2024-08-07 00:00:00
6,Toronto,43.780775,-79.40083,M2N4N1,25.4,17.0,24.6,16.4,2024-08-08 00:00:00
7,Toronto,43.810684,-79.319894,M1W3H2,28.1,22.2,32.1,24.7,2024-08-02 00:00:00
8,Toronto,43.810684,-79.319894,M1W3H2,29.4,21.4,32.3,24.8,2024-08-03 00:00:00
9,Toronto,43.810684,-79.319894,M1W3H2,32.1,19.9,33.4,21.2,2024-08-04 00:00:00
