In [1]:
# Python Program to Get IP Address
import socket
hostname = socket.gethostname()
IPAddr = socket.gethostbyname(hostname)

print("Your Computer Name is:" + hostname)
print("Your Computer IP Address is:" + IPAddr)

Your Computer Name is:LAPTOP-MHAIMPJ7
Your Computer IP Address is:172.16.0.2


# Install and Import Required Libraries

In [57]:
!pip install -q openmeteo-requests requests-cache retry-requests

In [13]:
import os
import sys
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry
from datetime import datetime, timedelta
from tqdm.notebook import trange, tqdm

# Define Functions

In [14]:
# TODO: Add error handling
def fetch_weather(latitudes, longitudes, start_date, end_date):
    """
    Get weather data for (latitudes, longitudes) from start_date to end_date

    Args:
        latitudes (list): List of latitudes [1st_lat, 2nd_lat, ...]
        longitudes (list): List of longitudes corresponding to the latitudes [1st_long, 2nd_long, ...]
        start_date (str): String of starting date in the ISO 8601 format (i.e. YYYY-MM-DD)
        end_date (str): String of starting date in the same format as start_date

    Returns:
        int: NOTSURE YET
    """

    url = "https://archive-api.open-meteo.com/v1/archive"
    # Setup the Open-Meteo API client with cache and retry on error
    cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
    retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
    openmeteo = openmeteo_requests.Client(session = retry_session)

    latitudes = list(latitudes)
    longitudes = list(longitudes)
    
    params = {
    "latitude": latitudes,
    "longitude": longitudes,
    "start_date": start_date,
    "end_date": end_date,
    "hourly": ["temperature_2m", "rain", "relative_humidity_2m"],
    "timezone": "auto"   
    }

    responses = openmeteo.weather_api(url, params=params)
    return responses

In [15]:
# Function for processing the responses
def process_response(response):
    # Hourly
    hourly = response.Hourly()
    hourly_index = pd.date_range(
        start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
        end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
        freq = pd.Timedelta(seconds = hourly.Interval()),
        inclusive = "left"
        )
    
    hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
    rain = hourly.Variables(1).ValuesAsNumpy()
    relative_humidity_2m = hourly.Variables(2).ValuesAsNumpy()

    # Turn response into df
    hourly_data = {
        "Datetime" : hourly_index,
        "Temperature" : hourly_temperature_2m,
        "Rain" : rain,
        "RelativeHumidity" : relative_humidity_2m
    }

    hourly_df= pd.DataFrame(hourly_data)

    # Aggregate for daily data
    daily_df = hourly_df.groupby(pd.Grouper(key='Datetime', freq='D')).agg({
        'Temperature': ['mean', 'max', 'min'],
        'Rain': ['sum'],
        'RelativeHumidity' :['mean', 'max', 'min']
    })

    daily_df.columns = [x + y.capitalize() for x,y in daily_df.columns.values]
    daily_df = daily_df.reset_index()
    
    return daily_df

# Collect Data via API Responses

In [16]:
df_cluster = pd.read_csv('Datasets/Clusters.csv')

In [17]:
responses = []

In [19]:
start = 1700
end = len(df_cluster)
step = 100
start_date = '2000-01-01'
end_date = '2024-07-31'

for i in range(start, end, step):
    try:
        latitudes = df_cluster.Latitude[i:i+step]
        longitudes = df_cluster.Longitude[i:i+step]
        responses += fetch_weather(latitudes, longitudes, start_date, end_date)
        print(f"Responses fetched for rows {i}-{i+step}")
    
    except Exception as err:
        s = f"{err} Encountered at iteration {i}"
        print(s)
        break

{'error': True, 'reason': 'Daily API request limit exceeded. Please try again tomorrow.'} Encountered at iteration 1700


Notes:
- Last iteration: 400 (start at this)

# Process Responses

In [9]:
clusterID = df_cluster.ClusterID

In [10]:
# Create the file
# parquet_path = '../Datasets/WeatherData.parquet'
csv_path = f"Datasets/WeatherData{start}_{i}.csv"
if not os.path.exists(csv_path):
    df_out = process_response(responses[0])
    df_out.insert(0, 'ClusterID', clusterID[start])
    df_out.to_csv(csv_path, index=False)

In [11]:
end = start + len(responses)

for i in range(1, len(responses)):
    res = responses[i]
    df_out = process_response(res)
    df_out.insert(0, 'ClusterID', clusterID[i + start])
    df_out.to_csv(csv_path, mode='a', index=False, header=False)

  0%|          | 0/99 [00:00<?, ?it/s]

# Check the Data

In [16]:
df_weather = pd.read_csv(csv_path)

In [17]:
df_weather.ClusterID.nunique()

100