# Import Hourly Weather Data

This notebook fetches hourly weather data from Open-Meteo Archive API for Semarang (2000-present).

In [None]:
import requests
import pandas as pd
from datetime import datetime
import time

# Coordinates and Parameters
LATITUDE = -7.0520702239386175
LONGITUDE = 110.43532807750137
TIMEZONE = "Asia/Jakarta"
API_URL = "https://archive-api.open-meteo.com/v1/archive"

# Weather Condition Mapping
def map_weather_code(code):
    """Maps WMO weather code to user's custom condition string."""
    if code is None:
        return 'Unknown'
    if code == 0:
        return 'Clear'
    elif code in [1, 2]:
        return 'Partially cloudy'
    elif code in [3, 45, 48]:
        return 'Overcast'
    elif code in [51, 53, 55]:
        return 'Rain'
    elif code in [61, 63, 65]:
        return 'Rain, Overcast'
    elif code in [80, 81, 82]:
        return 'Rain, Partially cloudy'
    elif code in [95, 96, 99]:
        return 'Rain'
    else:
        return 'Unknown'

def fetch_hourly_data_chunk(start_date, end_date):
    """Fetch hourly data for a specific date range."""
    params = {
        "latitude": LATITUDE,
        "longitude": LONGITUDE,
        "start_date": start_date,
        "end_date": end_date,
        "hourly": ["temperature_2m", "relative_humidity_2m", "wind_speed_10m", 
                   "pressure_msl", "weather_code"],
        "timezone": TIMEZONE
    }
    
    response = requests.get(API_URL, params=params)
    response.raise_for_status()
    return response.json()

def fetch_historical_hourly_data():
    """Fetch hourly data from 2000 to today in yearly chunks."""
    today = datetime.now()
    start_year = 2000
    end_year = today.year
    
    all_data = []
    
    for year in range(start_year, end_year + 1):
        start_date = f"{year}-01-01"
        if year == end_year:
            end_date = today.strftime("%Y-%m-%d")
        else:
            end_date = f"{year}-12-31"
        
        print(f"Fetching data for {year}...")
        
        try:
            data = fetch_hourly_data_chunk(start_date, end_date)
            hourly = data.get("hourly", {})
            
            if not hourly:
                print(f"No hourly data found for {year}.")
                continue
            
            df_year = pd.DataFrame({
                "datetime": hourly["time"],
                "temp": hourly["temperature_2m"],
                "humidity": hourly["relative_humidity_2m"],
                "windspeed": hourly["wind_speed_10m"],
                "sealevelpressure": hourly["pressure_msl"],
                "weather_code": hourly["weather_code"]
            })
            
            all_data.append(df_year)
            time.sleep(0.5)
            
        except requests.exceptions.RequestException as e:
            print(f"Error fetching data for {year}: {e}")
            continue
    
    if not all_data:
        print("No data fetched.")
        return
    
    df = pd.concat(all_data, ignore_index=True)
    
    df["datetime"] = pd.to_datetime(df["datetime"])
    df["hour"] = df["datetime"].dt.hour
    df["day"] = df["datetime"].dt.day
    df["month"] = df["datetime"].dt.month
    df["year"] = df["datetime"].dt.year
    
    df["conditions"] = df["weather_code"].apply(map_weather_code)
    df["id"] = range(len(df))
    
    output_columns = ["id", "hour", "day", "month", "year", "temp", 
                      "humidity", "windspeed", "sealevelpressure", "weather_code", "conditions"]
    
    final_df = df[output_columns]
    
    output_file = "datasets/historical_data_hourly.csv"
    final_df.to_csv(output_file, index=False)
    print(f"Data successfully saved to {output_file}")
    print(f"Total records: {len(final_df)}")
    print(final_df.head())

fetch_historical_hourly_data()