In [23]:
import pandas as pd
import numpy as np
import requests
from datetime import datetime

In [14]:
df = pd.read_csv(r"data\unpred\santacruz_cleaned.csv")
df

Unnamed: 0,YEAR,HR,DT,MN,DPT,WBT,DBT,RH
0,2010,0,1,1,14.8,17.4,21.20000,67.0
1,2010,0,2,1,17.8,19.4,22.00000,77.0
2,2010,0,3,1,20.0,21.4,24.00000,78.0
3,2010,0,4,1,16.1,18.6,22.40000,68.0
4,2010,0,5,1,17.8,18.4,19.40000,90.0
...,...,...,...,...,...,...,...,...
41052,2024,84,27,12,,,22.60570,
41053,2024,84,28,12,,,23.04580,
41054,2024,84,29,12,,,21.40127,
41055,2024,84,30,12,,,23.43057,


In [15]:
df.isnull().sum()

YEAR       0
HR         0
DT         0
MN         0
DPT     5802
WBT     5802
DBT        0
RH      5781
dtype: int64

In [16]:
df = df.interpolate()


In [17]:
df.isnull().sum()

YEAR    0
HR      0
DT      0
MN      0
DPT     0
WBT     0
DBT     0
RH      0
dtype: int64

In [18]:
df.to_csv(r"data\unpred\santacruz_inter.csv")

In [24]:
def fetch_live_weather_data(lat=19.09, lon=72.85):
    current_year = datetime.now().year
    today = datetime.now().strftime("%Y-%m-%d")
    start_date = f"{current_year}-01-01"

    url = (
        f"https://archive-api.open-meteo.com/v1/archive?"
        f"latitude={lat}&longitude={lon}&start_date={start_date}&end_date={today}"
        "&hourly=temperature_2m,dew_point_2m,relative_humidity_2m,surface_pressure,"
        "windspeed_10m,winddirection_10m,precipitation&timezone=Asia%2FKolkata"
    )

    response = requests.get(url)
    data = response.json()

    df = pd.DataFrame({
        "time": data["hourly"]["time"],
        "DBT": data["hourly"]["temperature_2m"],
        "DPT": data["hourly"]["dew_point_2m"],
        "RH": data["hourly"]["relative_humidity_2m"],
    })

    df["time"] = pd.to_datetime(df["time"])
    df["YEAR"] = df["time"].dt.year
    df["MN"] = df["time"].dt.month
    df["DT"] = df["time"].dt.day
    df["HR"] = df["time"].dt.hour

    df["WBT"] = (
        df["DBT"] * np.arctan(0.151977 * (df["RH"] + 8.313659)**0.5)
        + np.arctan(df["DBT"] + df["RH"])
        - np.arctan(df["RH"] - 1.676331)
        + 0.00391838 * (df["RH"]**1.5) * np.arctan(0.023101 * df["RH"])
        - 4.686035
    )

    df = df[["YEAR", "HR", "DT", "MN", "DPT", "WBT", "DBT", "RH"]]
    return df

live_df = fetch_live_weather_data()
live_df

Unnamed: 0,YEAR,HR,DT,MN,DPT,WBT,DBT,RH
0,2025,0,1,1,18.7,21.051238,25.9,65.0
1,2025,1,1,1,19.2,21.117712,25.5,68.0
2,2025,2,1,1,19.2,21.075245,25.3,69.0
3,2025,3,1,1,19.0,20.844763,24.9,70.0
4,2025,4,1,1,18.8,20.424493,24.3,71.0
...,...,...,...,...,...,...,...,...
2635,2025,19,20,4,,,,
2636,2025,20,20,4,,,,
2637,2025,21,20,4,,,,
2638,2025,22,20,4,,,,


In [26]:

combined_df = pd.concat([df, live_df], ignore_index=True)
combined_df.to_csv(r"data\unpred\combined_data.csv", index=False)