In [1]:
import dask.dataframe as dd
import pandas as pd
from pmdarima import auto_arima

In [None]:
def load_weather():
    df = pd.read_csv("weather_2025_cleaned.csv", parse_dates=['DATE'])
    df.columns = [col.lower() for col in df.columns]  

    df.rename(columns={"date": "date"}, inplace=True)  

    df['date'] = df['date'] if 'date' in df.columns else df['DATE']
    df['day_only'] = df['date'].dt.date
    df['hour'] = df['date'].dt.hour

    return df


In [21]:
def forecast_future_arima_all(df, days=7):
    daily = df.resample("D", on="date")["tmp_c"].agg(["mean", "max", "min"]).dropna().reset_index()

    y_mean = daily["mean"].values
    y_max = daily["max"].values
    y_min = daily["min"].values

    model_mean = auto_arima(y_mean, seasonal=True, m=7, suppress_warnings=True)
    model_max = auto_arima(y_max, seasonal=True, m=7, suppress_warnings=True)
    model_min = auto_arima(y_min, seasonal=True, m=7, suppress_warnings=True)

    forecast_mean = model_mean.predict(n_periods=days)
    forecast_max = model_max.predict(n_periods=days)
    forecast_min = model_min.predict(n_periods=days)

    future_dates = pd.date_range(daily["date"].iloc[-1] + pd.Timedelta(days=1), periods=days, freq="D")
    fc = pd.DataFrame({
        "ds": future_dates,
        "mean_temp": forecast_mean.round(1),
        "max_temp": forecast_max.round(1),
        "min_temp": forecast_min.round(1),
        "day": future_dates.strftime("%A")
    })

    return fc


In [22]:
df = load_weather()
forecast_7 = forecast_future_arima_all(df, days=7)
print(forecast_7)



          ds  mean_temp  max_temp  min_temp        day
0 2025-08-24       28.6      35.1      17.2     Sunday
1 2025-08-25       28.7      35.2      17.3     Monday
2 2025-08-26       28.6      35.3      17.5    Tuesday
3 2025-08-27       28.6      35.3      17.6  Wednesday
4 2025-08-28       28.8      35.3      17.8   Thursday
5 2025-08-29       28.7      35.3      17.9     Friday
6 2025-08-30       28.7      35.3      17.9   Saturday


