# ARMIA
- Produces Crowd Predicitions based on weather and user selected location

### Load the Libaries

In [1]:
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX 
from pmdarima import auto_arima
import pickle
import os
from datetime import datetime,timedelta,date
from dateutil import parser
import holidays as hl

import openmeteo_requests
import requests_cache
from retry_requests import retry
trim_date = parser.parse('2025-09-30').date()

### Load the data set

In [2]:
Auck_peds = pd.read_csv("data_weather/Final/Auckland_Pedestrian_Hourly.csv")
Dub_peds = pd.read_csv("data_weather/Final/Dublin_Pedestrian_Hourly.csv")

df = pd.concat([Auck_peds, Dub_peds],ignore_index=True)

df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values(['Location_ID','Date'])

### Main ARMIA model
- Model creation
- Data splitting
- Fitting model
- Creates pickel files for each location
    - Need seperate pickel files for forecasting each location 
- ARMIA needs to have even spacing between dates
    if gap then a fill in needs to be done for Y values(Dep Var) & Exog(or X Ind Vars)

In [10]:
os.makedirs("arima_models", exist_ok=True) 

models = {}
for loc in df['Location_ID'].unique():
    sub = df[df['Location_ID'] == loc].set_index('Date')
    y = sub['Avg_Daily_Pedestrian_Count'].asfreq('D').interpolate(method='linear') # D is daily, rate of change fill in
    x = sub[['Holiday',
                'Weather_Temperature_Avg',
                'Weather_Wind_Speed_Avg',
                'Weather_Precipitation_Sum',
                'Weather_Relative_Humidity_Avg']].asfreq('D').interpolate(method='linear') # numeric only
    
    y = y[y.index <= trim_date.strftime('%Y-%m-%d')] # will chang depending on new datasets in the future
    x = x[x.index <= trim_date.strftime('%Y-%m-%d')] # will chang depending on new datasets in the future

    # Auto-tune ARIMA parameters
    stepwise = auto_arima(y,
                          seasonal=True,
                          m=7, # weekly pattern
                          trace=False,
                          error_action='ignore',
                          suppress_warnings=True)
    
    # Fit SARIMA model
    model = SARIMAX(endog=y,
                    exog=x,
                    order=stepwise.order,
                    seasonal_order=stepwise.seasonal_order,
                    enforce_stationarity=False,
                    enforce_invertibility=False)
    results = model.fit(disp=False)
    
    # Save model
    model_path = f"arima_models/{loc}_arima.pkl"
    with open(model_path, "wb") as f:
        pickle.dump(results, f)
    
    models[loc] = {
        'model_path': model_path,
        'order': stepwise.order,
        'seasonal_order': stepwise.seasonal_order
    }

# Save metadata
with open("arima_models/arima_index.pkl", "wb") as f:
    pickle.dump(models, f)



In [11]:
# Setup the Open-Meteo API client with cache and retry on error # <--- this is from Open Meteo Api Docs
cache_session = requests_cache.CachedSession('.amriacache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

In [12]:
def Weather_Requester(lat:float,long:float,ArvDate:date) -> pd.DataFrame:
    FwdD = (ArvDate - date.today()).days
    url = "https://historical-forecast-api.open-meteo.com/v1/forecast"
    params = {
        "latitude": lat,
        "longitude": long,
        "start_date": trim_date.strftime('%Y-%m-%d'),
        "end_date": (date.today()-timedelta(days=1)).strftime('%Y-%m-%d'),
        "daily": ["temperature_2m_mean", "wind_speed_10m_mean", "precipitation_sum", "relative_humidity_2m_mean"],
        "timezone": "America/New_York"
    }

    response = openmeteo.weather_api(url,params=params)
    dly = response[0].Daily()

    T1 = dly.Variables(0).ValuesAsNumpy()
    W1 = dly.Variables(1).ValuesAsNumpy()
    P1 = dly.Variables(2).ValuesAsNumpy()
    R1 = dly.Variables(3).ValuesAsNumpy()

    url = "https://seasonal-api.open-meteo.com/v1/seasonal"
    params = {
        "latitude": lat,
        "longitude": long,
        "forecast_days": FwdD,
        "timezone": "America/New_York",
        "daily": ["temperature_2m_mean", "wind_speed_10m_mean", "precipitation_sum", "relative_humidity_2m_mean"]
    }
    
    response = openmeteo.weather_api(url,params=params)
    dly = response[0].Daily()

    T2 = dly.Variables(0).ValuesAsNumpy()
    W2 = dly.Variables(1).ValuesAsNumpy()
    P2 = dly.Variables(2).ValuesAsNumpy()
    R2 = dly.Variables(3).ValuesAsNumpy()

    T = np.concatenate((T1,T2))
    w = np.concatenate((W1,W2))
    P = np.concatenate((P1,P2))
    R = np.concatenate((R1,R2))
    
    vstk = pd.DataFrame(data = np.vstack((T,w,P,R)).T)

    return vstk

In [13]:
def Holidayer(df:pd.DataFrame,d:date,CCode:str) -> pd.DataFrame:
    df.insert(0,'Holiday',0)
    for i in range(len(df)-1,-1,-1):
        df.loc[i,['Holiday']] = 1 if hl.country_holidays(country=CCode).get(d.strftime('%Y-%m-%d')) != None else 0
    return df

In [18]:
loc = "IRDUB_1"
with open(f"arima_models/{loc}_arima.pkl", "rb") as f:
    model = pickle.load(f)

d = "2025-12-27"
w = Weather_Requester(-36.8485,174.7633,parser.parse(d).date())
h = Holidayer(w,parser.parse(d).date(),'IE')
pred_mean = pd.DataFrame(model.get_forecast(exog=h,steps=len(h)).predicted_mean)
PredM = float(pred_mean.loc[d])
print(PredM)

261063.51149329025


  PredM = float(pred_mean.loc[d])
