# ARMIA
- Produces Crowd Predicitions based on weather and user selected location

### Load the Libaries

In [12]:
import pandas as pd
from statsmodels.tsa.statespace.sarimax import SARIMAX 
from pmdarima import auto_arima
import pickle
import os
from datetime import datetime,timedelta,date
from dateutil import parser
import holidays as hl

import openmeteo_requests
import requests_cache
from retry_requests import retry

### Load the data set

In [4]:
Auck_peds = pd.read_csv("data_weather/Final/Auckland_Pedestrian_Hourly.csv")
Dub_peds = pd.read_csv("data_weather/Final/Dublin_Pedestrian_Hourly.csv")

df = pd.concat([Auck_peds, Dub_peds],ignore_index=True)

df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values(['Location_ID','Date'])

display(df.head(2))
display(df.tail(2))

Unnamed: 0,Country,City,Location_ID,Location_Name,Type_of_Attraction,Attraction_Category,Latitude,Longitude,Date,Avg_Daily_Pedestrian_Count,Holiday,Weather_Temperature_Avg,Weather_Wind_Speed_Avg,Weather_Precipitation_Sum,Weather_Relative_Humidity_Avg
13113,Ireland,Dublin,IRDUB_1,Trinity College & Book of Kells,University,Culture & History,53.3438,-6.2546,2021-01-01,47847.0,1.0,2.034,9.817025,0.0,92.18573
13114,Ireland,Dublin,IRDUB_1,Trinity College & Book of Kells,University,Culture & History,53.3438,-6.2546,2021-01-02,82252.0,0.0,1.525667,10.459395,0.7,92.40616


Unnamed: 0,Country,City,Location_ID,Location_Name,Type_of_Attraction,Attraction_Category,Latitude,Longitude,Date,Avg_Daily_Pedestrian_Count,Holiday,Weather_Temperature_Avg,Weather_Wind_Speed_Avg,Weather_Precipitation_Sum,Weather_Relative_Humidity_Avg
6610,New Zealand,Auckland,NZAUK_5,Viaduct Harbour,Waterfront,Urban & Leisure,-36.8422,174.7588,2025-09-29,126711.0,0.0,14.700084,10.577799,0.4,72.0
6611,New Zealand,Auckland,NZAUK_5,Viaduct Harbour,Waterfront,Urban & Leisure,-36.8422,174.7588,2025-09-30,140880.0,0.0,13.752164,11.655316,0.1,73.083336


### Main ARMIA model
- Model creation
- Data splitting
- Fitting model
- Creates pickel files for each location
    - Need seperate pickel files for forecasting each location 
- ARMIA needs to have even spacing between dates
    if gap then a fill in needs to be done for Y values(Dep Var) & Exog(or X Ind Vars)

In [42]:
os.makedirs("arima_models", exist_ok=True) 

models = {}
for loc in df['Location_ID'].unique():
    sub = df[df['Location_ID'] == loc].set_index('Date')
    y = sub['Avg_Daily_Pedestrian_Count'].asfreq('D').interpolate(method='linear') # D is daily, rate of change fill in 
    # x = sub[['Holiday',
    #             'Weather_Temperature_Avg',
    #             'Weather_Wind_Speed_Avg',
    #             'Weather_Precipitation_Sum',
    #             'Weather_Relative_Humidity_Avg']].asfreq('D').interpolate(method='linear') # numeric only

    # Auto-tune ARIMA parameters
    stepwise = auto_arima(y,
                          seasonal=True,
                          m=7, # weekly pattern
                          trace=False,
                          error_action='ignore',
                          suppress_warnings=True)
    
    # Fit SARIMA model # exog=x,
    model = SARIMAX(endog=y,
                    
                    order=stepwise.order,
                    seasonal_order=stepwise.seasonal_order,
                    enforce_stationarity=False,
                    enforce_invertibility=False)
    results = model.fit(disp=False)
    
    # Save model
    model_path = f"arima_models/{loc}_arima.pkl"
    with open(model_path, "wb") as f:
        pickle.dump(results, f)
    
    models[loc] = {
        'model_path': model_path,
        'order': stepwise.order,
        'seasonal_order': stepwise.seasonal_order
    }

# Save metadata
with open("arima_models/arima_index.pkl", "wb") as f:
    pickle.dump(models, f)



In [9]:
# Setup the Open-Meteo API client with cache and retry on error # <--- this is from Open Meteo Api Docs
cache_session = requests_cache.CachedSession('.amriacache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

In [None]:
def Weather_Requester(lat:float,long:float,ArvDate:date) -> list:
    print(ArvDate.month)
    time = (ArvDate - date.today()).days
    print(time)
    # if ArvDate <= date.today():
        # url = "https://historical-forecast-api.open-meteo.com/v1/forecast"
        # params = {
        #     "latitude": lat,
        #     "longitude": long,
        #     "start_date": stDate,
        #     "end_date": edDate,
        #     "daily": ["temperature_2m_mean", "wind_speed_10m_mean", "precipitation_sum", "relative_humidity_2m_mean"],
        #     "timezone": "America/New_York"
        # }
    if time <= 14:
        url = "https://api.open-meteo.com/v1/forecast"
        params = {
            "latitude": lat,
            "longitude": long,
            "forecast_days": time,
            "timezone": "America/New_York",
            "daily": ["temperature_2m_mean", "wind_speed_10m_mean", "precipitation_sum", "relative_humidity_2m_mean"],
            }
        response = openmeteo.weather_api(url,params=params)
        dly = response[0].Daily()
        T = float(dly.Variables(0).ValuesAsNumpy()[-1]).__round__(6)
        W = float(dly.Variables(1).ValuesAsNumpy()[-1]).__round__(6)
        P = float(dly.Variables(2).ValuesAsNumpy()[-1]).__round__(6)
        R = float(dly.Variables(3).ValuesAsNumpy()[-1]).__round__(6)
    if time > 14:
        url = "https://seasonal-api.open-meteo.com/v1/seasonal"
        params = {
            "latitude": lat,
            "longitude": long,
            "daily": ["temperature_2m_mean", "wind_speed_10m_mean", "precipitation_sum", "relative_humidity_2m_mean"],
            "forecast_days": time
            }
        response = openmeteo.weather_api(url,params=params)
        dly = response[0].Daily()
        T = float(dly.Variables(0).ValuesAsNumpy()[-1]).__round__(6)
        W = float(dly.Variables(1).ValuesAsNumpy()[-1]).__round__(6)
        P = float(dly.Variables(2).ValuesAsNumpy()[-1]).__round__(6)
        R = float(dly.Variables(2).ValuesAsNumpy()[-1]).__round__(6)
    try:
        return [T,W,P,R]
    except:
        return []

In [46]:
loc = "IRDUB_1"
with open(f"arima_models/{loc}_arima.pkl", "rb") as f:
    model = pickle.load(f)


d = "2025-12-27"
forecast_days = (parser.parse(d).date() - date.today()).days + 20
w = Weather_Requester(-36.8485,174.7633,parser.parse(d).date())
h = 1 if hl.country_holidays(country='IE').get(d) != None else 0
w.insert(0,h)
print(w)
PredM = 0.0
if len(w) == 5:
    pred_mean = pd.DataFrame(model.get_forecast(steps=forecast_days).predicted_mean)
    PredM = float(pred_mean.iloc[0])
    display(str(pred_mean.index[0]).split(' ')[0])
    print(PredM)

#exog=[w],
# d = (datetime.strptime(trd,'%Y-%m-%d') + timedelta(days=8)).strftime('%Y-%m-%d')
# forecast_days = 18+20
# e = "2025-11-30" 
# d = e
# PredM = 0.0 
# for i in range(0,forecast_days):
#     w = Weather_Requester(-36.8485,174.7633,parser.parse(d).date())
#     h = 1 if hl.country_holidays(country='IE').get(d) != None else 0
#     w.insert(0,h)
#     print(w)
#     if len(w) == 5:
#         pred_mean = pd.DataFrame(model.get_forecast(exog=[w], steps=1).predicted_mean)
#         display(str(pred_mean.index[0]).split(' ')[0])
#         PredM = pred_mean.iloc[0]
#         d = str(pred_mean.index[0]).split(' ')[0]
#     if d == e: break 
# print(PredM)
    

12
44
[0, 17.325001, 20.225, 17.1625, 17.1625]


  PredM = float(pred_mean.iloc[0])


'2025-11-06'

178285.07732348837
