In [1]:
import os
import numpy as np
import pandas as pd
from dateutil import parser
import holidays as hl

# Used with Pandas to help view hidden columns and rows  
pd.set_option('display.max_rows',None)
pd.set_option('display.max_columns',None)
OUTDIR = "data_weather/Final"
os.makedirs(OUTDIR, exist_ok=True)

import openmeteo_requests

import requests_cache
from retry_requests import retry

### Load in Popular POI's

In [148]:
GB = pd.read_csv('data_weather/Global_Tourist_Attractions.csv') #  Use for creating our time series data, POI and with lat and long
GB.head(1)

Unnamed: 0,Location_Name,Type_of_Attraction,Attraction_Category,City,Country,Latitude,Longitude,Location_ID
0,CN Tower,Tower,Urban Landmark,Toronto,Canada,43.6426,-79.3871,CNTOR_1


### Need cache for Open Meto from Api Doc
-  Setup the Open-Meteo API client with cache and retry on error # <--- this is from Open Meteo Api Docs

In [149]:
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

### Helper Functions
- Using Open Meto to get past weather data, code is from their APi doc for Historical Forecast 
- As of Nov 18 2025 Nov 16 2025 is the latest

In [150]:
def Weather_Requester(lat:float,long:float,stDate:str,edDate:str) -> pd.DataFrame:
    url = "https://archive-api.open-meteo.com/v1/archive"
    params = {
        "latitude": lat,
        "longitude": long,
        "start_date": stDate,
        "end_date": edDate,
        "daily": ["precipitation_sum", "temperature_2m_mean", "relative_humidity_2m_mean", "wind_gusts_10m_mean"],
        "timezone": "America/New_York",
    }
    responses = openmeteo.weather_api(url, params=params)
    # Basically getting the data for the beginning of the dataset set to the end of the dataset per location   
    dly = responses[0].Daily() 
   
    dP = dly.Variables(0).ValuesAsNumpy() # Temp NumpArray
    dT = dly.Variables(1).ValuesAsNumpy() # Wind NumpArray
    dH = dly.Variables(2).ValuesAsNumpy() # Prep NumpArray
    dW = dly.Variables(3).ValuesAsNumpy() # Humd NumpArray

    daily_data = {"Date": pd.date_range( # Code from Open Meto sandbox, used for building the date columns in datetime.date(yyyy,mm,dd)<-want this format when working in code
            start = pd.to_datetime(dly.Time(), unit = "s", utc = True),
            end =  pd.to_datetime(dly.TimeEnd(), unit = "s", utc = True),
            freq = pd.Timedelta(seconds = dly.Interval()),
            inclusive = "left" 
    )}

    daily_data['Date'] = daily_data['Date'].date

    daily_data['Weather_Temperature_Avg'] = dT
    daily_data['Weather_Wind_Speed_Avg'] = dW
    daily_data['Weather_Precipitation_Sum'] = dP
    daily_data['Weather_Relative_Humidity_Avg'] = dH

    daily_dataframe = pd.DataFrame(data = daily_data)

    # daily_dataframe['Date'] = daily_dataframe['Date'].apply(lambda x: str(x))
    
    return daily_dataframe 

### Function that handles holiday

In [151]:
def adding_Holiday(df:pd.DataFrame,country:str) -> pd.DataFrame:
    df['Holiday'] = df['Date'].apply(lambda x: 1 if hl.country_holidays(country=country).get(x) != None else 0)
    return df

### Function Handles v stacking data sets, removing unessary columns 

In [152]:
def combiner(DSArray:list[pd.DataFrame],colFlt:list)->pd.DataFrame:
    DSArray = [df.rename(columns={df.columns[0]:'Date'}) for df in DSArray] # Renaming Date column to Date  
    vstk = pd.concat(DSArray)
    vstk['Date'] = vstk['Date'].apply(lambda x : parser.parse(x).date()) # Convert date to datetime.date(YYYY,MM,DD)
    drpcols = []
    for a in vstk.columns.to_list(): 
        for b in colFlt:
            if b in a : 
                drpcols.append(a) 
                break
    vstk = vstk.drop(columns=drpcols).reset_index(drop=True)
    return vstk

### Function that transform the data set to a usable form

In [166]:
def dfTransform(vdf:pd.DataFrame,latlong:dict,tol:float,Country:str)->pd.DataFrame:
    vdf = vdf.replace(np.nan,0.0) # Better to replace NaN pedrestrain data with 0 instead of other methods that can Artifically inflate numbers
    vdf = vdf[[cn for cn in vdf.columns if cn in latlong or cn == 'Date']] # keep columns that can have lat and long as well as date
    vdf = vdf.groupby(['Date'])[vdf.columns[1:]].mean().reset_index() # Group by Date column to average out pedestrian counts in each column
    
    vdf_comb_gbLocs = pd.DataFrame(columns=['Country','City','Location_ID','Location_Name','Type_of_Attraction','Attraction_Category',
                                            'Latitude','Longitude','Date','Avg_Daily_Pedestrian_Count']) # new dataframe to fill
    
    for i,r in GB[GB['Country']==Country].reset_index(drop=True).iterrows():# grab the row with correct country
        Lat,Long = r['Latitude'],r['Longitude']# Finding Intersections which are close to POI Lat and Long (taking a tol into consideration) 
        Ints = [key for key, value in latlong.items() if (value[0] - Lat) <= tol and (value[1] - Long) <= tol]
        if not Ints:continue # Find no intersect that are close than go to next POI
        Davg = vdf[Ints].sum(axis=1).round(0) # sereis 
        d =  vdf['Date']
        for rr in zip(Davg,d):
            vdf_comb_gbLocs.loc[len(vdf_comb_gbLocs)] = {
                'Country':r['Country'],'City':r['City'],'Location_ID':r['Location_ID'],'Location_Name':r['Location_Name'],'Type_of_Attraction':r['Type_of_Attraction'],
                'Attraction_Category':r['Attraction_Category'],'Latitude':Lat,'Longitude':Long,'Date':rr[1],'Avg_Daily_Pedestrian_Count':rr[0]}
    return vdf_comb_gbLocs

### Dublin   

In [164]:
def IR_Dub():
    folder = './data_weather/Dublin'
    dfs = [pd.read_csv(os.path.join(folder,f)) for f in os.listdir(folder) if f.endswith('.csv')] # 
    ColNameExst = ['North','East','South','West','Inbound','inbound','Outbound','outbound',
                   'IN','OUT','Pedestrian','Pedestrians','place/Google','Channel','Peds',
                   '1','2','old','(','PYRO EVO Temporary Counter'] # Used for filtering columns we do not want
    v = combiner(dfs,ColNameExst) # Obtain processed v stacked 
    Dublin_latlong = {
        "O'Connell St/Parnell St/AIB":[53.347861,-6.262075],
        "D'olier st/Burgh Quay":[53.346362,-6.258316],
        "College Green/Church Lane":[53.344356,-6.260970],
        "Grafton Street / Nassau Street / Suffolk Street":[53.343208,-6.259262],
        "Henry Street/Coles Lane/Dunnes":[53.3501148,-6.2641621],
        "Phibsborough Rd/Enniskerry Road":[53.363647,-6.272056],
        "Grand Canal st upp/Clanwilliam place":[53.341236,-6.240578],
        "Baggot st upper/Mespil rd/Bank":[53.334004,-6.245193],
        "Grafton Street/CompuB":[53.340153,-6.260714],
        "Mary st/Jervis st":[53.348774,-6.266618],
        "Capel st/Mary street":[53.348477,-6.268733],
        "College Green/Bank Of Ireland":[53.344397,-6.260329]         
    } # These all exist in all dfs column headers
    v = dfTransform(v,Dublin_latlong,0.007,'Ireland')
    v = adding_Holiday(v,'IE')

    v = v[(v['Date'] <= parser.parse('2025-09-30').date()) & (v['Date'] >= parser.parse('2021-01-01').date())].reset_index(drop=True) 
    # Settign end date for time series               # Settign start date for time series   
     
    gp = v.groupby(['Latitude','Longitude'],as_index=False)['Date'].agg(['min', 'max'])
    gpC = pd.DataFrame() # We dont want to call Meto each day better to do all per each POI 
    for i,r in gp.iterrows():
        Wth = Weather_Requester(r['Latitude'],r['Longitude'],r['min'],r['max'])
        Wth.insert(0, 'Longitude', float(r['Longitude']))
        Wth.insert(0, 'Latitude', float(r['Latitude']))
        gpC = pd.concat((gpC,Wth),axis='index',).reset_index(drop=True)
    
    M = pd.merge(v,gpC,on=['Latitude','Longitude','Date'],how='outer').dropna(how='any').sort_values(by=['Location_ID','Date']).reset_index(drop=True)
    M.to_csv(f"{OUTDIR}/Dublin_Pedestrian_Hourly.csv", index=False)
    display(M.head(5))

### Auckland

In [163]:
def NZ_Auckland():
    folder = './data_weather/Auckland Data'
    dfs = [pd.read_csv(os.path.join(folder,f)) for f in os.listdir(folder) if f.endswith('.csv')] # 
    
    Temp = []
    for df in dfs:
        df = df.drop(columns=['Time']) # Dataset contains a tiem column, we don't care about it
        if df['150 K Road'].dtypes == object: # Theres a '-' in column '150  K Road' of one of the data sets 
            df['150 K Road'] = df['150 K Road'].apply(lambda x: 0.0 if type(x) != int and type(x) != float else x) # coerce invalid parsing --> NaN then --> 0.0  
        Temp.append(df)
    dfs = Temp

    ColNameExst = [] # Used for filtering columns we do not want
    v = combiner(dfs,ColNameExst) # Obtain processed v stacked 
    NewZe_latlong= { # Obtain processed v stacked 
        '107 Quay Street':[-36.84294,174.7657151],
        'Te Ara Tahuhu Walkway':[-36.8445354,174.7689804],
        'Commerce Street West':[-37.7924771,175.2788845],
        '7 Custom Street East':[-36.84518,174.76742],
        '45 Queen Street':[-36.845001,174.766266],
        '30 Queen Street':[-36.8485,174.7633],
        '19 Shortland Street':[-36.84495,174.766575],
        '2 High Street':[-36.8496,174.7644],
        '1 Courthouse Lane':[36.8435,174.7638],
        '61 Federal Street':[-36.8474453,174.7577998],
        '59 High Street':[-36.8487668,174.7612574],
        '210 Queen Street':[-36.848873,174.765435],
        '205 Queen Street':[-36.8492249,174.7643553],
        '8 Darby Street EW':[-36.8496018,174.7640929],
        '8 Darby Street NS':[-36.8496018,174.7640929],
        '261 Queen Street':[-36.8504686,174.7643253],
        '297 Queen Street':[-36.8516857,174.7615011],
        '150 K Road':[-36.857909,174.7600514],
        '183 K Road':[-36.8574364,174.7576088],
        } # These all exist in all dfs column headers
    v = dfTransform(v,NewZe_latlong,0.007,'New Zealand')
    v = adding_Holiday(v,'NZ')

    v = v[(v['Date'] <= parser.parse('2025-09-30').date()) & (v['Date'] >= parser.parse('2021-01-01').date())].reset_index(drop=True) 
    # Settign end date for time series               # Settign start date for time series  

    gp = v.groupby(['Latitude','Longitude'],as_index=False)['Date'].agg(['min', 'max'])
    gpC = pd.DataFrame() # We dont want to call Meto each day better to do all per each POI 
    for i,r in gp.iterrows():
        Wth = Weather_Requester(r['Latitude'],r['Longitude'],r['min'],r['max'])
        Wth.insert(0, 'Longitude', float(r['Longitude']))
        Wth.insert(0, 'Latitude', float(r['Latitude']))
        gpC = pd.concat((gpC,Wth),axis='index',).reset_index(drop=True)
    
    M = pd.merge(v,gpC,on=['Latitude','Longitude','Date'],how='outer').dropna(how='any').sort_values(by=['Location_ID','Date']).reset_index(drop=True)
    M.to_csv(f"{OUTDIR}/Auckland_Pedestrian_Hourly.csv", index=False)
    display(M.head(5))

### Int the fuctions to begin data processing --- Start Point

In [167]:
IR_Dub()
NZ_Auckland()

Unnamed: 0,Country,City,Location_ID,Location_Name,Type_of_Attraction,Attraction_Category,Latitude,Longitude,Date,Avg_Daily_Pedestrian_Count,Holiday,Weather_Temperature_Avg,Weather_Wind_Speed_Avg,Weather_Precipitation_Sum,Weather_Relative_Humidity_Avg
0,Ireland,Dublin,IRDUB_1,Trinity College & Book of Kells,University,Culture & History,53.3438,-6.2546,2021-01-01,1673.0,1.0,2.034,20.099998,0.0,92.18573
1,Ireland,Dublin,IRDUB_1,Trinity College & Book of Kells,University,Culture & History,53.3438,-6.2546,2021-01-02,3014.0,0.0,1.525667,21.119999,0.7,92.406158
2,Ireland,Dublin,IRDUB_1,Trinity College & Book of Kells,University,Culture & History,53.3438,-6.2546,2021-01-03,3050.0,0.0,2.029833,24.945,1.2,91.873329
3,Ireland,Dublin,IRDUB_1,Trinity College & Book of Kells,University,Culture & History,53.3438,-6.2546,2021-01-04,3444.0,0.0,3.5715,33.120007,2.4,86.95858
4,Ireland,Dublin,IRDUB_1,Trinity College & Book of Kells,University,Culture & History,53.3438,-6.2546,2021-01-05,3373.0,0.0,2.9215,28.800003,3.0,91.681313


Unnamed: 0,Country,City,Location_ID,Location_Name,Type_of_Attraction,Attraction_Category,Latitude,Longitude,Date,Avg_Daily_Pedestrian_Count,Holiday,Weather_Temperature_Avg,Weather_Wind_Speed_Avg,Weather_Precipitation_Sum,Weather_Relative_Humidity_Avg
0,New Zealand,Auckland,NZAUK_1,Sky Tower,Tower,Urban Landmark,-36.8485,174.7633,2021-01-01,2686.0,1.0,19.225752,19.874998,14.5,83.947762
1,New Zealand,Auckland,NZAUK_1,Sky Tower,Tower,Urban Landmark,-36.8485,174.7633,2021-01-02,2964.0,1.0,18.509085,21.929998,15.800001,88.193947
2,New Zealand,Auckland,NZAUK_1,Sky Tower,Tower,Urban Landmark,-36.8485,174.7633,2021-01-03,2918.0,0.0,19.51325,21.960001,7.0,80.497612
3,New Zealand,Auckland,NZAUK_1,Sky Tower,Tower,Urban Landmark,-36.8485,174.7633,2021-01-04,3210.0,1.0,20.306999,14.864999,0.5,82.517159
4,New Zealand,Auckland,NZAUK_1,Sky Tower,Tower,Urban Landmark,-36.8485,174.7633,2021-01-05,4083.0,0.0,20.627832,22.559998,0.0,78.300323
