In [None]:
import os
import pandas as pd
from dateutil import parser
import holidays as hl

pd.set_option('display.max_rows',None)
pd.set_option('display.max_columns',None)
OUTDIR = "data_weather/Final"
os.makedirs(OUTDIR, exist_ok=True)

import openmeteo_requests

import requests_cache
from retry_requests import retry

### Need cache for Open Meto from Api Doc

In [2]:
# Setup the Open-Meteo API client with cache and retry on error # <--- this is from Open Meteo Api Docs
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

### Load in Popular POI's

In [3]:
GB = pd.read_csv('data_weather/Global_Tourist_Attractions.csv')
GB.head(1)

Unnamed: 0,Location_Name,Type_of_Attraction,Attraction_Category,City,Country,Latitude,Longitude
0,CN Tower,Tower,Urban Landmark,Toronto,Canada,43.6426,-79.3871


### Helper Functions
- Using Open Meto to get past weather data, code is from their APi doc for Historical Forecast 
- As of Nov 3 2025 Nov 1 2025 is the latest

In [101]:
def Weather_Requester(lat:float,long:float,stDate:str,edDate:str) -> pd.DataFrame:
    url = "https://historical-forecast-api.open-meteo.com/v1/forecast"
    params = {
        "latitude": lat,
        "longitude": long,
        "start_date": stDate,
        "end_date": edDate,
        "daily": ["temperature_2m_mean", "wind_speed_10m_mean", "precipitation_sum", "relative_humidity_2m_mean"],
        "timezone": "America/New_York"
    }
    response = openmeteo.weather_api(url,params=params)
    
    dly = response[0].Daily()
   
    dT = dly.Variables(0).ValuesAsNumpy()
    dW = dly.Variables(1).ValuesAsNumpy()
    dP = dly.Variables(2).ValuesAsNumpy()
    dH = dly.Variables(3).ValuesAsNumpy()

    daily_data = {"Date": pd.date_range(
	start = pd.to_datetime(dly.Time(), unit = "s", utc = True),
	end =  pd.to_datetime(dly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = dly.Interval()),
	inclusive = "left"
    )}

    daily_data['Date'] = daily_data['Date'].date

    daily_data['Weather_Temperature_Avg'] = dT
    daily_data['Weather_Wind_Speed_Avg'] = dW
    daily_data['Weather_Precipitation_Sum'] = dP
    daily_data['Weather_Relative_Humidity_Avg'] = dH

    daily_dataframe = pd.DataFrame(data = daily_data)

    daily_dataframe['Date'] = daily_dataframe['Date'].apply(lambda x: str(x))
    
    return daily_dataframe 

In [5]:
def adding_Holiday(df:pd.DataFrame,country:str) -> pd.DataFrame:
    df['Holiday'] = df['Date'].apply(lambda x: 1 if hl.country_holidays(country=country).get(x) != None else 0)
    return df

In [6]:
def Handle_Date(df:pd.DataFrame) -> pd.DataFrame:    
    for col in df.select_dtypes(include='object').columns:
        df[col] = df[col].apply(lambda x: parser.parse(x).strftime('%Y-%m-%d'))
        df.rename(columns={col:'Date'}, inplace=True)
    return df

In [None]:
def adding_time_features(df:pd.DataFrame) -> pd.DataFrame:
    seasons = {1:'Winter',2:'Spring',3:'Spring',4:'Spring',5:'Summer',6:'Summer',7:'Summer',8:'Fall',9:'Fall',10:'Fall',11:'Winter',12:'Winter'}
    month = {1:'Janary',2:'February',3:'March',4:'Apirl',5:'May',6:'June',7:'July',8:'August',9:'Setember',10:'October',11:'November',12:'December'}
    df['Month'] = df['Date'].apply(lambda x: parser.parse(x).month)
    df['Season'] = df['Month'].apply(lambda x: seasons.get(x))
    df['Month'] = df['Month'].apply(lambda x: month.get(x))
    return df

In [None]:
def Attract_Scre_Tour_Sat_Level(df:pd.DataFrame) -> pd.DataFrame:
    # Base attraction score by city (0-1 scale)
    city_score = {"Toronto": 0.8,"Edmonton": 0.55,"Sydney": 0.85,"Melbourne": 0.7,"Auckland": 0.60,"Dublin": 0.65}

    # Map each city’s base score
    df["Attraction_Score"] = df["City"].map(city_score)

    # Add small random variation (so it's not static)
    df["Attraction_Score"] = (0.4*df['Avg_Daily_Pedestrian_Count'] + 0.3*df['Weather_Temperature_Avg'] + 0.1*df['Weather_Precipitation_Avg'] + 0.2*df['Weather_Wind_Speed_Avg'])/1000
    #(df["Attraction_Score"] + np.random.uniform(-0.05, 0.05, len(df))).clip(0, 1).round(3)

    # Tourist_Saturation_Level = normalized Pedestrian_Avg (0–1)
    # df["Tourist_Saturation_Level"] = df.groupby("City")["Avg_Daily_Pedestrian_Count"].transform(lambda s: (s - s.min()) / (s.max() - s.min())).round(3)
    return df

In [7]:
def dfsComb(dfs:pd.DataFrame,wB:list[str]) -> pd.DataFrame:
    U_cols = []
    for df in dfs: 
        r = Handle_Date(df)
        if type(r) == str: 
            return r
        df = r 
        for cn in df.columns:
            U_cols.append(cn)

    for cn in list(set(U_cols)):
        if U_cols.count(cn) != len(dfs):
            U_cols = [v for v in U_cols if v != cn]
    
    U_cols = list(set(U_cols))

    dfs_comb = pd.DataFrame(columns=U_cols)
    for df in dfs:
        dfs_comb = pd.concat([dfs_comb,df[U_cols]],axis=0)    
    
    CNameChk = wB 
    if len(CNameChk) > 0:
        drpC = []
        for cn in dfs_comb.columns:
            if sum([1 if chk in cn else 0 for chk in CNameChk]) != 0: 
                drpC.append(cn)
        dfs_comb = dfs_comb.drop(columns=drpC)
    dfs_comb.insert(0,'Date',dfs_comb.pop('Date'))
    dfs_comb.dropna(axis=1,how='all')
    NanRation_PerCol = zip(dfs_comb.columns,dfs_comb.isna().sum()/dfs_comb.shape[0])
    for valuepair in NanRation_PerCol:
        if valuepair[1] > 0.15:
            dfs_comb = dfs_comb.drop(columns=[valuepair[0]])
    dfs_comb = dfs_comb.fillna(0.0)
    
    return dfs_comb

In [None]:
def dfsCombo_Final(dfs_comb:pd.DataFrame,U_cols:list[str],Country:str,Tol:float,IntsecLatLongs:dict[str, list[float]],Location_ID:str) -> pd.DataFrame:
    avg_col = U_cols[1:]
    dfs_comb_daily = dfs_comb.groupby(['Date'], as_index=False)[avg_col].sum()
    dfs_comb_daily = dfs_comb_daily[dfs_comb_daily['Date'] >= '2021-01-01'].reset_index(drop=True)

    GB_NZ = GB[GB['Country'] == Country].reset_index(drop=True)
    
    tolerance = Tol
    dfs_comb_gbLocs = pd.DataFrame(columns=['Country','City','Location_ID','Location_Name','Type_of_Attraction','Attraction_Category',
                                            'Latitude','Longitude','Date','Avg_Daily_Pedestrian_Count'])
    for i,L in GB_NZ.iterrows():
        Lat,Long = L['Latitude'],L['Longitude']
        Ints = [key for key, value in IntsecLatLongs.items() if (value[0] - Lat) <= tolerance and (value[1] - Long) <= tolerance]
        if not Ints:continue
        Davg = dfs_comb_daily[Ints].sum(axis=1)
        d =  dfs_comb_daily['Date']
        for r in zip(Davg,d):
            dfs_comb_gbLocs.loc[len(dfs_comb_gbLocs)] = {
                'Country':L['Country'],'City':L['City'],'Location_ID':f'{Location_ID}_{i+1}','Location_Name':L['Location_Name'],'Type_of_Attraction':L['Type_of_Attraction'],
                'Attraction_Category':L['Attraction_Category'],'Latitude':Lat,'Longitude':Long,'Date':r[1],'Avg_Daily_Pedestrian_Count':r[0]}
    
    return dfs_comb_gbLocs

### Getting Dublin Data 

In [89]:
def dublin():
    Dublin_latlong = {
        "O'Connell St/Parnell St/AIB":[53.347861,-6.262075],
        "D'olier st/Burgh Quay":[53.346362,-6.258316],
        "College Green/Church Lane":[53.344356,-6.260970],
        "Grafton Street / Nassau Street / Suffolk Street":[53.343208,-6.259262],
        "Henry Street/Coles Lane/Dunnes":[53.3501148,-6.2641621],
        "Phibsborough Rd/Enniskerry Road":[53.363647,-6.272056],
        "Grand Canal st upp/Clanwilliam place":[53.341236,-6.240578],
        "Baggot st upper/Mespil rd/Bank":[53.334004,-6.245193],
        "Grafton Street/CompuB":[53.340153,-6.260714],
        "Mary st/Jervis st":[53.348774,-6.266618],
        "Capel st/Mary street":[53.348477,-6.268733],
        "College Green/Bank Of Ireland":[53.344397,-6.260329]         
    } # These all exist in all dfs column headers
    folder = './data_weather/Dublin'
    dfs = [pd.read_csv(os.path.join(folder,f)) for f in os.listdir(folder) if f.endswith('.csv')]
    dfs_comb = dfsComb(dfs,['North','East','South','West','Inbound','inbound','Outbound','outbound','IN','OUT',
                'Pedestrian','Pedestrians','place/Google','Channel','Peds','1','2','old',
                '(','PYRO EVO Temporary Counter']) # Obtained from trail and error runs 

    df_F = dfsCombo_Final(dfs_comb,dfs_comb.columns.tolist(),'Ireland',0.0285,Dublin_latlong,'IRDUB')
    df_F = adding_Holiday(df_F,'IE')

    df_F = df_F[df_F['Date'] <= '2025-11-08'].reset_index(drop=True) # Open Meto has a day limit on latest day   
     
    gp = df_F.groupby(['Latitude','Longitude'],as_index=False)['Date'].agg(['min', 'max'])

    gpC = pd.DataFrame()
    for i,r in gp.iterrows():
        Wth = Weather_Requester(r['Latitude'],r['Longitude'],r['min'],r['max'])
        Wth.insert(0, 'Longitude', float(r['Longitude']))
        Wth.insert(0, 'Latitude', float(r['Latitude']))
        gpC = pd.concat((gpC,Wth),axis='index',).reset_index(drop=True)
    
    M = pd.merge(df_F,gpC,on=['Latitude','Longitude','Date'],how='outer').dropna(how='any')

    M.to_csv(f"{OUTDIR}/Dublin_Pedestrian_Hourly.csv", index=False)

### Getting Auckland Data

In [88]:
def auckland():
    NewZe_latlong= {
        '107 Quay Street':[-36.84294,174.7657151],
        'Te Ara Tahuhu Walkway':[-36.8445354,174.7689804],
        'Commerce Street West':[-37.7924771,175.2788845],
        '7 Custom Street East':[-36.84518,174.76742],
        '45 Queen Street':[-36.845001,174.766266],
        '30 Queen Street':[-36.8485,174.7633],
        '19 Shortland Street':[-36.84495,174.766575],
        '2 High Street':[-36.8496,174.7644],
        '1 Courthouse Lane':[36.8435,174.7638],
        '61 Federal Street':[-36.8474453,174.7577998],
        '59 High Street':[-36.8487668,174.7612574],
        '210 Queen Street':[-36.848873,174.765435],
        '205 Queen Street':[-36.8492249,174.7643553],
        '8 Darby Street EW':[-36.8496018,174.7640929],
        '8 Darby Street NS':[-36.8496018,174.7640929],
        '261 Queen Street':[-36.8504686,174.7643253],
        '297 Queen Street':[-36.8516857,174.7615011],
        '150 K Road':[-36.857909,174.7600514],
        '183 K Road':[-36.8574364,174.7576088],
        } # These all exist in all dfs column headers
    folder = './data_weather/Auckland Data'
    dfs = [pd.read_csv(os.path.join(folder,f)) for f in os.listdir(folder) if f.endswith('.csv')]
    Temp = []
    for df in dfs:
        df = df.drop(columns=['Time'])
        if df['150 K Road'].dtypes == object:
            df['150 K Road'] = df['150 K Road'].apply(lambda x: 0.0 if type(x) != int and type(x) != float else x) # coerce invalid parsing --> NaN then --> 0.0  
        Temp.append(df)
    dfs = Temp
    dfs_comb = dfsComb(dfs,[

    ]) #  Left blank as the unessary columns will be dropped with out it
    
    df_F = dfsCombo_Final(dfs_comb,dfs_comb.columns.tolist(),'New Zealand',0.0285,NewZe_latlong,'NZAUK')
    df_F = adding_Holiday(df_F,'NZ')

    df_F = df_F[df_F['Date'] <= '2025-11-08'].reset_index(drop=True) # Open Meto has a day limit on latest day   
     
    gp = df_F.groupby(['Latitude','Longitude'],as_index=False)['Date'].agg(['min', 'max'])

    gpC = pd.DataFrame()
    for i,r in gp.iterrows():
        Wth = Weather_Requester(r['Latitude'],r['Longitude'],r['min'],r['max'])
        Wth.insert(0, 'Longitude', float(r['Longitude']))
        Wth.insert(0, 'Latitude', float(r['Latitude']))
        gpC = pd.concat((gpC,Wth),axis='index',).reset_index(drop=True)
    
    M = pd.merge(df_F,gpC,on=['Latitude','Longitude','Date'],how='outer').dropna(how='any')

    M.to_csv(f"{OUTDIR}/Auckland_Pedestrian_Hourly.csv", index=False)

### Running all

In [102]:
auckland()
dublin()

  dfs_comb = dfs_comb.fillna(0.0)
  dfs_comb = pd.concat([dfs_comb,df[U_cols]],axis=0)
