<a href="https://colab.research.google.com/github/Nuha4/adelaide_metrocard-cpi-fuel-correlation/blob/main/adl_metrocard_cpi_fuel_correlation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr, spearmanr
import numpy as np
import glob
import os

In [6]:
# Load, Merge and Clean MetroCard Usage Data for Adelaide (2015_Q4 to 2025_Q2)
folder_path = 'data_metro/'
file_pattern = os.path.join(folder_path, "*anded*.csv")
all_files = glob.glob(file_pattern)

# Read and concatenate
df_list = []
for file in all_files:
    df = pd.read_csv(file)
    df['SOURCE_FILE'] = os.path.basename(file)  # Optional: track source file
    df_list.append(df)
combined_metro_df = pd.concat(df_list, ignore_index=True)

print(combined_metro_df)

# # Check unparseable rows
# invalid_dates = combined_metro_df[combined_metro_df['VALIDATION_DATE'].isna()]
# print("Unparseable rows:", len(invalid_dates))
# print(invalid_dates[['SOURCE_FILE', 'VALIDATION_DATE']].head())


# Convert date and prepare monthly data
combined_metro_df['VALIDATION_DATE'] = pd.to_datetime(
    combined_metro_df['VALIDATION_DATE'], 
    format='mixed', 
    dayfirst=True, 
    errors='coerce'
)
# combined_metro_df['VALIDATION_DATE'] = pd.to_datetime(combined_metro_df['VALIDATION_DATE'], dayfirst=True)
combined_metro_df['BAND_BOARDINGS_FLOOR'] = pd.to_numeric(combined_metro_df['BAND_BOARDINGS_FLOOR'], errors='coerce')
combined_metro_df['Month'] = combined_metro_df['VALIDATION_DATE'].dt.to_period('M').dt.to_timestamp()

# Aggregate by month
monthly_metro = combined_metro_df.groupby('Month')['BAND_BOARDINGS_FLOOR'].sum().reset_index()
monthly_metro = monthly_metro.rename(columns={'BAND_BOARDINGS_FLOOR': 'MetroCard_Usage'})

# Save for reuse
monthly_metro.to_csv("combined_metrocard_monthly.csv", index=False)
print(monthly_metro.head())


         VALIDATION_DATE  NUM_MODE_TRANSPORT ROUTE_CODE  ROUTE_DIRECTION  \
0             22/05/2023                   0          0                0   
1             19/06/2023                   0          0                0   
2              2/04/2023                   1     OD1MTB                0   
3              4/05/2023                   1     OD1MTB                0   
4             13/05/2023                   1     OD1MTB                0   
...                  ...                 ...        ...              ...   
59247577      11/04/2024                   1         J1                1   
59247578      11/04/2024                   1         J2                1   
59247579      11/04/2024                   1        M44                1   
59247580      11/04/2024                   1       178M                1   
59247581      11/04/2024                   1        252                1   

           GTFS_ID  MEDIUM_TYPE BAND_BOARDINGS  BAND_BOARDINGS_FLOOR  \
0         10184