In [2]:
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
from shapely.geometry import Point
import geopandas as gpd
import matplotlib.pyplot as plt

In [3]:
daily_trains_demand_pre_covid = pd.read_csv('../data/curated/train_demand/daily_trains_demand_pre_covid.csv')
daily_trains_demand_post_covid = pd.read_csv('../data/curated/train_demand/daily_trains_demand_post_covid.csv')

In [6]:
len(daily_trains_demand_post_covid['Business_Date'].unique())

546

In [14]:
def get_daily_demand_in_rows(demand_df: pd.DataFrame, feature: str):

    df_with_feature = demand_df[['Station_Name', 'Business_Date', feature]]

    station_df_list = []

    for id, station_df in tqdm(df_with_feature.groupby('Station_Name')):

        station_df = station_df.rename({feature: f'{feature}_{id}'}, axis=1)
        station_df = station_df.drop('Station_Name', axis=1)
        station_df_list.append(station_df)

    for i, station_df in enumerate(station_df_list):
        if i == 0:
            merged_df = station_df
        else:
            merged_df = pd.merge(merged_df, station_df, on='Business_Date', how='outer')

    merged_df = merged_df.fillna(0)
    return merged_df

In [20]:
log_demand_precovid = get_daily_demand_in_rows(daily_trains_demand_pre_covid, 'log_Total_Demand')
log_demand_postcovid = get_daily_demand_in_rows(daily_trains_demand_post_covid, 'log_Total_Demand')
log_alighting_precovid = get_daily_demand_in_rows(daily_trains_demand_pre_covid, 'log_Passenger_Alightings')
log_alighting_postcovid = get_daily_demand_in_rows(daily_trains_demand_post_covid, 'log_Passenger_Alightings')
log_boarding_precovid = get_daily_demand_in_rows(daily_trains_demand_pre_covid, 'log_Passenger_Boardings')
log_boarding_postcovid = get_daily_demand_in_rows(daily_trains_demand_post_covid, 'log_Passenger_Boardings')

  0%|          | 0/222 [00:00<?, ?it/s]

100%|██████████| 222/222 [00:00<00:00, 1694.28it/s]
100%|██████████| 223/223 [00:00<00:00, 1628.42it/s]
100%|██████████| 222/222 [00:00<00:00, 2254.79it/s]
100%|██████████| 223/223 [00:00<00:00, 2530.26it/s]
100%|██████████| 222/222 [00:00<00:00, 2005.69it/s]
100%|██████████| 223/223 [00:00<00:00, 1931.47it/s]


In [13]:
os.mkdir('../data/curated/ML_features')

In [23]:
log_demand_precovid.to_csv('../data/curated/ML_features/log_demand_precovid.csv', index=False)
log_demand_postcovid.to_csv('../data/curated/ML_features/log_demand_postcovid.csv', index=False)
log_alighting_precovid.to_csv('../data/curated/ML_features/log_alighting_precovid.csv', index=False)
log_alighting_postcovid.to_csv('../data/curated/ML_features/log_alighting_postcovid.csv', index=False)
log_boarding_precovid.to_csv('../data/curated/ML_features/log_boarding_precovid.csv', index=False)
log_boarding_postcovid.to_csv('../data/curated/ML_features/log_boarding_postcovid.csv', index=False)