In [89]:
import pandas as pd

In [90]:
df = pd.read_csv("/Users/jasminehuang/Desktop/datasets to be used/Rail_Ridership_by_Season_Time_Period_RouteLine_and_Stop_5316095790925663588.csv")

In [91]:
def calculate_peak_data(df, day_type, day_types):
    # Filter data for the specified day type
    filtered_data = df[df['day_type_name'].isin(day_types)]

    # Calculate overall average ons for each station
    average_ons_per_station = filtered_data.groupby('stop_name')['average_ons'].mean().reset_index()
    average_ons_per_station.rename(columns={'average_ons': f'{day_type}_overall_average_ons'}, inplace=True)
    
    # Find the peak time for each station
    peak_time_data = filtered_data.groupby(['stop_name', 'time_period_name'])['average_ons'].sum().reset_index()
    peak_time_per_station = peak_time_data.loc[
        peak_time_data.groupby('stop_name')['average_ons'].idxmax()
    ]
    
    # Merge overall average with peak time data
    result = pd.merge(average_ons_per_station, peak_time_per_station, on='stop_name')
    result.rename(columns={
        'time_period_name': f'{day_type}_peak_time',
        'average_ons': f'{day_type}_peak_time_average_ons'
    }, inplace=True)
    
    return result

# Step 2: Calculate data for weekdays and weekends
weekday_data = calculate_peak_data(df, 'weekday', ['weekday'])
weekend_data = calculate_peak_data(df, 'weekend', ['saturday', 'sunday'])

In [92]:
station_mapping_df = pd.read_csv("/Users/jasminehuang/Desktop/datasets to be used/cleaned_mbta_stops.csv")

weekday_data = pd.merge(weekday_data, station_mapping_df[['stop_name', 'station_id']], on='stop_name', how='left')
weekend_data = pd.merge(weekend_data, station_mapping_df[['stop_name', 'station_id']], on='stop_name', how='left')

# Replace stop_name with station_id
weekday_data['stop_name'] = weekday_data['station_id']
weekend_data['stop_name'] = weekend_data['station_id']

# Drop extra station_id column
weekday_data.drop(columns=['station_id'], inplace=True)
weekend_data.drop(columns=['station_id'], inplace=True)

weekday_data = weekday_data.dropna(subset=['stop_name'])
weekend_data = weekend_data.dropna(subset=['stop_name'])



In [93]:
weekday_data

Unnamed: 0,stop_name,weekday_overall_average_ons,weekday_peak_time,weekday_peak_time_average_ons
0,place-aport,443.703704,MIDDAY_BASE,4898
1,place-alfcl,670.888889,AM_PEAK,14424
2,place-alsgr,40.944444,MIDDAY_BASE,722
3,place-andrw,362.851852,AM_PEAK,4752
4,place-aqucl,289.129630,PM_PEAK,5674
...,...,...,...,...
115,place-welln,429.462963,AM_PEAK,8217
116,place-wlsta,141.925926,AM_PEAK,3031
117,place-wondl,390.851852,AM_PEAK,8385
118,place-wimnl,126.925926,AM_PEAK,1786


In [94]:
# Step 5: Map time periods to start times
weekday_data = pd.merge(
    weekday_data,
    time_period_df[['time_period_name', 'period_start_time']],
    left_on='weekday_peak_time',
    right_on='time_period_name',
    how='left'
)
weekday_data['weekday_peak_time'] = weekday_data['period_start_time']
weekday_data.drop(columns=['time_period_name', 'period_start_time'], inplace=True)

weekend_data = pd.merge(
    weekend_data,
    time_period_df[['time_period_name', 'period_start_time']],
    left_on='weekend_peak_time',
    right_on='time_period_name',
    how='left'
)
weekend_data['weekend_peak_time'] = weekend_data['period_start_time']
weekend_data.drop(columns=['time_period_name', 'period_start_time'], inplace=True)

In [102]:
# Step 6: Combine weekday and weekend data into a nested structure
combined = {}

# Helper function to strip prefixes from dictionary keys
def strip_prefix(data, prefix):
    return {key.replace(f"{prefix}_", ""): value for key, value in data.items()}

for station in set(weekday_data['stop_name']).union(weekend_data['stop_name']):
    station_weekday_data = weekday_data[weekday_data['stop_name'] == station]
    station_weekend_data = weekend_data[weekend_data['stop_name'] == station]

    combined[station] = {
        'weekday': strip_prefix(station_weekday_data.iloc[0].to_dict(), 'weekday') 
                   if not station_weekday_data.empty else {},
        'weekend': strip_prefix(station_weekend_data.iloc[0].to_dict(), 'weekend') 
                   if not station_weekend_data.empty else {}
    }

# Clean up duplicate 'stop_name' keys in the dictionaries
for station, data in combined.items():
    data['weekday'].pop('stop_name', None)
    data['weekend'].pop('stop_name', None)

# Step 7: Export to JSON
import json
with open("peak_time_ridership.json", "w") as f:
    json.dump(combined, f, indent=4)
