In [36]:
import re
import pandas as pd

#file for making the tables for the 2015-2017 data

In [33]:
#access modes
COLUMNS = [
    "Access: Walked or bicycled",
    "Access: Drove alone",
    "Access: Carpooled",
    "Access: Dropped off by personal vehicle",
    "Access: Dropped off by other vehicle",
    "Previous Mode: Bus or Silver Line",
    "Previous Mode: Rail rapid transit",
    "Previous Mode: Commuter rail or ferry"
]

In [2]:
#alt modes
COLUMNS = [
    "Use Alt Mode: Yes",
    "Use Alt Mode: No",
    "Alt Mode: Walk",
    "Alt Mode: Bicycle",
    "Alt Mode: Drive alone",
    "Alt Mode: Drive or ride in a carpool",
    "Alt Mode: Take a taxi or use a rideshare company",
    "Alt Mode: Take a private shuttle or other transit",
    "Alt Mode: Take a different MBTA service",
    "Alt Mode: Other"
]

In [9]:
#auto availability
COLUMNS = [
    "Vehicles: 0",
    "Vehicles: 1",
    "Vehicles: 2",
    "Vehicles: 3 or more",
    
    "Vehicles/Capita: None",
    "Vehicles/Capita: 0.01–0.49",
    "Vehicles/Capita: 0.50–0.99",
    "Vehicles/Capita: 1.00–1.49",
    "Vehicles/Capita: 1.50–1.99",
    "Vehicles/Capita: 2 or more",
    
    "License: Yes",
    "License: No"
]


In [21]:
#income
COLUMNS = [
    "Income: Less than $14,500",
    "Income: $14,500 to $28,999",
    "Income: $29,000 to $43,499",
    "Income: $43,500 to $57,999",
    "Income: $58,000 to $75,999",
    "Income: $76,000 to $108,499",
    "Income: $108,500 to $151,999",
    "Income: $152,000 or more",
    "Income: Prefer not to say",
    "Low-income: Yes",
    "Low-income: No"
]

In [26]:
#race and ethnicity
COLUMNS = [
    "Race: American Indian or Alaska Native",
    "Race: Asian",
    "Race: Black or African American",
    "Race: Native Hawaiian or other Pacific Islander",
    "Race: White",
    "Race: Other",
    "Race: Prefer not to say",
    "Hispanic: Yes",
    "Hispanic: No",
    "Hispanic: Prefer not to say",
    "Minority: Yes",
    "Minority: No"
]

In [31]:
#other demographics
COLUMNS = [
    "Age: Under 18",
    "Age: 18 to 21",
    "Age: 22 to 34",
    "Age: 35 to 44",
    "Age: 45 to 64",
    "Age: 65 or over",
    
    "Gender: Man",
    "Gender: Woman",
    "Gender: Other",
    "Gender: Prefer not to say",
    
    "English Ability: Always",
    "English Ability: Often",
    "English Ability: Sometimes",
    "English Ability: Never",
    "English Ability: Prefer not to say"
]

In [32]:
def parse_route_data(file_path):
    with open(file_path, 'r') as f:
        lines = f.readlines()

    route_data = {}
    pattern = re.compile(r"Route\s+([^\:]+):\s+(\d+)\s?%")

    for line in lines:
        match = pattern.match(line.strip())
        if match:
            route, percent = match.groups()
            percent = int(percent)
            if route not in route_data:
                route_data[route] = []
            route_data[route].append(percent)

    all_dataframes = {}
    for route, percentages in route_data.items():

        chunks = [percentages[i:i+15] for i in range(0, len(percentages), 15)]
        complete_chunks = [chunk for chunk in chunks if len(chunk) == 15]


        df = pd.DataFrame(complete_chunks, columns=COLUMNS)
        all_dataframes[route] = df
    
    return all_dataframes

In [34]:
route_dfs = parse_route_data("/Users/sarahyuhan/BostonBusEquity/trip_purpose_frequency.txt")

# Example: display Route 1 data
print("Route 1:")
print(route_dfs['1'])

Route 1:
   Access: Walked or bicycled  Access: Drove alone  Access: Carpooled  \
0                          65                    9                  7   

   Access: Dropped off by personal vehicle  \
0                                        9   

   Access: Dropped off by other vehicle  Previous Mode: Bus or Silver Line  \
0                                    10                                 14   

   Previous Mode: Rail rapid transit  Previous Mode: Commuter rail or ferry  
0                                 46                                     16  


In [29]:
route_dfs = parse_route_data("/Users/sarahyuhan/BostonBusEquity/fares_data.txt")

In [35]:
route_dfs = parse_route_data("/Users/sarahyuhan/BostonBusEquity/access_mode_data.txt")

In [4]:
route_dfs = parse_route_data("/Users/sarahyuhan/BostonBusEquity/2015-2017-tables/alt_mode_data.txt")

In [None]:
route_dfs = parse_route_data("/Users/sarahyuhan/BostonBusEquity/2015-2017-tables/auto_availability_data.txt")

In [23]:
route_dfs = parse_route_data("/Users/sarahyuhan/BostonBusEquity/2015-2017-tables/income_data.txt")

In [28]:
route_dfs = parse_route_data("/Users/sarahyuhan/BostonBusEquity/2015-2017-tables/race_and_ethnicity_data.txt")

In [33]:
route_dfs = parse_route_data("/Users/sarahyuhan/BostonBusEquity/2015-2017-tables/other_demographics_data.txt")

In [34]:
def export_to_excel(route_dataframes, output_file):
    # Combine all dataframes into one with route label
    combined = []
    for route, df in route_dataframes.items():
        df_with_route = df.copy()
        df_with_route.insert(0, "Route", route)  # Add route column
        combined.append(df_with_route)
    
    final_df = pd.concat(combined, ignore_index=True)
    final_df.to_excel(output_file, index=False)
    print(f"✅ Data exported to {output_file}")


In [35]:
route_dfs = parse_route_data("/Users/sarahyuhan/BostonBusEquity/2015-2017-tables/other_demographics_data.txt")
export_to_excel(route_dfs, "other_demographics_data_export.xlsx")

✅ Data exported to other_demographics_data_export.xlsx
