In [12]:
import pandas as pd
import geopandas as gpd
import os

# === BASE DIRECTORY ===
base_dir = r"C:\Users\natda\OneDrive - Northeastern University\Desktop\NatDave\Academics\PhD_NU\RESEARCH\Traffic_Stress\Boston"

# === FILE PATHS ===
roads_path = os.path.join(base_dir, "street_network.shp")

# === LOAD SHAPEFILE ===
roads = gpd.read_file(roads_path)

# === CONVERT RELEVANT COLUMNS TO NUMERIC ===
numeric_cols = ['SPEED', 'qDirLanes', 'qNoAccess', 'qExclude', 'BL_WIDTH', 'BL_REACH', 'PARKALONG', 'ILLPARKING', 'StOperNEU', 'ADT_Infer', 'FEDERALFUN']
roads[numeric_cols] = roads[numeric_cols].apply(pd.to_numeric, errors='coerce')

In [13]:
roads['FEDERALFUN'].value_counts(dropna=False)

FEDERALFUN
7    14461
0     9193
3     4977
4     3200
5     2593
2      825
6      797
1      750
Name: count, dtype: int64

In [14]:
# === FUNCTION TO CALCULATE LTS_2025 ===
def calculate_LTS_2025(row):
    """Calculate LTS_2025 based on road characteristics."""
    
    protected = row['qProtected']
    no_access = row['qNoAccess']
    exclude = row['qExclude']
    speed = row['SPEED']
    dir_lanes = row['qDirLanes']
    bl_width = row['BL_WIDTH']
    bl_reach = row['BL_REACH']
    parkalong = row['PARKALONG']
    illparking = row['ILLPARKING']
    bike_type = row['bike_type2']
    st_oper_neu = row['StOperNEU']
    adt = row['ADT_Infer']
    fed_fun = row['FEDERALFUN']

    
    if no_access in (1, 98, 99):
        return 6  # freeway (ramps)
    elif exclude in (1, 5) or (bike_type and bike_type in ("WALK_YR_BIKE")):
        return 5  # cemetary, private property, peds only, etc.
    elif fed_fun == 0 and exclude == 0:
        return 1  # Parks, alleys, bike paths, streets with almost no traffic
    elif bike_type in ("SUP", "SUP_NAT", "SUP_MINOR", "SBL", "SBL_LEFT", "SBL_BL", "SBL_MIX", "CARFREE"):
        return 1  # Separated from traffic
    elif protected == 1:
        return 1 # Separated from traffic
    
    elif ((bl_width >= 4) or bike_type in ("BL", "BL_LEFT", "BL_MIX", "BL_BUF", "BL_BUF_LEFT", "BUS_BL", "BUS_BL_LEFT", "BL_PK_BUS_BL")) and (parkalong == 0):
        # Conventional bike lanes NOT adjacent to parking
        if (
            (dir_lanes >= 3 and speed > 38.5) or 
            (dir_lanes == 2 and speed > 43.5 and bl_width < 6) or
            (dir_lanes <= 1 and speed > 48.5 and bl_width < 6)
        ):
            return 4
        elif (dir_lanes >= 3) or (speed > 38.5):
            return 3
        elif (
            dir_lanes == 2 or
            speed > 33.5 or
            (dir_lanes == 1 and bl_width < 6) or
            (dir_lanes == 1 and speed > 33.5)
        ):
            return 2
        elif (dir_lanes <= 1) and (bl_width >= 6):
            return 1
        else:
            return 98 # For other cases that do not meet conditions
        
    elif (bl_width >= 4 or bike_type in ("BL", "BL_LEFT", "BL_BUF", "BL_MIX", "BL_PK_BUS_BL")) and (parkalong == 1) and (bl_reach >= 12):
        # Conventional bike lanes adjacent to parking
        if (dir_lanes <= 1 and speed <= 28.5 and bl_reach >= 15):
            return 1
        elif (
            (dir_lanes <= 1 and speed <= 38.5 and bl_reach >= 15) or
            (dir_lanes <= 1 and speed <= 33.5 and bl_reach < 15) or
            (dir_lanes > 1 and speed <= 28.5 and st_oper_neu == 1 and bl_reach >= 15) or
            (dir_lanes == 2 and speed <= 28.5 and bl_reach >= 15)
        ):
            return 2
        else:
            return 3

    elif (
        (bl_width < 4) or 
        ((bl_width >= 4) and (bl_reach == 1)) or 
        ((bl_width >= 4) and (parkalong > 0) and (bl_reach < 12)) or
        bike_type in ("MIX_CONTRA", "MIX_SCONTRA", "SLM", "SLMTC")
    ):
        # Mixed traffic conditions
        if (
            (dir_lanes >= 3 and speed > 28.5) or 
            (dir_lanes == 2 and adt > 8_000 and speed > 28.5) or 
            (dir_lanes == 2 and adt <= 8_000 and speed > 38.5)
        ):
            return 4
        elif (
            (dir_lanes >= 2) or 
            (speed > 38.5) or 
            (dir_lanes == 1 and adt > 3_000) or 
            (dir_lanes == 1 and adt > 1_500 and speed > 23.5)
        ):
            return 3
        elif (
            (dir_lanes == 1 and adt > 1_000) or 
            (dir_lanes == 1 and speed > 28.5) or 
            (dir_lanes == 0 and adt > 1_500) or 
            (dir_lanes == 0 and speed > 28.5)
        ):
            return 2
        elif (
            (dir_lanes == 1 and speed <= 28.5) or 
            (dir_lanes == 0 and speed <= 28.5)
        ):
            return 1
        else:
            return 99  # For other cases that do not meet conditions

    else:
        return 96  # Default return for other cases

# === CALCULATE LTS_2025 FOR ROADS ===
roads['LTS_2025'] = roads.apply(calculate_LTS_2025, axis=1)

# === SAVE THE UPDATED ROADS SHAPEFILE ===
roads.to_file(roads_path, driver="ESRI Shapefile")
 
# Output summary
print("LTS_2025 calculation complete.")
print(roads['LTS_2025'].value_counts(dropna=False))

LTS_2025 calculation complete.
LTS_2025
1     18533
3      6296
5      6283
2      2576
6      1885
4      1147
98       76
Name: count, dtype: int64


In [15]:
if 'qProtected' in roads.columns:
    print("Column 'qProtected' exists in the dataframe.")
else:
    print("Column 'qProtected' does not exist in the dataframe.")


Column 'qProtected' exists in the dataframe.


In [16]:
roads['LTS_2018'].value_counts(dropna=False)

LTS_2018
1    17648
3     7140
5     6055
2     2186
6     1839
4     1682
0      246
Name: count, dtype: int64

In [17]:
# Assuming roads is your GeoDataFrame
lts_columns = [col for col in roads.columns if "LTS" in col]
print(lts_columns)

['qLTS', 'qLTS_Retn', 'qLTS_Own', 'LTS2006', 'LTS2014', 'LTSEmerald', 'LTSBForE', 'LTS_DT_Imp', 'LTS_DT_BE', 'LTS_DT_Col', 'LTS_Colum', 'LTS_All_Im', 'LTS_2017', 'LTS_2018', 'LTS_2025', 'LTS_2025b']


In [18]:
# Filter rows where the values are 1, 2, 3, or 4
valid_values = {1, 2, 3, 4}
filtered_rows = roads[roads["LTS_2018"].isin(valid_values) & roads["LTS_2025"].isin(valid_values)]

# Determine cases where values differ
diff_mask = filtered_rows["LTS_2018"] != filtered_rows["LTS_2025"]

# Count the number of instances for different conditions
cases_2025_gt_2018 = (filtered_rows["LTS_2025"] > filtered_rows["LTS_2018"]).sum()
cases_2025_eq_2018 = (filtered_rows["LTS_2025"] == filtered_rows["LTS_2018"]).sum()
cases_2025_lt_2018 = (filtered_rows["LTS_2025"] < filtered_rows["LTS_2018"]).sum()

# Calculate breakdowns for specific transitions
breakdown = {}
for lts_2025, lts_2018 in [(1, 2), (1, 3), (1, 4), (2, 1), (2, 3), (2, 4),
                           (3, 1), (3, 2), (3, 4), (4, 1), (4, 2), (4, 3)]:
    count = ((filtered_rows["LTS_2025"] == lts_2025) & (filtered_rows["LTS_2018"] == lts_2018)).sum()
    breakdown[(lts_2025, lts_2018)] = count

# Print general summary
print(f"Cases where NatDave > Theja: {cases_2025_gt_2018}")
print(f"Cases where NatDave = Theja: {cases_2025_eq_2018}")
print(f"Cases where NatDave < Theja: {cases_2025_lt_2018}")

# Print detailed breakdown
print("\nDetailed Breakdown:")
for (lts_2025, lts_2018), count in breakdown.items():
    print(f"Cases where NatDave says {lts_2025} but Theja says {lts_2018}: {count}")

Cases where NatDave > Theja: 14
Cases where NatDave = Theja: 26524
Cases where NatDave < Theja: 1973

Detailed Breakdown:
Cases where NatDave says 1 but Theja says 2: 391
Cases where NatDave says 1 but Theja says 3: 362
Cases where NatDave says 1 but Theja says 4: 242
Cases where NatDave says 2 but Theja says 1: 12
Cases where NatDave says 2 but Theja says 3: 685
Cases where NatDave says 2 but Theja says 4: 89
Cases where NatDave says 3 but Theja says 1: 2
Cases where NatDave says 3 but Theja says 2: 0
Cases where NatDave says 3 but Theja says 4: 204
Cases where NatDave says 4 but Theja says 1: 0
Cases where NatDave says 4 but Theja says 2: 0
Cases where NatDave says 4 but Theja says 3: 0


In [19]:
# Check where the values are not the same
diff_mask = roads["LTS_2018"] != roads["LTS_2025"]

# Count the number of instances where the values differ
num_diff = diff_mask.sum()

# Calculate the proportion of differing values
proportion_diff = num_diff / len(roads)

# Print results
print(f"Number of differing rows: {num_diff} out of {len(roads)}")
print(f"Proportion of differing rows: {proportion_diff:.2%}")

Number of differing rows: 2432 out of 36796
Proportion of differing rows: 6.61%


In [20]:
roads['LTS_2025'].value_counts(dropna=False)

LTS_2025
1     18533
3      6296
5      6283
2      2576
6      1885
4      1147
98       76
Name: count, dtype: int64

In [21]:
# Calculate road lengths in meters
roads["LENGTH_METERS"] = roads.geometry.length

# Convert lengths to miles
roads["LENGTH_MILES"] = roads["LENGTH_METERS"] * 0.000621371

# Calculate total miles for each LTS_2025 value
lts_miles = roads.groupby("LTS_2025")["LENGTH_MILES"].sum()

# Print the results
print("Total miles for each LTS_2025 value:")
print(lts_miles)

Total miles for each LTS_2025 value:
LTS_2025
1     1031.871608
2      124.587350
3      303.680924
4       64.009505
5      316.147408
6      124.071922
98       1.523742
Name: LENGTH_MILES, dtype: float64


In [22]:
roads["LENGTH_MILES"].sum()

1965.8924581837405

In [23]:
# Filter rows where LTS_2025 < LTS_2018
improve = roads[roads["LTS_2025"] < roads["LTS_2018"]]

# Calculate the total length in miles
total_length_miles = improve["LENGTH_MILES"].sum()

# Print the result
print(f"Total length in miles where LTS_2025 < LTS_2018: {total_length_miles}")

Total length in miles where LTS_2025 < LTS_2018: 95.50728326207495


In [None]:
# Create a new column for the difference between LTS_2018 and LTS_2025
roads["LTS_DIFF"] = roads["LTS_2018"] - roads["LTS_2025"]

# Print the first few rows to verify
print(roads[["LTS_2018", "LTS_2025", "LTS_DIFF"]].head())

# Save the updated shape file
roads.to_file(roads_path, driver="ESRI Shapefile")