In [6]:
import pandas as pd

df = pd.read_csv('/Users/aryangoel/Desktop/Updated_DataSet.csv')

# Display first 5 rows (Jupyter will format it nicely)
df.head()


Unnamed: 0.1,Unnamed: 0,CRASH_DATE,CRASH_HOUR,CRASH_DAY_OF_WEEK,CRASH_MONTH,LATITUDE,LONGITUDE,LOCATION,DAMAGE,CRASH_TYPE,...,INJURIES_TOTAL,INJURIES_FATAL,INJURIES_INCAPACITATING,INJURIES_NON_INCAPACITATING,STREET_NAME,PRIM_CONTRIBUTORY_CAUSE,SEC_CONTRIBUTORY_CAUSE,LIGHTING_CONDITION,CRASH_DATE_ONLY,INJURY_SCORE
0,0,10/24/2025 03:33:00 AM,3,6,10,41.776529,-87.700489,POINT (-87.700488959345 41.776528764215),"OVER $1,500",INJURY AND / OR TOW DUE TO CRASH,...,0.0,0.0,0.0,0.0,ALBANY AVE,UNABLE TO DETERMINE,UNABLE TO DETERMINE,"DARKNESS, LIGHTED ROAD",10/24/2025,0.0
1,1,10/24/2025 01:43:00 AM,1,6,10,41.97435,-87.65481,POINT (-87.654810487257 41.974350496819),"$501 - $1,500",NO INJURY / DRIVE AWAY,...,0.0,0.0,0.0,0.0,SHERIDAN RD,DRIVING SKILLS/KNOWLEDGE/EXPERIENCE,DISTRACTION - FROM OUTSIDE VEHICLE,"DARKNESS, LIGHTED ROAD",10/24/2025,0.0
2,2,10/24/2025 01:05:00 AM,1,6,10,41.951333,-87.898242,POINT (-87.898242084333 41.951332504063),"OVER $1,500",NO INJURY / DRIVE AWAY,...,0.0,0.0,0.0,0.0,IRVING PARK RD,ANIMAL,NOT APPLICABLE,"DARKNESS, LIGHTED ROAD",10/24/2025,0.0
3,3,10/24/2025 12:30:00 AM,0,6,10,41.904712,-87.668365,POINT (-87.668364978394 41.90471221906),"OVER $1,500",NO INJURY / DRIVE AWAY,...,0.0,0.0,0.0,0.0,MILWAUKEE AVE,UNABLE TO DETERMINE,NOT APPLICABLE,"DARKNESS, LIGHTED ROAD",10/24/2025,0.0
4,4,10/24/2025 12:18:00 AM,0,6,10,41.918953,-87.691114,POINT (-87.691113725571 41.918952840995),$500 OR LESS,NO INJURY / DRIVE AWAY,...,0.0,0.0,0.0,0.0,MILWAUKEE AVE,FAILING TO REDUCE SPEED TO AVOID CRASH,UNDER THE INFLUENCE OF ALCOHOL/DRUGS (USE WHEN...,"DARKNESS, LIGHTED ROAD",10/24/2025,0.0


In [7]:
# Display column names
print("\nColumn names:")
print(df.columns.tolist())

# Optional: Display basic info about the dataset
print("\nDataset shape:")
print(df.shape)


Column names:
['Unnamed: 0', 'CRASH_DATE', 'CRASH_HOUR', 'CRASH_DAY_OF_WEEK', 'CRASH_MONTH', 'LATITUDE', 'LONGITUDE', 'LOCATION', 'DAMAGE', 'CRASH_TYPE', 'MOST_SEVERE_INJURY', 'INJURIES_TOTAL', 'INJURIES_FATAL', 'INJURIES_INCAPACITATING', 'INJURIES_NON_INCAPACITATING', 'STREET_NAME', 'PRIM_CONTRIBUTORY_CAUSE', 'SEC_CONTRIBUTORY_CAUSE', 'LIGHTING_CONDITION', 'CRASH_DATE_ONLY', 'INJURY_SCORE']

Dataset shape:
(996094, 21)


In [8]:
import pandas as pd
import numpy as np

# Read the data
df = pd.read_csv('/Users/aryangoel/Desktop/Updated_DataSet.csv')
print(f"Initial dataset size: {df.shape[0]:,} rows, {df.shape[1]} columns")

# 1. Drop the unnecessary 'Unnamed: 0' column (it's just an index)
df = df.drop('Unnamed: 0', axis=1)
print(f"After dropping 'Unnamed: 0' column: {df.shape[0]:,} rows, {df.shape[1]} columns")

# 2. Check for missing values in critical columns
print("\n=== Missing values in key columns ===")
print(df[['LATITUDE', 'LONGITUDE', 'CRASH_DATE']].isnull().sum())

# 3. Remove rows with missing latitude/longitude (can't map without location)
before_dropna = len(df)
df = df.dropna(subset=['LATITUDE', 'LONGITUDE'])
removed = before_dropna - len(df)
print(f"\nAfter removing missing coordinates: {df.shape[0]:,} rows (removed {removed:,} rows)")

# 4. Remove invalid coordinates (outside Chicago bounds)
# Chicago roughly: Latitude 41.6-42.1, Longitude -87.9 to -87.5
before_bounds = len(df)
df = df[(df['LATITUDE'] >= 41.6) & (df['LATITUDE'] <= 42.1)]
df = df[(df['LONGITUDE'] >= -87.95) & (df['LONGITUDE'] <= -87.5)]
removed = before_bounds - len(df)
print(f"After removing invalid coordinates: {df.shape[0]:,} rows (removed {removed:,} rows)")

# 5. Convert CRASH_DATE to proper datetime format
df['CRASH_DATE'] = pd.to_datetime(df['CRASH_DATE'])
df['CRASH_DATE_ONLY'] = pd.to_datetime(df['CRASH_DATE_ONLY'])
print(f"After converting dates: {df.shape[0]:,} rows (no change - just formatting)")

# 6. Handle missing values in categorical columns
# Replace empty strings or 'UNKNOWN' with a standard value
categorical_cols = ['DAMAGE', 'CRASH_TYPE', 'PRIM_CONTRIBUTORY_CAUSE', 
                    'SEC_CONTRIBUTORY_CAUSE', 'LIGHTING_CONDITION']
for col in categorical_cols:
    if col in df.columns:
        df[col] = df[col].fillna('UNKNOWN')
print(f"After filling categorical NaNs: {df.shape[0]:,} rows (no change - just filling values)")

# 7. Fill missing numeric values with 0 (for injury counts)
injury_cols = ['INJURIES_TOTAL', 'INJURIES_FATAL', 'INJURIES_INCAPACITATING', 
               'INJURIES_NON_INCAPACITATING', 'INJURY_SCORE']
for col in injury_cols:
    if col in df.columns:
        df[col] = df[col].fillna(0)
print(f"After filling numeric NaNs: {df.shape[0]:,} rows (no change - just filling values)")

# 8. Remove duplicates
before_dedup = len(df)
df = df.drop_duplicates()
removed = before_dedup - len(df)
print(f"After removing duplicates: {df.shape[0]:,} rows (removed {removed:,} rows)")

# 9. Final summary
print("\n" + "="*60)
print("CLEANING SUMMARY")
print("="*60)
print(f"Original dataset: 988,386 rows")
print(f"Final cleaned dataset: {df.shape[0]:,} rows")
print(f"Total rows removed: {988386 - len(df):,} ({((988386 - len(df))/988386*100):.2f}%)")
print("="*60)

print(f"\nSample of cleaned data:")
print(df.head())

# 10. Save the cleaned dataset
df.to_csv('/Users/aryangoel/Desktop/Cleaned_DataSet.csv', index=False)
print("\n✓ Cleaned data saved to Desktop/Cleaned_DataSet.csv")

Initial dataset size: 996,094 rows, 21 columns
After dropping 'Unnamed: 0' column: 996,094 rows, 20 columns

=== Missing values in key columns ===
LATITUDE      7526
LONGITUDE     7526
CRASH_DATE       0
dtype: int64

After removing missing coordinates: 988,568 rows (removed 7,526 rows)
After removing invalid coordinates: 988,503 rows (removed 65 rows)
After converting dates: 988,503 rows (no change - just formatting)
After filling categorical NaNs: 988,503 rows (no change - just filling values)
After filling numeric NaNs: 988,503 rows (no change - just filling values)
After removing duplicates: 988,386 rows (removed 117 rows)

CLEANING SUMMARY
Original dataset: 988,386 rows
Final cleaned dataset: 988,386 rows
Total rows removed: 0 (0.00%)

Sample of cleaned data:
           CRASH_DATE  CRASH_HOUR  CRASH_DAY_OF_WEEK  CRASH_MONTH   LATITUDE  \
0 2025-10-24 03:33:00           3                  6           10  41.776529   
1 2025-10-24 01:43:00           1                  6           10

In [4]:
import pandas as pd
df = pd.read_csv('/Users/aryangoel/Desktop/Cleaned_DataSet.csv')
df.head(15)

Unnamed: 0,CRASH_DATE,CRASH_HOUR,CRASH_DAY_OF_WEEK,CRASH_MONTH,LATITUDE,LONGITUDE,LOCATION,DAMAGE,CRASH_TYPE,MOST_SEVERE_INJURY,INJURIES_TOTAL,INJURIES_FATAL,INJURIES_INCAPACITATING,INJURIES_NON_INCAPACITATING,STREET_NAME,PRIM_CONTRIBUTORY_CAUSE,SEC_CONTRIBUTORY_CAUSE,LIGHTING_CONDITION,CRASH_DATE_ONLY,INJURY_SCORE
0,2025-10-24 03:33:00,3,6,10,41.776529,-87.700489,POINT (-87.700488959345 41.776528764215),"OVER $1,500",INJURY AND / OR TOW DUE TO CRASH,NO INDICATION OF INJURY,0.0,0.0,0.0,0.0,ALBANY AVE,UNABLE TO DETERMINE,UNABLE TO DETERMINE,"DARKNESS, LIGHTED ROAD",2025-10-24,0.0
1,2025-10-24 01:43:00,1,6,10,41.97435,-87.65481,POINT (-87.654810487257 41.974350496819),"$501 - $1,500",NO INJURY / DRIVE AWAY,NO INDICATION OF INJURY,0.0,0.0,0.0,0.0,SHERIDAN RD,DRIVING SKILLS/KNOWLEDGE/EXPERIENCE,DISTRACTION - FROM OUTSIDE VEHICLE,"DARKNESS, LIGHTED ROAD",2025-10-24,0.0
2,2025-10-24 01:05:00,1,6,10,41.951333,-87.898242,POINT (-87.898242084333 41.951332504063),"OVER $1,500",NO INJURY / DRIVE AWAY,NO INDICATION OF INJURY,0.0,0.0,0.0,0.0,IRVING PARK RD,ANIMAL,NOT APPLICABLE,"DARKNESS, LIGHTED ROAD",2025-10-24,0.0
3,2025-10-24 00:30:00,0,6,10,41.904712,-87.668365,POINT (-87.668364978394 41.90471221906),"OVER $1,500",NO INJURY / DRIVE AWAY,NO INDICATION OF INJURY,0.0,0.0,0.0,0.0,MILWAUKEE AVE,UNABLE TO DETERMINE,NOT APPLICABLE,"DARKNESS, LIGHTED ROAD",2025-10-24,0.0
4,2025-10-24 00:18:00,0,6,10,41.918953,-87.691114,POINT (-87.691113725571 41.918952840995),$500 OR LESS,NO INJURY / DRIVE AWAY,NO INDICATION OF INJURY,0.0,0.0,0.0,0.0,MILWAUKEE AVE,FAILING TO REDUCE SPEED TO AVOID CRASH,UNDER THE INFLUENCE OF ALCOHOL/DRUGS (USE WHEN...,"DARKNESS, LIGHTED ROAD",2025-10-24,0.0
5,2025-10-24 00:00:00,0,6,10,41.7756,-87.576588,POINT (-87.576588053812 41.77560044409),"OVER $1,500",NO INJURY / DRIVE AWAY,NO INDICATION OF INJURY,0.0,0.0,0.0,0.0,MARQUETTE DR,UNABLE TO DETERMINE,UNABLE TO DETERMINE,"DARKNESS, LIGHTED ROAD",2025-10-24,0.0
6,2025-10-23 23:22:00,23,5,10,41.962838,-87.713847,POINT (-87.71384737365 41.962838341849),"$501 - $1,500",NO INJURY / DRIVE AWAY,NO INDICATION OF INJURY,0.0,0.0,0.0,0.0,SUNNYSIDE AVE,UNABLE TO DETERMINE,NOT APPLICABLE,"DARKNESS, LIGHTED ROAD",2025-10-23,0.0
7,2025-10-23 23:00:00,23,5,10,42.011985,-87.708326,POINT (-87.708326043842 42.011984506457),"OVER $1,500",NO INJURY / DRIVE AWAY,NO INDICATION OF INJURY,0.0,0.0,0.0,0.0,TOUHY AVE,UNABLE TO DETERMINE,NOT APPLICABLE,"DARKNESS, LIGHTED ROAD",2025-10-23,0.0
8,2025-10-23 22:52:00,22,5,10,41.759584,-87.661979,POINT (-87.661979218645 41.759584072691),"OVER $1,500",INJURY AND / OR TOW DUE TO CRASH,NO INDICATION OF INJURY,0.0,0.0,0.0,0.0,74TH ST,UNABLE TO DETERMINE,UNABLE TO DETERMINE,"DARKNESS, LIGHTED ROAD",2025-10-23,0.0
9,2025-10-23 22:42:00,22,5,10,41.896411,-87.624133,POINT (-87.624133296552 41.896410675642),"$501 - $1,500",NO INJURY / DRIVE AWAY,NO INDICATION OF INJURY,0.0,0.0,0.0,0.0,MICHIGAN AVE,IMPROPER OVERTAKING/PASSING,NOT APPLICABLE,"DARKNESS, LIGHTED ROAD",2025-10-23,0.0


In [2]:
import streamlit as st
import datetime
import pandas as pd


st.title("Ranking")
#load data
df_pre = pd.read_csv("Newnew_dataset.csv")
df_pre["COUNT"] = 1 #lazy, but keeps track of number of rows when we group categories

# -------------------------------------------- #
# ----------- RANKING AND FILTERS ------------ #
# -------------------------------------------- #

left, right = st.columns(2)
with left:
    # ----------- MAIN RANKING ------------ #
    #Ranks
    st.header("Rank Category")
    rank_options = [
        "Most Frequent Crash Locations",
        "Most Frequent Crash Locations + Weighted",
        "Most Dangerous Crash Locations"
    ]
    ranking_choice = st.selectbox("Rank", rank_options)

with right:
    # ----------- GROUP BY ------------ #
    st.header("Group By")
    group_options = [
        "Street",
        "Location"
    ]
    group_choice = st.selectbox("Group By..", group_options)

# ------------- FILTERS --------------- #
#Filters
st.header("Filters")
st.divider()
date_col, damage_col, crash_col = st.columns(3)
injury_col, cause_col, lighting_col = st.columns(3)
st.divider()

#Date
with date_col:
    date_col.write("**Date:**")
    date_start = date_col.date_input(
        label="Start Date",
        value=pd.to_datetime('2017-10-24'),
        min_value=pd.to_datetime('2017-10-24'),
        max_value=pd.to_datetime('2025-10-24')
    )
    date_end = date_col.date_input(
        label="End Date:",
        value=pd.to_datetime('2025-10-24'),
        min_value=pd.to_datetime('2017-10-24'),
        max_value=pd.to_datetime('2025-10-24')
    )

#Damage
with damage_col:
    damage_col.write("**Damage:**")
    damage_less = damage_col.checkbox(label='$500 OR LESS')
    damage_mid = damage_col.checkbox(label='\$501 - \$1,500')
    damage_high = damage_col.checkbox(label='OVER $1,500')

#Crash Type
with crash_col:
    crash_col.write("**Crash Type:**")
    crash_drive = crash_col.checkbox(label='NO INJURY / DRIVE AWAY')
    crash_injury = crash_col.checkbox(label='INJURY AND / OR TOW DUE TO CRASH')

#Injures
with injury_col:
    injury_col.write("**Injuries:**")
    #['NO INDICATION OF INJURY', 'REPORTED, NOT EVIDENT', 'INCAPACITATING INJURY', 'NONINCAPACITATING INJURY', 'FATAL']
    injury_non = injury_col.checkbox(label='No Injury')
    injury_nonincapacitating = injury_col.checkbox(label='Non-Incapacitating')
    injury_incapacitating = injury_col.checkbox(label='Incapacitating')
    injury_fatal = injury_col.checkbox(label='Fatal')

#Primary/Secondary Cause
with cause_col:
    cause_col.write("**Cause of Crash:**")
    cause_user = cause_col.checkbox(label="User Error")
    cause_nonuser = cause_col.checkbox(label="None-User Error")
    cause_vehicle = cause_col.checkbox(label="Vehicle Error")

    user_error = ['DRIVING SKILLS/KNOWLEDGE/EXPERIENCE',
        'FAILING TO REDUCE SPEED TO AVOID CRASH',
        'IMPROPER OVERTAKING/PASSING', 'FOLLOWING TOO CLOSELY',
        'DISTRACTION - FROM OUTSIDE VEHICLE',
        'FAILING TO YIELD RIGHT-OF-WAY', 'DISREGARDING STOP SIGN',
            'IMPROPER LANE USAGE',
        'IMPROPER TURNING/NO SIGNAL',
        'OPERATING VEHICLE IN ERRATIC, RECKLESS, CARELESS, NEGLIGENT OR AGGRESSIVE MANNER',
        'IMPROPER BACKING', 'DISTRACTION - FROM INSIDE VEHICLE',
        'DRIVING ON WRONG SIDE/WRONG WAY', 'DISREGARDING TRAFFIC SIGNALS',
        'CELL PHONE USE OTHER THAN TEXTING', 'PHYSICAL CONDITION OF DRIVER', 'DISREGARDING OTHER TRAFFIC SIGNS',
        'RELATED TO BUS STOP',  'DISREGARDING ROAD MARKINGS',
        'TURNING RIGHT ON RED',
        'UNDER THE INFLUENCE OF ALCOHOL/DRUGS (USE WHEN ARREST IS EFFECTED)',
        'HAD BEEN DRINKING (USE WHEN ARREST IS NOT MADE)', 'TEXTING',
            'OBSTRUCTED CROSSWALKS',
        'DISTRACTION - OTHER ELECTRONIC DEVICE (NAVIGATION DEVICE, DVD PLAYER, ETC.)',
        'PASSING STOPPED SCHOOL BUS', 'DISREGARDING YIELD SIGN',
        'BICYCLE ADVANCING LEGALLY ON RED LIGHT',
        'MOTORCYCLE ADVANCING LEGALLY ON RED LIGHT',
        'EXCEEDING AUTHORIZED SPEED LIMIT',
        'EXCEEDING SAFE SPEED FOR CONDITIONS']
    non_user_error = [
        'ANIMAL', 'ROAD ENGINEERING/SURFACE/MARKING DEFECTS', 'VISION OBSCURED (SIGNS, TREE LIMBS, BUILDINGS, ETC.)',
        'EVASIVE ACTION DUE TO ANIMAL, OBJECT, NONMOTORIST', 'WEATHER', 'ROAD CONSTRUCTION/MAINTENANCE',
    ]
    vehicle_error = [
        'EQUIPMENT - VEHICLE CONDITION'
    ]

#Lighting
with lighting_col:
    lighting_col.write("**Type of Lighting**")
    lighting_cond = lighting_col.multiselect("Lighting Conditions", ['DARKNESS, LIGHTED ROAD', 'DARKNESS', 'UNKNOWN', 'DAYLIGHT',
        'DAWN', 'DUSK'])

# -------------------------------------------- #
# ----- DATASET CLEANING AND FILTERING  ------ #
# -------------------------------------------- #
if st.button("calculate"):
    df = df_pre.copy()

    # Approximate conversion: 1 degree ≈ 111,000 meters
    # For 50 meters, delta = 50 / 111000 ≈ 0.00045 degrees
    delta = 0.00045

    # Create grid identifiers for 50m squares
    df['LAT_BIN'] = (df['LATITUDE'] / delta).round().astype(int)
    df['LON_BIN'] = (df['LONGITUDE'] / delta).round().astype(int)

    # Combine bins into one label
    df['LOCATION_BIN'] = df['LAT_BIN'].astype(str) + "_" + df['LON_BIN'].astype(str)

    # City wide data only begins post September 2017, so we only track crashes post October 2017,
    # to be safe
    #Specifically, we choose the cutoff date to be October 24, because concidentally that's what
    # our data goes to (25-10-24), so exactly 9 years
    
    #Filter Data

    # ------ Date ------ #
    # ------------------ #
    df["CRASH_DATE_ONLY"] = pd.to_datetime(df["CRASH_DATE_ONLY"])
    date_start = datetime.datetime.combine(date_start, datetime.time.min)
    date_end = datetime.datetime.combine(date_end, datetime.time.max)
    df = df[(df["CRASH_DATE_ONLY"] >= date_start) & (df["CRASH_DATE_ONLY"] <= date_end)]

    # ----- Damage ----- #
    # ------------------ #
    damage_arr = []
    if damage_less:
        damage_arr.append("$500 OR LESS")
    if damage_mid:
        damage_arr.append("$501 - $1,500")
    if damage_high:
        damage_arr.append("OVER $1,500")

    if damage_arr:
        df = df[df["DAMAGE"].isin(damage_arr)]

    # --- Crash Type --- #
    # ------------------ #
    crash_types = []
    if crash_drive:
        crash_types.append("NO INJURY / DRIVE AWAY")
    if crash_injury:
        crash_types.append("INJURY AND / OR TOW DUE TO CRASH")

    if crash_types:  # Only filter if something is selected
        df = df[df["CRASH_TYPE"].isin(crash_types)]
    
    # ---- Injuries ---- #
    # ------------------ #
    injury_conditions = pd.Series([False] * len(df))  # start with all False

    if injury_non:
        injury_conditions |= (df["INJURY_SCORE"] == 0)
    if injury_nonincapacitating:
        injury_conditions |= (df["INJURIES_NON_INCAPACITATING"] > 0)
    if injury_incapacitating:
        injury_conditions |= (df["INJURIES_INCAPACITATING"] > 0)
    if injury_fatal:
        injury_conditions |= (df["INJURIES_FATAL"] > 0)  # replace with your fatal column

    if injury_conditions.any():  # Only filter if any condition is True
        df = df[injury_conditions]

    # ----- Cause ------ #
    # ------------------ #
    cause_arr = []

    if cause_user:
        cause_arr += user_error
    if cause_nonuser:
        cause_arr += non_user_error
    if cause_vehicle:
        cause_arr += vehicle_error

    if cause_arr:
        df = df[df["PRIMARY_CONTRIBUTORY_CAUSE"].isin(cause_arr)]

    # ---- Lighting ---- #
    # ------------------ #
    if lighting_cond:  # Only filter if some lighting conditions selected
        df = df[df["LIGHTING_CONDITION"].isin(lighting_cond)]

    # -------------------------------------------- #
    # ----------- RANKING LOGIC ------------------- #
    # -------------------------------------------- #
    if group_choice == "Street":
        ranking = df.groupby('STREET_NAME').agg("sum", numeric_only=True)
    elif group_choice == "Location":
        ranking = df.groupby('LOCATION_BIN').agg("sum", numeric_only=True)

    if ranking_choice == "Most Frequent Crash Locations":
        ranking = ranking.sort_values(by='COUNT', ascending=False)[["COUNT"]].head(10)
    elif ranking_choice == "Most Frequent Crash Locations + Weighted":
        ranking = ranking.sort_values(by='INJURY_SCORE', ascending=False)[['INJURIES_FATAL',
       'INJURIES_INCAPACITATING', 'INJURIES_NON_INCAPACITATING', "COUNT","INJURY_SCORE"]].head(10)
    elif ranking_choice == "Most Dangerous Crash Locations":
        ranking["Average Injury Score"] = ranking["INJURY_SCORE"] / ranking["COUNT"]
        ranking = ranking.sort_values(by='INJURY_SCORE', ascending=False)[['INJURIES_FATAL',
       'INJURIES_INCAPACITATING', 'INJURIES_NON_INCAPACITATING', "COUNT","INJURY_SCORE", "Average Injury Score"]].head(10)
    ranking["CRASHES PER YEAR"] = ranking["COUNT"]/9
    
    # Create "PER YEAR" columns by dividing by 9
    ranking["CRASHES PER YEAR"] = ranking["COUNT"] / 9
    ranking["FATALS PER YEAR"] = ranking["INJURIES_FATAL"] / 9
    ranking["INCAPACITATING INJURIES PER YEAR"] = ranking["INJURIES_INCAPACITATING"] / 9
    ranking["NON-INCAPACITATING INJURIES PER YEAR"] = ranking["INJURIES_NON_INCAPACITATING"] / 9
    ranking["INJURY SCORE PER YEAR"] = ranking["INJURY_SCORE"] / 9

    # Optional: display only the "PER YEAR" columns
    ranking_per_year = ranking[[
        "CRASHES PER YEAR",
        "FATALS PER YEAR",
        "INCAPACITATING INJURIES PER YEAR",
        "NON-INCAPACITATING INJURIES PER YEAR",
        "INJURY SCORE PER YEAR"
    ]]

    st.dataframe(ranking_per_year)

  damage_mid = damage_col.checkbox(label='\$501 - \$1,500')
  damage_mid = damage_col.checkbox(label='\$501 - \$1,500')


ModuleNotFoundError: No module named 'streamlit'