In [1]:
import pandas as pd

input_file = "US_Accidents_March23.csv"       # big raw file (same folder)
output_file = "US_Accidents_Cleaned.csv"      # cleaned file will be created here

chunksize = 100000
first = True

In [2]:
columns_to_drop = [
    "Start_Time","End_Time","End_Lat","End_Lng","Weather_Timestamp",
    "Amenity","Bump","Distance(mi)", "Start_Lat", "Start_Lng",
    "Precipitation(in)","Zipcode","Airport_Code"
]

important_cols = ["Severity", "City", "State", "Weather_Condition"]

In [3]:
print("Cleaning started... please wait.")

for chunk in pd.read_csv(input_file, chunksize=chunksize):
    chunk.drop(columns=columns_to_drop, axis=1, inplace=True, errors="ignore")
    
    # 2) Drop rows missing important 
    chunk.dropna(subset=important_cols, inplace=True)
    
    # 3) Append to cleaned file
    mode = "w" if first else "a"
    header = first
    chunk.to_csv(output_file, mode=mode, index=False, header=header)
    first = False

print("Cleaning completed!")
print("Cleaned file saved as:", output_file)

Cleaning started... please wait.
Cleaning completed!
Cleaned file saved as: US_Accidents_Cleaned.csv


In [4]:
df_clean = pd.read_csv(output_file, nrows=10)
df_clean.head()

Unnamed: 0,ID,Source,Severity,Description,Street,City,County,State,Country,Timezone,...,Roundabout,Station,Stop,Traffic_Calming,Traffic_Signal,Turning_Loop,Sunrise_Sunset,Civil_Twilight,Nautical_Twilight,Astronomical_Twilight
0,A-1,Source2,3,Right lane blocked due to accident on I-70 Eas...,I-70 E,Dayton,Montgomery,OH,US,US/Eastern,...,False,False,False,False,False,False,Night,Night,Night,Night
1,A-2,Source2,2,Accident on Brice Rd at Tussing Rd. Expect del...,Brice Rd,Reynoldsburg,Franklin,OH,US,US/Eastern,...,False,False,False,False,False,False,Night,Night,Night,Day
2,A-3,Source2,2,Accident on OH-32 State Route 32 Westbound at ...,State Route 32,Williamsburg,Clermont,OH,US,US/Eastern,...,False,False,False,False,True,False,Night,Night,Day,Day
3,A-4,Source2,3,Accident on I-75 Southbound at Exits 52 52B US...,I-75 S,Dayton,Montgomery,OH,US,US/Eastern,...,False,False,False,False,False,False,Night,Day,Day,Day
4,A-5,Source2,2,Accident on McEwen Rd at OH-725 Miamisburg Cen...,Miamisburg Centerville Rd,Dayton,Montgomery,OH,US,US/Eastern,...,False,False,False,False,True,False,Day,Day,Day,Day
