In [1]:
import os
import pandas as pd

In [2]:
train_df = pd.read_csv('collision_train_data.csv')
test_df = pd.read_csv('collision_test_data.csv')

In [4]:
vehicle_type_mapping = {
    'PASSENGER CAR': 'Passenger Cars',
    'Passenger Car': 'Passenger Cars',
    'STATION WAGON': 'Passenger Cars',
    'Station Wagon': 'Passenger Cars',
    '(SPORT) UTILITY VEHICLE': 'SUVs',
    'Sport Utility Vehicle': 'SUVs',
    'PICKUP TRUCK': 'Pickup Trucks',
    'Pickup': 'Pickup Trucks',
    'VAN': 'Vans',
    'Van - Passenger (&lt;9 Seats)': 'Vans',
    'Van - Cargo': 'Vans',
    'TRANSIT BUS': 'Buses',
    'SCHOOL BUS': 'Buses',
    'Bus - Transit': 'Buses',
    'Bus - School': 'Buses',
    'OTHER BUS': 'Buses',
    'Bus - Other Type': 'Buses',
    'CROSS COUNTRY BUS': 'Buses',
    'MOTORCYCLE': 'Motorcycles and Mopeds',
    'Motorcycle - 2 Wheeled': 'Motorcycles and Mopeds',
    'MOPED': 'Motorcycles and Mopeds',
    'Moped Or motorized bicycle': 'Motorcycles and Mopeds',
    'OTHER LIGHT TRUCKS (10,000LBS (4,536KG) OR LESS)': 'Trucks',
    'CARGO VAN/LIGHT TRUCK 2 AXLES (OVER 10,000LBS (4,536 KG))': 'Trucks',
    'MEDIUM/HEAVY TRUCKS 3 AXLES (OVER 10,000LBS (4,536KG))': 'Trucks',
    'TRUCK TRACTOR': 'Trucks',
    'Truck Tractor': 'Trucks',
    'Single-Unit Truck': 'Trucks',
    'Other Trucks': 'Trucks',
    'POLICE VEHICLE/NON EMERGENCY': 'Emergency Vehicles',
    'POLICE VEHICLE/EMERGENCY': 'Emergency Vehicles',
    'AMBULANCE/EMERGENCY': 'Emergency Vehicles',
    'AMBULANCE/NON EMERGENCY': 'Emergency Vehicles',
    'FIRE VEHICLE/EMERGENCY': 'Emergency Vehicles',
    'FIRE VEHICLE/NON EMERGENCY': 'Emergency Vehicles',
    'RECREATIONAL VEHICLE': 'Recreational Vehicles',
    'SNOWMOBILE': 'Recreational Vehicles',
    'Snowmobile': 'Recreational Vehicles',
    'Recreational Off-Highway Vehicles (ROV)': 'Recreational Vehicles',
    'ALL TERRAIN VEHICLE (ATV)': 'Recreational Vehicles',
    'All-Terrain Vehicle/All-Terrain Cycle (ATV/ATC)': 'Recreational Vehicles',
    'FARM VEHICLE': 'Specialty Vehicles',
    'AUTOCYCLE': 'Specialty Vehicles',
    'LOW SPEED VEHICLE': 'Specialty Vehicles',
    'LIMOUSINE': 'Specialty Vehicles',
    'Construction Equipment (backhoe, bulldozer, etc.)': 'Specialty Vehicles',
    'UNKNOWN': 'Unknown',
    'Unknown': 'Unknown',
    'OTHER': 'Other',
    'Other': 'Other',
}
location_type_mapping = {
    'Maryland (State)': 'State Roads',
    'US (State)': 'State Roads',
    'Interstate (State)': 'State Roads',
    'Maryland (State) Route': 'State Roads',
    'County': 'County Roads',
    'County Route': 'County Roads',
    'Municipality': 'Municipality Roads',
    'Municipality Route': 'Municipality Roads',
    'Other Public Roadway': 'Other Public Roadways',
    'Local Route': 'Other Public Roadways',
    'Ramp': 'Other Public Roadways',
    'Service Road': 'Other Public Roadways',
    'Crossover': 'Other Public Roadways',
    'Government': 'Government Roads',
    'Government Route': 'Government Roads',
    'Private Route': 'Private Roads',
    'Unknown': 'Unknown',
}
weather_condition_mapping = {
    'CLEAR': 'Clear',
    'Clear': 'Clear',
    'RAINING': 'Rain',
    'Rain': 'Rain',
    'Freezing Rain Or Freezing Drizzle': 'Rain',
    'CLOUDY': 'Cloudy',
    'Cloudy': 'Cloudy',
    'SNOW': 'Snow',
    'Snow': 'Snow',
    'BLOWING SNOW': 'Snow',
    'Blowing Snow': 'Snow',
    'FOGGY': 'Foggy',
    'Fog, Smog, Smoke': 'Foggy',
    'WINTRY MIX': 'Wintry Mix',
    'SLEET': 'Wintry Mix',
    'SEVERE WINDS': 'Severe Winds',
    'Severe Crosswinds': 'Severe Winds',
    'BLOWING SAND, SOIL, DIRT': 'Severe Winds',
    'UNKNOWN': 'Unknown',
    'Unknown': 'Unknown',
    'OTHER': 'Other',
}
surface_condition_mapping = {
    'DRY': 'Dry',
    'Dry': 'Dry',
    'WET': 'Wet',
    'Wet': 'Wet',
    'SNOW': 'Snow',
    'Snow': 'Snow',
    'ICE': 'Ice',
    'Ice/Frost': 'Ice',
    'SLUSH': 'Slush',
    'Slush': 'Slush',
    'MUD, DIRT, GRAVEL': 'Loose Material',
    'SAND': 'Loose Material',
    'WATER(STANDING/MOVING)': 'Water',
    'Water (standing, moving)': 'Water',
    'OIL': 'Oil',
    'UNKNOWN': 'Unknown',
    'OTHER': 'Other',
    'Other': 'Other',
}
light_mapping = {
    'DAYLIGHT': 'Daylight',
    'Daylight': 'Daylight',
    'DARK LIGHTS ON': 'Dark - Lighted',
    'Dark - Lighted': 'Dark - Lighted',
    'DARK NO LIGHTS': 'Dark - Not Lighted',
    'Dark - Not Lighted': 'Dark - Not Lighted',
    'DARK -- UNKNOWN LIGHTING': 'Dark - Unknown Lighting',
    'Dark - Unknown Lighting': 'Dark - Unknown Lighting',
    'DUSK': 'Dusk',
    'Dusk': 'Dusk',
    'DAWN': 'Dawn',
    'Dawn': 'Dawn',
    'OTHER': 'Other',
    'Other': 'Other',
    'UNKNOWN': 'Unknown',
    'Unknown': 'Unknown',
}
substance_abuse_mapping = {
    'NONE DETECTED': 'None Detected',
    'Not Suspect of Alcohol Use, Not Suspect of Drug Use': 'None Detected',
    'ALCOHOL PRESENT': 'Alcohol Present',
    'Suspect of Alcohol Use, Not Suspect of Drug Use': 'Alcohol Present',
    'Suspect of Alcohol Use, Unknown': 'Alcohol Present',
    'ALCOHOL CONTRIBUTED': 'Alcohol Contributed',
    'ILLEGAL DRUG PRESENT': 'Drug Present',
    'ILLEGAL DRUG CONTRIBUTED': 'Drug Contributed',
    'MEDICATION PRESENT': 'Medication Present',
    'MEDICATION CONTRIBUTED': 'Medication Contributed',
    'COMBINED SUBSTANCE PRESENT': 'Combined Substances Present',
    'COMBINATION CONTRIBUTED': 'Combined Substances Contributed',
    'UNKNOWN': 'Unknown',
    'Unknown, Unknown': 'Unknown',
    'Unknown, Not Suspect of Drug Use': 'Unknown',
    'Not Suspect of Alcohol Use, Unknown': 'Unknown',
    'OTHER': 'Other',
}
collision_mapping = {
    'SAME DIR REAR END': 'Same Direction Rear-End',
    'SAME DIR REND RIGHT TURN': 'Same Direction Rear-End',
    'SAME DIR REND LEFT TURN': 'Same Direction Rear-End',
    'STRAIGHT MOVEMENT ANGLE': 'Angle Collisions',
    'ANGLE MEETS LEFT TURN': 'Angle Collisions',
    'ANGLE MEETS RIGHT TURN': 'Angle Collisions',
    'Angle': 'Angle Collisions',
    'SAME DIRECTION SIDESWIPE': 'Same Direction Sideswipe',
    'Sideswipe, Same Direction': 'Same Direction Sideswipe',
    'HEAD ON LEFT TURN': 'Opposite Direction Collisions',
    'HEAD ON': 'Opposite Direction Collisions',
    'OPPOSITE DIRECTION SIDESWIPE': 'Opposite Direction Collisions',
    'Front to Front': 'Opposite Direction Collisions',
    'SINGLE VEHICLE': 'Single Vehicle',
    'Single Vehicle': 'Single Vehicle',
    'SAME DIRECTION RIGHT TURN': 'Turn Collisions',
    'SAME DIRECTION LEFT TURN': 'Turn Collisions',
    'SAME DIR BOTH LEFT TURN': 'Turn Collisions',
    'OPPOSITE DIR BOTH LEFT TURN': 'Turn Collisions',
    'OTHER': 'Other',
    'Other': 'Other',
    'UNKNOWN': 'Unknown',
    'Unknown': 'Unknown',
    'Rear To Rear': 'Other',  # I don't understand ???????
}
control_mapping = {
    'NO CONTROLS': 'No Controls',
    'No Controls': 'No Controls',
    'TRAFFIC SIGNAL': 'Traffic Signal',
    'Traffic Control Signal': 'Traffic Signal',
    'Flashing Traffic Control Signal': 'Traffic Signal',
    'FLASHING TRAFFIC SIGNAL': 'Traffic Signal',
    'Other Signal': 'Traffic Signal',
    'STOP SIGN': 'Stop Sign',
    'Stop Sign': 'Stop Sign',
    'YIELD SIGN': 'Yield Sign',
    'Yield Sign': 'Yield Sign',
    'PERSON': 'Pedestrian/Crossing Control',
    'Pedestrian Crossing Sign': 'Pedestrian/Crossing Control',
    'Pedestrian Crossing': 'Pedestrian/Crossing Control',
    'Person (including flagger, law enforcement, crossing guard, etc.)': 'Pedestrian/Crossing Control',
    'RAILWAY CROSSING DEVICE': 'Railway Crossing',
    'Flashing Railroad Crossing Signal (may include gates)': 'Railway Crossing',
    'WARNING SIGN': 'Warning Sign',
    'Intersection Ahead Warning Sign': 'Warning Sign',
    'Other Warning Sign': 'Warning Sign',
    'School Zone Sign': 'Warning Sign',
    'SCHOOL ZONE SIGN DEVICE': 'Warning Sign',
    'OTHER': 'Other',
    'Other': 'Other',
    'Other Pavement Marking (excluding edgelines, centerlines, or lane lines)': 'Other',
    'UNKNOWN': 'Unknown',
}
movement_mapping = {
    'MOVING CONSTANT SPEED': 'Constant Speed',
    'Moving Constant Speed': 'Constant Speed',
    'SLOWING OR STOPPING': 'Slowing or Stopping',
    'Slowing or Stopping': 'Slowing or Stopping',
    'STOPPED IN TRAFFIC LANE': 'Slowing or Stopping',
    'Stopped in Traffic': 'Slowing or Stopping',
    'MAKING LEFT TURN': 'Making Left Turn',
    'Turning Left': 'Making Left Turn',
    'MAKING RIGHT TURN': 'Making Right Turn',
    'Turning Right': 'Making Right Turn',
    'RIGHT TURN ON RED': 'Making Right Turn',
    'MAKING U TURN': 'Making U-Turn',
    'Making U-Turn': 'Making U-Turn',
    'ACCELERATING': 'Accelerating',
    'Accelerating': 'Accelerating',
    'CHANGING LANES': 'Changing Lanes',
    'Changing Lanes': 'Changing Lanes',
    'ENTERING TRAFFIC LANE': 'Entering/Leaving Traffic Lane',
    'Entering Traffic Lane': 'Entering/Leaving Traffic Lane',
    'LEAVING TRAFFIC LANE': 'Entering/Leaving Traffic Lane',
    'Leaving Traffic Lane': 'Entering/Leaving Traffic Lane',
    'STARTING FROM LANE': 'Starting Vehicle',
    'STARTING FROM PARKED': 'Starting Vehicle',
    'PARKED': 'Parking or Backing',
    'PARKING': 'Parking or Backing',
    'BACKING': 'Parking or Backing',
    'Backing': 'Parking or Backing',
    'PASSING': 'Passing/Overtaking',
    'Overtaking/Passing': 'Passing/Overtaking',
    'SKIDDING': 'Skidding/Negotiating Curve',
    'NEGOTIATING A CURVE': 'Skidding/Negotiating Curve',
    'Negotiating a Curve': 'Skidding/Negotiating Curve',
    'DRIVERLESS MOVING VEH.': 'Driverless Vehicle',
    'UNKNOWN': 'Other/Unknown',
    'OTHER': 'Other/Unknown',
}

In [5]:
def combine_categories(df):
    df['Vehicle Movement'] = df['Vehicle Movement'].map(movement_mapping)
    df['Traffic Control'] = df['Traffic Control'].map(control_mapping)
    df['Collision Type'] = df['Collision Type'].map(collision_mapping)
    df['Driver Substance Abuse'] = df['Driver Substance Abuse'].map(substance_abuse_mapping)
    df['Light'] = df['Light'].map(light_mapping)
    df['Surface Condition'] = df['Surface Condition'].map(surface_condition_mapping)
    df['Weather'] = df['Weather'].map(weather_condition_mapping)
    df['Route Type'] = df['Route Type'].map(location_type_mapping)
    df['Vehicle Body Type'] = df['Vehicle Body Type'].map(vehicle_type_mapping).fillna('Other')

    categorical_columns = df.select_dtypes(include='object').columns
    df[categorical_columns] = df[categorical_columns].apply(lambda col: col.str.lower())
    return df

In [6]:
new_train_df = combine_categories(train_df)
new_test_df = combine_categories(test_df)

In [8]:
if os.path.exists('cleaned_collision_train_data.csv'):
    os.remove('cleaned_collision_train_data.csv')
if os.path.exists('cleaned_collision_test_data.csv'):
    os.remove('cleaned_collision_test_data.csv')

new_train_df.to_csv('cleaned_collision_train_data.csv', index=False)
new_test_df.to_csv('cleaned_collision_test_data.csv', index=False)