# ICBC 2023 Lower Mainland vehicle dataset - EDA and data cleaning 1

## Source

In [1]:
import os
import pandas as pd

In [2]:
 # Check if file path is valid.
file_path = './icbc/Vehicle Population - 2023 Passenger Vehicles_Full _data.csv'
if not os.path.isfile(file_path):
    fnf_err = f'{file_path} not found.'
    raise FileNotFoundError(fnf_err)

vehicle_df = pd.read_csv(file_path)

# Take a peek at the first couple of rows in the dataset.
print(vehicle_df.head(2))

dataset_columns = vehicle_df.columns.tolist()
print(f"This dataset contains the following columns {dataset_columns}")

num_rows, num_cols = vehicle_df.shape
print(f"Data set shape {num_rows} rows x {num_cols} cols")

  Veh Pop - Criteria Selector Vehicle Use Anti Theft Device Indicator  \
0              Lower Mainland    Business                          No   
1              Lower Mainland    Business                          No   

             Body Style Electric_Vehicle_Indicator Fleet Vehicle Indicator  \
0         Fourdoorsedan                         No                      No   
1  Fourdoorstationwagon                         No                      No   

  Fuel Type Hybrid Vehicle Indicator        Make                Model  \
0    Diesel                       No  VOLKSWAGEN  RABBIT OTHER MODELS   
1    Diesel                       No      TOYOTA   LAND CRUISER WAGON   

   Model Year Municipality             Owner Type          Region  \
0        1978      Langley  External organization  Lower Mainland   
1        1991      Burnaby  External organization  Lower Mainland   

   Vehicle Count  
0              1  
1              1  
This dataset contains the following columns ['Veh Pop - Crit

# Initial comments
This dataset contains redundant columns. We specifically set the criteria to Lower Mainland vehicles on ICBC's data portal. We can get rid of the columns 'Veh Pop - Criteria Selector' and 'Region'.

In [3]:
vehicle_df.drop(['Veh Pop - Criteria Selector', 'Region'], axis=1, inplace=True)

dataset_columns = vehicle_df.columns.tolist()
print(f"This dataset contains the following columns {dataset_columns}")

num_rows, num_cols = vehicle_df.shape
print(f"Data set shape {num_rows} rows x {num_cols} cols")
assert num_cols == 13, "Columns delete failed."

This dataset contains the following columns ['Vehicle Use', 'Anti Theft Device Indicator', 'Body Style', 'Electric_Vehicle_Indicator', 'Fleet Vehicle Indicator', 'Fuel Type', 'Hybrid Vehicle Indicator', 'Make', 'Model', 'Model Year', 'Municipality', 'Owner Type', 'Vehicle Count']
Data set shape 719144 rows x 13 cols


# Exploratory Data Analysis
Here we take a quick look at each column to see if there's any obvious redundancies (if all values for col C are 'xyz', then there's isn't much to be investigated in col C)

In [4]:
for c in dataset_columns:
    print(f"Examining column {c}")
    # Find unique values in this column.
    uniques = vehicle_df[c].unique()
    print(len(uniques))

Examining column Vehicle Use
3
Examining column Anti Theft Device Indicator
2
Examining column Body Style
24
Examining column Electric_Vehicle_Indicator
2
Examining column Fleet Vehicle Indicator
2
Examining column Fuel Type
18
Examining column Hybrid Vehicle Indicator
2
Examining column Make
427
Examining column Model
9496
Examining column Model Year
116
Examining column Municipality
54
Examining column Owner Type
2
Examining column Vehicle Count
284


# Comments
- There are 3 categories for vehicle use. We'll have to dig into that later.
- There are 2 categories for anti theft device indicator. This is likely a Boolean field. A vehicle either has an anti theft device or it does not.
- There are 24 body styles. We'll have to dig into that later.
- There are 2 categories for electric vehicle indicator. This must be a Boolean field for obvious reasons. We'll have to check if a hybrid (Battery Electric Vehicle - BEV) and a PHEV (Plug-in Electric Vehicle) are declared as electric vehicles or not.
- There are 2 categories for fleet vehicle indicator. This is definitely a Boolean field.
- There are 18 categories for fuel type, which is more than we anticipated (diesel, gasoline, electric, LPG).
- There are 427 vehicle makes, which seems too high a number. We'll have to dig into that - are the makes misspelled or sometimes present as acronyms and sometimes in full?
- There are 9496 models. This seems slightly too many. We'll have to take a look later.
- There are 116 model years, which is unexpected. We don't expect the years to span more than 50/60 years.
- There are 54 municipalities, which is expected for the Lower Mainland.
- There are 2 types of owners.
- At most, there are 284 different values for vehicle count.

In [5]:
print(f"Unique values for vehicle use = {vehicle_df['Vehicle Use'].unique()}")
print(f"Anti Theft Device Indicator values ={vehicle_df['Anti Theft Device Indicator'].unique()}")
print(f"Body styles = {vehicle_df['Body Style'].unique()}")
print(f"Electric vehicle indicator ={vehicle_df['Electric_Vehicle_Indicator'].unique()}")
print(f"Fleet vehicle indicator = {vehicle_df['Fleet Vehicle Indicator'].unique()}")
print(f"Fuel types = {vehicle_df['Fuel Type'].unique()}")
print(f" Hybrid vehicle indicator = {vehicle_df['Hybrid Vehicle Indicator'].unique()}")

Unique values for vehicle use = ['Business' 'Other' 'Personal']
Anti Theft Device Indicator values =['No' 'Yes']
Body styles = ['Fourdoorsedan' 'Fourdoorstationwagon' 'Twodoorfastback'
 'Twodoorstationwagon' 'Dualpurpose' 'Fourdoorcoupe' 'Fourdoorfastback'
 'Hatchback' 'Twodoorconvertible' 'Twodoorcoupe' 'Twodoorhardtop'
 'Snowmobile' 'Twodoorsedan' 'Wheeledatv' 'Lowspeedvehicle'
 'Fourdoorhardtop' 'Golfcart' 'Limousinepassenger' 'Sportconvertible'
 'Threewheeled' 'Fourdoorconvertible' 'Workutilitypassengervehicle'
 'Dunebuggy' 'Amphibiousvehicle']
Electric vehicle indicator =['No' 'Yes']
Fleet vehicle indicator = ['No' 'Yes']
Fuel types = ['Diesel' 'Gasoline' 'Electric' 'Gasoline Electric' 'Multifuels' 'Butane'
 'Diesel Natural Gas' 'Gasoline Natural Gas' 'Natural Gas' 'Propane'
 'Diesel Butane' 'Gasoline Propane' 'Other' 'Diesel Propane' 'Alcohol'
 'Gasoline Alcohol' 'Hydrogen' 'Propane Natural Gas']
 Hybrid vehicle indicator = ['No' 'Yes']


### Vehicle use
### Anti theft device indicator
As we suspected, the anti theft device indicator field is a Boolean one.
### Body styles
We initially thought that there will only be a few body styles: sedan, hatchback, station wagon, coupe, convertible, pick up truck, crossover/SUV.
### Electric vehicle indicator
This is indeed a Boolean field.
### Fleet vehicle Indicator
This is also a Boolean field.
### Fuel types
There are values which were not anticipated in this column, such as 'butane' and 'propane'
### Hybrid vehicle indicator
This is indeed a Boolean field. It would be interesting to see how this column correlates with the fuel type column.

In [6]:
model_years = vehicle_df['Model Year'].unique()
oldest, newest = min(model_years), max(model_years)
print(f"oldest and newest are {oldest}, {newest}")
all_years = [_ for _ in range(oldest, newest+1)]
print(f"Model years = {model_years}")
missing_years = [_ for _ in all_years if _ not in all_years]
print(f"Missing years = {missing_years}")

oldest and newest are 1908, 2024
Model years = [1978 1991 1997 1998 2006 2011 2005 1993 1994 2015 1964 1996 1999 2001
 2003 2004 2007 2009 2010 2012 2014 2018 1995 2000 2002 2016 2017 1990
 1992 1966 1972 1981 1930 1965 1968 1986 1963 1969 1976 1980 1988 1970
 1971 1967 2013 2021 2019 2024 2023 2022 2020 2008 1989 1987 1935 1959
 1962 1979 1955 1973 1983 1950 1951 1982 1984 1985 1949 1952 1953 1956
 1957 1958 1960 1928 1954 1974 1923 1947 1977 1929 1936 1933 1932 1937
 1938 1939 1946 1948 1942 1961 1926 1975 1931 1934 1941 1927 1915 1940
 1924 1909 1913 1908 1912 1920 1916 1925 1921 1944 1911 1943 1910 1918
 1917 1922 1945 1914]
Missing years = []


In [7]:
municipalities = vehicle_df['Municipality'].unique()
print(f"municipalities = {municipalities}")

print(f"Owner type {vehicle_df['Owner Type'].unique()}")

municipalities = ['Langley' 'Burnaby' 'Vancouver' 'Port Coquitlam' 'Richmond' 'Surrey'
 'Whistler' 'North Vancouver' 'Coquitlam' 'Chilliwack' 'New Westminster'
 'Abbotsford' 'Mission' 'Halfmoon Bay' 'Bowen Island' 'Roberts Creek'
 'Gibsons' 'Ubc' 'Cultus Lake' 'Maple Ridge' 'Delta' 'Squamish'
 'Port Moody' 'Egmont' 'Pitt Meadows' 'Sechelt' 'Agassiz' 'West Vancouver'
 'Hope' 'White Rock' 'Pemberton' 'Harrison Hot Springs' 'Furry Creek'
 'Britannia Beach' 'Deroche' 'Harrison Lake' 'Chilliwack River Valley'
 'Popkum' 'Lake Errock' 'Madeira Park' 'Lions Bay' 'Dewdney' 'Anmore'
 'Hatzic' 'Belcarra' 'Pemberton Meadows' 'Yale' 'Columbia Valley'
 'Garden Bay' 'Boston Bar' 'Lindell Beach' 'Sunshine Valley' 'Ruby Lake'
 'Port Mellon']
Owner type ['External organization' 'Person']


## Data cleaning
### About Makes
We found out that there are 427 vehicle makes. This seems way to many. So let's check if there are any 

In [8]:
makes = vehicle_df['Make'].unique()
makes_lower = set(map(str.upper, makes)) # use a set to keep unique elements.
print(f"upper case make length comparison {len(makes_lower) == len(makes)}")
# converting to lower case didn't reduce the number of makes.
print(makes_lower)

upper case make length comparison True
{'ASUNA', 'FERRARI', 'JAGUAR TRUCK/VAN', 'GAZELE', 'KAGAWA', 'CADDY', 'GORDON KEEBLE', 'WIDESCAPE', 'ZHEJIANG', 'LIFAN', 'NISSAN', 'TRIHAWK', 'MITSHU', 'WOLSELEY', 'REPLIKIT', 'HUZHOU DAI', 'DAIMLER', 'MERCEDES-BENZ TRUCK', 'TEXTRON', 'PEERLESS', 'CADILLAC', 'DAIHATSU', 'MAXI TAXI', 'KTM', 'REPLICAR', 'VOLKSWAGEN', 'POLARIS', 'KODIAK', 'MERCURY TRUCK/VAN', 'AMERICAN MOTORS', 'WILLY', 'ROLLS ROYCE', 'JINYUN', 'LINCOLN', 'BLAKELY', 'PIERCE', 'HONDA TRUCK/VAN', 'WELCH', 'PUMA', 'MISTUBISHI', 'SUNL', 'CUTLASS', 'ROLLSROYCE', 'HENRY', 'BORGWARD', 'MAYBACH', 'COLEMAN', 'NISSAN TRUCK/VAN', 'JIANGSU LI', 'MAZDA', 'JINYUN COU', 'GLOBAL ELECTRIC MOTORS', 'ARCTIC', 'GL MOTOR', 'TOYOTA TRUCK/VAN', 'KAWASAKE', 'REO', 'FRANKLIN', 'ARIEL', 'MOBI', 'TELSA', 'AURORA', 'CAPRI', 'RIVIAN', 'SKODA', 'HYUNDAI', 'MAGNUM', 'POLESTAR', 'SHELBY', 'UBILT', 'ALFA', 'MONARCH', 'STUTZ', 'CHEVROLET', 'ARTIC CAT', 'AMERICAN', 'SPORTSMAN', 'JAGUAR', 'NSU', 'LENCO', 'PACKARD', 'FO

## Misspelt makes
- There are some obviously misspelt makes, such as Porshe for Porsche and MITSHU for Mitsubishi.
- We use a dictionary to map misspelt makes to the correct ones.

In [9]:
misspelt: dict = {
    'PORSHE': 'PORSCHE',
    'LAMBORGHIN': 'LAMBORGHINI',
    'TOYOYA': 'TOYOTA',
    'TOTOYA' : 'TOYOTA',
    'TOOYTA' : 'TOYOTA',
    'MECEDES': 'MERCEDES-BENZ',
    'MERCEDES' : 'MERCEDES-BENZ',
    'MERDECES' : 'MERCEDES-BENZ',
    'MERCEDSBNZ' : 'MERCEDES-BENZ',
    'YAHAMA': 'YAMAHA',
    'YAHMA' : 'YAMAHA',
    'INTERNATIO': 'INTERNATIONAL',
    'FERARRI' : 'FERRARI',
    'MITSHUBISH' : 'MITSUBISHI',
    'MITSHU' : 'MITSUBISHI',
    'MISTUBISHI' : 'MITSUBISHI',
    'CADDY' : 'CADILLAC',
    'CHEVEROLET' : 'CHEVROLET',
    'CHERVOLET' : 'CHEVROLET',
    'CHEV' : 'CHEVROLET',
    'CHEVY' : 'CHEVROLET',
    'DATSUN': 'NISSAN',
    'DATSUN/NISSAN' : 'NISSAN',
    'ROLLS ROYC' : 'ROLLS ROYCE',
    'ROLLSROYCE' : 'ROLLS ROYCE',
    'JOHNDEERE' : 'JOHN DEERE',
    'ALFA': 'ALFA ROMEO',
    'ASTON MART' : 'ASTON MARTIN',
    'TELSA': 'TESLA',
    'SKI=DOO':'SKI-DOO',
    'SKIDO':'SKI-DOO',
    'SKI-D': 'SKI-DOO',
    'SKIDOO': 'SKI-DOO',
    'SKI-D00':'SKI-DOO',
    'CANAM': 'CAN-AM',
    'BOMBADIER':'BOMBARDIER',
    'PLARIS': 'POLARIS',
    'TAOTAO' : 'TAO TAO',
    'TRACKERATV' : 'TRACKER',
    'TRACKERPTV' : 'TRACKER',
    'TRACKER SX' : 'TRACKER',
    'GAZELE' : 'GAZELLE',
    'CATERPILAR' : 'CATERPILLAR',
    
}

vehicle_df['Make'] = vehicle_df['Make'].map(misspelt).fillna(vehicle_df['Make'])
makes = vehicle_df['Make'].unique()
print(f"Cleaned makes length = {len(makes)} and {makes=}")

Cleaned makes length = 386 and makes=array(['VOLKSWAGEN', 'TOYOTA', 'FORD TRUCK/VAN', 'LAND ROVER',
       'AUDI TRUCK/VAN', 'SMART', 'MITSUBISHI', 'JEEP', 'FIAT', 'HYUNDAI',
       'HONDA', 'FORD', 'ACURA', 'PONTIAC', 'NISSAN', 'ACURA TRUCK/VAN',
       'CHEVROLET', 'KIA', 'LEXUS TRUCK/VAN', 'GMC TRUCK/VAN',
       'CHEVROLET TRUCK/VAN', 'NISSAN TRUCK/VAN', 'TOYOTA TRUCK/VAN',
       'DODGE/RAM TRUCK/VAN', 'HONDA TRUCK/VAN', 'SUBARU',
       'HYUNDAI TRUCK/VAN', 'CHRYSLER TRUCK/VAN', 'KIA TRUCK/VAN',
       'MERCEDES-BENZ TRUCK', 'CHRYSLER', 'DODGE/RAM', 'PLYMOUTH',
       'MERCEDES-BENZ', 'BMW', 'YAMAHA', 'ARCTIC CAT', 'SKI-DOO',
       'MASERATI', 'PORSCHE', 'LEXUS', 'SC CARTS', 'PORSCHE TRUCK/VAN',
       'JAGUAR', 'INFINITI', 'BMW TRUCK/VAN', 'HUMMER',
       'CADILLAC TRUCK/VAN', 'MAZDA TRUCK/VAN', 'INFINITI TRUCK/VAN',
       'LINCOLN TRUCK/VAN', 'VOLVO TRUCK/VAN', 'JAGUAR TRUCK/VAN',
       'VOLKSWAGEN TRUCK/VAN', 'AUDI', 'BUICK', 'MAZDA',
       'SUZUKI TRUCK/VAN', 'BUICK TRUC

In [10]:
print(f"{vehicle_df.loc[vehicle_df['Make'] == 'COROLLA']} =")

print(f"{vehicle_df.loc[vehicle_df['Make'] == 'ODES-13']} =")

print(f"{vehicle_df.loc[vehicle_df['Make'] == 'HENRY J']} =")

print(f"{vehicle_df.loc[vehicle_df['Make'] == 'SHELBY AMERICAN']} =")

print(f"{vehicle_df.loc[vehicle_df['Make'] == 'MAVERICK']} =")

print(f"{vehicle_df.loc[vehicle_df['Make'] == 'ALPHARD']} =")

print(f"{vehicle_df.loc[vehicle_df['Make'] == 'TUNDRA']} =")

print(f"{vehicle_df.loc[vehicle_df['Make'] == 'ISETTA']} =")

print(f"{vehicle_df.loc[vehicle_df['Make'] == 'ANGLIA']} =")

print(f"{vehicle_df.loc[vehicle_df['Make'] == 'EUNOS']} =")

print(f"{vehicle_df.loc[vehicle_df['Make'] == '1965 SHELB']} =")

print(f"{vehicle_df.loc[vehicle_df['Make'] == 'EDSEL']} =")

print(f"{vehicle_df.loc[vehicle_df['Make'] == 'OUTLANDER']} =")

print(f"{vehicle_df.loc[vehicle_df['Make'] == '2024']} =")

print(f"{vehicle_df.loc[vehicle_df['Make'] == 'CUTLASS']} =")


       Vehicle Use Anti Theft Device Indicator     Body Style  \
143975    Personal                         Yes  Fourdoorsedan   

       Electric_Vehicle_Indicator Fleet Vehicle Indicator Fuel Type  \
143975                         No                      No  Gasoline   

       Hybrid Vehicle Indicator     Make  Model  Model Year Municipality  \
143975                       No  COROLLA  CROSS        2024       Surrey   

       Owner Type  Vehicle Count  
143975     Person              1   =
Empty DataFrame
Columns: [Vehicle Use, Anti Theft Device Indicator, Body Style, Electric_Vehicle_Indicator, Fleet Vehicle Indicator, Fuel Type, Hybrid Vehicle Indicator, Make, Model, Model Year, Municipality, Owner Type, Vehicle Count]
Index: [] =
       Vehicle Use Anti Theft Device Indicator    Body Style  \
542439    Personal                          No  Twodoorsedan   

       Electric_Vehicle_Indicator Fleet Vehicle Indicator Fuel Type  \
542439                         No                    

## Rows with wrong make.
- The row with Make = COROLLA and Model = CROSS is erroneous. The make is TOYOTA and the model is COROLLACROSS.
- The row with Make = MAVERICK and Model = MAVERICK is erroneous. The Body Style is Wheeledatv, so the actual make is CAN-AM and the model is MAVERICK.
- The row with Make = ALPHARD and Model = Unknown is erroneous. The make is TOYOTA and the model is ALPHARD.
- The row with Make = TUNDRA and Model = Unknown is erroneous. The Body Style is Snowmobile, so the actual make is SKI-DOO and the model is TUNDRA.
- The row with Make = ISETTA and Model = ISETTA is erroneous. The YEAR is 1957 and 1958. So the actual make is BMW and the model is ISETTA.
- The row with Make = 1965 SHELB and Model = COBRA is erroenous. The Make is SHELBY.
- The row with Make = Anglia and Model = Delux is erroneous. The make is FORD and model is Anglia Delux.
- The row with Make = EDSEL and Model = ALL MODELS is erroneous. It's a 1958 car. The make is FORD and model is EDSEL.
- The row with Make = EUNOS and Model = ROADSTER (IMPORT MAZDA MX5 MIATA) is erroneous. The make is Mazda and model is MX-5 Miata.
- The row with Make = SHELBY AMERICAN and Model = COBRA. We are going to go with the common name for this brand, SHELBY.
- The rows with Make = OUTLANDER and Body Style = Wheeledatv are erroneous. The Make is CAN-AM.
- The row with Make = 2024 and Model RAV4 is erroenous. The Make is TOYOTA and the model is RAV4.
- The row with Make = CUTLASS and Year = 1971 is erroneous. The Make Is Oldsmobile and the model is CUTLASS.

In [11]:
vehicle_df.loc[vehicle_df["Make"]=="COROLLA", 'Make'] = "TOYOTA"
vehicle_df.loc[vehicle_df["Make"]=="COROLLA", 'Model'] = "COROLLA CROSS"

vehicle_df.loc[vehicle_df["Make"]=="MAVERICK", 'Make'] = "CAN-AM"

vehicle_df.loc[vehicle_df["Make"]=="ALPHARD", 'Make'] = "TOYOTA"
vehicle_df.loc[vehicle_df["Make"]=="ALPHARD", 'Model'] = "ALPHARD"

vehicle_df.loc[vehicle_df["Make"]=="TUNDRA", 'Make'] = "SKI-DOO"
vehicle_df.loc[vehicle_df["Make"]=="TUNDRA", 'Model'] = "TUNDRA"

vehicle_df.loc[vehicle_df["Make"]=="ISETTA", 'Make'] = "BMW"
vehicle_df.loc[vehicle_df["Make"]=="ISETTA", 'Model'] = "ISETTA"

vehicle_df.loc[vehicle_df["Make"]=="1965 SHELB", 'Make'] = "SHELBY"

vehicle_df.loc[vehicle_df["Make"]=="ANGLIA", 'Make'] = "FORD"
vehicle_df.loc[vehicle_df["Make"]=="ANGLIA", 'Model'] = "ANGLIA DELUX"

vehicle_df.loc[vehicle_df["Make"]=="EDSEL", 'Make'] = "FORD"
vehicle_df.loc[vehicle_df["Make"]=="EDSEL", 'Model'] = "EDSEL"

vehicle_df.loc[vehicle_df["Make"]=="EUNOS", 'Make'] = "MAZDA"
vehicle_df.loc[vehicle_df["Make"]=="EUNOS", 'Model'] = "MX-5 Miata"

vehicle_df.loc[vehicle_df["Make"]=="SHELBY AMERICAN", 'Make'] = "SHELBY"
vehicle_df.loc[vehicle_df["Make"]=="SHELBY AMERICAN", 'Model'] = "COBRA"

vehicle_df.loc[vehicle_df["Make"]=="OUTLANDER", 'Model'] = "CAN-AM"

vehicle_df.loc[vehicle_df["Make"]=="2024", 'Make'] = "TOYOTA"

vehicle_df.loc[vehicle_df["Make"]=="CUTLASS", 'MAKE'] = "OLDSMOBILE"
vehicle_df.loc[vehicle_df["Make"]=="CUTLASS", 'MODEL'] = "CUTLASS"

makes = vehicle_df['Make'].unique()
print(f"Cleaned makes length = {len(makes)}")

Cleaned makes length = 375


## Make+truck/van
- We saw that some entries have the Make field set as 'MAKE TRUCK/VAN' or 'MAKE VAN'. We just want the make, not the qualifier truck or van.
- We clean the data to reflect that.

In [12]:
for m in makes:
    make_split = m.split()
    if make_split[-1] in ("TRUCK/VAN", "TRUCK") :
        new_make = " ".join(make_split[:-1])
        vehicle_df.loc[vehicle_df["Make"]==m, "Make"] = new_make
        
makes = vehicle_df["Make"].unique()
print(f"Cleaned makes length = {len(makes)} and {makes=}")

Cleaned makes length = 333 and makes=array(['VOLKSWAGEN', 'TOYOTA', 'FORD', 'LAND ROVER', 'AUDI', 'SMART',
       'MITSUBISHI', 'JEEP', 'FIAT', 'HYUNDAI', 'HONDA', 'ACURA',
       'PONTIAC', 'NISSAN', 'CHEVROLET', 'KIA', 'LEXUS', 'GMC',
       'DODGE/RAM', 'SUBARU', 'CHRYSLER', 'MERCEDES-BENZ', 'PLYMOUTH',
       'BMW', 'YAMAHA', 'ARCTIC CAT', 'SKI-DOO', 'MASERATI', 'PORSCHE',
       'SC CARTS', 'JAGUAR', 'INFINITI', 'HUMMER', 'CADILLAC', 'MAZDA',
       'LINCOLN', 'VOLVO', 'BUICK', 'SUZUKI', 'GOLF CART', 'AUSTIN',
       'ROVER', 'TRIUMPH', 'DODGE', 'KAWASAKI', 'POLARIS', 'CAN-AM',
       'ISUZU', 'MINI', 'TESLA', 'POLESTAR', 'RIVIAN', 'SAAB',
       'ASTON MARTIN', 'LAMBORGHINI', 'ALFA ROMEO', 'MAHINDRA', 'BOBCAT',
       'KUBOTA', 'ROLLS ROYCE', 'AMERICAN MOTORS', 'RANGER', 'JOHN DEERE',
       'CFMOTO', 'TEXTRON', 'TRACKER', 'WIDESCAPE', 'GENESIS', 'FERRARI',
       'LOTUS', 'DELOREAN', 'REPLIKIT', 'UBILT', 'MERCURY', 'STUDEBAKER',
       'OLDSMOBILE', 'SATURN', 'MG', 'GEO', 'BENTL

In [13]:
output_file_path = file_path = './icbc/Vehicle Population - 2023 Passenger Vehicles_Makes_cleaned.csv'
vehicle_df.to_csv(output_file_path, encoding='utf-8', index=False)