----------
# **EV Registration**
----------

In [17]:
import pandas as pd
df = pd.read_csv("Electric_Vehicle_Title_and_Registration_Activity_20240508.csv", usecols=['Clean Alternative Fuel Vehicle Type','VIN (1-10)','Model Year','Make', 'Model','Electric Range','County','City','State of Residence','Electric Utility',])

In [18]:
df.head(3)

Unnamed: 0,Clean Alternative Fuel Vehicle Type,VIN (1-10),Model Year,Make,Model,Electric Range,County,City,State of Residence,Electric Utility
0,Battery Electric Vehicle (BEV),1N4AZ1BV0R,2024,NISSAN,Leaf,0,King,SEATTLE,WA,CITY OF SEATTLE - (WA)|CITY OF TACOMA - (WA)
1,Plug-in Hybrid Electric Vehicle (PHEV),JTMCB3FV2P,2023,TOYOTA,RAV4 Prime,42,King,KIRKLAND,WA,PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA)
2,Battery Electric Vehicle (BEV),7SAYGDEE1P,2023,TESLA,Model Y,0,King,BELLEVUE,WA,PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA)


In [23]:
df.isna().sum()

Clean Alternative Fuel Vehicle Type     0
VIN (1-10)                              0
Model Year                              0
Make                                    0
Model                                   0
Electric Range                          0
County                                 52
City                                   92
State of Residence                      1
Electric Utility                       52
dtype: int64

In [28]:
df.duplicated().sum()

823019

In [30]:
df.dropna(inplace=True)
df.isna().sum()

Clean Alternative Fuel Vehicle Type    0
VIN (1-10)                             0
Model Year                             0
Make                                   0
Model                                  0
Electric Range                         0
County                                 0
City                                   0
State of Residence                     0
Electric Utility                       0
dtype: int64

In [32]:
df.drop_duplicates(inplace=True)
df.duplicated().sum()

0

In [35]:
df.to_csv("Electric_Vehicle_Title_and_Registration_Activity_20240508_cleaned.csv", index=False)

----------
# **Electric Car Data**
----------

In [113]:
import pandas as pd
df = pd.read_csv("Electric_Car_Data_Cleaned.csv")

In [114]:
df.head(3)

Unnamed: 0,Brand,Model,AccelSec,TopSpeed_KmH,Range_Km,Efficiency_WhKm,FastCharge_KmH,RapidCharge,PowerTrain,PlugType,BodyStyle,Segment,Seats,PriceEuro
0,Tesla,Model 3 Long Range Dual Motor,4.6,233,450,161,940,Yes,AWD,Type 2 CCS,Sedan,D,5,55480
1,Volkswagen,ID.3 Pure,10.0,160,270,167,250,Yes,RWD,Type 2 CCS,Hatchback,C,5,30000
2,Polestar,2,4.7,210,400,181,620,Yes,AWD,Type 2 CCS,Liftback,D,5,56440


In [115]:
df.isna().sum()

Brand              0
Model              0
AccelSec           0
TopSpeed_KmH       0
Range_Km           0
Efficiency_WhKm    0
FastCharge_KmH     0
RapidCharge        0
PowerTrain         0
PlugType           0
BodyStyle          0
Segment            0
Seats              0
PriceEuro          0
dtype: int64

In [116]:
df.duplicated().sum()

0

In [117]:
df.drop(["Segment", "PriceEuro"], axis=1, inplace=True)

In [118]:
df['Brand'] = df['Brand'].str.strip()

In [119]:
df = df.assign(car_id=range(1, len(df)+1))

In [120]:
unique_brands = df['Brand'].unique()

In [105]:
unique_brands

array(['Tesla', 'Volkswagen', 'Polestar', 'BMW', 'Honda', 'Lucid',
       'Peugeot', 'Audi', 'Mercedes', 'Nissan', 'Hyundai', 'Porsche',
       'MG', 'Mini', 'Opel', 'Skoda', 'Volvo', 'Kia', 'Renault', 'Mazda',
       'Lexus', 'CUPRA', 'SEAT', 'Lightyear', 'Aiways', 'DS', 'Citroen',
       'Jaguar', 'Ford', 'Byton', 'Sono', 'Smart', 'Fiat'], dtype=object)

In [112]:
from PIL import Image
import os

def resize_images(original_folder, resized_folder, target_width, target_height):
    if not os.path.exists(resized_folder):
        os.makedirs(resized_folder)

    for filename in os.listdir(original_folder):
        if filename.endswith(".jpg"):
            with Image.open(os.path.join(original_folder, filename)) as img:
                resized_img = img.resize((target_width, target_height))
                resized_img.save(os.path.join(resized_folder, filename))

original_folder = "car_image"
resized_folder = "car_image_resize"
target_width = 320
target_height = 180

resize_images(original_folder, resized_folder, target_width, target_height)


In [121]:
brand_to_image = {brand: f'{brand}.jpg' for brand in df['Brand'].unique()}
df['car_image'] = df['Brand'].map(brand_to_image)
df['car_image'] = "sources/dataset/car_image_resize/" + df['car_image']

In [126]:
df.head(100)[['Brand', 'car_image']]

Unnamed: 0,Brand,car_image
0,Tesla,sources/dataset/car_image_resize/Tesla.jpg
1,Volkswagen,sources/dataset/car_image_resize/Volkswagen.jpg
2,Polestar,sources/dataset/car_image_resize/Polestar.jpg
3,BMW,sources/dataset/car_image_resize/BMW.jpg
4,Honda,sources/dataset/car_image_resize/Honda.jpg
...,...,...
95,Volkswagen,sources/dataset/car_image_resize/Volkswagen.jpg
96,Kia,sources/dataset/car_image_resize/Kia.jpg
97,Byton,sources/dataset/car_image_resize/Byton.jpg
98,Nissan,sources/dataset/car_image_resize/Nissan.jpg


In [127]:
df.to_csv("electric_car_data.csv", index=False, mode='w')