In [2]:

# Electric Vehicle Type Classification using Machine Learning
# AICTE Internship â€“ Week 2
# Author: Sabaretha

# ------------------------------------------------------------
# Step 1: Import required libraries
# ------------------------------------------------------------
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from google.colab import files

# ------------------------------------------------------------
# Step 2: Upload your dataset
# ------------------------------------------------------------
print("Please upload your dataset CSV file...")
uploaded = files.upload()   # choose your downloaded CSV file

# Automatically read the first uploaded file
file_name = list(uploaded.keys())[0]
df = pd.read_csv(file_name)
print("\nDataset loaded successfully!")
print("Shape:", df.shape)

# ------------------------------------------------------------
# Step 3: Basic understanding of the data
# ------------------------------------------------------------
print("\nColumns present in dataset:\n", df.columns)
print("\nFirst 5 rows of data:\n", df.head())

# ------------------------------------------------------------
# Step 4: Handle missing values
# ------------------------------------------------------------
df = df.fillna(0)

# ------------------------------------------------------------
# Step 5: Check and rename columns if required
# ------------------------------------------------------------
# Some Kaggle EV datasets have different column names.
# This makes sure we can still work even if they vary slightly.

col_map = {
    'Make': 'Make',
    'Model': 'Model',
    'Electric Vehicle Type': 'Electric Vehicle Type',
    'Model Year': 'Model Year',
    'Electric Range': 'Electric Range',
    'Base MSRP': 'Base MSRP'
}

# Convert column names to lower case for safe access
df.columns = df.columns.str.strip().str.lower()

# Rename using lower-case keys
renamed = {
    'make': 'Make',
    'model': 'Model',
    'electric vehicle type': 'Electric Vehicle Type',
    'model year': 'Model Year',
    'electric range': 'Electric Range',
    'base msrp': 'Base MSRP'
}
df = df.rename(columns=renamed)

# ------------------------------------------------------------
# Step 6: Encode text (categorical) columns
# ------------------------------------------------------------
le = LabelEncoder()
for col in ['Make', 'Model', 'Electric Vehicle Type']:
    if col in df.columns:
        df[col] = le.fit_transform(df[col].astype(str))
    else:
        print(f"Warning: Column '{col}' not found, skipping encoding.")

# ------------------------------------------------------------
# Step 7: Select useful numeric features
# ------------------------------------------------------------
features = ['Model Year', 'Make', 'Electric Range', 'Base MSRP']
target = 'Electric Vehicle Type'

# Keep only existing columns
X = df[[col for col in features if col in df.columns]]
y = df[target]

# ------------------------------------------------------------
# Step 8: Split dataset into train & test sets
# ------------------------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ------------------------------------------------------------
# Step 9: Train Random Forest model
# ------------------------------------------------------------
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# ------------------------------------------------------------
# Step 10: Evaluate performance
# ------------------------------------------------------------
y_pred = model.predict(X_test)
print("\nModel Evaluation Results")
print("----------------------------")
print("Accuracy:", round(accuracy_score(y_test, y_pred)*100, 2), "%")
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

# ------------------------------------------------------------
# Step 11: Example prediction
# ------------------------------------------------------------
sample_input = [[2023, 10, 250, 42000]]  # Example data
sample_pred = model.predict(sample_input)
print("\nPredicted Vehicle Type (0 = BEV, 1 = PHEV):", sample_pred)

Please upload your dataset CSV file...


Saving Electric_Vehicle_Population_Data.csv.zip to Electric_Vehicle_Population_Data.csv (1).zip

Dataset loaded successfully!
Shape: (112634, 17)

Columns present in dataset:
 Index(['VIN (1-10)', 'County', 'City', 'State', 'Postal Code', 'Model Year',
       'Make', 'Model', 'Electric Vehicle Type',
       'Clean Alternative Fuel Vehicle (CAFV) Eligibility', 'Electric Range',
       'Base MSRP', 'Legislative District', 'DOL Vehicle ID',
       'Vehicle Location', 'Electric Utility', '2020 Census Tract'],
      dtype='object')

First 5 rows of data:
    VIN (1-10)     County      City State  Postal Code  Model Year       Make  \
0  JTMEB3FV6N     Monroe  Key West    FL        33040        2022     TOYOTA   
1  1G1RD6E45D      Clark  Laughlin    NV        89029        2013  CHEVROLET   
2  JN1AZ0CP8B     Yakima    Yakima    WA        98901        2011     NISSAN   
3  1G1FW6S08H     Skagit  Concrete    WA        98237        2017  CHEVROLET   
4  3FA6P0SU1K  Snohomish   Everett    WA   

