In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Importing required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Electric_Vehicle_Population_Data.csv')

In [4]:
# Check for missing values
df.isnull().sum()

Unnamed: 0,0
VIN (1-10),0
County,4
City,4
State,0
Postal Code,4
Model Year,0
Make,0
Model,0
Electric Vehicle Type,0
Clean Alternative Fuel Vehicle (CAFV) Eligibility,0


In [5]:
# Summary statistics
df.describe()

Unnamed: 0,Postal Code,Model Year,Electric Range,Base MSRP,Legislative District,DOL Vehicle ID,2020 Census Tract
count,232226.0,232230.0,232203.0,232203.0,231749.0,232230.0,232226.0
mean,98180.172044,2021.353727,46.755998,803.808973,28.880979,234367100.0,52981770000.0
std,2489.407943,2.994884,84.373596,7246.597102,14.904503,68314180.0,1507814000.0
min,1731.0,1999.0,0.0,0.0,1.0,4385.0,1001020000.0
25%,98052.0,2020.0,0.0,0.0,17.0,203473700.0,53033010000.0
50%,98126.0,2023.0,0.0,0.0,32.0,251271700.0,53033030000.0
75%,98375.0,2023.0,38.0,0.0,42.0,268694300.0,53053070000.0
max,99577.0,2025.0,337.0,845000.0,49.0,479254800.0,56021000000.0


In [6]:
# Unique values in the target column
df['Clean Alternative Fuel Vehicle (CAFV) Eligibility'].value_counts()

Unnamed: 0_level_0,count
Clean Alternative Fuel Vehicle (CAFV) Eligibility,Unnamed: 1_level_1
Eligibility unknown as battery range has not been researched,136865
Clean Alternative Fuel Vehicle Eligible,72847
Not eligible due to low battery range,22518


In [7]:
# Fill missing values (Best Practice)
df['Postal Code'] = df['Postal Code'].fillna(df['Postal Code'].mode()[0])
df['Electric Range'] = df['Electric Range'].fillna(df['Electric Range'].median())
df['Base MSRP'] = df['Base MSRP'].fillna(df['Base MSRP'].median())

# Fill remaining missing data with 'Unknown' (Ensure type compatibility)
df = df.fillna('Unknown')

In [8]:
# Select Features and Target
selected_features = ['Model Year', 'Make', 'Model', 'Electric Vehicle Type', 'Electric Range', 'Base MSRP']
X = df[selected_features].copy()
y = df['Clean Alternative Fuel Vehicle (CAFV) Eligibility'].copy()

# Identify Numerical and Categorical Columns
numerical_cols = ['Model Year', 'Electric Range', 'Base MSRP']
categorical_cols = [col for col in X.columns if col not in numerical_cols]

print("Features and target selected.")

Features and target selected.


In [9]:
# One-Hot Encode Categorical Columns
X = pd.get_dummies(X, columns=categorical_cols, drop_first=True)

print("Categorical columns one-hot encoded.")

Categorical columns one-hot encoded.


In [10]:
from sklearn.preprocessing import StandardScaler

# Scale Numerical Columns
scaler = StandardScaler()
X[numerical_cols] = scaler.fit_transform(X[numerical_cols])

print("Numerical columns scaled.")

Numerical columns scaled.


In [11]:
from sklearn.model_selection import train_test_split  # Import train_test_split

# Split the Data into Training and Testing Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print("Data split into training and testing sets.")

Data split into training and testing sets.


In [12]:
# Verify Shape and Columns of X
print("Shape of X after encoding:", X.shape)
print("\nColumns in X:")
print(X.columns)

# Inspect the Encoding for 'Make' (Optional)
print("\nEncoding for 'Make':")
for col in X.columns:
    if col.startswith('Make_'):
        make_name = col[5:]
        print(f"{col}: Represents Make '{make_name}'")

# Inspect the Encoding for 'Electric Vehicle Type' (Optional)
print("\nEncoding for 'Electric Vehicle Type':")
for col in X.columns:
    if col.startswith('Electric Vehicle Type_'):
        ev_type = col[22:]
        print(f"{col}: Represents Electric Vehicle Type '{ev_type}'")

print("Shape and encoding information displayed.")

Shape of X after encoding: (232230, 218)

Columns in X:
Index(['Model Year', 'Electric Range', 'Base MSRP', 'Make_ALFA ROMEO',
       'Make_AUDI', 'Make_AZURE DYNAMICS', 'Make_BENTLEY', 'Make_BMW',
       'Make_BRIGHTDROP', 'Make_CADILLAC',
       ...
       'Model_WRANGLER', 'Model_X3', 'Model_X5', 'Model_XC40', 'Model_XC60',
       'Model_XC90', 'Model_XM', 'Model_ZDX', 'Model_ZEVO',
       'Electric Vehicle Type_Plug-in Hybrid Electric Vehicle (PHEV)'],
      dtype='object', length=218)

Encoding for 'Make':
Make_ALFA ROMEO: Represents Make 'ALFA ROMEO'
Make_AUDI: Represents Make 'AUDI'
Make_AZURE DYNAMICS: Represents Make 'AZURE DYNAMICS'
Make_BENTLEY: Represents Make 'BENTLEY'
Make_BMW: Represents Make 'BMW'
Make_BRIGHTDROP: Represents Make 'BRIGHTDROP'
Make_CADILLAC: Represents Make 'CADILLAC'
Make_CHEVROLET: Represents Make 'CHEVROLET'
Make_CHRYSLER: Represents Make 'CHRYSLER'
Make_DODGE: Represents Make 'DODGE'
Make_FIAT: Represents Make 'FIAT'
Make_FISKER: Represents Make 'FIS

In [13]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Encode Target Variable (y) using LabelEncoder
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)

# Convert Target Variable to One-Hot Encoding
y_categorical_train = pd.get_dummies(y_train).values
y_categorical_test = pd.get_dummies(y_test).values

print("Target variable encoded and converted to one-hot.")

Target variable encoded and converted to one-hot.


In [14]:
# Import necessary libraries
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

# Build the Deep Learning Model
model = Sequential([
    Dense(10, activation='relu', input_shape=(X_train.shape[1],)),  # First hidden layer
    Dense(8, activation='relu'),                                    # Second hidden layer
    Dense(y_categorical_train.shape[1], activation='softmax')       # Output layer with softmax
])

In [15]:
# Compile the model with Categorical Cross-Entropy
from tensorflow.keras.losses import CategoricalCrossentropy

model.compile(optimizer=Adam(), loss=CategoricalCrossentropy(), metrics=['accuracy'])
print("Model compiled with Categorical Cross-Entropy loss.")

# Train the model
model.fit(X_train, y_categorical_train, epochs=20, verbose=1)
print("Training completed with Categorical Cross-Entropy loss.")


Model compiled with Categorical Cross-Entropy loss.
Epoch 1/20
[1m5806/5806[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 2ms/step - accuracy: 0.9658 - loss: 0.1371
Epoch 2/20
[1m5806/5806[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 2ms/step - accuracy: 1.0000 - loss: 4.0256e-04
Epoch 3/20
[1m5806/5806[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 2ms/step - accuracy: 1.0000 - loss: 1.1941e-04
Epoch 4/20
[1m5806/5806[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 2ms/step - accuracy: 1.0000 - loss: 4.6064e-05
Epoch 5/20
[1m5806/5806[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 2ms/step - accuracy: 1.0000 - loss: 1.0330e-05
Epoch 6/20
[1m5806/5806[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 2ms/step - accuracy: 1.0000 - loss: 1.7153e-06
Epoch 7/20
[1m5806/5806[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 2ms/step - accuracy: 1.0000 - loss: 8.5535e-06
Epoch 8/20
[1m5806/5806[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [16]:
# Evaluate the Model
loss, accuracy = model.evaluate(X_test, y_categorical_test, verbose=0)

# Print the Evaluation Results
print(f"Categorical Crossentropy Loss: {loss}, Accuracy: {accuracy}")

print("Model evaluated and results printed.")

Categorical Crossentropy Loss: 0.0002560533757787198, Accuracy: 0.9999784827232361
Model evaluated and results printed.


In [17]:
# Compile the model with Mean Squared Error (MSE)
from tensorflow.keras.losses import MeanSquaredError

model.compile(optimizer=Adam(learning_rate=0.001), loss=MeanSquaredError(), metrics=['accuracy'])
print("Model compiled with Mean Squared Error (MSE) loss.")

# Train the model
model.fit(X_train, y_categorical_train, epochs=20, batch_size=32, verbose=1)
print("Training completed with Mean Squared Error (MSE) loss.")

Model compiled with Mean Squared Error (MSE) loss.
Epoch 1/20
[1m5806/5806[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 2ms/step - accuracy: 1.0000 - loss: 5.2323e-14
Epoch 2/20
[1m5806/5806[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 2ms/step - accuracy: 1.0000 - loss: 1.6405e-13
Epoch 3/20
[1m5806/5806[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 2ms/step - accuracy: 1.0000 - loss: 1.8506e-14
Epoch 4/20
[1m5806/5806[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2ms/step - accuracy: 1.0000 - loss: 2.9385e-13
Epoch 5/20
[1m5806/5806[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 2ms/step - accuracy: 1.0000 - loss: 2.2310e-14
Epoch 6/20
[1m5806/5806[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 2ms/step - accuracy: 1.0000 - loss: 8.2260e-15
Epoch 7/20
[1m5806/5806[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 2ms/step - accuracy: 1.0000 - loss: 1.6490e-14
Epoch 8/20
[1m5806/5806[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

In [19]:
# Evaluate the Model
loss, accuracy = model.evaluate(X_test, y_categorical_test, verbose=0)

# Print the Evaluation Results
print(f"Mean Squared Error (MSE) Loss: {loss}, Accuracy: {accuracy}")

print("Model evaluated and results printed.")


Mean Squared Error (MSE) Loss: 1.4353386177390348e-05, Accuracy: 0.9999784827232361
Model evaluated and results printed.


In [20]:
# Compile the model with Mean Absolute Error (MAE)
from tensorflow.keras.losses import MeanAbsoluteError

model.compile(optimizer=Adam(learning_rate=0.001), loss=MeanAbsoluteError(), metrics=['accuracy'])
print("Model compiled with Mean Absolute Error (MAE) loss.")

# Train the model
model.fit(X_train, y_categorical_train, epochs=20, batch_size=32, verbose=1)
print("Training completed with Mean Absolute Error (MAE) loss.")

Model compiled with Mean Absolute Error (MAE) loss.
Epoch 1/20
[1m5806/5806[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 3ms/step - accuracy: 1.0000 - loss: 5.0028e-07
Epoch 2/20
[1m5806/5806[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 3ms/step - accuracy: 1.0000 - loss: 2.7555e-06
Epoch 3/20
[1m5806/5806[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 3ms/step - accuracy: 1.0000 - loss: 3.0524e-06
Epoch 4/20
[1m5806/5806[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 3ms/step - accuracy: 1.0000 - loss: 9.1875e-08
Epoch 5/20
[1m5806/5806[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 3ms/step - accuracy: 1.0000 - loss: 6.7636e-07
Epoch 6/20
[1m5806/5806[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2ms/step - accuracy: 1.0000 - loss: 1.8847e-05
Epoch 7/20
[1m5806/5806[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2ms/step - accuracy: 0.9942 - loss: 0.0039
Epoch 8/20
[1m5806/5806[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [21]:
# Evaluate the Model
loss, accuracy = model.evaluate(X_test, y_categorical_test, verbose=0)

# Print the Evaluation Results
print(f"Mean Absolute Error (MAE) Loss: {loss}, Accuracy: {accuracy}")

print("Model evaluated and results printed.")

Mean Absolute Error (MAE) Loss: 0.009301098063588142, Accuracy: 0.9860483407974243
Model evaluated and results printed.
