# **CSCN-8040 Case Studies in Artificial Intelligence and Machine Learning**
# **Group 7 – EV Charging Gap in Canada**
- Team Leader:
- Dhruv Bhanupprasad Chaudhary

- Team Members:
- Manpreet Kaur
- Abdul Bari Mohammad
- Vishal Mukeshbhai Shah


# load the dataset

In [71]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

from scipy.stats import f_oneway
import matplotlib.pyplot as plt

In [72]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from scipy.stats import f_oneway

# ----------------------------------------------------------
# 1. LOAD DATASETS
# ----------------------------------------------------------

summary_df = pd.read_csv("DataSets/DataSets/ev_city_station_summary.csv")
cities_df = pd.read_csv("DataSets/DataSets/canadacities.csv")

print("Summary Dataset Sample:")
display(summary_df.head())

print("Cities Dataset Sample:")
display(cities_df.head())




Summary Dataset Sample:


Unnamed: 0,Province,City,EV_Count,Charging_Stations
0,Ontario,toronto,250934.0,428
1,Ontario,hamilton,26036.0,89
2,Quebec,sherbrooke,24964.0,100
3,Quebec,saguenay,16268.0,16
4,Manitoba,winnipeg,12174.0,119


Cities Dataset Sample:


Unnamed: 0,city,city_ascii,province_id,province_name,lat,lng,population,density,timezone,ranking,postal,id
0,Toronto,Toronto,ON,Ontario,43.7417,-79.3733,5647656.0,4427.8,America/Toronto,1,M5T M5V M5P M5S M5R M5E M5G M5A M5C M5B M5M M5...,1124279679
1,Montréal,Montreal,QC,Quebec,45.5089,-73.5617,3675219.0,4833.5,America/Toronto,1,H1X H1Y H1Z H1P H1R H1S H1T H1V H1W H1H H1J H1...,1124586170
2,Vancouver,Vancouver,BC,British Columbia,49.25,-123.1,2426160.0,5749.9,America/Vancouver,1,V6Z V6S V6R V6P V6N V6M V6L V6K V6J V6H V6G V6...,1124825478
3,Calgary,Calgary,AB,Alberta,51.05,-114.0667,1306784.0,1592.4,America/Edmonton,1,T1Y T2H T2K T2J T2M T2L T2N T2A T2C T2B T2E T2...,1124690423
4,Edmonton,Edmonton,AB,Alberta,53.5344,-113.4903,1151635.0,1320.4,America/Edmonton,1,T5X T5Y T5Z T5P T5R T5S T5T T5V T5W T5H T5J T5...,1124290735


# Normalizing 

In [73]:
# ----------------------------------------------------------
# 2. NORMALIZATION FUNCTIONS 
# ----------------------------------------------------------

def clean_city(x):
    if isinstance(x, str):
        return (x.lower()
                  .strip()
                  .replace("-", " ")
                  .replace("é", "e")
                  .replace("è", "e")
                  .replace("ê", "e"))
    return x

def clean_province(x):
    if isinstance(x, str):
        return x.lower().strip()
    return x




In [74]:
# ----------------------------------------------------------
# 3. APPLY NORMALIZATION BEFORE MERGING
# ----------------------------------------------------------

summary_df["City"] = summary_df["City"].apply(clean_city)
summary_df["Province"] = summary_df["Province"].apply(clean_province)

cities_df["City"] = cities_df["city"].apply(clean_city)
cities_df["Province"] = cities_df["province_name"].apply(clean_province)
cities_df["Population"] = cities_df["population"]

print("City values after cleaning:")
display(summary_df[["City", "Province"]].head())

print("Cities clean file:")
display(cities_df[["City", "Province", "Population"]].head())




City values after cleaning:


Unnamed: 0,City,Province
0,toronto,ontario
1,hamilton,ontario
2,sherbrooke,quebec
3,saguenay,quebec
4,winnipeg,manitoba


Cities clean file:


Unnamed: 0,City,Province,Population
0,toronto,ontario,5647656.0
1,montreal,quebec,3675219.0
2,vancouver,british columbia,2426160.0
3,calgary,alberta,1306784.0
4,edmonton,alberta,1151635.0


In [75]:
# ----------------------------------------------------------
# 4. MERGE DATASETS 
# ----------------------------------------------------------

merged = pd.merge(
    summary_df,
    cities_df[["City", "Province", "Population"]],
    on=["City", "Province"],
    how="left"
)

print("Merged dataset sample:")
display(merged.head(20))

print("Population missing count:", merged["Population"].isna().sum())





Merged dataset sample:


Unnamed: 0,Province,City,EV_Count,Charging_Stations,Population
0,ontario,toronto,250934.0,428,5647656.0
1,ontario,hamilton,26036.0,89,729560.0
2,quebec,sherbrooke,24964.0,100,172950.0
3,quebec,saguenay,16268.0,16,144723.0
4,manitoba,winnipeg,12174.0,119,758515.0
5,ontario,oshawa,12066.0,38,335949.0
6,ontario,london,12032.0,113,423369.0
7,quebec,drummondville,11234.0,45,79258.0
8,quebec,granby,10780.0,41,69025.0
9,quebec,saint hyacinthe,8108.0,37,57239.0


Population missing count: 1


# Feature Engineering

In [76]:
# ----------------------------------------------------------
# 5. FEATURE ENGINEERING
# ----------------------------------------------------------

merged["EV_per_capita"] = merged["EV_Count"] / merged["Population"]
merged["Distance_Score"] = merged["Population"] / (merged["Charging_Stations"] + 1)
merged["Accessibility"] = merged["Charging_Stations"] / (merged["EV_Count"] + 1)

print("Feature sample:")
display(merged[[
    "City", "Province", "EV_Count", "Charging_Stations", 
    "Population", "EV_per_capita", "Distance_Score", "Accessibility"
]].head())




Feature sample:


Unnamed: 0,City,Province,EV_Count,Charging_Stations,Population,EV_per_capita,Distance_Score,Accessibility
0,toronto,ontario,250934.0,428,5647656.0,0.044432,13164.699301,0.001706
1,hamilton,ontario,26036.0,89,729560.0,0.035687,8106.222222,0.003418
2,sherbrooke,quebec,24964.0,100,172950.0,0.144342,1712.376238,0.004006
3,saguenay,quebec,16268.0,16,144723.0,0.112408,8513.117647,0.000983
4,winnipeg,manitoba,12174.0,119,758515.0,0.01605,6320.958333,0.009774


In [77]:
# ----------------------------------------------------------
# 6. NORMALIZE FEATURES
# ----------------------------------------------------------

for col in ["EV_per_capita", "Distance_Score", "Accessibility"]:
    merged[f"{col}_norm"] = (
        (merged[col] - merged[col].min()) /
        (merged[col].max() - merged[col].min())
    )

print("Normalized feature sample:")
display(merged.head())




Normalized feature sample:


Unnamed: 0,Province,City,EV_Count,Charging_Stations,Population,EV_per_capita,Distance_Score,Accessibility,EV_per_capita_norm,Distance_Score_norm,Accessibility_norm
0,ontario,toronto,250934.0,428,5647656.0,0.044432,13164.699301,0.001706,0.034057,0.18168,1e-05
1,ontario,hamilton,26036.0,89,729560.0,0.035687,8106.222222,0.003418,0.027354,0.111711,1.9e-05
2,quebec,sherbrooke,24964.0,100,172950.0,0.144342,1712.376238,0.004006,0.110638,0.023272,2.3e-05
3,quebec,saguenay,16268.0,16,144723.0,0.112408,8513.117647,0.000983,0.08616,0.11734,6e-06
4,manitoba,winnipeg,12174.0,119,758515.0,0.01605,6320.958333,0.009774,0.012302,0.087018,5.5e-05


# RDI SCORE

In [78]:
# ----------------------------------------------------------
# 7. COMPUTE RDI SCORE
# ----------------------------------------------------------

merged["RDI"] = (
    0.40 * merged["EV_per_capita_norm"] +
    0.40 * merged["Distance_Score_norm"] +
    0.20 * merged["Accessibility_norm"]
)

print("RDI score sample:")
display(merged[["City", "Province", "RDI"]].head())




RDI score sample:


Unnamed: 0,City,Province,RDI
0,toronto,ontario,0.086297
1,hamilton,ontario,0.05563
2,sherbrooke,quebec,0.053568
3,saguenay,quebec,0.081401
4,winnipeg,manitoba,0.039739


# Model Training

In [79]:
# ----------------------------------------------------------
# 8. MODEL TRAINING 
# ----------------------------------------------------------

features = ["EV_per_capita", "Distance_Score", "Accessibility"]

X = merged[features]
y = merged["RDI"]

X = X.fillna(X.median())
y = y.fillna(y.median())

# Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred = lr.predict(X_test)

print("Model Performance:")
print("MAE:", mean_absolute_error(y_test, y_pred))
print("RMSE:", mean_squared_error(y_test, y_pred) ** 0.5)
print("R2 Score:", r2_score(y_test, y_pred))




Model Performance:
MAE: 0.00010800338985903206
RMSE: 0.00011253451160573472
R2 Score: 0.9999987693789884


In [80]:
# ----------------------------------------------------------
# 9. ANOVA TEST 
# ----------------------------------------------------------

grouped = merged.groupby("Province")["EV_Count"].apply(list)

f_stat, p_val = f_oneway(*grouped)

print("\nANOVA Results:")
print("F-statistic:", f_stat)
print("P-value:", p_val)

if p_val < 0.05:
    print(" Reject H0: Significant difference between provinces.")
else:
    print(" Fail to Reject H0: No significant difference.")


# ----------------------------------------------------------
# 10. FINAL OUTPUT
# ----------------------------------------------------------

print("\nTop 10 Underserved Cities:")
display(merged.sort_values("RDI", ascending=False).head(10))


ANOVA Results:
F-statistic: 0.24646027332216813
P-value: 0.9865711841659552
 Fail to Reject H0: No significant difference.

Top 10 Underserved Cities:


Unnamed: 0,Province,City,EV_Count,Charging_Stations,Population,EV_per_capita,Distance_Score,Accessibility,EV_per_capita_norm,Distance_Score_norm,Accessibility_norm,RDI
12,quebec,windsor,7002.0,89,5367.0,1.304639,59.633333,0.012709,1.0,0.000411,7.1e-05,0.400179
100,alberta,wood buffalo,0.0,0,72326.0,0.0,72326.0,0.0,0.0,1.0,0.0,0.4
21,new brunswick,kingston,4074.0,106,3202.0,1.27233,29.925234,0.026012,0.975235,0.0,0.000146,0.390123
11,nova scotia,windsor,7002.0,89,5514.0,1.269859,61.266667,0.012709,0.973341,0.000434,7.1e-05,0.389524
104,alberta,calgary,0.0,178,1306784.0,0.0,7300.469274,178.0,0.0,0.100566,1.0,0.240226
39,ontario,kawartha lakes,1174.0,1,79247.0,0.014814,39623.5,0.000851,0.011355,0.547659,5e-06,0.223607
98,alberta,edmonton,0.0,138,1151635.0,0.0,8285.143885,138.0,0.0,0.114186,0.775281,0.200731
46,ontario,centre wellington,912.0,0,31093.0,0.029331,31093.0,0.0,0.022482,0.429665,0.0,0.180859
105,ontario,cambridge,0.0,126,138479.0,0.0,1090.385827,126.0,0.0,0.014668,0.707865,0.14744
0,ontario,toronto,250934.0,428,5647656.0,0.044432,13164.699301,0.001706,0.034057,0.18168,1e-05,0.086297
