In [41]:
import pandas as pd 
import numpy as np
import os


Sources: 
EV data:
https://www.iea.org/data-and-statistics/data-tools/global-ev-data-explorer

Total cars:
https://worldpopulationreview.com/country-rankings/cars-by-country



In [49]:
# Set up workspace path - go up one directory from scripts folder to project root
workspace_root = os.path.dirname(os.getcwd())

# Set the data directory path
data_dir = os.path.join(workspace_root, 'data')

EV_cars_path = os.path.join(data_dir, 'cars_country.csv')
cars_country_path = os.path.join(data_dir, 'cars-by-country-2025.csv')
vars_WDI = os.path.join(data_dir, 'important_variables.csv')


ev_cars = pd.read_csv(EV_cars_path)
cars_country = pd.read_csv(cars_country_path)
vars_WDI = pd.read_csv(vars_WDI)[[
    'REF_AREA_LABEL', 
    'YEAR', 
    'WB_WDI_EN_GHG_CO2_PC_CE_AR5', #Carbon dioxide (CO2) emissions excluding LULUCF per capita (t CO2e/capita)
    'WB_WDI_EG_FEC_RNEW_ZS', # Renewable energy consumption (% of total final energy consumption)
    'WB_WDI_EG_USE_PCAP_KG_OE', # Energy use (kg of oil equivalent per capita)
    'WB_WDI_NY_GDP_MKTP_KD_ZG', #	GDP growth (annual %)
    'WB_WDI_SP_URB_GROW', # Urban population growth (annual %)
    'WB_WDI_EG_USE_ELEC_KH_PC'	#Electric power consumption (kWh per capita)
]]


I chose 2022 as is the most recent year with range available data

In [50]:
ev_cars_2024 =ev_cars.groupby(['country','year']).agg({'value': 'sum'}).sort_values('year', ascending=False).reset_index()

ev_cars_2024 = ev_cars_2024[ev_cars_2024['year'] == 2022]
ev_cars_2024.rename(columns={'value': 'stock_ev'}, inplace=True)

ev_cars_2024['country'] = ev_cars_2024['country'].replace('USA', 'United States')
# Stock of EV
ev_cars_2024


Unnamed: 0,country,year,stock_ev
80,China,2022,61431990.0
81,Costa Rica,2022,8710.0
82,Africa,2022,13608.0
83,Sweden,2022,471798.0
84,Netherlands,2022,686807.0
85,Italy,2022,399959.0
86,Korea,2022,525480.0
87,Iceland,2022,33521.0
88,Germany,2022,1978789.0
89,Switzerland,2022,220521.0


In [51]:
cars_2022 = cars_country[cars_country['CarsByCountryDataYear'] == 2022][['country', 'CarsByCountryTotal', 'CarsByCountryDataYear']]
cars_2022.rename(columns={'CarsByCountryTotal': 'total_cars'}, inplace=True)
cars_2022.rename(columns={'CarsByCountryDataYear': 'year'}, inplace=True)
cars_2022


Unnamed: 0,country,total_cars,year
1,Guernsey,88532,2022.0
2,Gibraltar,48641,2022.0
8,New Zealand,4794156,2022.0
9,Monaco,35500,2022.0
10,United States,283400986,2022.0
17,Italy,44888074,2022.0
18,Estonia,998852,2022.0
19,Portugal,7198871,2022.0
20,Canada,26302526,2022.0
21,France,44444965,2022.0


In [52]:
vars_WDI = vars_WDI[vars_WDI['YEAR'] == 2022]
vars_WDI.rename(columns={'REF_AREA_LABEL': 'country'}, inplace=True)
vars_WDI.rename(columns={'YEAR': 'year'}, inplace=True)
vars_WDI

Unnamed: 0,country,year,WB_WDI_EN_GHG_CO2_PC_CE_AR5,WB_WDI_EG_FEC_RNEW_ZS,WB_WDI_EG_USE_PCAP_KG_OE,WB_WDI_NY_GDP_MKTP_KD_ZG,WB_WDI_SP_URB_GROW,WB_WDI_EG_USE_ELEC_KH_PC
27,Albania,2022,1.659293,41.9,780.738624,4.826696,0.093716,2507.480139
57,Algeria,2022,4.104114,0.1,1500.346113,3.600000,2.273337,1756.587213
87,Angola,2022,0.767587,52.9,423.122518,3.044727,4.059358,392.507047
117,Argentina,2022,4.260967,9.2,1808.998492,5.269880,0.338668,2833.119098
147,Armenia,2022,2.454971,9.1,1347.590557,12.600000,0.456271,2403.677758
...,...,...,...,...,...,...,...,...
4647,"Venezuela, RB",2022,2.759258,33.7,1478.064343,-3.894386,-0.027910,1989.223627
4677,Viet Nam,2022,3.259416,24.2,1023.057136,8.537500,2.609752,2624.400893
4707,"Yemen, Rep.",2022,0.292772,3.7,68.949535,0.752448,4.525171,53.972914
4737,Zambia,2022,0.387388,83.0,800.674874,5.211224,4.014864,703.966836


In [53]:
merged = pd.merge(vars_WDI, cars_2022, on=['country', 'year'], how='inner')

# Fusionar con ev_cars_2024
final_merged = pd.merge(merged, ev_cars_2024, on=['country', 'year'], how='inner')

final_merged['ev_adoption_rate'] = (final_merged['stock_ev'] / final_merged['total_cars']) * 100
final_merged

Unnamed: 0,country,year,WB_WDI_EN_GHG_CO2_PC_CE_AR5,WB_WDI_EG_FEC_RNEW_ZS,WB_WDI_EG_USE_PCAP_KG_OE,WB_WDI_NY_GDP_MKTP_KD_ZG,WB_WDI_SP_URB_GROW,WB_WDI_EG_USE_ELEC_KH_PC,total_cars,stock_ev,ev_adoption_rate
0,Austria,2022,6.771147,36.0,3465.290534,5.277894,1.397726,8260.476754,5633525,169062.0,3.000998
1,Belgium,2022,7.683021,11.7,4333.042675,4.259361,0.844853,7282.403313,6977664,361757.0,5.1845
2,Canada,2022,14.776101,23.8,7632.300562,4.189036,1.925077,14592.484156,26302526,430394.0,1.636322
3,Denmark,2022,4.767292,39.5,2628.918995,1.540173,0.931334,5870.029275,3220740,229673.0,7.131063
4,France,2022,4.561196,16.2,3102.403109,2.57084,0.6551,6677.395135,44444965,1238061.0,2.785605
5,Germany,2022,7.870139,17.6,3236.728452,1.369731,0.854902,6285.246596,52714433,1978789.0,3.75379
6,Greece,2022,5.263286,21.5,1960.675339,5.743649,-0.862125,4827.782857,5777241,21600.0,0.373881
7,Italy,2022,5.638792,17.5,2411.651724,4.821177,0.232654,5215.690121,44888074,399959.0,0.891014
8,Netherlands,2022,7.512052,12.2,3575.788885,5.007235,1.291903,6365.466052,10062194,686807.0,6.825619
9,New Zealand,2022,6.761818,28.9,3729.843959,3.496387,0.222811,8099.314065,4794156,68960.0,1.438418


### Defininig a model

In [55]:
# Data

features = [
    'ev_adoption_rate',
    'total_cars',
    'WB_WDI_EG_FEC_RNEW_ZS',
    'WB_WDI_EG_USE_PCAP_KG_OE',
    'WB_WDI_EG_USE_ELEC_KH_PC',
    'WB_WDI_NY_GDP_MKTP_KD_ZG',
    'WB_WDI_SP_URB_GROW'
]

X = final_merged[features]
y = final_merged['WB_WDI_EN_GHG_CO2_PC_CE_AR5']



# Model 
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.linear_model import LinearRegression
import numpy as np

# Entrenar
model = LinearRegression()
model.fit(X, y)

# Predicciones
y_pred = model.predict(X)

# Métricas
r2 = r2_score(y, y_pred)
rmse = np.sqrt(mean_squared_error(y, y_pred))

print(f"R²: {r2:.4f}")
print(f"RMSE: {rmse:.4f}")


R²: 0.8813
RMSE: 1.0969


In [56]:
# Interpretation
coef_df = pd.DataFrame({
    'Variable': features,
    'Coeficiente': model.coef_
}).sort_values(by='Coeficiente', key=abs, ascending=False)

print(coef_df)

                   Variable   Coeficiente
6        WB_WDI_SP_URB_GROW -6.120181e-01
5  WB_WDI_NY_GDP_MKTP_KD_ZG  3.553804e-01
2     WB_WDI_EG_FEC_RNEW_ZS -6.615015e-02
0          ev_adoption_rate  6.433730e-02
3  WB_WDI_EG_USE_PCAP_KG_OE  1.813944e-03
4  WB_WDI_EG_USE_ELEC_KH_PC  4.294769e-05
1                total_cars  6.947491e-09


### Simulation of adoption

In [58]:
simulated = final_merged.copy()
simulated['ev_adoption_rate'] = 0.50  # 50% adopción EV

# Predicciones
y_simulated = model.predict(simulated[features])

# Comparación
final_merged['pred_base'] = y_pred
final_merged['pred_scenario'] = y_simulated
final_merged['change_abs'] = final_merged['pred_scenario'] - final_merged['pred_base']
final_merged['change_pct'] = 100 * final_merged['change_abs'] / final_merged['pred_base']

final_merged[['country','year','pred_base','pred_scenario','change_abs','change_pct']]

Unnamed: 0,country,year,pred_base,pred_scenario,change_abs,change_pct
0,Austria,2022,5.934498,5.773591,-0.160907,-2.711392
1,Belgium,2022,9.200216,8.898828,-0.301388,-3.275881
2,Canada,2022,13.918291,13.845183,-0.073108,-0.525265
3,Denmark,2022,3.289259,2.862635,-0.426625,-12.970236
4,France,2022,6.266275,6.119225,-0.14705,-2.346683
5,Germany,2022,5.971091,5.761751,-0.20934,-3.505893
6,Greece,2022,5.397528,5.405642,0.008114,0.150331
7,Italy,2022,5.803974,5.778817,-0.025157,-0.433441
8,Netherlands,2022,7.873336,7.466363,-0.406973,-5.169006
9,New Zealand,2022,6.856721,6.796346,-0.060375,-0.880527
