In [1]:
import pandas as pd
import numpy as np
import pyarrow as prw


In [2]:
df = pd.read_csv(
    './data/EV-data.csv', dtype_backend='pyarrow', engine='pyarrow'
).query('region != "World" and parameter in ["EV sales", "EV stock"]')
df

Unnamed: 0,region,category,parameter,mode,powertrain,year,unit,value
2,Australia,Historical,EV sales,Cars,BEV,2011,Vehicles,49.0
3,Australia,Historical,EV stock,Cars,BEV,2011,Vehicles,49.0
4,Australia,Historical,EV stock,Cars,BEV,2012,Vehicles,220.0
5,Australia,Historical,EV sales,Cars,BEV,2012,Vehicles,170.0
8,Australia,Historical,EV stock,Cars,PHEV,2012,Vehicles,80.0
...,...,...,...,...,...,...,...,...
3638,USA,Historical,EV stock,Cars,PHEV,2023,Vehicles,1300000.0
3640,USA,Historical,EV stock,Cars,FCEV,2023,Vehicles,18000.0
3641,USA,Historical,EV sales,Cars,FCEV,2023,Vehicles,3000.0
3642,USA,Historical,EV stock,Cars,BEV,2023,Vehicles,3500000.0


In [3]:
bev_df = (
    df.query('powertrain == "BEV" and parameter ==  "EV sales"').loc[:, ['region', 'year', 'value']]
      .groupby(by=['region', 'year']).sum()
)
bev_df

Unnamed: 0_level_0,Unnamed: 1_level_0,value
region,year,Unnamed: 2_level_1
Australia,2011,49.0
Australia,2012,170.0
Australia,2013,190.0
Australia,2014,370.0
Australia,2015,760.0
...,...,...
United Kingdom,2019,38000.0
United Kingdom,2020,110000.0
United Kingdom,2021,190000.0
United Kingdom,2022,270000.0


In [4]:
sales_df = (
    df.query('parameter ==  "EV sales" and year in [2019, 2023]').loc[:, ['region', 'year', 'value']]
      .groupby(by=['region', 'year']).sum().groupby('region').pct_change()
      .sort_values(by='value', ascending=False)
)
sales_df

Unnamed: 0_level_0,Unnamed: 1_level_0,value
region,year,Unnamed: 2_level_1
United Arab Emirates,2023,306.446809
Turkiye,2023,211.037037
India,2023,118.752547
Cyprus,2023,55.521739
Greece,2023,34.416667
...,...,...
Switzerland,2019,
Turkiye,2019,
USA,2019,
United Arab Emirates,2019,


In [5]:
bev_df2 = (
    df.query('powertrain == "BEV"').set_index(['region', 'year']).loc[:, ['parameter', 'value']]
       .pivot(columns='parameter')
)
bev_df2.columns = ['EV sales', 'EV stock']
bev_df2['% sold'] = bev_df2['EV sales']/bev_df2['EV stock']
bev_df2

Unnamed: 0_level_0,Unnamed: 1_level_0,EV sales,EV stock,% sold
region,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Australia,2011,49.0,49.0,1.0
Australia,2012,170.0,220.0,0.772727
Australia,2013,190.0,410.0,0.463415
Australia,2014,370.0,780.0,0.474359
Australia,2015,760.0,1500.0,0.506667
...,...,...,...,...
United Kingdom,2019,38000.0,91000.0,0.417582
United Kingdom,2020,110000.0,190000.0,0.578947
United Kingdom,2021,190000.0,380000.0,0.5
United Kingdom,2022,270000.0,550000.0,0.490909


In [6]:
bev_df2[bev_df2['% sold'] == 1.0]

Unnamed: 0_level_0,Unnamed: 1_level_0,EV sales,EV stock,% sold
region,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Australia,2011,49.0,49.0,1.0
Brazil,2014,61.0,61.0,1.0
Chile,2011,6.0,6.0,1.0
Greece,2013,3.0,3.0,1.0
Korea,2010,61.0,61.0,1.0
Korea,2018,55000.0,55000.0,1.0
Mexico,2011,3.0,3.0,1.0
South Africa,2013,34.0,34.0,1.0
Spain,2010,76.0,76.0,1.0
Sweden,2010,4.0,4.0,1.0


In [7]:
bev_df2[bev_df2['% sold'] < 0.1]

Unnamed: 0_level_0,Unnamed: 1_level_0,EV sales,EV stock,% sold
region,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Costa Rica,2015,4.0,140.0,0.028571
Costa Rica,2016,12.0,160.0,0.075
Denmark,2017,710.0,8800.0,0.080682
India,2012,190.0,2800.0,0.067857
India,2019,680.0,8600.0,0.07907
Israel,2011,3.0,85.0,0.035294
Israel,2014,6.0,1200.0,0.005
Israel,2015,3.0,1200.0,0.0025
Israel,2016,5.0,1200.0,0.004167
Israel,2017,130.0,1400.0,0.092857


In [8]:
bev_df2.index

MultiIndex([(     'Australia', 2011),
            (     'Australia', 2012),
            (     'Australia', 2013),
            (     'Australia', 2014),
            (     'Australia', 2015),
            (     'Australia', 2016),
            (     'Australia', 2017),
            (     'Australia', 2018),
            (     'Australia', 2019),
            (     'Australia', 2020),
            ...
            ('United Kingdom', 2014),
            ('United Kingdom', 2015),
            ('United Kingdom', 2016),
            ('United Kingdom', 2017),
            ('United Kingdom', 2018),
            ('United Kingdom', 2019),
            ('United Kingdom', 2020),
            ('United Kingdom', 2021),
            ('United Kingdom', 2022),
            ('United Kingdom', 2023)],
           names=['region', 'year'], length=598)

In [9]:
bev_df2.loc[:, ['% sold']].groupby('region').mean().sort_values(by='% sold', ascending=False)

Unnamed: 0_level_0,% sold
region,Unnamed: 1_level_1
Brazil,0.597556
Sweden,0.547542
Korea,0.540066
China,0.527578
Greece,0.524149
Iceland,0.51891
Germany,0.515133
Belgium,0.511249
Australia,0.509556
New Zealand,0.48383


In [10]:
def style_pct_sold(v):
    lo_red, hi_green = 'background-color: #ffe6e6;', 'background-color: #e6ffe6;'
    if v < 0.1:
        sty = lo_red
    elif v > 0.75:
        sty = hi_green
    else:
        sty = None
    #print(f'v={v} ({type(v)}) sty={sty}')
    return sty

col = ['% sold']
bev_df2.style.map(style_pct_sold, subset=['% sold'])



Unnamed: 0_level_0,Unnamed: 1_level_0,EV sales,EV stock,% sold
region,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Australia,2011,49.0,49.0,1.0
Australia,2012,170.0,220.0,0.772727
Australia,2013,190.0,410.0,0.463415
Australia,2014,370.0,780.0,0.474359
Australia,2015,760.0,1500.0,0.506667
Australia,2016,670.0,2200.0,0.304545
Australia,2017,1200.0,3400.0,0.352941
Australia,2018,1800.0,5200.0,0.346154
Australia,2019,6300.0,12000.0,0.525
Australia,2020,5200.0,17000.0,0.305882


In [11]:
with pd.option_context('display.max_rows', None):
    print(bev_df2)

                            EV sales    EV stock    % sold
region               year                                 
Australia            2011       49.0        49.0       1.0
                     2012      170.0       220.0  0.772727
                     2013      190.0       410.0  0.463415
                     2014      370.0       780.0  0.474359
                     2015      760.0      1500.0  0.506667
                     2016      670.0      2200.0  0.304545
                     2017     1200.0      3400.0  0.352941
                     2018     1800.0      5200.0  0.346154
                     2019     6300.0     12000.0     0.525
                     2020     5200.0     17000.0  0.305882
                     2021    17000.0     34000.0       0.5
                     2022    33000.0     67000.0  0.492537
                     2023    87000.0    150000.0      0.58
Austria              2010       <NA>       350.0      <NA>
                     2011       <NA>       990.0      <N