In [2]:
import pandas as pd
import sqlite3
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
import plotly.graph_objects as go
import plotly.express as px

# 2018 Fuel Consumption Ratings

## Curation

In [3]:
fuel_ratings_2018 = pd.read_csv('MY2018 Fuel Consumption Ratings.csv', 
                                encoding='cp863', engine='python', usecols=[1,2,3,4,5,7,8,9,10,12], skiprows=[1], nrows=1083)
fuel_ratings_2018.shape

(1083, 10)

In [4]:
fuel_ratings_2018.columns

Index(['Make', 'Model.1', 'Vehicle Class', 'Engine Size', 'Cylinders', 'Fuel',
       'Fuel Consumption', 'Unnamed: 9', 'Unnamed: 10', 'CO2 Emissions'],
      dtype='object')

In [5]:
fuel_ratings_2018.columns = ['Make', 'Model', 'Vehicle Class', 'Engine Size (L)', 'Cylinders', 'Fuel Type', 
                             'City Fuel Economy (L/100km)', 'Hwy Fuel Economy (L/100km)', 'Comb Fuel Economy (L/100km)', 'CO2 Emissions (g/km)']

In [6]:
fuel_ratings_2018.replace(['X', 'Z', 'D', 'E', 'N'], ['Gasoline', 'Gasoline', 'Diesel', 'E85', 'Natural gas'], inplace=True)

In [7]:
hybrid_mask = fuel_ratings_2018['Model'].str.contains('Hybrid', case=False)

In [8]:
fuel_ratings_2018.loc[hybrid_mask, 'Fuel Type'] = fuel_ratings_2018.loc[hybrid_mask, 'Fuel Type'] + ' Hybrid'

In [9]:
# From the 2023 Fuel Consumption Guide
classes = ['Two-seater',
            'Minicompact',
            'Subcompact',
            'Compact',
            'Mid-size',
            'Full-size',
            'Station wagon: Small',
            'Station wagon: Mid-size',
            'Pickup truck: Small',
            'Pickup truck: Standard',
            'SUV: Small',
            'SUV: Standard',
            'Minivan',
            'Van: Cargo',
            'Van: Passenger',
            'Special purpose vehicle',
            ]

In [10]:
fig = px.histogram(fuel_ratings_2018, x='Comb Fuel Economy (L/100km)', color='Cylinders')
fig.update_layout(height=700)

fig.show()

In [11]:
fig = px.box(fuel_ratings_2018, y='Engine Size (L)', x='Vehicle Class')
fig.update_layout(height=700)

fig.show()

In [12]:
fig = px.box(fuel_ratings_2018, y='Comb Fuel Economy (L/100km)', x='Vehicle Class',
            category_orders={"Vehicle Class": ['Two-seater',
            'Minicompact',
            'Subcompact',
            'Compact',
            'Mid-size',
            'Full-size',
            'Station wagon: Small',
            'Station wagon: Mid-size',
            'Pickup truck: Small',
            'Pickup truck: Standard',
            'SUV: Small',
            'SUV: Standard',
            'Minivan',
            'Van: Cargo',
            'Van: Passenger',
            'Special purpose vehicle',
            ]})
fig.update_layout(height=700)

fig.show()

In [13]:
fuel_ratings_2018[fuel_ratings_2018['Engine Size (L)']>=6.0]

Unnamed: 0,Make,Model,Vehicle Class,Engine Size (L),Cylinders,Fuel Type,City Fuel Economy (L/100km),Hwy Fuel Economy (L/100km),Comb Fuel Economy (L/100km),CO2 Emissions (g/km)
20,Aston Martin,Rapide S,Subcompact,6.0,12,Gasoline,16.7,10.9,14.1,332
21,Aston Martin,Vanquish,Minicompact,6.0,12,Gasoline,17.5,11.4,14.7,346
63,Bentley,Bentayga,SUV: Standard,6.0,12,Gasoline,18.8,12.2,15.9,370
64,Bentley,Continental GT Convertible,Compact,6.0,12,Gasoline,20.4,12.0,16.6,389
65,Bentley,Continental Supersports,Compact,6.0,12,Gasoline,20.4,12.0,16.6,389
67,Bentley,Flying Spur,Mid-size,6.0,12,Gasoline,20.4,12.0,16.6,389
68,Bentley,Mulsanne,Mid-size,6.8,8,Gasoline,20.9,13.0,17.3,405
124,BMW,M760i xDrive,Full-size,6.6,12,Gasoline,17.7,11.9,15.1,355
138,Bugatti,Chiron,Two-seater,8.0,16,Gasoline,26.8,16.6,22.2,522
168,Cadillac,CTS-V,Mid-size,6.2,8,Gasoline,16.5,11.1,14.0,329


In [14]:
fuel_ratings_2018[fuel_ratings_2018['Cylinders']>8]

Unnamed: 0,Make,Model,Vehicle Class,Engine Size (L),Cylinders,Fuel Type,City Fuel Economy (L/100km),Hwy Fuel Economy (L/100km),Comb Fuel Economy (L/100km),CO2 Emissions (g/km)
19,Aston Martin,DB11 V12,Minicompact,5.2,12,Gasoline,15.5,11.4,13.7,322
20,Aston Martin,Rapide S,Subcompact,6.0,12,Gasoline,16.7,10.9,14.1,332
21,Aston Martin,Vanquish,Minicompact,6.0,12,Gasoline,17.5,11.4,14.7,346
44,Audi,R8,Two-seater,5.2,10,Gasoline,16.0,9.5,13.1,304
45,Audi,R8 quattro,Two-seater,5.2,10,Gasoline,17.1,11.3,14.5,338
46,Audi,R8 Spyder,Two-seater,5.2,10,Gasoline,17.1,11.3,14.5,338
63,Bentley,Bentayga,SUV: Standard,6.0,12,Gasoline,18.8,12.2,15.9,370
64,Bentley,Continental GT Convertible,Compact,6.0,12,Gasoline,20.4,12.0,16.6,389
65,Bentley,Continental Supersports,Compact,6.0,12,Gasoline,20.4,12.0,16.6,389
67,Bentley,Flying Spur,Mid-size,6.0,12,Gasoline,20.4,12.0,16.6,389


In [15]:
fuel_ratings_2018 = fuel_ratings_2018[fuel_ratings_2018['Engine Size (L)'] <= 6.0].reset_index(drop=True)
fuel_ratings_2018 = fuel_ratings_2018[fuel_ratings_2018['Cylinders'] <= 8].reset_index(drop=True)

In [16]:
fuel_ratings_2018.shape

(1022, 10)

In [17]:
fuel_ratings_2018.describe()

Unnamed: 0,Engine Size (L),Cylinders,City Fuel Economy (L/100km),Hwy Fuel Economy (L/100km),Comb Fuel Economy (L/100km),CO2 Emissions (g/km)
count,1022.0,1022.0,1022.0,1022.0,1022.0,1022.0
mean,2.929648,5.345401,12.018689,8.833659,10.586106,243.728963
std,1.124168,1.482868,2.963019,1.963841,2.483829,49.739005
min,1.0,3.0,4.2,4.0,4.1,96.0
25%,2.0,4.0,10.0,7.5,8.9,208.0
50%,3.0,6.0,11.8,8.5,10.3,242.0
75%,3.6,6.0,13.8,9.9,12.1,280.0
max,5.7,8.0,22.3,17.2,19.5,419.0


In [18]:
fuel_ratings_2018['Vehicle Class'] = pd.Categorical(fuel_ratings_2018['Vehicle Class'], categories=classes, ordered=True)

## Results

In [19]:
fuel_ratings_2018_grouped = fuel_ratings_2018.groupby(['Vehicle Class', 'Fuel Type']).mean(numeric_only=True)[['Comb Fuel Economy (L/100km)', 'CO2 Emissions (g/km)']].dropna()
fuel_ratings_2018_grouped





Unnamed: 0_level_0,Unnamed: 1_level_0,Comb Fuel Economy (L/100km),CO2 Emissions (g/km)
Vehicle Class,Fuel Type,Unnamed: 2_level_1,Unnamed: 3_level_1
Two-seater,Gasoline,11.113462,260.038462
Minicompact,Gasoline,10.009804,234.019608
Subcompact,Gasoline,10.361538,242.450549
Compact,Diesel,6.675,178.75
Compact,Gasoline,9.079825,212.684211
Compact,Gasoline Hybrid,8.5,200.0
Mid-size,Diesel,6.733333,181.333333
Mid-size,E85,15.0,248.0
Mid-size,Gasoline,9.353731,219.320896
Mid-size,Gasoline Hybrid,5.766667,135.555556


# 2023 Fuel Consumption Ratings

In [20]:
def group_fuel_classes(df, classes):
    df.columns = ['Make', 'Model', 'Vehicle Class', 'Engine Size (L)', 'Cylinders', 'Fuel Type', 
                             'City Fuel Economy (L/100km)', 'Hwy Fuel Economy (L/100km)', 'Comb Fuel Economy (L/100km)', 'CO2 Emissions (g/km)']
    df.replace(['X', 'Z', 'D', 'E', 'N'], ['Gasoline', 'Gasoline', 'Diesel', 'E85', 'Natural gas'], inplace=True)
    hybrid_mask = df['Model'].str.contains('Hybrid', case=False)
    df.loc[hybrid_mask, 'Fuel Type'] = df.loc[hybrid_mask, 'Fuel Type'] + ' Hybrid'
    df2 = df[df['Engine Size (L)'] <= 6.0].reset_index(drop=True)
    df2 = df2[df2['Cylinders'] <= 8].reset_index(drop=True)
    
    df2['Vehicle Class'] = pd.Categorical(df2['Vehicle Class'], categories=classes, ordered=True)
    result = df2.groupby(['Vehicle Class', 'Fuel Type']).mean(numeric_only=True)[['Comb Fuel Economy (L/100km)', 'CO2 Emissions (g/km)']].dropna()
    
    return df, result

fuel_ratings_2023 = pd.read_csv('MY2023 Fuel Consumption Ratings.csv', 
                                encoding='cp863', engine='python', usecols=[1,2,3,4,5,7,8,9,10,12], skiprows=[1], nrows=833)

In [21]:
fuel_ratings_2023, fuel_ratings_2023_grouped = group_fuel_classes(fuel_ratings_2023, classes)





In [22]:
fuel_ratings_2023_grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,Comb Fuel Economy (L/100km),CO2 Emissions (g/km)
Vehicle Class,Fuel Type,Unnamed: 2_level_1,Unnamed: 3_level_1
Two-seater,Gasoline,11.361905,266.238095
Minicompact,Gasoline,10.689474,250.684211
Subcompact,Gasoline,10.42,243.585714
Compact,Gasoline,9.213559,215.79661
Compact,Gasoline Hybrid,4.966667,116.333333
Mid-size,Gasoline,9.476289,222.020619
Mid-size,Gasoline Hybrid,5.0,117.0
Full-size,Gasoline,9.897297,231.72973
Full-size,Gasoline Hybrid,5.0,117.0
Station wagon: Small,Gasoline,8.185714,192.0


# 2020 Fuel Consumption Ratings

In [23]:
fuel_ratings_2020 = pd.read_csv('MY2020 Fuel Consumption Ratings.csv', 
                                encoding='cp863', engine='python', usecols=[1,2,3,4,5,7,8,9,10,12], skiprows=[1], nrows=975)

In [24]:
fuel_ratings_2020.columns

Index(['Make', 'Model.1', 'Vehicle Class', 'Engine Size', 'Cylinders', 'Fuel',
       'Fuel Consumption', 'Unnamed: 9', 'Unnamed: 10', 'CO2 Emissions'],
      dtype='object')

In [25]:
fuel_ratings_2020['Vehicle Class'].unique().size

15

In [26]:
fuel_ratings_2020, fuel_ratings_2020_grouped = group_fuel_classes(fuel_ratings_2020, classes)





In [27]:
fuel_ratings_2020_grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,Comb Fuel Economy (L/100km),CO2 Emissions (g/km)
Vehicle Class,Fuel Type,Unnamed: 2_level_1,Unnamed: 3_level_1
Two-seater,Gasoline,10.931373,256.019608
Minicompact,Gasoline,10.657143,250.095238
Subcompact,Gasoline,10.682222,250.033333
Compact,Gasoline,9.070476,212.114286
Compact,Gasoline Hybrid,4.5,106.0
Mid-size,Gasoline,9.294545,217.790909
Mid-size,Gasoline Hybrid,5.866667,137.333333
Full-size,E85,14.7,246.0
Full-size,Gasoline,10.453846,245.184615
Full-size,Gasoline Hybrid,5.0,117.0


In [28]:
fuel_ratings_2020_grouped.to_clipboard()

# 2010-2014 Fuel Consumption Ratings

In [29]:
fuel_ratings_2014 = pd.read_csv('MY2010-2014 Fuel Consumption Ratings 5-cycle.csv', 
                                encoding='cp863', engine='python', usecols=[1,2,3,4,5,7,8,9,10,12], skiprows=[1], nrows=5359)

In [30]:
fuel_ratings_2014

Unnamed: 0,MAKE,MODEL.1,VEHICLE CLASS,ENGINE SIZE,CYLINDERS,FUEL,FUEL CONSUMPTION*,Unnamed: 9,Unnamed: 10,CO2 EMISSIONS
0,ACURA,CSX,COMPACT,2.0,4,X,10.9,7.8,9.5,219
1,ACURA,CSX,COMPACT,2.0,4,X,10.0,7.6,8.9,205
2,ACURA,CSX,COMPACT,2.0,4,Z,11.6,8.1,10.0,230
3,ACURA,MDX AWD,SUV,3.7,6,Z,14.8,11.3,13.2,304
4,ACURA,RDX AWD TURBO,SUV,2.3,4,Z,13.2,10.3,11.9,274
...,...,...,...,...,...,...,...,...,...,...
5354,VOLVO,XC60 AWD,SUV - SMALL,3.0,6,X,13.4,9.8,11.8,271
5355,VOLVO,XC60 AWD,SUV - SMALL,3.2,6,X,13.2,9.5,11.5,264
5356,VOLVO,XC70 AWD,SUV - SMALL,3.0,6,X,13.4,9.8,11.8,271
5357,VOLVO,XC70 AWD,SUV - SMALL,3.2,6,X,12.9,9.3,11.3,260


In [31]:
fuel_ratings_2014['VEHICLE CLASS'].unique()

array(['COMPACT', 'SUV', 'MID-SIZE', 'MINICOMPACT', 'TWO-SEATER',
       'STATION WAGON - SMALL', 'SUBCOMPACT', 'STATION WAGON - MID-SIZE',
       'FULL-SIZE', 'PICKUP TRUCK - STANDARD', 'PICKUP TRUCK - SMALL',
       'VAN - CARGO', 'VAN - PASSENGER', 'MINIVAN',
       'SPECIAL PURPOSE VEHICLE', 'SUV - SMALL', 'SUV - STANDARD'],
      dtype=object)

In [32]:
# From the older Fuel Consumption Guides
classes_old = ['TWO-SEATER',
            'MINICOMPACT',
            'SUBCOMPACT',
            'COMPACT',
            'MID-SIZE',
            'FULL-SIZE',
            'STATION WAGON - SMALL',
            'STATION WAGON - MID-SIZE',
            'PICKUP TRUCK - SMALL',
            'PICKUP TRUCK - STANDARD',
            'SUV',
            'SUV - SMALL',
            'SUV - STANDARD',
            'MINIVAN',
            'VAN - CARGO',
            'VAN - PASSENGER',
            'SPECIAL PURPOSE VEHICLE'
            ]

In [33]:
fuel_ratings_2014, fuel_ratings_2014_grouped = group_fuel_classes(fuel_ratings_2014, classes_old)





In [34]:
fuel_ratings_2014_grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,Comb Fuel Economy (L/100km),CO2 Emissions (g/km)
Vehicle Class,Fuel Type,Unnamed: 2_level_1,Unnamed: 3_level_1
TWO-SEATER,Gasoline,10.717373,246.580508
MINICOMPACT,Gasoline,10.438583,240.173228
SUBCOMPACT,E85,14.433333,231.0
SUBCOMPACT,Gasoline,10.761036,247.617117
COMPACT,Diesel,7.456,200.28
COMPACT,E85,12.566667,201.055556
COMPACT,Gasoline,9.619106,221.323575
COMPACT,Gasoline Hybrid,6.383333,146.916667
MID-SIZE,Diesel,7.925,211.916667
MID-SIZE,E85,14.502632,231.894737


# 2005-2009 Fuel Consumption Ratings

In [35]:
fuel_ratings_2009 = pd.read_csv('MY2005-2009 Fuel Consumption Ratings 5-cycle.csv', 
                                encoding='cp863', engine='python', usecols=[1,2,3,4,5,7,8,9,10,12], skiprows=[1], nrows=5205)

In [36]:
fuel_ratings_2009, fuel_ratings_2009_grouped = group_fuel_classes(fuel_ratings_2009, classes_old)





In [37]:
fuel_ratings_2009_grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,Comb Fuel Economy (L/100km),CO2 Emissions (g/km)
Vehicle Class,Fuel Type,Unnamed: 2_level_1,Unnamed: 3_level_1
TWO-SEATER,Diesel,5.2,140.0
TWO-SEATER,Gasoline,12.814286,294.798319
MINICOMPACT,Gasoline,11.710879,269.393305
SUBCOMPACT,Diesel,6.85,184.75
SUBCOMPACT,Gasoline,11.16089,256.742389
COMPACT,Diesel,7.254545,195.545455
COMPACT,E85,15.066667,241.0
COMPACT,Gasoline,10.790476,248.21659
COMPACT,Gasoline Hybrid,5.6,129.0
MID-SIZE,Diesel,8.85,239.0


# All ICE Ratings

In [38]:
fuel_ratings_2018 = pd.read_csv('MY2018 Fuel Consumption Ratings.csv', 
                                encoding='cp863', engine='python', usecols=[1,2,3,4,5,7,8,9,10,12], skiprows=[1], nrows=1083)
fuel_ratings_2018.shape

(1083, 10)

In [39]:
fuel_ratings_2018, _ = group_fuel_classes(fuel_ratings_2018, classes)





In [40]:
fuel_ratings = pd.concat([fuel_ratings_2009, fuel_ratings_2014, fuel_ratings_2018, fuel_ratings_2023], axis=0)
fuel_ratings.shape

(12480, 10)

In [41]:
fuel_ratings['Engine Size'] = pd.cut(fuel_ratings['Engine Size (L)'], bins=list(range(10)), labels=["[{},{})".format(i, i + 1) for i in range(9)])

In [42]:
fig = px.histogram(fuel_ratings, x='Comb Fuel Economy (L/100km)', color='Engine Size')
fig.update_layout(height=700)

fig.show()

In [43]:
fig = px.histogram(fuel_ratings, x='Comb Fuel Economy (L/100km)', color='Cylinders')
fig.update_layout(height=700)

fig.show()

In [44]:
fig = px.box(fuel_ratings, x='Engine Size (L)')

fig.show()

# 2012-2023 BEV

In [45]:
BEV_2023 = pd.read_csv('MY2012-2023 Battery Electric Vehicles.csv', 
                       encoding='cp863', engine='python', usecols=[1,2,3,4,7,8,9,13,17], skiprows=[1], nrows=455)

In [46]:
BEV_2023

Unnamed: 0,Make,Model.1,Vehicle Class,Motor,Consumption,Unnamed: 8,Unnamed: 9,Range,Recharge
0,Mitsubishi,i-MiEV,Subcompact,49,16.9,21.4,18.7,100,7.0
1,Nissan,LEAF,Mid-size,80,19.3,23.0,21.1,117,7.0
2,Ford,Focus Electric,Compact,107,19.0,21.1,20.0,122,4.0
3,Mitsubishi,i-MiEV,Subcompact,49,16.9,21.4,18.7,100,7.0
4,Nissan,LEAF,Mid-size,80,19.3,23.0,21.1,117,7.0
...,...,...,...,...,...,...,...,...,...
450,Volkswagen,ID.4,SUV: Small,150,18.2,21.2,19.6,336,6.0
451,Volkswagen,ID.4 Pro,SUV: Small,150,18.2,21.3,19.6,443,7.5
452,Volkswagen,ID.4 AWD Pro,SUV: Small,220,20.1,22.6,21.2,410,7.5
453,Volvo,C40 Recharge Twin,SUV: Small,300,22.2,26.1,23.9,364,8.0


In [47]:
BEV_2023.columns

Index(['Make', 'Model.1', 'Vehicle Class', 'Motor', 'Consumption',
       'Unnamed: 8', 'Unnamed: 9', 'Range', 'Recharge'],
      dtype='object')

In [48]:
BEV_2023.columns = ['Make', 'Model', 'Vehicle Class', 'Motor (kW)', 'City Consumption (kWh/100km)', 'Hwy Consumption (kWh/100km)', 
                    'Combined Consumption (kWh/100km)', 'Range (km)', 'Recharge Time (h)']

In [49]:
BEV_2023_grouped = BEV_2023.groupby(['Vehicle Class']).mean(numeric_only=True)[['Combined Consumption (kWh/100km)', 'Range (km)']].dropna()

In [50]:
BEV_2023_grouped

Unnamed: 0_level_0,Combined Consumption (kWh/100km),Range (km)
Vehicle Class,Unnamed: 1_level_1,Unnamed: 2_level_1
Compact,24.644828,284.655172
Full-size,20.41383,512.723404
Mid-size,20.019767,361.139535
Pickup truck: Standard,29.952941,493.941176
SUV: Small,21.667647,418.588235
SUV: Standard,25.030137,453.726027
Station wagon: Small,19.3,361.02439
Subcompact,19.257143,239.542857
Two-seater,19.733333,103.5


In [51]:
fig = px.histogram(BEV_2023, x='Range (km)')

fig.show()

In [52]:
print(BEV_2023['Range (km)'].mean(), BEV_2023['Range (km)'].std())

399.8241758241758 141.0268660478626


In [53]:
BEV_2023[BEV_2023['Range (km)']>500]

Unnamed: 0,Make,Model,Vehicle Class,Motor (kW),City Consumption (kWh/100km),Hwy Consumption (kWh/100km),Combined Consumption (kWh/100km),Range (km),Recharge Time (h)
55,Tesla,Model S P100D,Full-size,568,22.6,20.0,21.5,507,12.0
74,Tesla,Model S 100D,Full-size,386,20.7,20.5,20.6,539,12.0
76,Tesla,Model S P100D,Full-size,568,22.6,20.0,21.5,507,12.0
99,Tesla,Model S 100D,Full-size,386,20.7,20.5,20.6,539,12.0
100,Tesla,Model S P100D,Full-size,568,22.6,20.0,21.5,507,12.0
...,...,...,...,...,...,...,...,...,...
434,Tesla,"Model S Plaid (19"" Wheels)",Full-size,750,17.6,18.7,18.1,637,15.0
435,Tesla,"Model S Plaid (21"" Wheels)",Full-size,750,20.4,21.2,20.8,560,15.0
436,Tesla,Model X,SUV: Standard,494,19.7,21.7,20.6,560,14.0
437,Tesla,"Model X Plaid (20"" Wheels)",SUV: Standard,750,20.4,22.5,21.4,536,14.0


In [54]:
fig = px.box(BEV_2023, x='Range (km)')
fig.show()

In [55]:
BEV_2023['Range Class'] = pd.cut(BEV_2023['Range (km)'], bins=[0, 250, 500, 900], labels=['<250', '<500', '<800'])

In [56]:
fig = px.histogram(BEV_2023, x='Combined Consumption (kWh/100km)', color='Range Class')
fig.update_layout(height=700)

fig.show()

In [57]:
BEV_2023['Vehicle Class'] = pd.Categorical(BEV_2023['Vehicle Class'], categories=classes, ordered=True)

In [58]:
BEV_2023_grouped = BEV_2023.groupby(['Vehicle Class', 'Range Class']).mean(numeric_only=True)[['Combined Consumption (kWh/100km)', 'Range (km)']].dropna()





In [59]:
BEV_2023_grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,Combined Consumption (kWh/100km),Range (km)
Vehicle Class,Range Class,Unnamed: 2_level_1,Unnamed: 3_level_1
Two-seater,<250,19.733333,103.5
Subcompact,<250,18.34,164.68
Subcompact,<500,21.55,426.7
Compact,<250,19.0,165.9
Compact,<500,27.615789,347.157895
Mid-size,<250,18.37619,176.619048
Mid-size,<500,21.369811,394.660377
Mid-size,<800,16.933333,536.0
Full-size,<250,22.2,224.0
Full-size,<500,21.651064,409.93617


# 2012-2023 PHEV

In [80]:
PHEV_2023 = pd.read_csv('MY2012-2023 Plug-in Hybrid Electric Vehicles.csv', 
                       encoding='cp863', engine='python', usecols=[1,2,3,4,5,6,8,9,10,11,15,17], skiprows=[1], nrows=250)

In [81]:
PHEV_2023.columns

Index(['Make', 'Model.1', 'Vehicle Class', 'Motor', 'Engine Size', 'Cylinders',
       'Fuel', 'Consumption', 'Range 1', 'Recharge', 'Unnamed: 15',
       'CO2 Emissions'],
      dtype='object')

In [82]:
PHEV_2023.columns = ['Make', 'Model', 'Vehicle Class', 'Motor (kW)', 'Engine Size (L)', 'Cylinders', 'Fuel Type', 'Combined Consumption (Le/100km)', 
                     'Range (km)', 'Recharge Time (h)', 'Fuel-only Consumption (L/100km)', 'CO2 Emissions (g/km)']
PHEV_2023.replace(['B/X', 'B/Z', 'B/Z*', 'B/X*'], ['B/G', 'B/G', 'B/G*', 'B/G*'], inplace=True)

In [83]:
PHEV_2023['Elec-only Consumption (Le/100km)'] = PHEV_2023['Combined Consumption (Le/100km)'].str.extract(r'(\d+\.\d+)').astype(float)

In [84]:
PHEV_2023.dtypes

Make                                 object
Model                                object
Vehicle Class                        object
Motor (kW)                            int64
Engine Size (L)                     float64
Cylinders                             int64
Fuel Type                            object
Combined Consumption (Le/100km)      object
Range (km)                            int64
Recharge Time (h)                   float64
Fuel-only Consumption (L/100km)     float64
CO2 Emissions (g/km)                  int64
Elec-only Consumption (Le/100km)    float64
dtype: object

In [85]:
fig = px.histogram(PHEV_2023, x='Fuel-only Consumption (L/100km)', color='Fuel Type')
fig.update_layout(height=700)

fig.show()

In [86]:
fig = px.box(PHEV_2023, x='Range (km)', y='Fuel Type')

fig.show()

In [87]:
fig = px.box(PHEV_2023, x='Range (km)')

fig.show()

In [88]:
fig = px.histogram(PHEV_2023, x='Range (km)', color='Fuel Type', nbins=40)
fig.update_layout(height=700)

fig.show()

In [89]:
PHEV_2023[PHEV_2023['Range (km)']>80].sort_values('Range (km)')

Unnamed: 0,Make,Model,Vehicle Class,Motor (kW),Engine Size (L),Cylinders,Fuel Type,Combined Consumption (Le/100km),Range (km),Recharge Time (h),Fuel-only Consumption (L/100km),CO2 Emissions (g/km),Elec-only Consumption (Le/100km)
164,Polestar,1,Minicompact,170,2.0,4,B/G*,3.9 ([35.4 kWh + 0.0 L]/100 km),84,9.0,9.2,54,3.9
87,Chevrolet,Volt,Compact,111,1.5,4,B,2.2 (19.5 kWh/100 km),85,4.5,5.6,32,2.2
28,Chevrolet,Volt,Compact,111,1.5,4,B,2.2 (19.9 kWh/100 km),85,4.5,5.6,32,2.2
43,Chevrolet,Volt,Compact,111,1.5,4,B,2.2 (19.9 kWh/100 km),85,4.5,5.6,32,2.2
64,Chevrolet,Volt,Compact,111,1.5,4,B,2.2 (19.9 kWh/100 km),85,4.5,5.6,32,2.2
158,Karma,"Revero GT (22"" Wheels)",Subcompact,400,1.5,3,B,3.8 (33.7 kWh/100 km),87,6.25,10.8,60,3.8
156,Karma,"GS-6 (22"" Wheels)",Subcompact,400,1.5,3,B,3.8 (33.7 kWh/100 km),87,6.25,10.8,60,3.8
238,Mercedes-Benz,S 580e 4MATIC Sedan,Full-size,110,3.0,6,B/G,4.6 ([30.4 kWh + 1.2 L]/100 km),90,2.25,10.0,67,4.6
155,Karma,"GS-6 (21"" Wheels)",Subcompact,400,1.5,3,B,3.4 (29.9 kWh/100 km),98,6.25,9.1,44,3.4
157,Karma,"Revero GT (21"" Wheels)",Subcompact,400,1.5,3,B,3.4 (29.9 kWh/100 km),98,6.25,9.1,44,3.4


In [90]:
PHEV_2023['Range Class'] = pd.cut(PHEV_2023['Range (km)'], bins=[0, 55, 210], labels=['<55', '<90'])

In [91]:
fig = px.histogram(PHEV_2023, x='Elec-only Consumption (Le/100km)', color='Range Class', 
                   # hover_data=['Range (km)'], hover_name='Model'
                  )
fig.update_layout(height=700)

fig.show()

In [72]:
PHEV_2023['Vehicle Class'] = pd.Categorical(PHEV_2023['Vehicle Class'], categories=classes, ordered=True)

In [73]:
PHEV_2023_grouped = PHEV_2023.groupby(['Vehicle Class', 'Range Class']).mean(numeric_only=True)[['Consumption (Le/100km)', 'Fuel-only Consumption (L/100km)', 'CO2 Emissions (g/km)', 'Range (km)']].dropna()





In [74]:
PHEV_2023_grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,Consumption (Le/100km),Fuel-only Consumption (L/100km),CO2 Emissions (g/km),Range (km)
Vehicle Class,Range Class,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Two-seater,<55,3.4,9.433333,136.0,26.333333
Minicompact,<90,3.9,9.2,54.0,84.0
Subcompact,<55,3.45,9.133333,119.833333,30.166667
Subcompact,<90,2.773684,8.236842,36.842105,133.157895
Compact,<55,3.233333,8.042857,107.714286,29.238095
Compact,<90,2.490909,6.4,44.363636,69.636364
Mid-size,<55,2.823636,6.676364,83.472727,36.163636
Mid-size,<90,2.3875,6.0,42.75,70.75
Full-size,<55,4.363636,10.272727,148.909091,26.0
Full-size,<90,4.6,10.0,67.0,90.0


In [79]:
# PHEV_2023_grouped.to_clipboard()

# ON vehicle population data

To define a weight class breakdown of MHDVs. Source: https://data.ontario.ca/dataset/vehicle-population-data

In [2]:
import pandas as pd

In [14]:
on_2020_r4 = pd.read_table('ON-vehicle-pop-data_2022/2022_Reg_Veh_Report4_Weight_Class&Status.txt')

In [15]:
on_comm_2020 = on_2020_r4[on_2020_r4.WEIGHT_CLASS == 'COMMERCIAL'][['WEIGHT_CLASS', 'KG_FROM', 'KG_TO',	'FIT-ACTIVE']]
on_comm_2020['LB_FROM'] = on_comm_2020.KG_FROM * 2.20462
on_comm_2020['LB_TO'] = on_comm_2020.KG_TO * 2.20462

on_comm_2020

Unnamed: 0,WEIGHT_CLASS,KG_FROM,KG_TO,FIT-ACTIVE,LB_FROM,LB_TO
0,COMMERCIAL,0,3000,1155476,0.00000,6613.86000
1,COMMERCIAL,3001,3500,40763,6616.06462,7716.17000
2,COMMERCIAL,3501,4000,42473,7718.37462,8818.48000
3,COMMERCIAL,4001,4500,132424,8820.68462,9920.79000
4,COMMERCIAL,4501,5000,5938,9922.99462,11023.10000
...,...,...,...,...,...,...
60,COMMERCIAL,60001,61000,370,132279.40462,134481.82000
61,COMMERCIAL,61001,62000,640,134484.02462,136686.44000
62,COMMERCIAL,62001,63000,1376,136688.64462,138891.06000
63,COMMERCIAL,63001,63500,19732,138893.26462,139993.37000


In [16]:
bins = [-float('inf'), 6000, 8500, 10000, 14000, 16000, 19500, 26000, 33000, float('inf')]
labels = [
    'LDT1-2',
    'LDT3-4',
    'MDV2b',
    'MDV3',
    'MDV4',
    'MDV5',
    'MDV6',
    'MDV7',
    'MDV8'
]

on_comm_2020['EPA_GVWR'] = pd.cut(on_comm_2020['LB_TO'], bins=bins, labels=labels)
on_comm_2020

Unnamed: 0,WEIGHT_CLASS,KG_FROM,KG_TO,FIT-ACTIVE,LB_FROM,LB_TO,EPA_GVWR
0,COMMERCIAL,0,3000,1155476,0.00000,6613.86000,LDT3-4
1,COMMERCIAL,3001,3500,40763,6616.06462,7716.17000,LDT3-4
2,COMMERCIAL,3501,4000,42473,7718.37462,8818.48000,MDV2b
3,COMMERCIAL,4001,4500,132424,8820.68462,9920.79000,MDV2b
4,COMMERCIAL,4501,5000,5938,9922.99462,11023.10000,MDV3
...,...,...,...,...,...,...,...
60,COMMERCIAL,60001,61000,370,132279.40462,134481.82000,MDV8
61,COMMERCIAL,61001,62000,640,134484.02462,136686.44000,MDV8
62,COMMERCIAL,62001,63000,1376,136688.64462,138891.06000,MDV8
63,COMMERCIAL,63001,63500,19732,138893.26462,139993.37000,MDV8


In [21]:
on_comm_2020.groupby('EPA_GVWR').sum()[['FIT-ACTIVE']].to_clipboard()

  on_comm_2020.groupby('EPA_GVWR').sum()[['FIT-ACTIVE']].to_clipboard()


In [22]:
on_school_2020 = on_2020_r4[on_2020_r4.WEIGHT_CLASS == 'SCHOOL BUS'][['WEIGHT_CLASS', 'KG_FROM', 'KG_TO', 'FIT-ACTIVE']]
on_school_2020['LB_FROM'] = on_school_2020.KG_FROM * 2.20462
on_school_2020['LB_TO'] = on_school_2020.KG_TO * 2.20462

bins = [-float('inf'), 19500, float('inf')]
labels = [
    'Class 3 (<19500)',
    'Class 7 (>19500)'
]

on_school_2020['EPA_GVWR'] = pd.cut(on_school_2020['LB_TO'], bins=bins, labels=labels)
on_school_2020

Unnamed: 0,WEIGHT_CLASS,KG_FROM,KG_TO,FIT-ACTIVE,LB_FROM,LB_TO,EPA_GVWR
257,SCHOOL BUS,1,2500,2,2.20462,5511.55,Class 3 (<19500)
258,SCHOOL BUS,2501,3000,291,5513.75462,6613.86,Class 3 (<19500)
259,SCHOOL BUS,3001,3500,619,6616.06462,7716.17,Class 3 (<19500)
260,SCHOOL BUS,3501,4000,672,7718.37462,8818.48,Class 3 (<19500)
261,SCHOOL BUS,4001,4500,1019,8820.68462,9920.79,Class 3 (<19500)
262,SCHOOL BUS,4501,5000,918,9922.99462,11023.1,Class 3 (<19500)
263,SCHOOL BUS,5001,6000,803,11025.30462,13227.72,Class 3 (<19500)
264,SCHOOL BUS,6001,7000,235,13229.92462,15432.34,Class 3 (<19500)
265,SCHOOL BUS,7001,8000,102,15434.54462,17636.96,Class 3 (<19500)
266,SCHOOL BUS,8001,9000,675,17639.16462,19841.58,Class 7 (>19500)


In [23]:
on_school_2020.groupby('EPA_GVWR').sum()[['FIT-ACTIVE']].to_clipboard()

  on_school_2020.groupby('EPA_GVWR').sum()[['FIT-ACTIVE']].to_clipboard()
