# Using VCA data to identify BEVs

We're going to try to use the VCA database to see whether we can identify:
1. vehicles that are BEV/PHEV/PETROL/DIESEL in veh0120, and
2. vehicles that are BEV/PHEV/PETROL/DIESEL in latest vehicle mileage estimates in our database

To test whether this is possible, we'll start by exploring some of the VCA data on emissions & vehicle types

In [1]:
#import packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
from zipfile import ZipFile


In [2]:
#First we'll import the latest 2020 vehicle emissions data
url_d20 = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/2020/data%20for%20guide%202020.zip'
df_d20 = pd.read_csv(url_d20, encoding='cp1252')

In [3]:
df_d20.head()

Unnamed: 0,Manufacturer,Model,Description,Transmission,Engine Capacity,Fuel Type,Powertrain,Engine Power (Kw),Engine Power (PS),Testing Scheme,...,Total cost / 10000 miles,Noise Level dB(A),Emissions CO [mg/km],THC Emissions [mg/km],Emissions NOx [mg/km],THC + NOx Emissions [mg/km],Particulates [No.] [mg/km],RDE NOx Urban,RDE NOx Combined,Unnamed: 45
0,ABARTH,595,595 1.4 145 BHP,M5,1368,Petrol,Internal Combustion Engine (ICE),107.0,145.0,WLTP,...,"£1,390",73.5,760.0,52.0,27.0,,,,,
1,ABARTH,595,595 1.4 145 BHP,M5,1368,Petrol,Internal Combustion Engine (ICE),107.0,145.0,WLTP,...,"£1,468",73.5,760.0,52.0,27.0,,,,,
2,ABARTH,595,595 1.4 TJET 145 BHP,M5,1368,Petrol,Internal Combustion Engine (ICE),107.0,145.0,WLTP,...,"£1,390",73.5,760.0,52.0,27.0,,,,,
3,ABARTH,595,595 1.4 TJET 145 BHP,M5,1368,Petrol,Internal Combustion Engine (ICE),107.0,145.0,WLTP,...,"£1,468",73.5,760.0,52.0,27.0,,,,,
4,ABARTH,595,595 1.4 TJET 145 BHP Convertible,M5,1368,Petrol,Internal Combustion Engine (ICE),107.0,145.0,WLTP,...,"£1,390",73.5,760.0,52.0,27.0,,,,,


In [79]:
df_d20.groupby(by=df_d20['Fuel Type']).sum()

Unnamed: 0_level_0,Engine Capacity,Engine Power (Kw),Engine Power (PS),Diesel VED Supplement,Electric energy consumption Miles/kWh,wh/km,Maximum range (Km),Maximum range (Miles),WLTP Imperial Low,WLTP Imperial Medium,...,Electric Range City Km,Noise Level dB(A),Emissions CO [mg/km],THC Emissions [mg/km],Emissions NOx [mg/km],THC + NOx Emissions [mg/km],Particulates [No.] [mg/km],RDE NOx Urban,RDE NOx Combined,Unnamed: 45
Fuel Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Diesel,2625248,167877.0,224726.0,1296,0.0,0.0,0.0,236.0,50207.3,61039.4,...,0.0,88382.8,140748.0,12538.0,60796.0,85214.0,846.22,79674.0,90314.0,0.0
Diesel Electric,322536,23619.0,32117.0,0,0.0,4998.0,1084.0,0.0,6628.1,8064.6,...,0.0,11816.3,40039.0,2034.0,6895.0,11447.0,72.29,8156.0,8116.0,0.0
Electricity,0,2460.0,3342.0,0,149.2,7946.0,14930.0,9269.0,0.0,0.0,...,19261.0,2900.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Electricity / Petrol,175998,19541.0,28401.0,0,399.8,16775.0,4851.0,2997.0,2760.6,2829.2,...,3040.0,6353.0,25666.0,1152.0,1308.0,860.0,5.75,2402.0,3122.0,0.0
Petrol,4662287,369190.0,498517.0,0,0.0,648.0,237.0,0.0,76837.9,103579.2,...,0.0,166115.2,741485.0,54050.0,48800.0,8924.0,836.83,116382.0,123560.0,0.0
Petrol Electric,1399249,99538.0,137199.0,0,0.0,8322.0,1584.0,0.0,28223.5,34799.1,...,0.0,48141.5,155757.0,13438.0,10469.0,14219.0,67.98,4726.0,4726.0,0.0


## Check whether previous VCA databases have same headers

In [80]:
#url_d20 was imported above
url_20 = url_d20
url_19 = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/2019/data%20for%20guide%202019.zip'
url_18 = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/sept2018/September%202018%20data%20download.zip'
url_17 = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/aug2017/download-data-for-Aug-2017-Euro-6.zip'
url_16 = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/aug2016/download-data-for-Aug-2016-Euro-6.zip'
url_15a = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/aug2015/download-data-for-Aug-2015-Euro-6.zip'
url_15b = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/aug2015/download-data-for-Aug-2015-Euro-5.zip'
url_14a = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/aug2014/download-data-for-Aug-2014-Euro-6.zip'
url_14b = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/aug2014/download-data-for-Aug-2014-Euro-5.zip'
url_13a = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/aug2013/download-data-for-Aug-2013-Euro-6.zip'
url_13b = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/aug2013/download-data-for-Aug-2013-Euro-6.zip'
url_12a = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/aug2012/download-data-for-Aug-2012-Euro-6.zip'
url_12b = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/aug2012/download-data-for-Aug-2012-Euro-5.zip'
url_12c = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/aug2012/download-data-for-Aug-2012-Euro-4.zip'
url_11a = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/aug2011/download-data-for-Aug-2011-Euro-6.zip'
url_11b = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/aug2011/download-data-for-Aug-2011-Euro-5.zip'
url_11c = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/aug2011/download-data-for-Aug-2011-Euro-4.zip'
url_list = [url_20, url_19, url_18, url_17, url_16, url_15a, url_15b, url_14a, url_14b, url_13a, url_13b, url_12a, url_12b, url_12c, url_11a, url_11b, url_11c]

In [81]:
url_list[15]

'https://carfueldata.vehicle-certification-agency.gov.uk/additional/aug2011/download-data-for-Aug-2011-Euro-5.zip'

In [83]:
df_19 = pd.read_csv(url_19, encoding='cp1252')
df_18 = pd.read_csv(url_18, encoding='cp1252')
df_17 = pd.read_csv(url_17, encoding='cp1252')
df_16 = pd.read_csv(url_16, encoding='cp1252')
df_15a = pd.read_csv(url_15a, encoding='cp1252')
df_15b = pd.read_csv(url_15b, encoding='cp1252')
df_14a = pd.read_csv(url_14a, encoding='cp1252')
df_14b = pd.read_csv(url_14b, encoding='cp1252')
df_13a = pd.read_csv(url_13a, encoding='cp1252')
df_13b = pd.read_csv(url_13b, encoding='cp1252')
df_12a = pd.read_csv(url_12a, encoding='cp1252')
df_12b = pd.read_csv(url_12b, encoding='cp1252')
df_12c = pd.read_csv(url_12c, encoding='cp1252')
# df_11a = pd.read_excel(url_11a)
# df_11b = pd.read_csv(url_11b, encoding='cp1252')
# df_11c = pd.read_csv(url_11c, encoding='cp1252')

In [84]:
dfs = [df_19, df_18, df_17, df_16, df_15a, df_15b, df_14a, df_14b, df_13a, df_13b, df_12a, df_12b, df_12c]

In [85]:
# column_header = []
# for df in dfs:
#     print(len(df.columns.tolist()))

In [18]:
# chs = pd.DataFrame(column_header)

In [33]:
# chs.head(40)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,35,36,37,38,39,40,41,42,43,44
0,Manufacturer,Model,Description,Transmission,Engine Capacity,Fuel Type,Engine Power (Kw),Engine Power (PS),Testing Scheme,Euro Standard,...,Electricity cost,Total cost / 12000 miles,Noise Level dB(A),Emissions CO [mg/km],THC Emissions [mg/km],Emissions NOx [mg/km],THC + NOx Emissions [mg/km],Particulates [No.] [mg/km],RDE NOx Urban,RDE NOx Combined
1,Manufacturer,Model,Description,Transmission,Engine Capacity,Fuel Type,Electric energy consumption Miles/kWh,wh/km,Maximum range (Km),Maximum range (Miles),...,,,,,,,,,,
2,Manufacturer,Model,Description,Transmission,Engine Capacity,Fuel Type,Electric energy consumption Miles/kWh,wh/km,Maximum range (Km),Maximum range (Miles),...,,,,,,,,,,
3,Manufacturer,Model,Description,Transmission,Engine Capacity,Fuel Type,Electric energy consumption Miles/kWh,wh/km,Maximum range (Km),Maximum range (Miles),...,,,,,,,,,,
4,Manufacturer,Model,Description,Transmission,Engine Capacity,Fuel Type,Electric energy consumption Miles/kWh,wh/km,Maximum range (Km),Maximum range (Miles),...,,,,,,,,,,
5,Manufacturer,Model,Description,Transmission,Engine Capacity,Fuel Type,Electric energy consumption Miles/kWh,wh/km,Maximum range (Km),Maximum range (Miles),...,Unnamed: 35,Unnamed: 36,,,,,,,,
6,Manufacturer,Model,Description,Transmissions Type,Transmission,Engine Capacity,Fuel Type,Electric energy consumption Miles/kWh,wh/km,Maximum range (Km),...,,,,,,,,,,
7,Manufacturer,Model,Description,Transmission,Transmission type,Engine Capacity,Fuel Type,Electric energy consumption Miles/kWh,wh/km,Maximum range (Km),...,Unnamed: 35,Unnamed: 36,Unnamed: 37,Unnamed: 38,Unnamed: 39,Unnamed: 40,,,,
8,Manufacturer,Model,Description,transmission,Transmission type,Engine Capacity,Fuel Type,Electrical energy consumption miles/kWh,wh/km,Electric range (Km),...,,,,,,,,,,
9,Manufacturer,Model,Description,transmission,Transmission type,Engine Capacity,Fuel Type,Electrical energy consumption miles/kWh,wh/km,Electric range (Km),...,,,,,,,,,,


In [86]:
# df_12b.groupby('Fuel Type').sum()

In [44]:
df_13a[df_13a['Fuel Type'] == 'Electricity'] # This is the earliest df with electric models in

Unnamed: 0,Manufacturer,Model,Description,transmission,Transmission type,Engine Capacity,Fuel Type,Electrical energy consumption miles/kWh,wh/km,Electric range (Km),...,Total cost of Driving 12000 miles (£s),Noise Level dB(A),Emissions CO [mg/km],THC Emissions [mg/km],Emissions NOx [mg/km],THC + NOx Emissions [mg/km],Particulates [No.] [mg/km],Euro Standard,Booklet,VED Band
315,MITSUBISHI,i-MiEV,i-MiEV,,,,Electricity,4.6,135.0,150.0,...,378,66.0,,,,,,6,August 2013,A
316,NISSAN,Leaf,Leaf,,,,Electricity,3.6,173.0,175.0,...,484,68.0,,,,,,6,August 2013,A


In [87]:
# df_19.groupby('Fuel Type').sum()

In [88]:
# df_19[df_19['Fuel Type'] == 'Electricity']

In [92]:
# Creating a master df with all make models & fuel types
columns = ['Manufacturer', 'Model', 'Fuel Type']
df_dict = []
for i in range(0, 12):
    df_dict.append(dfs[i][columns])
master_df = pd.concat(df_dict)
master_df.drop_duplicates(inplace = True)
master_df.shape

(3312, 3)

In [97]:
master_df.head()

Unnamed: 0,Manufacturer,Model,Fuel Type
0,ABARTH,595,Petrol
42,ALFA ROMEO,Giulia,Petrol
48,ALFA ROMEO,Giulia,Diesel
54,ALFA ROMEO,Giulietta,Petrol
58,ALFA ROMEO,Giulietta,Diesel


In [98]:
master_df.groupby('Fuel Type').sum()

Unnamed: 0_level_0,Manufacturer,Model
Fuel Type,Unnamed: 1_level_1,Unnamed: 2_level_1
CNG,VOLKSWAGEN C.V.VOLKSWAGEN C.V.VOLKSWAGEN C.V.V...,Caddy ( Passenger range )Caddy Maxi ( Passenge...
Diesel,ALFA ROMEOALFA ROMEOALFA ROMEOCHRYSLER JEEPCHR...,GiuliaGiuliettaStelvioJeep Compass MY 2019Jeep...
Diesel Electric,KIAMERCEDES-BENZMERCEDES-BENZRENAULTRENAULTMER...,"SportageC-Class Estate, Model Year 2018C-Class..."
Diesel/Electric,CITROENPEUGEOTPEUGEOTPEUGEOTMERCEDES-BENZMERCE...,"DS53008508508 RXHE-Class Saloon, Model Year 20..."
Electricity,BMWHYUNDAIHYUNDAIJAGUARKIAKIANISSANNISSANNISSA...,"i Series From November 2013IONIQKonaI-PACE, 19..."
Electricity / Diesel,AUDIVOLVOVOLVOVOLVOVOLVOVOLVO,"Q7V60 MY18V60 MY17V60, MY16V60, Model Year 201..."
Electricity / Petrol,HYUNDAITOYOTAAUDIBMWBMWBMWBMWBMWBMWBMWKIAKIAKI...,"IONIQPrius Plug-InA3 Sportbacki Series, From N..."
Petrol,ABARTHALFA ROMEOALFA ROMEOALFA ROMEOASTON MART...,595GiuliaGiuliettaStelvioDB11 V12 AMR Coupe 20...
Petrol / E85 (Flex Fuel),BENTLEY MOTORS,Continental
Petrol Electric,KIABMWBMWLEXUSMERCEDES-BENZMERCEDES-BENZMERCED...,"Niro3 Series Saloon F30, From February 20125 S..."


In [96]:
ev_types = master_df[master_df['Fuel Type'] == 'Electricity'].reset_index(drop=True)
ev_types.shape

(40, 3)

In [95]:
ev_types.head()

Unnamed: 0,Manufacturer,Model,Fuel Type
0,BMW,i Series From November 2013,Electricity
1,HYUNDAI,IONIQ,Electricity
2,HYUNDAI,Kona,Electricity
3,JAGUAR,"I-PACE, 19MY",Electricity
4,KIA,Niro EV,Electricity


In [99]:
df_av = pd.read_csv('active_vehs.csv')

In [100]:
df_av.head()

Unnamed: 0,make,model,num
0,TOYOTA,PRIUS,16754
1,NISSAN,LEAF,16643
2,RENAULT,ZOE,4905
3,MITSUBISHI,OUTLANDER,3438
4,TOYOTA,AURIS,2702


In [103]:
master_df = master_df.rename(columns = {'Model': 'model'})
df_try = pd.merge(df_av, master_df, how='left', on='model')

In [104]:
df_try.head()

Unnamed: 0,make,model,num,Manufacturer,Fuel Type
0,TOYOTA,PRIUS,16754,,
1,NISSAN,LEAF,16643,,
2,RENAULT,ZOE,4905,,
3,MITSUBISHI,OUTLANDER,3438,,
4,TOYOTA,AURIS,2702,,


In [105]:
df_try.shape

(626, 5)

In [106]:
df_try.isna().sum()

make              0
model            97
num               0
Manufacturer    580
Fuel Type       580
dtype: int64

In [110]:
df_try_one = df_try.dropna()
df_try_one.head()

Unnamed: 0,make,model,num,Manufacturer,Fuel Type
11,LEXUS,CT,962,LEXUS,Petrol Hybrid
22,HYUNDAI,IONIQ,260,HYUNDAI,Electricity
23,HYUNDAI,IONIQ,260,HYUNDAI,Petrol Hybrid
24,HYUNDAI,IONIQ,260,HYUNDAI,Electricity / Petrol
27,VOLKSWAGEN,UP,178,VOLKSWAGEN,Electricity


# Exploring problems in MOT database

In [112]:
#These are the vehicles listed as 'Electric' in the MOT database
df_av = pd.read_csv('active_vehs.csv')

In [113]:
df_av.head()

Unnamed: 0,make,model,num
0,TOYOTA,PRIUS,16754
1,NISSAN,LEAF,16643
2,RENAULT,ZOE,4905
3,MITSUBISHI,OUTLANDER,3438
4,TOYOTA,AURIS,2702


In [None]:
mot_model = df_av['make']