# Using VCA data to identify BEVs

We're going to try to use the VCA database to see whether we can identify:
1. vehicles that are BEV/PHEV/PETROL/DIESEL in veh0120, and
2. vehicles that are BEV/PHEV/PETROL/DIESEL in latest vehicle mileage estimates in our database

To test whether this is possible, we'll start by exploring some of the VCA data on emissions & vehicle types

In [1]:
#import packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
from zipfile import ZipFile

In [2]:
#First we'll import the latest 2020 vehicle emissions data
url_d20 = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/2020/data%20for%20guide%202020.zip'
df_d20 = pd.read_csv(url_d20, encoding='cp1252')

In [3]:
df_d20.head()

Unnamed: 0,Manufacturer,Model,Description,Transmission,Engine Capacity,Fuel Type,Powertrain,Engine Power (Kw),Engine Power (PS),Testing Scheme,...,Total cost / 10000 miles,Noise Level dB(A),Emissions CO [mg/km],THC Emissions [mg/km],Emissions NOx [mg/km],THC + NOx Emissions [mg/km],Particulates [No.] [mg/km],RDE NOx Urban,RDE NOx Combined,Unnamed: 45
0,ABARTH,595,595 1.4 145 BHP,M5,1368,Petrol,Internal Combustion Engine (ICE),107.0,145.0,WLTP,...,"£1,390",73.5,760.0,52.0,27.0,,,,,
1,ABARTH,595,595 1.4 145 BHP,M5,1368,Petrol,Internal Combustion Engine (ICE),107.0,145.0,WLTP,...,"£1,468",73.5,760.0,52.0,27.0,,,,,
2,ABARTH,595,595 1.4 TJET 145 BHP,M5,1368,Petrol,Internal Combustion Engine (ICE),107.0,145.0,WLTP,...,"£1,390",73.5,760.0,52.0,27.0,,,,,
3,ABARTH,595,595 1.4 TJET 145 BHP,M5,1368,Petrol,Internal Combustion Engine (ICE),107.0,145.0,WLTP,...,"£1,468",73.5,760.0,52.0,27.0,,,,,
4,ABARTH,595,595 1.4 TJET 145 BHP Convertible,M5,1368,Petrol,Internal Combustion Engine (ICE),107.0,145.0,WLTP,...,"£1,390",73.5,760.0,52.0,27.0,,,,,


In [4]:
df_d20.groupby(by=df_d20['Powertrain']).sum()

Unnamed: 0_level_0,Engine Capacity,Engine Power (Kw),Engine Power (PS),Diesel VED Supplement,Electric energy consumption Miles/kWh,wh/km,Maximum range (Km),Maximum range (Miles),WLTP Imperial Low,WLTP Imperial Medium,...,Electric Range City Km,Noise Level dB(A),Emissions CO [mg/km],THC Emissions [mg/km],Emissions NOx [mg/km],THC + NOx Emissions [mg/km],Particulates [No.] [mg/km],RDE NOx Urban,RDE NOx Combined,Unnamed: 45
Powertrain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Battery Electric Vehicle (BEV) / Pure Electric Vehicle / Electric Vehicle (EV),0,2460.0,3342.0,0,149.2,7946.0,14930.0,9269.0,0.0,0.0,...,19261.0,2900.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Hybrid Electric Vehicle (HEV),1721785,123157.0,169316.0,0,0.0,13320.0,2668.0,0.0,34851.6,42863.7,...,0.0,59957.8,195796.0,15472.0,17364.0,25666.0,140.27,12882.0,12842.0,0.0
Internal Combustion Engine (ICE),7287535,537067.0,723243.0,1296,0.0,648.0,237.0,236.0,127045.2,164618.6,...,0.0,254498.0,882233.0,66588.0,109596.0,94138.0,1683.05,196056.0,213874.0,0.0
Plug-in Hybrid Electric Vehicle (PHEV),175998,19541.0,28401.0,0,399.8,16775.0,4851.0,2997.0,2760.6,2829.2,...,3040.0,6353.0,25666.0,1152.0,1308.0,860.0,5.75,2402.0,3122.0,0.0


In [5]:
df_d20_evs = df_d20[df_d20['Powertrain'] == 'Battery Electric Vehicle (BEV) / Pure Electric Vehicle / Electric Vehicle (EV)']
df_d20_evs.head()

Unnamed: 0,Manufacturer,Model,Description,Transmission,Engine Capacity,Fuel Type,Powertrain,Engine Power (Kw),Engine Power (PS),Testing Scheme,...,Total cost / 10000 miles,Noise Level dB(A),Emissions CO [mg/km],THC Emissions [mg/km],Emissions NOx [mg/km],THC + NOx Emissions [mg/km],Particulates [No.] [mg/km],RDE NOx Urban,RDE NOx Combined,Unnamed: 45
290,CITROEN,New C4,100kW Electric Vehicle with 50kWh battery,,0,Electricity,Battery Electric Vehicle (BEV) / Pure Electric...,100.0,136.0,WLTP,...,£308,67.0,,,,,,,0.0,
291,CITROEN,New C4,100kW Electric Vehicle with 50kWh battery,,0,Electricity,Battery Electric Vehicle (BEV) / Pure Electric...,100.0,136.0,WLTP,...,£334,67.0,,,,,,,0.0,
304,CITROEN,SpaceTourer,50KWh Electric Vehicle,,0,Electricity,Battery Electric Vehicle (BEV) / Pure Electric...,100.0,136.0,WLTP,...,£463,68.0,,,,,,,0.0,
305,CITROEN,SpaceTourer,50KWh Electric Vehicle,,0,Electricity,Battery Electric Vehicle (BEV) / Pure Electric...,100.0,136.0,WLTP,...,£567,68.0,,,,,,,0.0,
324,DS,DS 3 CROSSBACK,E-TENSE,,0,Electricity,Battery Electric Vehicle (BEV) / Pure Electric...,,,WLTP,...,£342,68.0,,,,,,,0.0,


In [6]:
df_d20_evs.groupby(['Manufacturer']).sum()

Unnamed: 0_level_0,Engine Capacity,Engine Power (Kw),Engine Power (PS),Diesel VED Supplement,Electric energy consumption Miles/kWh,wh/km,Maximum range (Km),Maximum range (Miles),WLTP Imperial Low,WLTP Imperial Medium,...,Electric Range City Km,Noise Level dB(A),Emissions CO [mg/km],THC Emissions [mg/km],Emissions NOx [mg/km],THC + NOx Emissions [mg/km],Particulates [No.] [mg/km],RDE NOx Urban,RDE NOx Combined,Unnamed: 45
Manufacturer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
CITROEN,0,400.0,544.0,0,13.0,831.0,1138.0,707.0,0.0,0.0,...,1663.0,270.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
DS,0,0.0,0.0,0,7.0,353.0,639.0,397.0,0.0,0.0,...,896.0,136.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
HYUNDAI,0,350.0,476.0,0,13.0,428.0,995.0,618.0,0.0,0.0,...,1507.0,202.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
JAGUAR,0,0.0,0.0,0,2.8,220.0,470.0,292.0,0.0,0.0,...,628.0,67.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
KIA,0,550.0,742.0,0,15.8,628.0,1650.0,1023.0,0.0,0.0,...,2282.0,279.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
MERCEDES-BENZ,0,600.0,816.0,0,5.4,456.0,916.0,568.0,0.0,0.0,...,819.0,136.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
MG MOTORS UK,0,0.0,0.0,0,14.2,698.0,1254.0,778.0,0.0,0.0,...,1682.0,264.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NISSAN,0,0.0,0.0,0,3.0,206.0,270.0,168.0,0.0,0.0,...,0.0,66.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PEUGEOT,0,200.0,272.0,0,20.0,1200.0,1735.0,1078.0,0.0,0.0,...,2469.0,392.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
SMART,0,360.0,492.0,0,21.6,1037.0,737.0,455.0,0.0,0.0,...,1012.0,396.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Check whether previous VCA databases have same headers

In [8]:
#url_d20 was imported above
url_20 = url_d20
url_19 = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/2019/data%20for%20guide%202019.zip'
url_18 = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/sept2018/September%202018%20data%20download.zip'
url_17 = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/aug2017/download-data-for-Aug-2017-Euro-6.zip'
url_16 = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/aug2016/download-data-for-Aug-2016-Euro-6.zip'
url_15a = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/aug2015/download-data-for-Aug-2015-Euro-6.zip'
url_15b = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/aug2015/download-data-for-Aug-2015-Euro-5.zip'
url_14a = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/aug2014/download-data-for-Aug-2014-Euro-6.zip'
url_14b = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/aug2014/download-data-for-Aug-2014-Euro-5.zip'
url_13a = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/aug2013/download-data-for-Aug-2013-Euro-6.zip'
url_13b = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/aug2013/download-data-for-Aug-2013-Euro-6.zip'
url_12a = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/aug2012/download-data-for-Aug-2012-Euro-6.zip'
url_12b = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/aug2012/download-data-for-Aug-2012-Euro-5.zip'
url_12c = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/aug2012/download-data-for-Aug-2012-Euro-4.zip'
url_11a = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/aug2011/download-data-for-Aug-2011-Euro-6.zip'
url_11b = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/aug2011/download-data-for-Aug-2011-Euro-5.zip'
url_11c = 'https://carfueldata.vehicle-certification-agency.gov.uk/additional/aug2011/download-data-for-Aug-2011-Euro-4.zip'
url_list = [url_20, url_19, url_18, url_17, url_16, url_15a, url_15b, url_14a, url_14b, url_13a, url_13b, url_12a, url_12b, url_12c, url_11a, url_11b, url_11c]
url_names_list = ['url_20', 'url_19', 'url_18', 'url_17', 'url_16', 'url_15', 'url_14a', 'url_14b', 'url_13a', 'url_13b', 'url_12a', 'url_12b', 'url_12c', 'url_11a', 'url_11b', 'url_11c']

In [9]:
print(len(url_list))

17


In [10]:
for i in range(0, 16):
    

SyntaxError: unexpected EOF while parsing (<ipython-input-10-07f5c209eddc>, line 2)

In [11]:
url_list[15]

'https://carfueldata.vehicle-certification-agency.gov.uk/additional/aug2011/download-data-for-Aug-2011-Euro-5.zip'

In [12]:
df_19 = pd.read_csv(url_19, encoding='cp1252')
df_18 = pd.read_csv(url_18, encoding='cp1252')
df_17 = pd.read_csv(url_17, encoding='cp1252')
df_16 = pd.read_csv(url_16, encoding='cp1252')
df_15a = pd.read_csv(url_15a, encoding='cp1252')
df_15b = pd.read_csv(url_15b, encoding='cp1252')
df_14a = pd.read_csv(url_14a, encoding='cp1252')
df_14b = pd.read_csv(url_14b, encoding='cp1252')
df_13a = pd.read_csv(url_13a, encoding='cp1252')
df_13b = pd.read_csv(url_13b, encoding='cp1252')
df_12a = pd.read_csv(url_12a, encoding='cp1252')
df_12b = pd.read_csv(url_12b, encoding='cp1252')
df_12c = pd.read_csv(url_12c, encoding='cp1252')
df_11a = pd.read_excel(url_11a)
df_11b = pd.read_csv(url_11b, encoding='cp1252')
df_11c = pd.read_csv(url_11c, encoding='cp1252')

ValueError: Your version of xlrd is 2.0.1. In xlrd >= 2.0, only the xls format is supported. Install openpyxl instead.