In [1]:
from modules.connector import MyBigQuery

In [2]:
import pandas as pd

In [3]:
bq = MyBigQuery(credentials_file='./credentials/New AutoMotive Index-487e031dc242.json')

In [4]:
table = "`rugged-baton-283921.mot_anonymised.mainNew`"
query = """
WITH VehicleData AS (
  SELECT
    vehicleId,
    test_mileage,
    testDate,
    firstUsedDate,
    DATE_DIFF(CURRENT_DATE(), firstUsedDate, YEAR) AS yearsOfService,
    CASE
      WHEN fuelType IN ('ED', 'OT', 'CN', 'GA', 'GB', 'GD', 'LNG', 'LNP', 'ST', 'FC', 'LP', 'LN') THEN 'other'
      WHEN fuelType = 'DI' THEN 'diesel'
      WHEN fuelType = 'EL' THEN 'electric'
      WHEN fuelType = 'HY' THEN 'hybrid'
      WHEN fuelType = 'PE' THEN 'petrol'
      ELSE fuelType
    END AS convertedFuelType
  FROM
    `rugged-baton-283921.mot_anonymised.mainNew`
  WHERE
    testDate >= DATE_SUB(CURRENT_DATE(), INTERVAL 2 YEAR)
        AND testTesult IN ('P', 'PRS')
        AND testClassId = 4
        AND `test_mileage` IS NOT NULL
),

RecentVehicleData AS (
  SELECT
    vehicleId,
    MAX(testDate) AS mostRecentTestDate
  FROM
    VehicleData
  GROUP BY
    vehicleId
)

SELECT
  vd.vehicleId,
  vd.firstUsedDate,
  vd.testDate,
  rvd.mostRecentTestDate,
  vd.yearsOfService,
  vd.test_mileage,
  vd.convertedFuelType AS fuelType,
  CASE
    WHEN vd.yearsOfService > 0 THEN vd.test_mileage / vd.yearsOfService
    ELSE 0
  END AS avgYearlyMileage
FROM
  VehicleData AS vd
JOIN
  RecentVehicleData AS rvd
ON
  vd.vehicleId = rvd.vehicleId
  AND vd.testDate = rvd.mostRecentTestDate
"""

In [5]:
df = bq.from_bq_to_dataframe(
    query=query
)
df

Unnamed: 0,vehicleId,firstUsedDate,testDate,mostRecentTestDate,yearsOfService,test_mileage,fuelType,avgYearlyMileage
0,340701186,2000-03-18,2022-04-02,2022-04-02,23,138163.0,petrol,6007.086957
1,199041160,1998-08-20,2021-11-09,2021-11-09,25,114673.0,petrol,4586.920000
2,1368848052,1991-06-10,2022-07-27,2022-07-27,32,118476.0,petrol,3702.375000
3,1322354670,1992-08-01,2022-11-05,2022-11-05,31,132987.0,diesel,4289.903226
4,1101580392,2000-03-25,2022-08-17,2022-08-17,23,119124.0,petrol,5179.304348
...,...,...,...,...,...,...,...,...
32125664,61480802,2001-02-13,2022-12-08,2022-12-08,22,225723.0,diesel,10260.136364
32125665,583762804,2001-03-30,2022-06-17,2022-06-17,22,77658.0,petrol,3529.909091
32125666,5968870,2001-03-07,2022-10-14,2022-10-14,22,45518.0,petrol,2069.000000
32125667,612698992,2001-06-02,2022-08-03,2022-08-03,22,93540.0,diesel,4251.818182


In [8]:
fuelTypes = ['other', 'diesel', 'electric', 'hybrid', 'petrol']

In [11]:
df_result = df.groupby('fuelType').agg({
    'vehicleId': 'count',
    'avgYearlyMileage': ['sum', 'mean']
})
df_result.columns = ['count', 'sumOfAvgs', 'totalAvgMileage']
df_result

Unnamed: 0_level_0,count,sumOfAvgs,totalAvgMileage
fuelType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
diesel,14167694,121751300000.0,8593.589315
electric,114851,788030500.0,6861.328852
hybrid,656580,5593192000.0,8518.675118
other,31376,282563000.0,9005.703508
petrol,17155168,92857890000.0,5412.823


In [16]:
litres_per_gallon = 4.54609

# https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/1040512/env0103.ods
diesel_mpg = 56.1
petrol_mpg = 52.6

petrol_mpl = petrol_mpg / litres_per_gallon
diesel_mpl = diesel_mpg / litres_per_gallon

# year to date average (2022-09-20)
avg_litre_cost_petrol = 1.505557692307692
avg_litre_cost_diesel = 1.6493961538461535

diesel_pounds_per_mile = avg_litre_cost_diesel / diesel_mpl
petrol_pounds_per_mile = avg_litre_cost_petrol / petrol_mpl

# EV Efficiency
ev_efficiency = 1 / 3.5 # 3.5 is avg miles per kWh - kWh/miles: https://www.fleetalliance.co.uk/driver-ev/mpg-to-kwh-electric-car-efficiency-explained/#:~:text=Most%20EVs%20will%20cover%20between,it%20will%20cost%20to%20run
ev_avg_kWh_cost = 0.3 # £/kWh: https://energysavingtrust.org.uk/about-us/our-data#jumpto-6
ev_cost_per_mile = ev_efficiency * ev_avg_kWh_cost

perMileCost = {'diesel': diesel_pounds_per_mile, 'petrol': petrol_pounds_per_mile, 'electric': ev_cost_per_mile}

for index, row in df_result.iterrows():
    if index in ['petrol', 'diesel', 'electric']:
        print(index)
        annual_cost = row['totalAvgMileage'] * perMileCost[index]
        print(index, annual_cost)
        print("\n")

diesel
diesel 1148.6156799547155


electric
electric 588.1139016026


petrol
petrol 704.3256636091023




In [18]:
# Electric = 588.1139016026
# Petrol = 704.3256636091023
# Diesel = 1148.6156799547155

EV_vs_Diesel_saving = 1148.6156799547155 - 588.1139016026
EV_vs_Petrol_saving = 704.3256636091023 - 588.1139016026

print(
    "Petrol: " + str(EV_vs_Petrol_saving),
    "\n",
    "Diesel: " + str(EV_vs_Diesel_saving)
)

Petrol: 116.21176200650234 
 Diesel: 560.5017783521155
