## Vehicle Analysis

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px 
from yaml import safe_load
import json
import sys 
import os
from pathlib import Path

root_path = Path.cwd().parent
sys.path.append(root_path)
pd.set_option("display.max_columns",None)
print(Path.cwd().parent)


c:\Users\Emmanuel\Desktop\MLOps-vehicle-analysis


## Setting up config file

In [2]:
CONFIG_FILE = "../include/config/initial_settings.yaml"
with open(CONFIG_FILE, "r") as f:
    config = safe_load(f)

## Importing dataset

In [3]:
datasets = config["datasets"]

veh_data = pd.read_parquet(rf'{datasets["veh_data"]}')
veh_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 47523 entries, 0 to 47522
Data columns (total 84 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   make             47523 non-null  object 
 1   model            47523 non-null  object 
 2   barrels08        47523 non-null  float64
 3   barrelsa08       47523 non-null  float64
 4   charge120        47523 non-null  float64
 5   charge240        47523 non-null  float64
 6   city08           47523 non-null  int64  
 7   city08u          47523 non-null  float64
 8   citya08          47523 non-null  int64  
 9   citya08u         47523 non-null  float64
 10  citycd           47523 non-null  float64
 11  citye            47523 non-null  float64
 12  cityuf           47523 non-null  float64
 13  co2              47523 non-null  int64  
 14  co2a             47523 non-null  int64  
 15  co2tailpipeagpm  47523 non-null  float64
 16  co2tailpipegpm   47523 non-null  float64
 17  comb08      

In [4]:
with open(datasets["veh_data_schema"], "r") as f:
    veh_data_schema = json.load(f)
    veh_data_schema = veh_data_schema["definitions"]["all-vehicles-model_records"]["properties"]["fields"]["properties"]


In [5]:
veh_data_schema

{'make': {'type': 'string',
  'title': 'Make',
  'description': 'manufacturer (division)'},
 'model': {'type': 'string',
  'title': 'Model',
  'description': 'model name (carline)'},
 'barrels08': {'type': 'number',
  'title': 'Annual Petroleum Consumption For Fuel Type1',
  'description': 'annual petroleum consumption in barrels for fuelType1 (1)'},
 'barrelsa08': {'type': 'number',
  'title': 'Annual Petroleum Consumption For Fuel Type2',
  'description': 'annual petroleum consumption in barrels for fuelType2 (1)'},
 'charge120': {'type': 'number',
  'title': 'Time to charge at 120V',
  'description': 'time to charge an electric vehicle in hours at 120 V'},
 'charge240': {'type': 'number',
  'title': 'Time to charge at 240V',
  'description': 'time to charge an electric vehicle in hours at 240 V'},
 'city08': {'type': 'integer',
  'title': 'City Mpg For Fuel Type1',
  'description': 'city MPG for fuelType1'},
 'city08u': {'type': 'number',
  'title': 'Unrounded City Mpg For Fuel Type

## Data Analysis

#### Setting up how graphs will look like

In [6]:
graphs_config = config["graphs_config"]
update_layout = config["update_layout"]

In [7]:
makers = veh_data.value_counts("make", ascending=True).reset_index(drop=False).tail(20)
fig = px.bar(
             data_frame = makers,
             y = "make", 
             x = "count",
             title = "Top 20 makers",
             text = 'count', 
             labels = {"count":"Number of cars", "make":"Maker"},
             **graphs_config["makers"])

fig.update_layout(update_layout["general"])

fig.update_traces(texttemplate='%{text:.2s}', textposition='outside', textfont=dict(family="Arial", size=14, color="black"))


In [8]:
veh_data[["make","basemodel"]].value_counts().reset_index(drop=False)

Unnamed: 0,make,basemodel,count
0,GMC,Sierra,733
1,Ford,F150,657
2,BMW,3 Series,571
3,Porsche,911,567
4,Chevrolet,Silverado,452
...,...,...,...
1513,Volvo,Coupe,1
1514,ASC Incorporated,GNX,1
1515,AM General,Post Office DJ8,1
1516,Yugo,Gy/yugo GVX,1


In [9]:
veh_data.query("make == 'BMW' and basemodel == '3 Series' and year == '2024'")

Unnamed: 0,make,model,barrels08,barrelsa08,charge120,charge240,city08,city08u,citya08,citya08u,citycd,citye,cityuf,co2,co2a,co2tailpipeagpm,co2tailpipegpm,comb08,comb08u,comba08,comba08u,combe,combinedcd,combineduf,cylinders,displ,drive,engid,eng_dscr,fescore,fuelcost08,fuelcosta08,fueltype,fueltype1,ghgscore,ghgscorea,highway08,highway08u,highwaya08,highwaya08u,highwaycd,highwaye,highwayuf,hlv,hpv,id,lv2,lv4,mpgdata,phevblended,pv2,pv4,range,rangecity,rangecitya,rangehwy,rangehwya,trany,ucity,ucitya,uhighway,uhighwaya,vclass,year,yousavespend,guzzler,trans_dscr,tcharger,scharger,atvtype,fueltype2,rangea,evmotor,mfrcode,c240dscr,charge240b,c240bdscr,createdon,modifiedon,startstop,phevcity,phevhwy,phevcomb,basemodel
9850,BMW,330i Sedan,10.258966,0.0,0.0,0.0,25,25.3702,0,0.0,0.0,0.0,0.0,308,-1,0.0,308.0,29,28.7104,0,0.0,0.0,0.0,0.0,4.0,2.0,Rear-Wheel Drive,300,SIDI,6.0,2300,0,Premium,Premium Gasoline,6.0,,34,34.2164,0,0.0,0.0,0.0,0.0,0,0,46656,0,13,N,False,0,94,0,0.0,0.0,0.0,0.0,Automatic (S8),32.8405,0.0,49.722,0.0,Compact Cars,2024,-2000,,,T,,,,,,BMX,,0.0,,2023-06-23,2024-01-18,Y,0,0,0,3 Series
14700,BMW,330e Sedan,5.689507,4.075479,0.0,3.0,24,23.7966,69,68.798,0.0,49.0,0.478,158,-1,0.0,158.0,27,26.873,73,72.5419,48.0,0.0,0.49,4.0,2.0,Rear-Wheel Drive,320,SIDI; PHEV,7.0,2500,0,Premium and Electricity,Premium Gasoline,8.0,,32,31.9159,78,77.7106,0.0,45.0,0.504,0,0,47490,0,9,N,True,0,94,0,0.0,20.81,0.0,21.74,Automatic (S8),34.8,98.1,46.0,111.0,Compact Cars,2024,500,,,T,,Plug-in Hybrid,Electricity,22.0,80 kW PMSM,BMX,,0.0,,2023-11-06,2024-01-18,Y,35,45,39,3 Series
24165,BMW,M340i xDrive Sedan,11.442692,0.0,0.0,0.0,23,22.6384,0,0.0,0.0,0.0,0.0,341,-1,0.0,341.0,26,25.8807,0,0.0,0.0,0.0,0.0,6.0,3.0,All-Wheel Drive,341,SIDI; Mild Hybrid,5.0,2600,0,Premium,Premium Gasoline,5.0,,31,31.3724,0,0.0,0.0,0.0,0.0,0,0,46769,0,13,N,False,0,94,0,0.0,0.0,0.0,0.0,Automatic (S8),28.9434,0.0,45.1294,0.0,Compact Cars,2024,-3500,,,T,,Hybrid,,,44V Li-Ion,BMX,,0.0,,2023-07-31,2024-01-18,Y,0,0,0,3 Series
26652,BMW,M340i Sedan,11.442692,0.0,0.0,0.0,23,22.8717,0,0.0,0.0,0.0,0.0,340,-1,0.0,340.0,26,26.0226,0,0.0,0.0,0.0,0.0,6.0,3.0,Rear-Wheel Drive,340,SIDI; Mild Hybrid,5.0,2600,0,Premium,Premium Gasoline,5.0,,31,31.2914,0,0.0,0.0,0.0,0.0,0,0,46768,0,13,N,False,0,94,0,0.0,0.0,0.0,0.0,Automatic (S8),29.2724,0.0,45.0,0.0,Compact Cars,2024,-3500,,,T,,Hybrid,,,44V Li-Ion,BMX,,0.0,,2023-07-31,2024-01-18,Y,0,0,0,3 Series
36474,BMW,330e xDrive Sedan,6.168642,4.375147,0.0,3.0,22,22.1326,64,64.0224,0.0,47.0,0.465,172,-1,0.0,172.0,26,25.6741,68,68.3785,45.0,0.0,0.476,4.0,2.0,All-Wheel Drive,321,SIDI; PHEV,7.0,2600,0,Premium and Electricity,Premium Gasoline,8.0,,32,31.9159,75,74.5807,0.0,43.0,0.489,0,0,47491,0,9,N,True,0,94,0,0.0,20.51,0.0,22.17,Automatic (S8),32.2,91.2,46.0,106.5,Compact Cars,2024,250,,,T,,Plug-in Hybrid,Electricity,20.0,80 kW PMSM,BMX,,0.0,,2023-11-06,2024-01-18,Y,31,44,36,3 Series
46536,BMW,330i xDrive Sedan,11.018889,0.0,0.0,0.0,24,23.9395,0,0.0,0.0,0.0,0.0,322,-1,0.0,322.0,27,27.4489,0,0.0,0.0,0.0,0.0,4.0,2.0,All-Wheel Drive,304,SIDI,5.0,2500,0,Premium,Premium Gasoline,5.0,,33,33.4404,0,0.0,0.0,0.0,0.0,0,0,46657,0,13,N,False,0,94,0,0.0,0.0,0.0,0.0,Automatic (S8),30.7874,0.0,48.4596,0.0,Compact Cars,2024,-3000,,,T,,,,,,BMX,,0.0,,2023-06-23,2024-01-18,Y,0,0,0,3 Series


In [10]:
veh_data.corr(numeric_only=True)

Unnamed: 0,barrels08,barrelsa08,charge120,charge240,city08,city08u,citya08,citya08u,citycd,citye,cityuf,co2,co2a,co2tailpipeagpm,co2tailpipegpm,comb08,comb08u,comba08,comba08u,combe,combinedcd,combineduf,cylinders,displ,fescore,fuelcost08,fuelcosta08,ghgscore,ghgscorea,highway08,highway08u,highwaya08,highwaya08u,highwaycd,highwaye,highwayuf,hlv,hpv,lv2,lv4,pv2,pv4,range,rangecity,rangecitya,rangehwy,rangehwya,ucity,ucitya,uhighway,uhighwaya,yousavespend,charge240b,phevcity,phevhwy,phevcomb
barrels08,1.0,0.04463,,-0.435071,-0.721626,-0.580639,-0.15645,-0.169465,-0.022371,-0.416283,-0.183725,-0.092215,0.040065,0.111122,0.990968,-0.764384,-0.576252,-0.151368,-0.166802,-0.423524,-0.019888,-0.183508,0.732937,0.782407,-0.940089,0.900283,0.106604,-0.949723,-0.827368,-0.810296,-0.557749,-0.141887,-0.160839,-0.01253,-0.430606,-0.183028,-0.227603,-0.249955,-0.054841,-0.228086,-0.055106,-0.271476,-0.410366,-0.261595,-0.164306,-0.258745,-0.167261,-0.708967,-0.162313,-0.820389,-0.06004,-0.906587,-0.16165,-0.181164,-0.184153,-0.182964
barrelsa08,0.04463,1.0,,0.03924,-0.047268,0.040627,0.51152,0.427251,0.142766,0.201472,0.285306,0.088435,0.57556,0.915292,0.046839,-0.053924,0.046955,0.556424,0.459875,0.196439,0.136513,0.286743,0.124387,0.140425,-0.056096,0.044999,0.915098,-0.011839,-0.548108,-0.062048,0.054933,0.617574,0.50574,0.118357,0.190244,0.288427,-0.042358,-0.049572,-0.060663,-0.056224,-0.061018,-0.061498,-0.023902,-0.015141,0.208625,-0.014976,0.223188,-0.045812,0.475418,-0.063397,0.679331,-0.027135,-0.009345,0.246636,0.26905,0.256774
charge120,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
charge240,-0.435071,0.03924,,1.0,0.788826,0.682678,0.168086,0.172985,0.028163,0.751278,0.2005,-0.071745,-0.016188,-0.024467,-0.439625,0.774547,0.639203,0.165653,0.171838,0.764389,0.026407,0.201007,-0.050356,-0.061945,0.527478,-0.29743,-0.01818,0.556056,0.298279,0.732025,0.566382,0.15961,0.167689,0.018669,0.776579,0.201583,0.060015,0.039832,-0.050375,0.015977,-0.050992,0.009629,0.913953,0.590444,0.186676,0.592101,0.191705,0.788271,0.170903,0.710341,0.089191,0.308478,0.415312,0.183543,0.189984,0.186839
city08,-0.721626,-0.047268,,0.788826,1.0,0.821887,0.063826,0.071199,0.003051,0.59537,0.070311,-0.022159,-0.034857,-0.068828,-0.731467,0.994336,0.775642,0.058491,0.067293,0.614585,0.001902,0.069296,-0.672473,-0.702966,0.733543,-0.630725,-0.067833,0.729534,0.756949,0.963019,0.698838,0.050316,0.061055,-0.00064,0.635067,0.067846,0.162364,0.166264,-0.034765,0.11023,-0.037439,0.125544,0.804712,0.565862,0.064506,0.553046,0.062628,0.998353,0.067759,0.958455,0.004751,0.630765,0.353331,0.084059,0.079873,0.082497
city08u,-0.580639,0.040627,,0.682678,0.821887,1.0,0.118563,0.130599,0.01134,0.529794,0.107313,0.416683,0.054323,0.009145,-0.588878,0.819203,0.994893,0.117658,0.13196,0.544339,0.009555,0.106623,-0.167888,-0.192076,0.733444,-0.440819,0.011284,0.729426,0.757638,0.797409,0.971716,0.11527,0.133231,0.005258,0.559568,0.1056,0.04264,0.028191,-0.088261,0.018037,-0.071855,0.046273,0.684415,0.477382,0.094153,0.467221,0.094248,0.823609,0.119951,0.804426,0.072025,0.44417,0.298098,0.114648,0.11368,0.114539
citya08,-0.15645,0.51152,,0.168086,0.063826,0.118563,1.0,0.98204,0.072681,0.49758,0.913506,0.036584,0.190062,0.268419,-0.156451,0.056764,0.117991,0.997059,0.979503,0.495975,0.063575,0.910194,-0.027617,-0.02996,0.199952,-0.068807,0.288507,0.254921,0.562024,0.040702,0.113056,0.983609,0.967718,0.036404,0.493603,0.90494,0.015349,0.00933,-0.040577,-0.00686,-0.039322,0.005838,-0.016138,-0.010222,0.818827,-0.010111,0.824924,0.067307,0.997109,0.043115,0.626169,0.11584,-0.00382,0.941283,0.942878,0.944341
citya08u,-0.169465,0.427251,,0.172985,0.071199,0.130599,0.98204,1.0,0.075088,0.508272,0.926488,0.048692,0.192374,0.167996,-0.169555,0.064662,0.130864,0.974446,0.997506,0.50657,0.066045,0.923164,-0.040935,-0.044333,0.200036,-0.075017,0.188392,0.25541,0.544306,0.048897,0.126841,0.9539,0.985571,0.038073,0.504034,0.917959,0.020694,0.014998,-0.034837,-0.003066,-0.03306,0.010586,-0.014015,-0.008878,0.829742,-0.008781,0.836144,0.075106,0.985396,0.052568,0.583082,0.122816,-0.002965,0.953189,0.955024,0.95635
citycd,-0.022371,0.142766,,0.028163,0.003051,0.01134,0.072681,0.075088,1.0,0.078582,0.13161,0.013134,-0.001632,-0.002466,-0.014156,0.002014,0.011847,0.079769,0.082595,0.07846,0.991963,0.130402,0.004012,-0.002709,0.00803,0.002182,-0.002491,0.018767,,0.000447,0.012,0.08942,0.092969,0.947859,0.079129,0.128573,-0.001225,-0.001895,-0.005477,-0.000522,-0.005692,-0.000757,-0.001679,-0.001063,0.089133,-0.001052,0.092504,0.002955,0.072393,0.000717,0.015888,0.002948,-0.000657,0.088335,0.106208,0.096164
citye,-0.416283,0.201472,,0.751278,0.59537,0.529794,0.49758,0.508272,0.078582,1.0,0.571451,-0.039448,-0.017642,-0.026666,-0.42177,0.579711,0.498089,0.50115,0.51448,0.998599,0.067169,0.575354,-0.033101,-0.049281,0.480477,-0.249205,-0.007403,0.551373,0.305334,0.537805,0.443353,0.496535,0.514515,0.040271,0.993446,0.580267,0.051713,0.034404,-0.051876,0.007501,-0.051156,0.014176,0.666248,0.374078,0.449337,0.373723,0.47714,0.600439,0.501418,0.527806,0.287498,0.28562,0.225341,0.497439,0.54017,0.517054
