# Vehicle Ratings

In [94]:
import pandas as pd
import numpy as np
import matplotlib as plt
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

### Path to ratings dataset

In [95]:
ratings_dataset = '../Processed_Data/API/ratings.pkl'

### Load ratings Data

In [96]:
df_ratings = pd.read_pickle(ratings_dataset)
print(df_ratings.shape)
df_ratings.head()

(7314, 21)


Unnamed: 0,ComplaintsCount,FrontCrashDriversideRating,FrontCrashPassengersideRating,InvestigationCount,Make,Model,ModelYear,NHTSAElectronicStabilityControl,NHTSAForwardCollisionWarning,NHTSALaneDepartureWarning,...,OverallRating,OverallSideCrashRating,RecallsCount,RolloverRating,RolloverRating2,SideCrashDriversideRating,SideCrashPassengersideRating,SidePoleCrashRating,VehicleDescription,sideBarrierRating-Overall
0,56,4,4,3,MITSUBISHI,ECLIPSE,1992,No,No,No,...,Not Rated,Not Rated,3,Not Rated,Not Rated,Not Rated,Not Rated,Not Rated,1992 Mitsubishi Eclipse 2-DR.,Not Rated
1,0,Not Rated,Not Rated,1,BMW,6 SERIES,2017,Standard,No,No,...,Not Rated,Not Rated,1,Not Rated,Not Rated,Not Rated,Not Rated,Not Rated,2017 BMW 6 Series C RWD,Not Rated
2,1,Not Rated,Not Rated,0,BMW,X5 HYBRID,2021,Standard,Standard,Standard,...,Not Rated,Not Rated,1,Not Rated,Not Rated,Not Rated,Not Rated,Not Rated,2021 BMW X5 Hybrid SUV AWD,Not Rated
3,0,Not Rated,Not Rated,0,LEXUS,LS 500,2021,Standard,Standard,Standard,...,Not Rated,Not Rated,0,Not Rated,Not Rated,Not Rated,Not Rated,Not Rated,2021 Lexus LS 500 4 DR AWD,Not Rated
4,156,5,5,0,CHEVROLET,TRAVERSE,2015,Standard,Optional,Optional,...,5,5,3,4,Not Rated,5,5,5,2015 Chevrolet Traverse SUV AWD,5


# Calculate the final rating for each model
## Convert text rating to number

Max grade: 14*5 = 70 (14 aspects and 5 start)

Grade range: [0,70]

To calculate grade ratio, just add 14 aspects subgrade together then divide by 70

In [97]:
df_ratings.columns

Index(['ComplaintsCount', 'FrontCrashDriversideRating',
       'FrontCrashPassengersideRating', 'InvestigationCount', 'Make', 'Model',
       'ModelYear', 'NHTSAElectronicStabilityControl',
       'OverallFrontCrashRating', 'OverallRating', 'OverallSideCrashRating',
       'RecallsCount', 'RolloverRating', 'RolloverRating2',
       'SideCrashDriversideRating', 'SideCrashPassengersideRating',
       'SidePoleCrashRating', 'VehicleDescription',
       'sideBarrierRating-Overall'],
      dtype='object')

In [98]:
mapping = {'No':0, 'Not Rated':0,
           'Optional': 2.5, 
           'Standard': 5}
df_ratings.replace( {'FrontCrashDriversideRating': mapping,
                    'FrontCrashPassengersideRating': mapping,
                    'NHTSAElectronicStabilityControl': mapping,
                    'NHTSAForwardCollisionWarning': mapping,
                    'NHTSALaneDepartureWarning': mapping,
                    'OverallFrontCrashRating': mapping,
                    'OverallRating': mapping,
                    'OverallSideCrashRating': mapping,
                    'RolloverRating': mapping,
                    'RolloverRating2': mapping,
                    'SideCrashDriversideRating': mapping,
                    'SideCrashPassengersideRating': mapping,
                    'SidePoleCrashRating': mapping,
                    'sideBarrierRating-Overall': mapping},
                    inplace=True
                  )

In [99]:
df_num_rating = df_ratings[['FrontCrashDriversideRating',
       'FrontCrashPassengersideRating','NHTSAElectronicStabilityControl',
       'NHTSAForwardCollisionWarning', 'NHTSALaneDepartureWarning',
       'OverallFrontCrashRating', 'OverallRating', 'OverallSideCrashRating',
       'RolloverRating', 'RolloverRating2',
       'SideCrashDriversideRating', 'SideCrashPassengersideRating',
       'SidePoleCrashRating','sideBarrierRating-Overall']]
df_num_rating.head()

Unnamed: 0,FrontCrashDriversideRating,FrontCrashPassengersideRating,NHTSAElectronicStabilityControl,NHTSAForwardCollisionWarning,NHTSALaneDepartureWarning,OverallFrontCrashRating,OverallRating,OverallSideCrashRating,RolloverRating,RolloverRating2,SideCrashDriversideRating,SideCrashPassengersideRating,SidePoleCrashRating,sideBarrierRating-Overall
0,4,4,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0
1,0,0,5.0,0.0,0.0,0,0,0,0,0,0,0,0,0
2,0,0,5.0,5.0,5.0,0,0,0,0,0,0,0,0,0
3,0,0,5.0,5.0,5.0,0,0,0,0,0,0,0,0,0
4,5,5,5.0,2.5,2.5,5,5,5,4,0,5,5,5,5


## Calculate grades

In [111]:
df_num_rating = df_num_rating.apply(pd.to_numeric, errors='coerce')
df_num_rating["sum"] = df_num_rating.sum(axis=1)
df_num_rating.groupby('sum').size()

sum
0.000000      117
2.014286        7
4.028571       22
5.035714        6
6.042857       15
             ... 
124.885714     68
125.892857     13
126.900000     86
128.914286    167
130.928571     59
Length: 116, dtype: int64

In [110]:
final_rating = pd.concat([df_ratings[['Make','Model','ModelYear']], (df_num_rating['sum']/70).rename('gradeRatio')], axis=1)
final_rating

Unnamed: 0,Make,Model,ModelYear,gradeRatio
0,MITSUBISHI,ECLIPSE,1992,0.114286
1,BMW,6 SERIES,2017,0.071429
2,BMW,X5 HYBRID,2021,0.214286
3,LEXUS,LS 500,2021,0.214286
4,CHEVROLET,TRAVERSE,2015,0.842857
...,...,...,...,...
7309,CHEVROLET,MALIBU,2022,0.757143
7310,DODGE,CHARGER,2015,0.214286
7311,FORD,EXPLORER,2004,0.392857
7312,RAM,2500,2021,0.764286


# Brand

In [119]:
final_rating.groupby('Make')['gradeRatio'].mean().sort_values(ascending=False).head(20)

Make
TESLA         0.775510
LINCOLN       0.636290
SUBARU        0.611310
TOYOTA        0.560317
ACURA         0.528095
VOLKSWAGEN    0.525415
MAZDA         0.510680
HONDA         0.505661
CADILLAC      0.486040
BUICK         0.478116
LEXUS         0.467310
RAM           0.459325
CHEVROLET     0.440849
KIA           0.435654
HYUNDAI       0.424460
FORD          0.422662
GENESIS       0.406211
GMC           0.401646
INFINITI      0.395681
JEEP          0.385629
Name: gradeRatio, dtype: float64