In [1]:
import pandas as pd
pd.set_option("display.max_columns", 100)
import numpy as np

from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import SimpleImputer, IterativeImputer, KNNImputer

from sklearn.pipeline import Pipeline

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

In [2]:
cols = ['RecordID', 'ESS_Id', 'EventTimeStamp', 'eventDescription', 'spn', 'fmi', 'active', 'activeTransitionCount',
            'EquipmentID', 'Latitude', 'Longitude', 'LocationTimeStamp']
faults = pd.read_csv("../data/J1939Faults.csv", usecols = cols, dtype = str)
diagnostics = pd.read_csv("../data/VehicleDiagnosticOnboardData.csv")
sfc = pd.read_excel("../data/Service Fault Codes_1_0_0_167.xlsx")

  for idx, row in parser.parse():


In [3]:
diagnostics = pd.pivot(diagnostics, 
                       columns = ['Name'], 
                       values = 'Value', 
                       index = 'FaultId')


In [4]:
diagnostics = diagnostics.reset_index()
diagnostics['FaultId'] = diagnostics['FaultId'].astype(str)

In [5]:
# Convert columns to numeric
diagnostics[['AcceleratorPedal', 'BarometricPressure', 'CruiseControlSetSpeed', 'DistanceLtd', 'EngineCoolantTemperature', 'EngineLoad', 'EngineOilPressure', 'EngineOilTemperature', 'EngineRpm', 'EngineTimeLtd', 'FuelLevel', 'FuelLtd', 'FuelRate', 'FuelTemperature', 'IntakeManifoldTemperature', 'ServiceDistance', 'Speed', 'SwitchedBatteryVoltage', 'Throttle', 'TurboBoostPressure']] = diagnostics[['AcceleratorPedal', 'BarometricPressure', 'CruiseControlSetSpeed', 'DistanceLtd', 'EngineCoolantTemperature', 'EngineLoad', 'EngineOilPressure', 'EngineOilTemperature', 'EngineRpm', 'EngineTimeLtd', 'FuelLevel', 'FuelLtd', 'FuelRate', 'FuelTemperature', 'IntakeManifoldTemperature', 'ServiceDistance', 'Speed', 'SwitchedBatteryVoltage', 'Throttle', 'TurboBoostPressure']].apply(pd.to_numeric, errors = 'coerce')

In [6]:
diagnostics = pd.get_dummies(diagnostics, drop_first=True, columns = ['CruiseControlActive', 'IgnStatus', 'ParkingBrake'])
#diagnostics = diagnostics[['AcceleratorPedal', 'BarometricPressure', 'CruiseControlSetSpeed', 'DistanceLtd', 'EngineCoolantTemperature', 'EngineLoad', 'EngineOilPressure', 'EngineOilTemperature', 'EngineRpm', 'EngineTimeLtd', 'FuelLevel', 'FuelLtd', 'FuelRate', 'FuelTemperature', 'IntakeManifoldTemperature', 'LampStatus','ServiceDistance', 'Speed', 'SwitchedBatteryVoltage', 'Throttle', 'TurboBoostPressure', 'CruiseControlActive_True', 'IgnStatus_True', 'ParkingBrake_True']]
#diagnostics_orig = diagnostics.copy()

In [None]:
diagnostics.info()

In [None]:
#predictors = ['AcceleratorPedal', 'BarometricPressure', 'CruiseControlSetSpeed', 'DistanceLtd', 'EngineCoolantTemperature', 'EngineLoad', 'EngineOilPressure', 'EngineOilTemperature', 'EngineRpm', 'EngineTimeLtd', 'FuelLevel', 'FuelLtd', 'FuelRate', 'FuelTemperature', 'IntakeManifoldTemperature', 'ServiceDistance', 'Speed', 'SwitchedBatteryVoltage', 'Throttle', 'TurboBoostPressure']

In [None]:
#predictors = ['AcceleratorPedal', 'BarometricPressure', 'CruiseControlSetSpeed', 'DistanceLtd', 'EngineCoolantTemperature', 'EngineLoad', 'EngineOilPressure', 'EngineOilTemperature', 'EngineRpm', 'EngineTimeLtd', 'FuelLevel', 'FuelLtd', 'FuelRate', 'FuelTemperature', 'IntakeManifoldTemperature', 'ServiceDistance', 'Speed', 'SwitchedBatteryVoltage', 'Throttle', 'TurboBoostPressure']
#scaler = StandardScaler().fit(diagnostics[predictors])
#diagnostics_KNN = IterativeImputer().fit(scaler.transform(diagnostics[predictors]))

In [None]:
#diagnostics_KNN.transform(scaler.transform(diagnostics[predictors]))

In [None]:
#diagnostics[predictors] = diagnostics_KNN.transform(scaler.transform(diagnostics[predictors]))

In [None]:
diagnostics

**There are service locations at (36.0666667, -86.4347222), (35.5883333, -86.4438888), and (36.1950, -83.174722), so you should remove any records in the vicinity of these locations, as fault codes may be tripped when working on the vehicles.**

In [8]:
station_1 = (faults['Latitude'].str.contains('36.06')) & faults['Longitude'].str.contains('86.43')
station_2 = (faults['Latitude'].str.contains('35.58')) & faults['Longitude'].str.contains('86.44')
station_3 = (faults['Latitude'].str.contains('36.19')) & faults['Longitude'].str.contains('83.17')
faults = faults.drop(faults[station_1 | station_2 | station_3].index)

In [9]:
# creating a column that has the lat long values together.
faults["lat_long"] = list(zip(faults['Latitude'], faults['Longitude']))

In [10]:
# convert dtypes as needed
# some columns have numeric-appearing values but they're not continuous variables, leaving them as strings
faults['EventTimeStamp'] = pd.to_datetime(faults['EventTimeStamp'])
faults['activeTransitionCount'] = faults['activeTransitionCount'].astype(int)

In [11]:
# time of day for faults could be interesting, split event column to date and timestamp columns
faults['event_date'] = faults['EventTimeStamp'].dt.date
faults['event_time'] = faults['EventTimeStamp'].dt.time

In [12]:
faults = faults.copy()

faults['event_year'] = faults['EventTimeStamp'].dt.year
faults['event_month'] = faults['EventTimeStamp'].dt.month
faults['event_day'] = faults['EventTimeStamp'].dt.day
faults['event_dayofweek'] = faults['EventTimeStamp'].dt.dayofweek
faults['event_dayname'] = faults['EventTimeStamp'].dt.day_name()
faults['event_hour'] = faults['EventTimeStamp'].dt.hour
faults['event_time_quadrant'] = faults['EventTimeStamp'].dt.hour // 4

In [None]:
faults['RecordID'] = faults[['RecordID', '']].astype(str)

In [13]:
faults.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1059180 entries, 0 to 1187334
Data columns (total 23 columns):
 #   Column                 Non-Null Count    Dtype         
---  ------                 --------------    -----         
 0   RecordID               1059180 non-null  object        
 1   ESS_Id                 1059180 non-null  object        
 2   EventTimeStamp         1059180 non-null  datetime64[ns]
 3   eventDescription       1008154 non-null  object        
 4   spn                    1059180 non-null  object        
 5   fmi                    1059180 non-null  object        
 6   active                 1059180 non-null  object        
 7   activeTransitionCount  1059180 non-null  int64         
 8   faultValue             0 non-null        object        
 9   EquipmentID            1059180 non-null  object        
 10  Latitude               1059180 non-null  object        
 11  Longitude              1059180 non-null  object        
 12  LocationTimeStamp      10591

In [15]:
faults

Unnamed: 0,RecordID,ESS_Id,EventTimeStamp,eventDescription,spn,fmi,active,activeTransitionCount,faultValue,EquipmentID,Latitude,Longitude,LocationTimeStamp,lat_long,event_date,event_time,event_year,event_month,event_day,event_dayofweek,event_dayname,event_hour,event_time_quadrant
0,1,990349,2015-02-21 10:47:13,Low (Severity Low) Engine Coolant Level,111,17,True,2,,1439,38.857638,-84.626851,2015-02-21 11:34:25.000,"(38.857638, -84.626851)",2015-02-21,10:47:13,2015,2,21,5,Saturday,10,2
1,2,990360,2015-02-21 11:34:34,,629,12,True,127,,1439,38.857638,-84.626851,2015-02-21 11:35:10.000,"(38.857638, -84.626851)",2015-02-21,11:34:34,2015,2,21,5,Saturday,11,2
2,3,990364,2015-02-21 11:35:31,Incorrect Data Steering Wheel Angle,1807,2,False,127,,1369,41.42125,-87.767361,2015-02-21 11:35:26.000,"(41.42125, -87.767361)",2015-02-21,11:35:31,2015,2,21,5,Saturday,11,2
3,4,990370,2015-02-21 11:35:33,Incorrect Data Steering Wheel Angle,1807,2,True,127,,1369,41.421018,-87.767361,2015-02-21 11:36:08.000,"(41.421018, -87.767361)",2015-02-21,11:35:33,2015,2,21,5,Saturday,11,2
4,5,990416,2015-02-21 11:39:41,,4364,17,False,2,,1674,38.416481,-89.442638,2015-02-21 11:39:37.000,"(38.416481, -89.442638)",2015-02-21,11:39:41,2015,2,21,5,Saturday,11,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1187330,1248454,123904424,2020-03-06 14:00:26,Low (Severity Low) Catalyst Tank Level,1761,17,False,3,,2282,37.094768,-85.897407,2020-03-06 14:00:21.000,"(37.094768, -85.897407)",2020-03-06,14:00:26,2020,3,6,4,Friday,14,3
1187331,1248455,123905139,2020-03-06 14:04:23,Condition Exists Engine Protection Torque Derate,1569,31,True,5,,1994,34.39074,-79.461805,2020-03-06 14:04:59.000,"(34.39074, -79.461805)",2020-03-06,14:04:23,2020,3,6,4,Friday,14,3
1187332,1248456,123905996,2020-03-06 14:13:38,Abnormal Rate of Change Aftertreatment 1 Intak...,3216,10,True,1,,1850,34.43037,-84.920509,2020-03-06 14:14:14.000,"(34.43037, -84.920509)",2020-03-06,14:13:38,2020,3,6,4,Friday,14,3
1187333,1248457,123906113,2020-03-06 14:14:13,Low (Severity Medium) Engine Coolant Level,111,18,True,8,,2377,35.030925,-85.321527,2020-03-06 14:14:49.000,"(35.030925, -85.321527)",2020-03-06,14:14:13,2020,3,6,4,Friday,14,3


In [16]:
diagnostics

Unnamed: 0,FaultId,AcceleratorPedal,BarometricPressure,CruiseControlSetSpeed,DistanceLtd,EngineCoolantTemperature,EngineLoad,EngineOilPressure,EngineOilTemperature,EngineRpm,EngineTimeLtd,FuelLevel,FuelLtd,FuelRate,FuelTemperature,IntakeManifoldTemperature,LampStatus,ServiceDistance,Speed,SwitchedBatteryVoltage,Throttle,TurboBoostPressure,CruiseControlActive_True,IgnStatus_True,ParkingBrake_True
0,1,0.0,14.2100,66.48672,423178.70000,100.4,11.0,0.00,96.74375,0.00,1632.20,43.2,12300.907429,0.000000,,78.8,1023,,0.00000,3276.75,,0.00,0,0,1
1,2,,,,,,,,,,,,,,,,1279,,,,,,0,1,0
2,3,,,,,,,,,,,,,,,,1279,,,,,,0,0,0
3,4,,,,,,,,,,,,,,,,1279,,,,,,0,1,0
4,5,,,,,,,,,,,,,,,,16639,,,,,,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1187330,1248454,,,,,,,,,,,,,,,,1023,,,,,,0,0,0
1187331,1248455,100.0,14.5000,64.62260,423937.90000,185.0,51.0,37.12,211.49370,1310.25,10722.70,96.4,58979.184416,7.647805,32.0,98.6,18431,,65.01096,,73.2,7.83,1,1,0
1187332,1248456,0.0,14.3550,66.48672,465925.40000,186.8,62.0,41.18,212.84380,1340.75,9326.75,100.0,65080.105870,8.995086,,91.4,17407,,66.57410,,100.0,6.96,1,1,0
1187333,1248457,1.6,14.4275,67.72946,28606.65625,181.4,0.0,27.26,221.73120,863.25,586.75,23.6,4042.492826,0.000000,,100.4,1023,,11.84489,14.10,100.0,1.74,0,1,0


Merging Dataframes

In [18]:
faults_diagnostics = pd.merge(faults, diagnostics, left_on='RecordID', right_on='FaultId')

In [19]:
diag_features=['FaultId', 'AcceleratorPedal', 'BarometricPressure', 'CruiseControlSetSpeed', 'DistanceLtd',
               'EngineCoolantTemperature', 'EngineLoad', 'EngineOilPressure','EngineOilTemperature', 
               'EngineRpm', 'EngineTimeLtd', 'FuelLevel', 'FuelLtd', 'FuelRate', 'IntakeManifoldTemperature', 
               'LampStatus', 'Speed', 'TurboBoostPressure', 'CruiseControlActive_True', 'IgnStatus_True', 'ParkingBrake_True']

In [27]:
diagnostics_drop = faults_diagnostics.dropna(subset = diag_features)

In [28]:
diagnostics_drop

Unnamed: 0,RecordID,ESS_Id,EventTimeStamp,eventDescription,spn,fmi,active,activeTransitionCount,faultValue,EquipmentID,Latitude,Longitude,LocationTimeStamp,lat_long,event_date,event_time,event_year,event_month,event_day,event_dayofweek,event_dayname,event_hour,event_time_quadrant,FaultId,AcceleratorPedal,BarometricPressure,CruiseControlSetSpeed,DistanceLtd,EngineCoolantTemperature,EngineLoad,EngineOilPressure,EngineOilTemperature,EngineRpm,EngineTimeLtd,FuelLevel,FuelLtd,FuelRate,FuelTemperature,IntakeManifoldTemperature,LampStatus,ServiceDistance,Speed,SwitchedBatteryVoltage,Throttle,TurboBoostPressure,CruiseControlActive_True,IgnStatus_True,ParkingBrake_True
0,1,990349,2015-02-21 10:47:13,Low (Severity Low) Engine Coolant Level,111,17,True,2,,1439,38.857638,-84.626851,2015-02-21 11:34:25.000,"(38.857638, -84.626851)",2015-02-21,10:47:13,2015,2,21,5,Saturday,10,2,1,0.0,14.2100,66.48672,423178.70000,100.4,11.0,0.00,96.74375,0.000,1632.20,43.2,12300.907429,0.000000,,78.8,1023,,0.000000,3276.75,,0.00,0,0,1
5,6,990431,2015-02-21 11:40:22,Low (Severity Low) Engine Coolant Level,111,17,True,1,,1417,33.043564,-96.179722,2015-02-21 11:40:59.000,"(33.043564, -96.179722)",2015-02-21,11:40:22,2015,2,21,5,Saturday,11,2,6,48.0,14.4275,64.62260,470381.40000,181.4,30.0,38.28,196.53130,1514.500,9480.00,44.0,70349.809964,4.583399,,111.2,1023,,13.602200,3276.75,,6.67,0,1,0
6,7,990439,2015-02-21 11:40:52,Low (Severity Low) Engine Coolant Level,111,17,True,2,,1597,36.902916,-86.436481,2015-02-21 11:41:29.000,"(36.902916, -86.436481)",2015-02-21,11:40:52,2015,2,21,5,Saturday,11,2,7,82.8,14.2825,64.62260,278736.70000,188.6,80.0,39.44,210.03130,1711.375,6292.20,64.8,40961.065437,14.291750,,78.8,1023,,41.534780,3276.75,,20.59,0,1,0
14,15,990494,2015-02-21 11:14:38,Incorrect Data Brake Signal Sensor 1,1067,2,True,127,,309,36.181898,-86.69504599999999,2015-02-21 11:44:52.000,"(36.181898, -86.69504599999999)",2015-02-21,11:14:38,2015,2,21,5,Saturday,11,2,15,0.0,14.3550,66.48672,65022.28000,181.4,0.0,41.76,193.83130,1369.875,1308.90,51.6,9487.342990,0.000000,32.0,100.4,1279,,26.311190,3276.75,0.0,0.58,0,1,0
16,17,990504,2015-02-21 11:45:14,Low (Severity Low) Engine Coolant Level,111,17,True,1,,1601,38.279629,-78.935509,2015-02-21 11:45:51.000,"(38.279629, -78.935509)",2015-02-21,11:45:14,2015,2,21,5,Saturday,11,2,17,37.2,14.1375,66.48672,268575.90000,183.2,26.0,33.64,217.28750,1035.500,5004.75,66.0,39450.133385,2.166218,,60.8,17407,,36.389050,3276.75,,1.16,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1059170,1248448,123899434,2020-03-06 13:12:43,High Voltage (Fuel Level),96,3,True,126,,1936,30.376851000000002,-81.74495300000001,2020-03-06 13:29:33.000,"(30.376851000000002, -81.74495300000001)",2020-03-06,13:12:43,2020,3,6,4,Friday,13,3,1248448,0.0,14.6450,66.48672,391932.60000,181.4,11.0,22.62,197.60000,597.375,8016.75,60.0,51466.131257,0.620806,,120.2,1279,,0.941766,,100.0,1.16,0,1,0
1059173,1248452,123901805,2020-03-06 13:42:48,Low (Severity Medium) Engine Coolant Level,111,18,True,93,,1886,39.015694,-77.031157,2020-03-06 13:43:24.000,"(39.015694, -77.031157)",2020-03-06,13:42:48,2020,3,6,4,Friday,13,3,1248452,0.0,14.3550,66.48672,457529.70000,181.4,11.0,19.72,207.21880,600.250,13047.05,62.0,64491.926797,0.515137,,104.0,2047,,5.932153,,100.0,0.58,0,1,0
1059176,1248455,123905139,2020-03-06 14:04:23,Condition Exists Engine Protection Torque Derate,1569,31,True,5,,1994,34.39074,-79.461805,2020-03-06 14:04:59.000,"(34.39074, -79.461805)",2020-03-06,14:04:23,2020,3,6,4,Friday,14,3,1248455,100.0,14.5000,64.62260,423937.90000,185.0,51.0,37.12,211.49370,1310.250,10722.70,96.4,58979.184416,7.647805,32.0,98.6,18431,,65.010960,,73.2,7.83,1,1,0
1059177,1248456,123905996,2020-03-06 14:13:38,Abnormal Rate of Change Aftertreatment 1 Intak...,3216,10,True,1,,1850,34.43037,-84.920509,2020-03-06 14:14:14.000,"(34.43037, -84.920509)",2020-03-06,14:13:38,2020,3,6,4,Friday,14,3,1248456,0.0,14.3550,66.48672,465925.40000,186.8,62.0,41.18,212.84380,1340.750,9326.75,100.0,65080.105870,8.995086,,91.4,17407,,66.574100,,100.0,6.96,1,1,0


In [29]:
diagnostics_drop['active'].value_counts()

True    392843
Name: active, dtype: int64