In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [27]:
pd.options.display.max_columns = None

In [2]:
cols = ['RecordID', 'ESS_Id', 'EventTimeStamp', 'eventDescription', 'ecuSource', 'spn', 'fmi', 'active', 'activeTransitionCount',
           'EquipmentID', 'Latitude', 'Longitude']

faults = pd.read_csv("../data/J1939Faults.csv", usecols = cols, dtype = str)
faults.head()

Unnamed: 0,RecordID,ESS_Id,EventTimeStamp,eventDescription,ecuSource,spn,fmi,active,activeTransitionCount,EquipmentID,Latitude,Longitude
0,1,990349,2015-02-21 10:47:13.000,Low (Severity Low) Engine Coolant Level,0,111,17,True,2,1439,38.857638,-84.626851
1,2,990360,2015-02-21 11:34:34.000,,11,629,12,True,127,1439,38.857638,-84.626851
2,3,990364,2015-02-21 11:35:31.000,Incorrect Data Steering Wheel Angle,11,1807,2,False,127,1369,41.42125,-87.767361
3,4,990370,2015-02-21 11:35:33.000,Incorrect Data Steering Wheel Angle,11,1807,2,True,127,1369,41.421018,-87.767361
4,5,990416,2015-02-21 11:39:41.000,,0,4364,17,False,2,1674,38.416481,-89.442638


In [3]:
# convert dtypes as needed
# some columns have numeric-appearing values but they're not continuous variables, leaving them as strings
faults['EventTimeStamp'] = pd.to_datetime(faults['EventTimeStamp'])
faults['activeTransitionCount'] = faults['activeTransitionCount'].astype(int)

In [4]:
# time of day for faults could be interesting, split event column to date and timestamp columns
faults['event_date'] = faults['EventTimeStamp'].dt.date
faults['event_time'] = faults['EventTimeStamp'].dt.time

In [5]:
# sort df by equipment, spn, timestamp
faults = faults.copy()
faults = faults.sort_values(by = ['EquipmentID', 'spn', 'EventTimeStamp'])

# get index for first row in each group, yields list of integers
first_index = faults.groupby(['EquipmentID', 'spn']).head(1).index

# check first_index for rows where active == False
drop_index = first_index[faults.loc[first_index, 'active'] == 'False']

# drop rows where first index is an active False row
faults = faults.drop(drop_index)

# now this should work as expected
faults['false_eventTimeStamp'] = faults.sort_values(by = ['EventTimeStamp']).groupby(by = ['EquipmentID', 'spn'])['EventTimeStamp'].shift(-1)

In [6]:
# test to reveal if the process worked (this group previously had a False as the first timestamp)
faults.loc[(faults['EquipmentID'] == '1369') & (faults['spn'] == '1807')].sort_values(by = ['EventTimeStamp'])

Unnamed: 0,RecordID,ESS_Id,EventTimeStamp,eventDescription,ecuSource,spn,fmi,active,activeTransitionCount,EquipmentID,Latitude,Longitude,event_date,event_time,false_eventTimeStamp
3,4,990370,2015-02-21 11:35:33,Incorrect Data Steering Wheel Angle,11,1807,2,True,127,1369,41.421018,-87.767361,2015-02-21,11:35:33,2015-02-21 11:57:37
31,32,990702,2015-02-21 11:57:37,Incorrect Data Steering Wheel Angle,11,1807,2,False,127,1369,41.42787,-87.756759,2015-02-21,11:57:37,2015-02-21 12:13:47
49,50,990999,2015-02-21 12:13:47,Incorrect Data Steering Wheel Angle,11,1807,2,True,127,1369,41.431574,-87.758981,2015-02-21,12:13:47,2015-02-21 18:26:34
421,422,995975,2015-02-21 18:26:34,Incorrect Data Steering Wheel Angle,11,1807,2,False,127,1369,38.330833,-85.757037,2015-02-21,18:26:34,2015-02-21 18:26:37
422,423,995979,2015-02-21 18:26:37,Incorrect Data Steering Wheel Angle,11,1807,2,True,127,1369,38.330833,-85.757083,2015-02-21,18:26:37,2015-02-21 18:32:35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6426,6427,1088496,2015-02-26 12:14:25,Incorrect Data Steering Wheel Angle,11,1807,2,True,127,1369,36.936018,-86.50726800000001,2015-02-26,12:14:25,2015-02-26 13:00:12
6434,6435,1089226,2015-02-26 13:00:12,Incorrect Data Steering Wheel Angle,11,1807,2,False,127,1369,36.717453000000006,-86.525,2015-02-26,13:00:12,2015-02-26 13:11:23
6437,6438,1089547,2015-02-26 13:11:23,Incorrect Data Steering Wheel Angle,11,1807,2,True,127,1369,36.718148,-86.525324,2015-02-26,13:11:23,2015-02-26 16:05:38
6492,6493,1094483,2015-02-26 16:05:38,Incorrect Data Steering Wheel Angle,11,1807,2,False,127,1369,35.679212,-88.745046,2015-02-26,16:05:38,2015-02-26 16:05:41


In [7]:
# mask for each service station, lat and long to 2 decimal places
station_1 = (faults['Latitude'].str.contains('36.06')) & (faults['Longitude'].str.contains('86.43'))
station_2 = (faults['Latitude'].str.contains('35.58')) & (faults['Longitude'].str.contains('86.44'))
station_3 = (faults['Latitude'].str.contains('36.19')) & (faults['Longitude'].str.contains('83.17'))

# dataframe without faults associated with service locations
faults_nonservice = faults[~(station_1 | station_2 | station_3)]

### next steps
- add date and time breakout columns (year, month, date, weekday, hour, day segment (divide 24 hours into quadrants))
- merge diagnostic data with filtered faults data (inner join)
- figure out imputing methods
- for models, imput after train/test split
- do some eda on the data

In [8]:
faults_nonservice = faults_nonservice.copy()

faults_nonservice['event_year'] = faults_nonservice['EventTimeStamp'].dt.year
faults_nonservice['event_month'] = faults_nonservice['EventTimeStamp'].dt.month
faults_nonservice['event_day'] = faults_nonservice['EventTimeStamp'].dt.day
faults_nonservice['event_dayofweek'] = faults_nonservice['EventTimeStamp'].dt.dayofweek
faults_nonservice['event_dayname'] = faults_nonservice['EventTimeStamp'].dt.day_name()
faults_nonservice['event_hour'] = faults_nonservice['EventTimeStamp'].dt.hour
faults_nonservice['event_time_quadrant'] = faults_nonservice['EventTimeStamp'].dt.hour // 4

In [9]:
# drop active == False (Kagon is keeping these rows so it will give us a good way to compare performance)
faults_active = faults_nonservice.loc[faults_nonservice['active'] == 'True']

In [10]:
# for each equipment id that experienced a derate, durration of time before derate for each fault
# complicated by equipment that has more than one derate event
# derates can occur at any time of day (tend to be clustered during working hours bet that's probably because there are more trucks active) so don't want to just group by day
# just to get something working, go on date for now
faults_active = faults_active.copy()
faults_active['derate_full'] = np.where(faults_active.groupby(['EquipmentID', 'event_date'])['spn'].transform(lambda x: (x == '5246').any()), 'True', 'False')
faults_active['derate_partial'] = np.where(faults_active.groupby(['EquipmentID', 'event_date'])['spn'].transform(lambda x: (x == '1569').any()), 'True', 'False')

# come back to this later when my brain works
# faults_active['derates_all'] = np.where(faults_active.groupby(['EquipmentID', 'event_date'])['spn'].transform(lambda x: (x == '5246').any()), 'True', 'False')

# once we get how to calculate time of event before derate this code can be modified slightly to detect

In [11]:
# partial derate occurring on the same day as a full derate - incorporate this as a signal boost?
# come back to this after initial model
faults_active.loc[(faults_active['spn'] == '1569') & (faults_active['derate_full'] == 'True')]

Unnamed: 0,RecordID,ESS_Id,EventTimeStamp,eventDescription,ecuSource,spn,fmi,active,activeTransitionCount,EquipmentID,...,false_eventTimeStamp,event_year,event_month,event_day,event_dayofweek,event_dayname,event_hour,event_time_quadrant,derate_full,derate_partial
996749,1032907,54267109,2018-07-06 04:53:05,Condition Exists Engine Protection Torque Derate,0,1569,31,True,2,105349576,...,2018-07-10 15:44:13,2018,7,6,4,Friday,4,1,True,True
5714,5715,1070647,2015-02-25 13:53:08,Condition Exists Engine Protection Torque Derate,0,1569,31,True,1,1329,...,2015-02-25 14:47:20,2015,2,25,2,Wednesday,13,3,True,True
82873,85259,2919536,2015-06-12 08:24:15,Condition Exists Engine Protection Torque Derate,0,1569,31,True,1,1339,...,2015-06-14 15:56:28,2015,6,12,4,Friday,8,2,True,True
82607,84993,2915364,2015-06-12 03:57:49,Condition Exists Engine Protection Torque Derate,0,1569,31,True,1,1366,...,2015-06-12 07:18:46,2015,6,12,4,Friday,3,0,True,True
83619,86005,2932223,2015-06-12 19:26:10,Condition Exists Engine Protection Torque Derate,0,1569,31,True,1,1366,...,2015-06-12 20:49:02,2015,6,12,4,Friday,19,4,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1125718,1177138,93351982,2019-07-09 00:11:00,Condition Exists Engine Protection Torque Derate,0,1569,31,True,1,1991,...,2019-07-11 13:30:47,2019,7,9,1,Tuesday,0,0,True,True
964660,995168,45003043,2018-04-07 02:07:44,Condition Exists Engine Protection Torque Derate,0,1569,31,True,2,2007,...,2018-04-07 09:45:39,2018,4,7,5,Saturday,2,0,True,True
794693,815232,18884125,2017-06-24 10:42:36,Condition Exists Engine Protection Torque Derate,0,1569,31,True,1,2009,...,2017-06-24 15:14:43,2017,6,24,5,Saturday,10,2,True,True
1139261,1192545,100080700,2019-08-30 18:05:29,Condition Exists Engine Protection Torque Derate,0,1569,31,True,1,2021,...,2019-09-06 23:47:17,2019,8,30,4,Friday,18,4,True,True


In [12]:
diagnostics = pd.read_csv('../data/VehicleDiagnosticOnboardData.csv')

In [13]:
# pivot on distinct values in 'Name'
diag_pivot = diagnostics.pivot(index = ['FaultId'], columns = ['Name'], values = ['Value'])
diag_pivot.columns = diag_pivot.columns.droplevel()

In [14]:
diag_pivot = diag_pivot.reset_index().rename_axis(None, axis = 1)


In [15]:
diag_pivot['FaultId'] = diag_pivot['FaultId'].astype(str)

In [16]:
# replace comma with period in all columns
diag_pivot = diag_pivot.replace(',', '.', regex = True)

In [17]:
# merge faults_active to diag_pivot on RecordID = FaultId
# how = left (keeps active True from faults and drops additional rows from diagnostics)
fault_diag = pd.merge(faults_active, diag_pivot, how = 'left', left_on = 'RecordID', right_on = 'FaultId')

In [18]:
fault_diag.isnull().sum()
# total rows ~550K
# rows that definitely don't have enough values to be useful: ServiceDistance, SwitchedBatteryVoltage
# extremely unlikely to be useful (1/3 to over half missing): FuelTemperature, ParkingBrake, Throttle
# iffy but possible to imput with some degree of meaning: FuelLevel, AcceleratorPedal
# quite a few are around 20-30K missing values

RecordID                          0
ESS_Id                            0
EventTimeStamp                    0
eventDescription              29996
ecuSource                         0
spn                               0
fmi                               0
active                            0
activeTransitionCount             0
EquipmentID                       0
Latitude                          0
Longitude                         0
event_date                        0
event_time                        0
false_eventTimeStamp            934
event_year                        0
event_month                       0
event_day                         0
event_dayofweek                   0
event_dayname                     0
event_hour                        0
event_time_quadrant               0
derate_full                       0
derate_partial                    0
FaultId                           0
AcceleratorPedal              68105
BarometricPressure            17752
CruiseControlActive         

In [19]:
# drop columns that are unlikely to be meaningful
fault_diag = fault_diag.drop(['ServiceDistance', 'SwitchedBatteryVoltage', 'ParkingBrake', 'FaultId'], axis = 1)

In [20]:
# convert data types
# add more as the need arises
convert_dict = {'AcceleratorPedal' : float,
                'BarometricPressure' : float,
                'CruiseControlSetSpeed' : float,
                'DistanceLtd' : float,
                'EngineCoolantTemperature': float,
                'EngineLoad' : float,
                'EngineOilPressure' : float,
                'EngineOilTemperature': float,
                'EngineRpm': float,
                'EngineTimeLtd': float,
                'FuelLevel' : float,
                'FuelLtd' : float,
                'FuelRate' : float,
                'FuelTemperature' : float,
                'IntakeManifoldTemperature' : float,
                #'LampStatus' : float,
                'Speed' : float,
                'Throttle' : float,
                'TurboBoostPressure' : float}

fault_diag = fault_diag.astype(convert_dict)

In [32]:
fault_diag.head(20)

Unnamed: 0,RecordID,ESS_Id,EventTimeStamp,eventDescription,ecuSource,spn,fmi,active,activeTransitionCount,EquipmentID,Latitude,Longitude,event_date,event_time,false_eventTimeStamp,event_year,event_month,event_day,event_dayofweek,event_dayname,event_hour,event_time_quadrant,derate_full,derate_partial,AcceleratorPedal,BarometricPressure,CruiseControlActive,CruiseControlSetSpeed,DistanceLtd,EngineCoolantTemperature,EngineLoad,EngineOilPressure,EngineOilTemperature,EngineRpm,EngineTimeLtd,FuelLevel,FuelLtd,FuelRate,FuelTemperature,IgnStatus,IntakeManifoldTemperature,LampStatus,Speed,Throttle,TurboBoostPressure,spn_fmi,EngineCoolantTemperature_cat,EngineLoad_cat,EngineOilPressure_cat,EngineOilTemperature_cat,EngineRpm_cat,FuelRate_cat,FuelTemperature_cat,IntakeManifoldTemperature_cat,TurboBoostPressure_cat
0,1038243,55748536,2018-07-20 09:31:33,High (Severity Medium) J1939 Network #2,49,1231,16,True,2,105406655,36.139351,-85.629722,2018-07-20,09:31:33,2018-07-20 09:42:30,2018,7,20,4,Friday,9,2,False,False,24.0,14.1375,False,0.0,91442.55,181.4,12.0,39.44,203.7312,1144.375,1703.05,,12311.606397,1.65108,,True,138.2,255.0,5.203984,100.0,5.8,1231_16,low,high,high,high,high,low,,high,low
1,366301,7171498,2016-01-31 07:12:25,,11,629,12,True,127,105301976,41.987175,-87.73199,2016-01-31,07:12:25,NaT,2016,1,31,6,Sunday,7,1,False,False,,,,,,,,,,,,,,,,True,,1279.0,,,,629_12,,,,,,,,,
2,1141606,81618595,2019-03-27 08:10:52,Low (Severity Medium) Engine Coolant Level,0,111,18,True,1,105338729,36.060324,-87.351435,2019-03-27,08:10:52,2019-03-27 08:12:58,2019,3,27,2,Wednesday,8,2,False,False,0.0,14.4275,False,0.0,112671.8,71.6,23.0,44.66,57.70625,601.625,2838.15,44.0,14987.537198,1.083109,,True,59.0,2047.0,0.0,100.0,0.29,111_18,low,high,high,high,high,low,,low,low
3,1142275,81843663,2019-03-29 06:08:56,Low (Severity Medium) Engine Coolant Level,0,111,18,True,3,105338729,34.410509000000005,-84.91824,2019-03-29,06:08:56,2019-03-29 06:12:07,2019,3,29,4,Friday,6,1,False,False,0.0,14.355,False,0.0,113623.2,84.2,25.0,42.34,81.78125,600.125,2859.9,52.8,15098.753632,0.990648,,True,73.4,2047.0,0.0,100.0,0.0,111_18,low,high,high,high,high,low,,normal,low
4,1149647,83749925,2019-04-16 12:27:33,Low (Severity Medium) Engine Coolant Level,0,111,18,True,4,105338729,36.142083,-85.365833,2019-04-16,12:27:33,2019-04-16 12:29:35,2019,4,16,1,Tuesday,12,3,False,False,16.4,13.92,False,0.0,119095.8,179.6,0.0,39.44,226.4,1227.5,2981.55,62.0,15782.959247,0.039626,,True,95.0,2047.0,67.52557,100.0,3.48,111_18,low,low,high,high,high,low,,high,low
5,1149942,83828217,2019-04-17 07:28:55,Low (Severity Medium) Engine Coolant Level,0,111,18,True,5,105338729,37.774351,-87.098148,2019-04-17,07:28:55,2019-04-17 07:30:51,2019,4,17,2,Wednesday,7,1,False,False,0.0,14.4275,False,0.0,119346.5,86.0,25.0,41.76,86.39375,601.5,2988.7,48.4,15809.508538,0.97744,,True,82.4,2047.0,0.0,100.0,0.0,111_18,low,high,high,high,high,low,,normal,low
6,1151355,84239911,2019-04-21 10:20:25,Low (Severity Medium) Engine Coolant Level,0,111,18,True,6,105338729,37.875833,-79.308194,2019-04-21,10:20:25,2019-04-21 10:23:04,2019,4,21,6,Sunday,10,2,False,False,0.0,13.775,False,0.0,120430.7,60.8,28.0,45.82,53.09375,700.5,3013.95,100.0,15969.99306,1.875627,,True,57.2,2047.0,0.0,100.0,0.87,111_18,low,high,high,high,high,low,,low,low
7,1039465,56177839,2018-07-24 22:25:59,Abnormal Frequency J1939 Network #2,49,1231,8,True,4,105338729,41.483796000000005,-88.179259,2018-07-24,22:25:59,2018-07-24 22:35:12,2018,7,24,1,Tuesday,22,5,False,False,0.0,14.355,False,0.0,52169.6,120.2,0.0,0.0,119.75,0.0,1202.1,87.6,7162.232674,0.0,,True,107.6,1279.0,0.0,100.0,0.0,1231_8,low,low,low,high,low,low,,high,low
8,1039485,56179492,2018-07-24 23:06:41,Abnormal Frequency J1939 Network #2,49,1231,8,True,6,105338729,41.483935,-88.179351,2018-07-24,23:06:41,2018-07-24 23:22:13,2018,7,24,1,Tuesday,23,5,False,False,0.0,14.355,False,0.0,52169.6,113.0,0.0,0.0,114.2375,0.0,1202.1,87.6,7162.232674,0.0,,True,102.2,1279.0,0.0,100.0,0.0,1231_8,low,low,low,high,low,low,,high,low
9,1088428,70080023,2018-12-02 23:53:13,Abnormal Frequency J1939 Network #2,49,1231,8,True,7,105338729,32.806759,-96.886157,2018-12-02,23:53:13,2018-12-02 23:57:31,2018,12,2,6,Sunday,23,5,False,False,,,,,,,,,,,2195.35,49.6,12247.016331,,,True,,1279.0,,,,1231_8,,,,,,,,,


In [22]:
# adding a concat column for spn and fmi
fault_diag['spn_fmi'] = fault_diag['spn'] + '_' + fault_diag['fmi']

Rob's idea to categorize diagnostics based on normal operating parameters, so categorical column with low, normal, high.
To implement: dictionary with diagnostic name as key and tuple or list of lower and upper limits of normal range
Loop through dictionary, create new column name based on diagnostic and assign low if below lower limit, normal if below upper limit, else high.

In [28]:
# replace with real operating parameters
diag_cols = {#'AcceleratorPedal' : [5, 10],
                #'BarometricPressure' : [5, 10],
                #'CruiseControlSetSpeed' : [5, 10],
                #'DistanceLtd' : [5, 10],
                'EngineCoolantTemperature': [180, 220],
                #'EngineLoad' : [5, 10], #this is % of laod at that moment of potential engine can make
                'EngineOilPressure' : [35, 50],
                'EngineOilTemperature': [180, 240],
                'EngineRpm': [1400, 1900],
                #'EngineTimeLtd': [5, 10],
                #'FuelLevel' : [5, 10],
                #'FuelLtd' : [5, 10],
                #'FuelRate' : [5, 10],
                'FuelTemperature' : [180, 10],
                'IntakeManifoldTemperature' : [60, 90],
                #'LampStatus' : [5, 10],
                #'Speed' : [5, 10],
                #'Throttle' : [5, 10],
                'TurboBoostPressure' : [36, 44]}

In [33]:
for col, bounds in diag_cols.items():
    low_bound, high_bound = bounds
    new_col = f'{col}_cat'
    fault_diag[new_col] = pd.cut(fault_diag[col], bins = [-float('inf'), low_bound, high_bound, float('inf')], labels = ['low', 'normal', 'high'])

In [34]:
fault_diag.head()

Unnamed: 0,RecordID,ESS_Id,EventTimeStamp,eventDescription,ecuSource,spn,fmi,active,activeTransitionCount,EquipmentID,Latitude,Longitude,event_date,event_time,false_eventTimeStamp,event_year,event_month,event_day,event_dayofweek,event_dayname,event_hour,event_time_quadrant,derate_full,derate_partial,AcceleratorPedal,BarometricPressure,CruiseControlActive,CruiseControlSetSpeed,DistanceLtd,EngineCoolantTemperature,EngineLoad,EngineOilPressure,EngineOilTemperature,EngineRpm,EngineTimeLtd,FuelLevel,FuelLtd,FuelRate,FuelTemperature,IgnStatus,IntakeManifoldTemperature,LampStatus,Speed,Throttle,TurboBoostPressure,spn_fmi,EngineCoolantTemperature_cat,EngineLoad_cat,EngineOilPressure_cat,EngineOilTemperature_cat,EngineRpm_cat,FuelRate_cat,FuelTemperature_cat,IntakeManifoldTemperature_cat,TurboBoostPressure_cat
0,1038243,55748536,2018-07-20 09:31:33,High (Severity Medium) J1939 Network #2,49,1231,16,True,2,105406655,36.139351,-85.629722,2018-07-20,09:31:33,2018-07-20 09:42:30,2018,7,20,4,Friday,9,2,False,False,24.0,14.1375,False,0.0,91442.55,181.4,12.0,39.44,203.7312,1144.375,1703.05,,12311.606397,1.65108,,True,138.2,255.0,5.203984,100.0,5.8,1231_16,low,high,high,high,high,low,,high,low
1,366301,7171498,2016-01-31 07:12:25,,11,629,12,True,127,105301976,41.987175,-87.73199,2016-01-31,07:12:25,NaT,2016,1,31,6,Sunday,7,1,False,False,,,,,,,,,,,,,,,,True,,1279.0,,,,629_12,,,,,,,,,
2,1141606,81618595,2019-03-27 08:10:52,Low (Severity Medium) Engine Coolant Level,0,111,18,True,1,105338729,36.060324,-87.351435,2019-03-27,08:10:52,2019-03-27 08:12:58,2019,3,27,2,Wednesday,8,2,False,False,0.0,14.4275,False,0.0,112671.8,71.6,23.0,44.66,57.70625,601.625,2838.15,44.0,14987.537198,1.083109,,True,59.0,2047.0,0.0,100.0,0.29,111_18,low,high,high,high,high,low,,low,low
3,1142275,81843663,2019-03-29 06:08:56,Low (Severity Medium) Engine Coolant Level,0,111,18,True,3,105338729,34.410509000000005,-84.91824,2019-03-29,06:08:56,2019-03-29 06:12:07,2019,3,29,4,Friday,6,1,False,False,0.0,14.355,False,0.0,113623.2,84.2,25.0,42.34,81.78125,600.125,2859.9,52.8,15098.753632,0.990648,,True,73.4,2047.0,0.0,100.0,0.0,111_18,low,high,high,high,high,low,,normal,low
4,1149647,83749925,2019-04-16 12:27:33,Low (Severity Medium) Engine Coolant Level,0,111,18,True,4,105338729,36.142083,-85.365833,2019-04-16,12:27:33,2019-04-16 12:29:35,2019,4,16,1,Tuesday,12,3,False,False,16.4,13.92,False,0.0,119095.8,179.6,0.0,39.44,226.4,1227.5,2981.55,62.0,15782.959247,0.039626,,True,95.0,2047.0,67.52557,100.0,3.48,111_18,low,low,high,high,high,low,,high,low


In [35]:
fault_diag.to_csv('../data/fault_diag.csv', index = False)

### target:
- group by equipment ID and sort by event datetime
- if equipment had derate (fault id) boolean column true for records within a certain timeframe prior
- looking for a signal with enough time to get the truck to a service location

### predictors:
- fault codes, diagnostics (downside is that a lot of values are missing, but they seem to be more present with active true)
- might not be value in looking at active false (Tomo's group only looked at active true)

### do this as part of data cleaning:
- pull timestamp from active false to get duration of active true (there should be paired rows based on fault code, different lights for different faults)
- do this before dropping service locations (don't want to miss active false which happened as a result of service)

### other thoughts:
- Code P0606 tends to be set when a PCM/ECM has failed. Depending on component condition and the make and model of a vehicle, it may be possible to resolve an internal integrity fault in the PCM/ECM by upgrading or re-flashing the control module.
- since it's a fault with the monitoring device itself, might not have any predictive power
- how many are there?
- if we get rid of this (and other faults not actually related to the engine) might reduce noise in the dataset?
- since there are active true with no diagnostics, could the rows with null diagnostics be faults like this one that don't have anything to do with the engine?