In [1]:
import pandas as pd

In [25]:
cols = ['RecordID', 'ESS_Id', 'EventTimeStamp', 'eventDescription', 'ecuModel', 'ecuMake', 'spn', 'fmi', 'active', 'activeTransitionCount',
           'EquipmentID', 'Latitude', 'Longitude']

faults = pd.read_csv("../data/J1939Faults.csv", usecols = cols, dtype = str)
faults.head()

Unnamed: 0,RecordID,ESS_Id,EventTimeStamp,eventDescription,ecuModel,ecuMake,spn,fmi,active,activeTransitionCount,EquipmentID,Latitude,Longitude
0,1,990349,2015-02-21 10:47:13.000,Low (Severity Low) Engine Coolant Level,unknown,unknown,111,17,True,2,1439,38.857638,-84.626851
1,2,990360,2015-02-21 11:34:34.000,,unknown,unknown,629,12,True,127,1439,38.857638,-84.626851
2,3,990364,2015-02-21 11:35:31.000,Incorrect Data Steering Wheel Angle,unknown,unknown,1807,2,False,127,1369,41.42125,-87.767361
3,4,990370,2015-02-21 11:35:33.000,Incorrect Data Steering Wheel Angle,unknown,unknown,1807,2,True,127,1369,41.421018,-87.767361
4,5,990416,2015-02-21 11:39:41.000,,0USA13_13_0415_2238A,VOLVO,4364,17,False,2,1674,38.416481,-89.442638


In [26]:
faults.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1187335 entries, 0 to 1187334
Data columns (total 13 columns):
 #   Column                 Non-Null Count    Dtype 
---  ------                 --------------    ----- 
 0   RecordID               1187335 non-null  object
 1   ESS_Id                 1187335 non-null  object
 2   EventTimeStamp         1187335 non-null  object
 3   eventDescription       1126490 non-null  object
 4   ecuModel               1122577 non-null  object
 5   ecuMake                1122577 non-null  object
 6   spn                    1187335 non-null  object
 7   fmi                    1187335 non-null  object
 8   active                 1187335 non-null  object
 9   activeTransitionCount  1187335 non-null  object
 10  EquipmentID            1187335 non-null  object
 11  Latitude               1187335 non-null  object
 12  Longitude              1187335 non-null  object
dtypes: object(13)
memory usage: 117.8+ MB


In [27]:
# convert dtypes as needed
# some columns have numeric-appearing values but they're not continuous variables, leaving them as strings
faults['EventTimeStamp'] = pd.to_datetime(faults['EventTimeStamp'])
faults['activeTransitionCount'] = faults['activeTransitionCount'].astype(int)

In [28]:
# time of day for faults could be interesting, split event column to date and timestamp columns
faults['event_date'] = faults['EventTimeStamp'].dt.date
faults['event_time'] = faults['EventTimeStamp'].dt.time

In [29]:
faults.head()

Unnamed: 0,RecordID,ESS_Id,EventTimeStamp,eventDescription,ecuModel,ecuMake,spn,fmi,active,activeTransitionCount,EquipmentID,Latitude,Longitude,event_date,event_time
0,1,990349,2015-02-21 10:47:13,Low (Severity Low) Engine Coolant Level,unknown,unknown,111,17,True,2,1439,38.857638,-84.626851,2015-02-21,10:47:13
1,2,990360,2015-02-21 11:34:34,,unknown,unknown,629,12,True,127,1439,38.857638,-84.626851,2015-02-21,11:34:34
2,3,990364,2015-02-21 11:35:31,Incorrect Data Steering Wheel Angle,unknown,unknown,1807,2,False,127,1369,41.42125,-87.767361,2015-02-21,11:35:31
3,4,990370,2015-02-21 11:35:33,Incorrect Data Steering Wheel Angle,unknown,unknown,1807,2,True,127,1369,41.421018,-87.767361,2015-02-21,11:35:33
4,5,990416,2015-02-21 11:39:41,,0USA13_13_0415_2238A,VOLVO,4364,17,False,2,1674,38.416481,-89.442638,2015-02-21,11:39:41


In [37]:
# mask for each service station, lat and long to 2 decimal places
station_1 = (faults['Latitude'].str.contains('36.06')) & faults['Longitude'].str.contains('86.43')
station_2 = (faults['Latitude'].str.contains('35.58')) & faults['Longitude'].str.contains('86.44')
station_3 = (faults['Latitude'].str.contains('36.19')) & faults['Longitude'].str.contains('83.17')
faults[station_1 | station_2 | station_3]

Unnamed: 0,RecordID,ESS_Id,EventTimeStamp,eventDescription,ecuModel,ecuMake,spn,fmi,active,activeTransitionCount,EquipmentID,Latitude,Longitude,event_date,event_time
17,18,990541,2015-02-21 11:47:13,,unknown,unknown,5444,1,False,126,302,36.066712,-86.433842,2015-02-21,11:47:13
18,19,990542,2015-02-21 11:47:13,,unknown,unknown,5396,1,False,126,302,36.066712,-86.433842,2015-02-21,11:47:13
21,22,990567,2015-02-21 11:47:15,,unknown,unknown,5396,1,True,126,302,36.066573999999996,-86.432962,2015-02-21,11:47:15
22,23,990568,2015-02-21 11:47:15,,unknown,unknown,5444,1,True,126,302,36.066573999999996,-86.432962,2015-02-21,11:47:15
24,25,990611,2015-02-21 11:50:50,Low (Severity Low) Engine Coolant Level,6X1u10D1500000000,CMMNS,111,17,True,1,1590,35.587731,-86.444351,2015-02-21,11:50:50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1187290,1248414,123881027,2020-03-06 11:48:52,Low Voltage (Aftertreatment 1 Intake NOx),6X1u13D1500000000,CMMNS,3216,4,False,1,1970,36.195091999999995,-83.174814,2020-03-06,11:48:52
1187291,1248415,123881028,2020-03-06 11:48:52,Abnormal Rate of Change Aftertreatment 1 Intak...,6X1u13D1500000000,CMMNS,3216,10,False,1,1970,36.195091999999995,-83.174814,2020-03-06,11:48:52
1187292,1248416,123881029,2020-03-06 11:48:52,Data Drifted High Aftertreatment 1 Intake NOx,6X1u13D1500000000,CMMNS,3216,20,False,1,1970,36.195091999999995,-83.174814,2020-03-06,11:48:52
1187293,1248417,123881030,2020-03-06 11:48:52,High Voltage (Engine Oil Temperature 1),6X1u13D1500000000,CMMNS,175,3,False,1,1970,36.195091999999995,-83.174814,2020-03-06,11:48:52
