# Identification of Train Related Events

## Documentation

The goal of this script is to:
1. Identify train related events
2. Extract timestamps of train related events
3. Compute timestamp of X duration prior to the train related event to get the target periods
4. Remove overlaps in target time periods


## Initialisation

### Load libraries

In [1]:
# Load libraries
import pandas as pd
import numpy as np
import datetime as dt
import time

### Configure display

In [2]:
# Enable display of all columns for dataframes with many variables
pd.set_option('display.max_columns', None)

### Set Up Directory

In [3]:
# Check current directory location
import os
cwd = os.getcwd()
cwd

'C:\\Users\\cftfda01\\Documents\\SBST Train IAMS Project\\scripts'

In [4]:
# Define root file directory folder where the files are being stored
#os.chdir(cwd + alarmLoc)
os.chdir(os.path.dirname(os.getcwd()) + '\\alarm-event-logs')

# Check current directory location
cwd = os.getcwd()

# Check directory location
cwd

'C:\\Users\\cftfda01\\Documents\\SBST Train IAMS Project\\alarm-event-logs'

In [5]:
# Location of Alarm and Normal Event Files
srcFiles = '\\taggedOutput\\main'

## Event Extraction

### Load AlarmList Files

In [6]:
# Define root file directory folder where the files are being stored
os.chdir(cwd + srcFiles)

# Check directory location
os.getcwd()

'C:\\Users\\cftfda01\\Documents\\SBST Train IAMS Project\\alarm-event-logs\\taggedOutput\\main'

In [7]:
# Gather list of tagged cleaned files (ATS + CMS environments only)
dfs = [pd.read_csv(f)
        for f in os.listdir(os.getcwd()) if (f.startswith('ATS') | f.startswith('CMS'))]

# Compile list of tagged cleaned files into a single dataframe
df = pd.concat(dfs, ignore_index=True).reset_index()

# Delete redundant index col
del df["index"]

# Format time values to time format
df['DATETIME_SENT'] = pd.to_datetime(df['DATETIME_SENT'])
df['DATETIME_RECEIVED'] = pd.to_datetime(df['DATETIME_RECEIVED'])
df['EQUIPMENT_DATE'] = pd.to_datetime(df['EQUIPMENT_DATE'])
df['ACQUISITION_DATE'] = pd.to_datetime(df['ACQUISITION_DATE'])
df['SCS_TIME'] = pd.to_datetime(df['SCS_TIME'])
df['TIME_CODE'] = pd.to_datetime(df['TIME_CODE'])

# Print df summary stats
print(df.shape)
print(df.info())

  if (await self.run_code(code, result,  async_=asy)):


(23618450, 38)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23618450 entries, 0 to 23618449
Data columns (total 38 columns):
 #   Column                    Dtype         
---  ------                    -----         
 0   ENTRY_CODE_SUFFIX         object        
 1   ENTRY_CODE                int64         
 2   ALARM_ID                  int64         
 3   USER_ID                   int64         
 4   EQUIPMENT_NAME            float64       
 5   VALUE                     object        
 6   VALUE_STATE               int64         
 7   ACKNOWLEDGEMENT_REQUIRED  bool          
 8   SEVERITY                  int64         
 9   HIDDEN                    bool          
 10  THEME                     int64         
 11  EQUIPMENT_DATE            datetime64[ns]
 12  ACQUISITION_DATE          datetime64[ns]
 13  SCS_TIME                  datetime64[ns]
 14  FUNCTIONAL_CATEGORY       int64         
 15  GEOGRAPHICAL_CATEGORY     int64         
 16  ENVIRONMENT               object     

In [8]:
# Inspect df
df.head()

Unnamed: 0,ENTRY_CODE_SUFFIX,ENTRY_CODE,ALARM_ID,USER_ID,EQUIPMENT_NAME,VALUE,VALUE_STATE,ACKNOWLEDGEMENT_REQUIRED,SEVERITY,HIDDEN,THEME,EQUIPMENT_DATE,ACQUISITION_DATE,SCS_TIME,FUNCTIONAL_CATEGORY,GEOGRAPHICAL_CATEGORY,ENVIRONMENT,USER1,ASSET_ID_RAW,ASSET_DESCRIPTION,EVENT_DESCRIPTION,EVENT_STATUS,OPERATOR_INITIALS,ASSET_DESC_CAT,EVENT_DESC_CAT,TrainID,CarID,ServiceID,AssetClass,AssetSubClass,DATETIME_SENT,DATETIME_RECEIVED,TIME_CODE,isAlarm,NuisanceAlarm,RepeatAlarm,AltAlarm2,AltAlarm3
0,+,-283082340,50,0,,3,0,True,2,True,0,2020-12-30 02:09:59.609949952,2020-12-30 02:09:59.609949952,2020-12-30 02:09:59.609949952,51,17,OCCATS,1.0,SCS/BGK/B1/RTU22,BGK ISCS Signalling RTU 2,Communication with ISCS,IN SERVICE,,SUBLOCATION ISCS Signalling RTU,Communication with ISCS,,,,SCS,RTU,2020-12-30 02:10:01.322,2020-12-30 02:10:01.322,2020-12-30 02:10:01.322298880,True,False,False,False,False
1,+,-283082339,49,0,,3,0,True,2,True,0,2020-12-30 02:09:59.609949952,2020-12-30 02:09:59.609949952,2020-12-30 02:09:59.609949952,51,17,OCCATS,1.0,SCS/BGK/B1/RTU21,BGK ISCS Signalling RTU 1,Communication with ISCS,IN SERVICE,,SUBLOCATION ISCS Signalling RTU,Communication with ISCS,,,,SCS,RTU,2020-12-30 02:10:01.322,2020-12-30 02:10:01.322,2020-12-30 02:10:01.322298880,True,False,False,False,False
2,+,-283082338,51,0,,3,0,True,2,True,0,2020-12-30 02:09:59.609949952,2020-12-30 02:09:59.609949952,2020-12-30 02:09:59.609949952,51,17,OCCATS,1.0,SCS/BGK/B1/PMSB02,BGK ISCS Signalling PMS,Communication with Server,IN SERVICE,,SUBLOCATION ISCS Signalling PMS,Communication with Server,,,,SCS,PMSB,2020-12-30 02:10:01.322,2020-12-30 02:10:01.322,2020-12-30 02:10:01.322298880,True,False,False,False,False
3,+,-283084384,4654,0,,1,0,True,2,True,0,2020-12-30 02:09:59.667840000,2020-12-30 02:09:59.667840000,2020-12-30 02:09:59.667840000,10,10,OCCATS,1.0,SIG/FRP/B2/ASCV1011,CBI,DMS-CBI Link 2,OK,,CBI,DMS-CBI Link,,,,SIG,ASCV,2020-12-30 02:10:01.322,2020-12-30 02:10:01.322,2020-12-30 02:10:01.322298880,False,False,False,False,False
4,+,-283084241,4513,0,,0,0,True,4,True,0,2020-12-30 02:09:59.667840000,2020-12-30 02:09:59.667840000,2020-12-30 02:09:59.667840000,11,10,OCCATS,1.0,SIG/FRP/B2/DCU1001,N/B PSD Door 01,Service Status,OK,,N/B PSD Door,Service Status,,,,SIG,DCU,2020-12-30 02:10:01.322,2020-12-30 02:10:01.322,2020-12-30 02:10:01.322298880,False,False,False,False,False


In [9]:
# Create dictionary of target events:
targetEvents = [
                "Train Emergency Brake",
                "Brake Equipment Status",
                "Service Brake Status",
                "DT Emergency Brake From Other",
                "Emergency Brake by ATC",
                "DT All Brakes Applied Relay Status",
                "DT All Brakes Released Relay Status",
                "Service Brake Summary Status",
                "Emergency Brake Reset Request",
                "Train Stalled in Interstation",
                "Train Skip Stop Demand",
                "ATO - S_ATO_DRIV_ - Stopping point overrun",
                "ATO - S_ATO_DRIV_ - Stopping point underrun",
                "Automatic Hold applied due to TrainCar stalled in "
]

In [10]:
targetEvent_df = df.loc[
                        df["EVENT_DESC_CAT"].str.contains(targetEvents[0], na=False, regex=True) | 
                        df["EVENT_DESC_CAT"].str.contains(targetEvents[1], na=False, regex=True) |    
                        df["EVENT_DESC_CAT"].str.contains(targetEvents[2], na=False, regex=True) |    
                        df["EVENT_DESC_CAT"].str.contains(targetEvents[3], na=False, regex=True) |    
                        df["EVENT_DESC_CAT"].str.contains(targetEvents[4], na=False, regex=True) |                                                           
                        df["EVENT_DESC_CAT"].str.contains(targetEvents[5], na=False, regex=True) |    
                        df["EVENT_DESC_CAT"].str.contains(targetEvents[6], na=False, regex=True) |    
                        df["EVENT_DESC_CAT"].str.contains(targetEvents[7], na=False, regex=True) |    
                        df["EVENT_DESC_CAT"].str.contains(targetEvents[8], na=False, regex=True) |                                  
                        df["EVENT_DESC_CAT"].str.contains(targetEvents[9], na=False, regex=True) |    
                        df["EVENT_DESC_CAT"].str.contains(targetEvents[10], na=False, regex=True) |    
                        df["EVENT_DESC_CAT"].str.contains(targetEvents[11], na=False, regex=True) |    
                        df["EVENT_DESC_CAT"].str.contains(targetEvents[12], na=False, regex=True) |                                                           
                        df["EVENT_DESC_CAT"].str.contains(targetEvents[13], na=False, regex=True)                                                           
                       ].reset_index()

# Delete redundant index col
del targetEvent_df["index"]

# Inspect data
targetEvent_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 107806 entries, 0 to 107805
Data columns (total 38 columns):
 #   Column                    Non-Null Count   Dtype         
---  ------                    --------------   -----         
 0   ENTRY_CODE_SUFFIX         107806 non-null  object        
 1   ENTRY_CODE                107806 non-null  int64         
 2   ALARM_ID                  107806 non-null  int64         
 3   USER_ID                   107806 non-null  int64         
 4   EQUIPMENT_NAME            0 non-null       float64       
 5   VALUE                     107806 non-null  object        
 6   VALUE_STATE               107806 non-null  int64         
 7   ACKNOWLEDGEMENT_REQUIRED  107806 non-null  bool          
 8   SEVERITY                  107806 non-null  int64         
 9   HIDDEN                    107806 non-null  bool          
 10  THEME                     107806 non-null  int64         
 11  EQUIPMENT_DATE            107806 non-null  datetime64[ns]
 12  AC

In [11]:
# Inspect data
targetEvent_df.head()

Unnamed: 0,ENTRY_CODE_SUFFIX,ENTRY_CODE,ALARM_ID,USER_ID,EQUIPMENT_NAME,VALUE,VALUE_STATE,ACKNOWLEDGEMENT_REQUIRED,SEVERITY,HIDDEN,THEME,EQUIPMENT_DATE,ACQUISITION_DATE,SCS_TIME,FUNCTIONAL_CATEGORY,GEOGRAPHICAL_CATEGORY,ENVIRONMENT,USER1,ASSET_ID_RAW,ASSET_DESCRIPTION,EVENT_DESCRIPTION,EVENT_STATUS,OPERATOR_INITIALS,ASSET_DESC_CAT,EVENT_DESC_CAT,TrainID,CarID,ServiceID,AssetClass,AssetSubClass,DATETIME_SENT,DATETIME_RECEIVED,TIME_CODE,isAlarm,NuisanceAlarm,RepeatAlarm,AltAlarm2,AltAlarm3
0,+,-283084238,2075,0,,1,1,True,5,True,0,2000-01-01,2000-01-01,2020-12-30 02:09:59.667840000,12,27,OCCATS,1.0,TR___0052,Cons 139,Train Stalled in Interstation,STALLED,,Cons,Train Stalled in Interstation,52.0,,,TR,,2020-12-30 02:10:01.322,2020-12-30 02:10:01.322,2020-12-30 02:10:01.322298880,True,False,False,False,False
1,+,-283082724,2938,0,,0,0,True,3,True,0,2000-01-01,2000-01-01,2020-12-30 02:09:59.667840000,12,27,OCCATS,1.0,EMU/043/TRN/XXXXXXXX,Train 043,Train Stalled in Interstation,NOT STALLED,,Train,Train Stalled in Interstation,,,,EMU,XXXXXXXX,2020-12-30 02:10:01.322,2020-12-30 02:10:01.322,2020-12-30 02:10:01.322298880,True,False,False,False,False
2,+,-283082645,2938,0,,2,1,True,3,True,0,2000-01-01,2000-01-01,2020-12-30 02:09:59.667840000,12,27,OCCATS,1.0,EMU/043/TRN/XXXXXXXX,Train 043,Train Stalled in Interstation,UNKNOWN,,Train,Train Stalled in Interstation,,,,EMU,XXXXXXXX,2020-12-30 02:10:01.322,2020-12-30 02:10:01.322,2020-12-30 02:10:01.322298880,False,False,False,False,False
3,+,-283084832,5097,0,,1,1,True,5,True,0,2000-01-01,2000-01-01,2020-12-30 02:10:01.619056896,12,27,OCCATS,1.0,TR___0058,Cons 187,Train Emergency Brake,APPLIED,,Cons,Train Emergency Brake,58.0,,,TR,,2020-12-30 02:10:01.758,2020-12-30 02:10:01.758,2020-12-30 02:10:01.758992896,True,False,False,False,False
4,+,-283084833,5098,0,,0,0,True,5,True,0,2000-01-01,2000-01-01,2020-12-30 02:10:01.619056896,12,27,OCCATS,1.0,TR___0058,Cons 187,Emergency Brake by ATC,INACTIVE CAB,,Cons,Emergency Brake by ATC,58.0,,,TR,,2020-12-30 02:10:01.758,2020-12-30 02:10:01.758,2020-12-30 02:10:01.758992896,False,False,False,False,False


In [12]:
# Get Event Time
targetEvent_df.drop(targetEvent_df.columns.difference(["ENVIRONMENT","SCS_TIME"]), 1, inplace=True)

# Inspect data
targetEvent_df.head()

Unnamed: 0,SCS_TIME,ENVIRONMENT
0,2020-12-30 02:09:59.667840000,OCCATS
1,2020-12-30 02:09:59.667840000,OCCATS
2,2020-12-30 02:09:59.667840000,OCCATS
3,2020-12-30 02:10:01.619056896,OCCATS
4,2020-12-30 02:10:01.619056896,OCCATS


In [13]:
# Get Target Time Periods
offsetTime_minus = -10 # time in seconds
offsetTime_plus = 0 # time in seconds
targetEvent_df["SCS_TIME_minusT"] = targetEvent_df["SCS_TIME"] + pd.Timedelta(seconds = offsetTime_minus)
targetEvent_df["SCS_TIME_plusT"] = targetEvent_df["SCS_TIME"] + pd.Timedelta(seconds = offsetTime_plus)

# Inspect data
targetEvent_df.head()

Unnamed: 0,SCS_TIME,ENVIRONMENT,SCS_TIME_minusT,SCS_TIME_plusT
0,2020-12-30 02:09:59.667840000,OCCATS,2020-12-30 02:09:49.667840000,2020-12-30 02:09:59.667840000
1,2020-12-30 02:09:59.667840000,OCCATS,2020-12-30 02:09:49.667840000,2020-12-30 02:09:59.667840000
2,2020-12-30 02:09:59.667840000,OCCATS,2020-12-30 02:09:49.667840000,2020-12-30 02:09:59.667840000
3,2020-12-30 02:10:01.619056896,OCCATS,2020-12-30 02:09:51.619056896,2020-12-30 02:10:01.619056896
4,2020-12-30 02:10:01.619056896,OCCATS,2020-12-30 02:09:51.619056896,2020-12-30 02:10:01.619056896


In [14]:
# Inspect data
targetEvent_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 107806 entries, 0 to 107805
Data columns (total 4 columns):
 #   Column           Non-Null Count   Dtype         
---  ------           --------------   -----         
 0   SCS_TIME         107806 non-null  datetime64[ns]
 1   ENVIRONMENT      107806 non-null  object        
 2   SCS_TIME_minusT  107806 non-null  datetime64[ns]
 3   SCS_TIME_plusT   107806 non-null  datetime64[ns]
dtypes: datetime64[ns](3), object(1)
memory usage: 3.3+ MB


### Simplify Time Ranges

In [15]:
# Get Drop Redundant Columns
targetEvent_df.drop(targetEvent_df.columns.difference(["SCS_TIME_minusT", "SCS_TIME_plusT"]), 1, inplace=True)

# Remove duplicates
targetEvent_df.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=True)

# Sort data by time for ease of export in chronological order
df = df.sort_values(["SCS_TIME"], ignore_index=True) 

# Inspect data
targetEvent_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 107806 entries, 0 to 107805
Data columns (total 2 columns):
 #   Column           Non-Null Count   Dtype         
---  ------           --------------   -----         
 0   SCS_TIME_minusT  107806 non-null  datetime64[ns]
 1   SCS_TIME_plusT   107806 non-null  datetime64[ns]
dtypes: datetime64[ns](2)
memory usage: 1.6 MB


In [16]:
# Inspect data
targetEvent_df.head()

Unnamed: 0,SCS_TIME_minusT,SCS_TIME_plusT
0,2020-12-30 02:09:49.667840000,2020-12-30 02:09:59.667840000
1,2020-12-30 02:09:49.667840000,2020-12-30 02:09:59.667840000
2,2020-12-30 02:09:49.667840000,2020-12-30 02:09:59.667840000
3,2020-12-30 02:09:51.619056896,2020-12-30 02:10:01.619056896
4,2020-12-30 02:09:51.619056896,2020-12-30 02:10:01.619056896


In [17]:
# Convert data to list array
temp_list = targetEvent_df.values.tolist()
temp_list[0:5]

[[1609294189667840000, 1609294199667840000],
 [1609294189667840000, 1609294199667840000],
 [1609294189667840000, 1609294199667840000],
 [1609294191619056896, 1609294201619056896],
 [1609294191619056896, 1609294201619056896]]

In [18]:
# Consolidate overlapping timings 10 folds
for counter in range(10):
    temp_list.sort(key=lambda interval: interval[0])
    merged = [temp_list[0]]
    for current in temp_list:
        previous = merged[-1]
        if current[0] <= previous[1]:
            previous[1] = max(previous[1], current[1])
        else:
            merged.append(current)
   

print(len(merged))
merged[0:5]

30600


[[1609294189667840000, 1609294201896118016],
 [1609294206206928896, 1609294216206928896],
 [1609296875768499968, 1609296888613876992],
 [1609296893748866048, 1609296913849339904],
 [1609296925343454976, 1609296951247840000]]

In [19]:
targetEvent_df = pd.DataFrame.from_records(merged, columns = ["SCS_TIME_minusT", "SCS_TIME_plusT"])

# Remove duplicates
targetEvent_df = targetEvent_df.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=True)

# Inspect data
targetEvent_df.head()

Unnamed: 0,SCS_TIME_minusT,SCS_TIME_plusT
0,1609294189667840000,1609294201896118016
1,1609294206206928896,1609294216206928896
2,1609296875768499968,1609296888613876992
3,1609296893748866048,1609296913849339904
4,1609296925343454976,1609296951247840000


In [20]:
targetEvent_df['SCS_TIME_minusT']=pd.to_datetime(targetEvent_df['SCS_TIME_minusT'])
targetEvent_df['SCS_TIME_plusT']=pd.to_datetime(targetEvent_df['SCS_TIME_plusT'])

# Inspect data
targetEvent_df.head()

Unnamed: 0,SCS_TIME_minusT,SCS_TIME_plusT
0,2020-12-30 02:09:49.667840000,2020-12-30 02:10:01.896118016
1,2020-12-30 02:10:06.206928896,2020-12-30 02:10:16.206928896
2,2020-12-30 02:54:35.768499968,2020-12-30 02:54:48.613876992
3,2020-12-30 02:54:53.748866048,2020-12-30 02:55:13.849339904
4,2020-12-30 02:55:25.343454976,2020-12-30 02:55:51.247840000


In [21]:
# Inspect data
targetEvent_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30600 entries, 0 to 30599
Data columns (total 2 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   SCS_TIME_minusT  30600 non-null  datetime64[ns]
 1   SCS_TIME_plusT   30600 non-null  datetime64[ns]
dtypes: datetime64[ns](2)
memory usage: 478.2 KB


## Filter Time

In [22]:
# Initialise
eventSplice0 = df.loc[(df["SCS_TIME"] >= targetEvent_df['SCS_TIME_minusT'][0]) & 
                      (df["SCS_TIME"] <= targetEvent_df['SCS_TIME_plusT'][0])
                     ].reset_index()
# Delete redundant index col
del eventSplice0["index"]


maxCounter = len(targetEvent_df)
counter = 1

# Inspect data
eventSplice0.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2568 entries, 0 to 2567
Data columns (total 38 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   ENTRY_CODE_SUFFIX         2568 non-null   object        
 1   ENTRY_CODE                2568 non-null   int64         
 2   ALARM_ID                  2568 non-null   int64         
 3   USER_ID                   2568 non-null   int64         
 4   EQUIPMENT_NAME            0 non-null      float64       
 5   VALUE                     2568 non-null   object        
 6   VALUE_STATE               2568 non-null   int64         
 7   ACKNOWLEDGEMENT_REQUIRED  2568 non-null   bool          
 8   SEVERITY                  2568 non-null   int64         
 9   HIDDEN                    2568 non-null   bool          
 10  THEME                     2568 non-null   int64         
 11  EQUIPMENT_DATE            2568 non-null   datetime64[ns]
 12  ACQUISITION_DATE    

In [23]:
# Inspect data
eventSplice0.head()

Unnamed: 0,ENTRY_CODE_SUFFIX,ENTRY_CODE,ALARM_ID,USER_ID,EQUIPMENT_NAME,VALUE,VALUE_STATE,ACKNOWLEDGEMENT_REQUIRED,SEVERITY,HIDDEN,THEME,EQUIPMENT_DATE,ACQUISITION_DATE,SCS_TIME,FUNCTIONAL_CATEGORY,GEOGRAPHICAL_CATEGORY,ENVIRONMENT,USER1,ASSET_ID_RAW,ASSET_DESCRIPTION,EVENT_DESCRIPTION,EVENT_STATUS,OPERATOR_INITIALS,ASSET_DESC_CAT,EVENT_DESC_CAT,TrainID,CarID,ServiceID,AssetClass,AssetSubClass,DATETIME_SENT,DATETIME_RECEIVED,TIME_CODE,isAlarm,NuisanceAlarm,RepeatAlarm,AltAlarm2,AltAlarm3
0,+,-1172283365,2574946,0,,0,0,True,3,True,0,2020-12-30 02:09:50.324232192,2020-12-30 02:09:50.324232192,2020-12-30 02:09:50.324232192,40,5,OCCCMS,1.0,COM/OTP/B3/CAM60,OTP:222 AFG 8,Status,NORMAL,,AFG,Status,,,,COM,CAM,2020-12-30 02:09:50.503,2020-12-30 02:09:50.503,2020-12-30 02:09:50.503602944,False,True,False,True,True
1,+,-1172283366,2574947,0,,0,0,True,3,True,0,2020-12-30 02:09:50.577306880,2020-12-30 02:09:50.577306880,2020-12-30 02:09:50.577306880,40,11,OCCCMS,1.0,COM/BNK/B1/CAM22,BNK:210 SUBWY3E3,Status,NORMAL,,SUBWYE,Status,,,,COM,CAM,2020-12-30 02:09:51.504,2020-12-30 02:09:51.504,2020-12-30 02:09:51.504785920,False,True,False,True,True
2,+,-1172283370,2573986,0,,0,1,True,2,True,0,2020-12-30 02:09:51.588258048,2020-12-30 02:09:51.588258048,2020-12-30 02:09:51.588258048,51,17,OCCCMS,1.0,SCS/BGK/B1/PLC01,BGK ISCS PLC 1,Mux Selection 01,FAULT,,SUBLOCATION ISCS PLC,Mux Selection,,,,SCS,PLC,2020-12-30 02:09:52.503,2020-12-30 02:09:52.503,2020-12-30 02:09:52.503451136,True,False,False,False,False
3,+,-1172283369,2573986,0,,1,0,True,2,True,0,2020-12-30 02:09:51.588258048,2020-12-30 02:09:51.588258048,2020-12-30 02:09:51.588258048,51,17,OCCCMS,1.0,SCS/BGK/B1/PLC01,BGK ISCS PLC 1,Mux Selection 01,NORMAL,,SUBLOCATION ISCS PLC,Mux Selection,,,,SCS,PLC,2020-12-30 02:09:52.503,2020-12-30 02:09:52.503,2020-12-30 02:09:52.503451136,False,True,True,False,False
4,+,-1172283368,2574949,0,,1,0,True,3,True,0,2020-12-30 02:09:51.832740096,2020-12-30 02:09:51.832740096,2020-12-30 02:09:51.832740096,40,9,OCCCMS,1.0,COM/LTI/B2/CAM11,LTI:111 PHONE 1,Status,FAILURE,,PHONE,Status,,,,COM,CAM,2020-12-30 02:09:51.504,2020-12-30 02:09:51.504,2020-12-30 02:09:51.504785920,False,True,False,True,True


In [24]:

while (counter < maxCounter):
    eventSplice = df.loc[(df["SCS_TIME"] >= targetEvent_df['SCS_TIME_minusT'][counter]) & 
                         (df["SCS_TIME"] <= targetEvent_df['SCS_TIME_plusT'][counter])
                        ].reset_index()
    # Delete redundant index col
    del eventSplice["index"]
    
    # Merge processed data
    eventSplice0 = pd.concat([eventSplice0, eventSplice])
    
    # Update Counter
    counter = counter + 1

# Inspect data
eventSplice0.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 3719829 entries, 0 to 8
Data columns (total 38 columns):
 #   Column                    Dtype         
---  ------                    -----         
 0   ENTRY_CODE_SUFFIX         object        
 1   ENTRY_CODE                int64         
 2   ALARM_ID                  int64         
 3   USER_ID                   int64         
 4   EQUIPMENT_NAME            float64       
 5   VALUE                     object        
 6   VALUE_STATE               int64         
 7   ACKNOWLEDGEMENT_REQUIRED  bool          
 8   SEVERITY                  int64         
 9   HIDDEN                    bool          
 10  THEME                     int64         
 11  EQUIPMENT_DATE            datetime64[ns]
 12  ACQUISITION_DATE          datetime64[ns]
 13  SCS_TIME                  datetime64[ns]
 14  FUNCTIONAL_CATEGORY       int64         
 15  GEOGRAPHICAL_CATEGORY     int64         
 16  ENVIRONMENT               object        
 17  USER1         

### Sort Data by Time

In [27]:
eventSplice0.head()

Unnamed: 0,ENTRY_CODE_SUFFIX,ENTRY_CODE,ALARM_ID,USER_ID,EQUIPMENT_NAME,VALUE,VALUE_STATE,ACKNOWLEDGEMENT_REQUIRED,SEVERITY,HIDDEN,THEME,EQUIPMENT_DATE,ACQUISITION_DATE,SCS_TIME,FUNCTIONAL_CATEGORY,GEOGRAPHICAL_CATEGORY,ENVIRONMENT,USER1,ASSET_ID_RAW,ASSET_DESCRIPTION,EVENT_DESCRIPTION,EVENT_STATUS,OPERATOR_INITIALS,ASSET_DESC_CAT,EVENT_DESC_CAT,TrainID,CarID,ServiceID,AssetClass,AssetSubClass,DATETIME_SENT,DATETIME_RECEIVED,TIME_CODE,isAlarm,NuisanceAlarm,RepeatAlarm,AltAlarm2,AltAlarm3
0,+,-1172283365,2574946,0,,0,0,True,3,True,0,2020-12-30 02:09:50.324232192,2020-12-30 02:09:50.324232192,2020-12-30 02:09:50.324232192,40,5,OCCCMS,1.0,COM/OTP/B3/CAM60,OTP:222 AFG 8,Status,NORMAL,,AFG,Status,,,,COM,CAM,2020-12-30 02:09:50.503,2020-12-30 02:09:50.503,2020-12-30 02:09:50.503602944,False,True,False,True,True
1,+,-1172283366,2574947,0,,0,0,True,3,True,0,2020-12-30 02:09:50.577306880,2020-12-30 02:09:50.577306880,2020-12-30 02:09:50.577306880,40,11,OCCCMS,1.0,COM/BNK/B1/CAM22,BNK:210 SUBWY3E3,Status,NORMAL,,SUBWYE,Status,,,,COM,CAM,2020-12-30 02:09:51.504,2020-12-30 02:09:51.504,2020-12-30 02:09:51.504785920,False,True,False,True,True
2,+,-1172283370,2573986,0,,0,1,True,2,True,0,2020-12-30 02:09:51.588258048,2020-12-30 02:09:51.588258048,2020-12-30 02:09:51.588258048,51,17,OCCCMS,1.0,SCS/BGK/B1/PLC01,BGK ISCS PLC 1,Mux Selection 01,FAULT,,SUBLOCATION ISCS PLC,Mux Selection,,,,SCS,PLC,2020-12-30 02:09:52.503,2020-12-30 02:09:52.503,2020-12-30 02:09:52.503451136,True,False,False,False,False
3,+,-1172283369,2573986,0,,1,0,True,2,True,0,2020-12-30 02:09:51.588258048,2020-12-30 02:09:51.588258048,2020-12-30 02:09:51.588258048,51,17,OCCCMS,1.0,SCS/BGK/B1/PLC01,BGK ISCS PLC 1,Mux Selection 01,NORMAL,,SUBLOCATION ISCS PLC,Mux Selection,,,,SCS,PLC,2020-12-30 02:09:52.503,2020-12-30 02:09:52.503,2020-12-30 02:09:52.503451136,False,True,True,False,False
4,+,-1172283368,2574949,0,,1,0,True,3,True,0,2020-12-30 02:09:51.832740096,2020-12-30 02:09:51.832740096,2020-12-30 02:09:51.832740096,40,9,OCCCMS,1.0,COM/LTI/B2/CAM11,LTI:111 PHONE 1,Status,FAILURE,,PHONE,Status,,,,COM,CAM,2020-12-30 02:09:51.504,2020-12-30 02:09:51.504,2020-12-30 02:09:51.504785920,False,True,False,True,True


In [28]:
eventSplice0.tail()

Unnamed: 0,ENTRY_CODE_SUFFIX,ENTRY_CODE,ALARM_ID,USER_ID,EQUIPMENT_NAME,VALUE,VALUE_STATE,ACKNOWLEDGEMENT_REQUIRED,SEVERITY,HIDDEN,THEME,EQUIPMENT_DATE,ACQUISITION_DATE,SCS_TIME,FUNCTIONAL_CATEGORY,GEOGRAPHICAL_CATEGORY,ENVIRONMENT,USER1,ASSET_ID_RAW,ASSET_DESCRIPTION,EVENT_DESCRIPTION,EVENT_STATUS,OPERATOR_INITIALS,ASSET_DESC_CAT,EVENT_DESC_CAT,TrainID,CarID,ServiceID,AssetClass,AssetSubClass,DATETIME_SENT,DATETIME_RECEIVED,TIME_CODE,isAlarm,NuisanceAlarm,RepeatAlarm,AltAlarm2,AltAlarm3
4,+,-287126212,1444975,0,,1,1,True,5,True,0,2000-01-01 00:00:00.000000000,2000-01-01 00:00:00.000000000,2021-02-01 01:46:38.912076032,12,27,OCCATS,1.0,TR___0052,Cons 139,Active Cab Auto-Test Status,FAILURE,ARA,Cons,Active Cab Auto-Test Status,52.0,,,TR,,2021-02-01 01:46:39.003,2021-02-01 01:46:39.003,2021-02-01 01:46:39.336800,True,False,False,False,False
5,+,-287126214,1457046,0,,0,1,True,7,True,0,2021-02-01 01:46:35.729468160,2021-02-01 01:46:35.729468160,2021-02-01 01:46:38.912076032,11,3,OCCATS,1.0,SIG/NED/1211/SPKS0001,NED NB SPKS (1),Status,OPERATED,ARA,SUBLOCATION NB SPKS (),Status,,,,SIG,SPKS,2021-02-01 01:46:39.003,2021-02-01 01:46:39.003,2021-02-01 01:46:39.336800,True,True,True,False,False
6,+,-287126215,1457047,0,,0,1,True,7,True,0,2021-02-01 01:46:36.737451008,2021-02-01 01:46:36.737451008,2021-02-01 01:46:38.912076032,11,3,OCCATS,1.0,SIG/NED/1211/SPKS0003,NED SB SPKS (3),Status,OPERATED,ARA,SUBLOCATION SB SPKS (),Status,,,,SIG,SPKS,2021-02-01 01:46:39.003,2021-02-01 01:46:39.003,2021-02-01 01:46:39.336800,True,True,True,False,False
7,+,-287126209,1457014,0,,0,1,True,5,True,0,2000-01-01 00:00:00.000000000,2000-01-01 00:00:00.000000000,2021-02-01 01:46:38.912076032,12,27,OCCATS,1.0,TR___0052,Cons 139,State of Train Localisation,DE-LOCALISED,ARA,Cons,State of Train Localisation,52.0,,,TR,,2021-02-01 01:46:39.003,2021-02-01 01:46:39.003,2021-02-01 01:46:39.336800,True,False,False,False,False
8,+,-287126213,1457045,0,,0,0,True,3,True,0,2000-01-01 00:00:00.000000000,2000-01-01 00:00:00.000000000,2021-02-01 01:46:38.912076032,12,27,OCCATS,1.0,TR___0052,Cons 139,Train Emergency Brake,NOT APPLIED,ARA,Cons,Train Emergency Brake,52.0,,,TR,,2021-02-01 01:46:39.003,2021-02-01 01:46:39.003,2021-02-01 01:46:39.336800,False,True,True,False,False


In [None]:
# Sort data by time for ease of export in chronological order (TEST)
#df.sort_values(["SCS_TIME"], ignore_index=True) 

In [29]:
# Sort data by time for ease of export in chronological order
eventSplice0 = eventSplice0.sort_values(["SCS_TIME"], ignore_index=True) 

In [37]:
eventSplice0.head()

Unnamed: 0,ENTRY_CODE_SUFFIX,ENTRY_CODE,ALARM_ID,USER_ID,EQUIPMENT_NAME,VALUE,VALUE_STATE,ACKNOWLEDGEMENT_REQUIRED,SEVERITY,HIDDEN,THEME,EQUIPMENT_DATE,ACQUISITION_DATE,SCS_TIME,FUNCTIONAL_CATEGORY,GEOGRAPHICAL_CATEGORY,ENVIRONMENT,USER1,ASSET_ID_RAW,ASSET_DESCRIPTION,EVENT_DESCRIPTION,EVENT_STATUS,OPERATOR_INITIALS,ASSET_DESC_CAT,EVENT_DESC_CAT,TrainID,CarID,ServiceID,AssetClass,AssetSubClass,DATETIME_SENT,DATETIME_RECEIVED,TIME_CODE,isAlarm,NuisanceAlarm,RepeatAlarm,AltAlarm2,AltAlarm3
0,+,-1172283365,2574946,0,,0,0,True,3,True,0,2020-12-30 02:09:50.324232192,2020-12-30 02:09:50.324232192,2020-12-30 02:09:50.324232192,40,5,OCCCMS,1.0,COM/OTP/B3/CAM60,OTP:222 AFG 8,Status,NORMAL,,AFG,Status,,,,COM,CAM,2020-12-30 02:09:50.503,2020-12-30 02:09:50.503,2020-12-30 02:09:50.503602944,False,True,False,True,True
1,+,-1172283366,2574947,0,,0,0,True,3,True,0,2020-12-30 02:09:50.577306880,2020-12-30 02:09:50.577306880,2020-12-30 02:09:50.577306880,40,11,OCCCMS,1.0,COM/BNK/B1/CAM22,BNK:210 SUBWY3E3,Status,NORMAL,,SUBWYE,Status,,,,COM,CAM,2020-12-30 02:09:51.504,2020-12-30 02:09:51.504,2020-12-30 02:09:51.504785920,False,True,False,True,True
2,+,-1172283370,2573986,0,,0,1,True,2,True,0,2020-12-30 02:09:51.588258048,2020-12-30 02:09:51.588258048,2020-12-30 02:09:51.588258048,51,17,OCCCMS,1.0,SCS/BGK/B1/PLC01,BGK ISCS PLC 1,Mux Selection 01,FAULT,,SUBLOCATION ISCS PLC,Mux Selection,,,,SCS,PLC,2020-12-30 02:09:52.503,2020-12-30 02:09:52.503,2020-12-30 02:09:52.503451136,True,False,False,False,False
3,+,-1172283369,2573986,0,,1,0,True,2,True,0,2020-12-30 02:09:51.588258048,2020-12-30 02:09:51.588258048,2020-12-30 02:09:51.588258048,51,17,OCCCMS,1.0,SCS/BGK/B1/PLC01,BGK ISCS PLC 1,Mux Selection 01,NORMAL,,SUBLOCATION ISCS PLC,Mux Selection,,,,SCS,PLC,2020-12-30 02:09:52.503,2020-12-30 02:09:52.503,2020-12-30 02:09:52.503451136,False,True,True,False,False
4,+,-1172283368,2574949,0,,1,0,True,3,True,0,2020-12-30 02:09:51.832740096,2020-12-30 02:09:51.832740096,2020-12-30 02:09:51.832740096,40,9,OCCCMS,1.0,COM/LTI/B2/CAM11,LTI:111 PHONE 1,Status,FAILURE,,PHONE,Status,,,,COM,CAM,2020-12-30 02:09:51.504,2020-12-30 02:09:51.504,2020-12-30 02:09:51.504785920,False,True,False,True,True


In [38]:
eventSplice0.tail()

Unnamed: 0,ENTRY_CODE_SUFFIX,ENTRY_CODE,ALARM_ID,USER_ID,EQUIPMENT_NAME,VALUE,VALUE_STATE,ACKNOWLEDGEMENT_REQUIRED,SEVERITY,HIDDEN,THEME,EQUIPMENT_DATE,ACQUISITION_DATE,SCS_TIME,FUNCTIONAL_CATEGORY,GEOGRAPHICAL_CATEGORY,ENVIRONMENT,USER1,ASSET_ID_RAW,ASSET_DESCRIPTION,EVENT_DESCRIPTION,EVENT_STATUS,OPERATOR_INITIALS,ASSET_DESC_CAT,EVENT_DESC_CAT,TrainID,CarID,ServiceID,AssetClass,AssetSubClass,DATETIME_SENT,DATETIME_RECEIVED,TIME_CODE,isAlarm,NuisanceAlarm,RepeatAlarm,AltAlarm2,AltAlarm3
3719824,+,-287126209,1457014,0,,0,1,True,5,True,0,2000-01-01,2000-01-01,2021-02-01 01:46:38.912076032,12,27,OCCATS,1.0,TR___0052,Cons 139,State of Train Localisation,DE-LOCALISED,ARA,Cons,State of Train Localisation,52.0,,,TR,,2021-02-01 01:46:39.003,2021-02-01 01:46:39.003,2021-02-01 01:46:39.336800,True,False,False,False,False
3719825,+,-287126211,1457015,0,,3,1,True,5,True,0,2000-01-01,2000-01-01,2021-02-01 01:46:38.912076032,12,27,OCCATS,1.0,TR___0052,Cons 139,Train Driving Mode Status,RM FORWARD,ARA,Cons,Train Driving Mode Status,52.0,,,TR,,2021-02-01 01:46:39.003,2021-02-01 01:46:39.003,2021-02-01 01:46:39.336800,True,False,False,False,False
3719826,+,-287126210,1444971,0,,1,1,True,5,True,0,2000-01-01,2000-01-01,2021-02-01 01:46:38.912076032,12,27,OCCATS,1.0,TR___0052,Cons 139,Availability,NOT AVAILABLE,ARA,Cons,Availability,52.0,,,TR,,2021-02-01 01:46:39.003,2021-02-01 01:46:39.003,2021-02-01 01:46:39.336800,True,False,False,False,False
3719827,+,-287126212,1444975,0,,1,1,True,5,True,0,2000-01-01,2000-01-01,2021-02-01 01:46:38.912076032,12,27,OCCATS,1.0,TR___0052,Cons 139,Active Cab Auto-Test Status,FAILURE,ARA,Cons,Active Cab Auto-Test Status,52.0,,,TR,,2021-02-01 01:46:39.003,2021-02-01 01:46:39.003,2021-02-01 01:46:39.336800,True,False,False,False,False
3719828,+,-287126213,1457045,0,,0,0,True,3,True,0,2000-01-01,2000-01-01,2021-02-01 01:46:38.912076032,12,27,OCCATS,1.0,TR___0052,Cons 139,Train Emergency Brake,NOT APPLIED,ARA,Cons,Train Emergency Brake,52.0,,,TR,,2021-02-01 01:46:39.003,2021-02-01 01:46:39.003,2021-02-01 01:46:39.336800,False,True,True,False,False


In [39]:
eventSplice0.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3719829 entries, 0 to 3719828
Data columns (total 38 columns):
 #   Column                    Dtype         
---  ------                    -----         
 0   ENTRY_CODE_SUFFIX         object        
 1   ENTRY_CODE                int64         
 2   ALARM_ID                  int64         
 3   USER_ID                   int64         
 4   EQUIPMENT_NAME            float64       
 5   VALUE                     object        
 6   VALUE_STATE               int64         
 7   ACKNOWLEDGEMENT_REQUIRED  bool          
 8   SEVERITY                  int64         
 9   HIDDEN                    bool          
 10  THEME                     int64         
 11  EQUIPMENT_DATE            datetime64[ns]
 12  ACQUISITION_DATE          datetime64[ns]
 13  SCS_TIME                  datetime64[ns]
 14  FUNCTIONAL_CATEGORY       int64         
 15  GEOGRAPHICAL_CATEGORY     int64         
 16  ENVIRONMENT               object        
 17  USER1   

## Export File

In [42]:
# Check current directory
cwd

'C:\\Users\\cftfda01\\Documents\\SBST Train IAMS Project\\alarm-event-logs'

In [43]:
# Define Save Location
saveLoc = '\\testOutput\\'
#os.chdir(cwd + alarmLoc)
os.chdir(cwd + saveLoc)
# Check directory location
print(os.getcwd())

C:\Users\cftfda01\Documents\SBST Train IAMS Project\alarm-event-logs\testOutput


In [46]:
# Define File Save Parameters
FileName = "Potential Train Events Raw"
Run = "-B0001"
# True for single file output; # False for multiple file output; "both" for both Single & Multiple File Output
singleSave = "both"

# Get length of dataframe
df_len = len(eventSplice0)
# Inspect data
print(df_len)

# Define Size of Partitioned Dataframes
partionSize = 500000

# Define Number of Partitions (Always Round Up to Nearest Interger)
if (df_len == partionSize): 
    partitions = 1
else:
    partitions = df_len // partionSize + 1

# Inspect data
print(partitions)


3719829
8


In [47]:
# Export file based on above settings
if singleSave == True:
    fileNameN = FileName + Run + '-alarmsTagged' + '.csv'
    eventSplice0.to_csv("main/" + fileNameN, index=False)
    print(fileNameN + " SAVED")
elif singleSave == "both":
    # Single File Save
    fileNameN = FileName + Run + '-alarmsTagged' + '.csv'
    eventSplice0.to_csv("main/" + fileNameN, index=False)
    print(fileNameN + " SAVED")
    
    # Split Dataframe into batches of 500K rows
    for counter in range(partitions):
        startPoint = counter * partionSize
        df_subset = eventSplice0.iloc[startPoint : (startPoint + partionSize)]
        #print(df_subset.info())

        # Save File
        fileNameN = "Subset File Ver/" + FileName + Run + '-' + str(counter).zfill(3) + '-alarmsTagged' + '.csv'
        df_subset.to_csv(fileNameN, index=False)
        print(fileNameN + " SAVED")
else:
    # Split Dataframe into batches of 500K rows
    for counter in range(partitions):
        startPoint = counter * partionSize
        df_subset = eventSplice0.iloc[startPoint : (startPoint + partionSize)]
        #print(df_subset.info())

        # Save File
        fileNameN = "Subset File Ver/" + FileName + Run + '-' + str(counter).zfill(3) + '-alarmsTagged' + '.csv'
        df_subset.to_csv(fileNameN, index=False)
        print(fileNameN + " SAVED")
        
# Ring Beeper When Complete
import winsound
import time

duration1 = 400  # milliseconds
freq1 = 400  # Hz
duration2 = 600  # milliseconds
freq2 = 300  # Hz
repeatCount = 7
for n in range(repeatCount):
    winsound.Beep(freq1, duration1)
    winsound.Beep(freq2, duration2)
    time.sleep(1)

Potential Train Events Raw-B0001-alarmsTagged.csv SAVED
Subset File Ver/Potential Train Events Raw-B0001-000-alarmsTagged.csv SAVED
Subset File Ver/Potential Train Events Raw-B0001-001-alarmsTagged.csv SAVED
Subset File Ver/Potential Train Events Raw-B0001-002-alarmsTagged.csv SAVED
Subset File Ver/Potential Train Events Raw-B0001-003-alarmsTagged.csv SAVED
Subset File Ver/Potential Train Events Raw-B0001-004-alarmsTagged.csv SAVED
Subset File Ver/Potential Train Events Raw-B0001-005-alarmsTagged.csv SAVED
Subset File Ver/Potential Train Events Raw-B0001-006-alarmsTagged.csv SAVED
Subset File Ver/Potential Train Events Raw-B0001-007-alarmsTagged.csv SAVED
