# Documentation

## Objective(s)
1. Ingest event log file data file into a consistent tabular format without error
2. Extract key features/attributes embedded in free text message log
3. Dump process logs into an SQL Server table for storage
4. Due to the different table structure for EventList and AlarmList data (AlarmList has redundant fields removed), the preprocessing script cannot be used interchangeably when appending data to the SQL Server Table.


# Initialisation

### Adjust Display Theme of Jupyter Notebook
Optional Step

Key Hotkeys:
1. Run cell: ctrl + enter
2. Delete Cell: DD
3. Undo Delete Cell: Z
4. Cut Cell: X
5. Copy Cell: C
6. Insert Cell Above: A
7. Insert Cell Below: B
8. Convert Cell to Code: Y
9. Convert Cell to Markdown: M

In [1]:
# install jupyterthemes
#!pip install jupyterthemes

# upgrade to latest version
#!pip install --upgrade jupyterthemes

In [2]:
#import jupyterthemes

In [3]:
# Adjust to Dark Theme
#jt -t chesterish

## Load Libraries

In [4]:
#pip install --user --upgrade pandas

In [5]:
# Install a pip package in the current Jupyter kernel
import sys
#!{sys.executable} -m pip install schedule

In [6]:
# Import libraries
#import modin.pandas as pd
import pandas as pd
import numpy as np
import os
import pathlib
import datetime as dt
import time
import os # for manipulating file directories
import pyodbc # for sql operations
import sqlalchemy # for sql operations with pandas
import urllib # for defining sql connection parameters
import shutil # for transferring files between folders
#import re

## Set Options

In [7]:
# Enable display of all columns for dataframes with many variables
pd.set_option('display.max_columns', None)

In [8]:
# Choose whether to initialise db from scratch or not
initDB = True

In [9]:
# Trigger SQL Function Test
testMode = True
# Use this to reset table data especially when running tests
resetTableData = True

## Set Up Core Directories

## Set Up SQL Server Database & Functions
The goal is to dump the pre-processed file into a SQL Server Database where further operations may then be made to perform the alarm tagging and nuisance event tagging. But first we will need to set up the SQL Server Database.

** Key tables **
1. EventList Table
2. AlarmList Table
3. Output_AlarmTagged Table
4. Output_TagComplete Table

In [10]:
# Open a database connection to target database
# All subsequent functions will depend on this connection
# Remember to close connection when done
conn = pyodbc.connect('Driver={SQL Server};'
                      'Server=SBSR-RD-0K00200;'
                      'Database=IAMS_DBtest;'
                      'Trusted_Connection=yes;')

# Define Server Parameters to Initiate Connection Engine via SQL Alchemy
# This has the same values as the connection request
# Only if one uses the windows authentification method
# Otherwise, one will need to define "UID" (user ID) + "PWD" (password)
serverParams = urllib.parse.quote_plus('Driver={SQL Server};'
                                       'Server=SBSR-RD-0K00200;'
                                       'Database=IAMS_DBtest;'
                                       'Trusted_Connection=yes;'
                                       #"UID=user;"
                                       #"PWD=password"
                                      )

# Create cursor to work in database
# SQL auto commits transactions
cursor = conn.cursor()

In [11]:
# Initialise tables (empty)
if initDB == True:
    # Initialise eventList Table
    cursor.execute('''
                if not exists (select * from sys.tables where name='eventList')
                    CREATE TABLE eventList (
                        ENTRY_CODE_SUFFIX varchar(255) null, 
                        ENTRY_CODE varchar(255) null,
                        ALARM_ID varchar(255) null,
                        USER_ID varchar(255) null,
                        EQUIPMENT_NAME varchar(255) null,
                        VALUE varchar(255) null,
                        VALUE_STATE varchar(255) null,
                        ACKNOWLEDGEMENT_REQUIRED int null,
                        SEVERITY varchar(255) null,
                        HIDDEN int null,
                        THEME varchar(255) null,
                        EQUIPMENT_DATE datetime null,
                        ACQUISITION_DATE datetime null,
                        SCS_TIME datetime null,
                        FUNCTIONAL_CATEGORY varchar(255) null,
                        GEOGRAPHICAL_CATEGORY varchar(255) null,
                        ENVIRONMENT varchar(255) null,
                        USER1 varchar(255) null,
                        ASSET_ID_RAW varchar(255) null,
                        ASSET_DESCRIPTION varchar(255) null,
                        EVENT_DESCRIPTION varchar(255) null,
                        EVENT_STATUS varchar(255) null,
                        OPERATOR_INITIALS varchar(255) null,
                        ASSET_DESC_CAT varchar(255) null,
                        EVENT_DESC_CAT varchar(255) null,
                        TrainID varchar(255) null,
                        CarID varchar(255) null,
                        ServiceID varchar(255) null,
                        AssetClass varchar(255) null,
                        AssetSubClass varchar(255) null,
                        DATETIME_SENT datetime null,
                        DATETIME_RECEIVED datetime null,
                        TIME_CODE datetime null,
                        isAlarm int null,
                        DATETIME_LOADED DATETIME NULL DEFAULT(GETDATE())
                    )
                    ''')
    
    # Initialise alarmList Table
    cursor.execute('''
                if not exists (select * from sys.tables where name='alarmList')
                    CREATE TABLE alarmList (
                        ENTRY_CODE_SUFFIX varchar(255) null, 
                        ENTRY_CODE varchar(255) null,
                        ALARM_ID varchar(255) null,
                        USER_ID varchar(255) null,
                        EQUIPMENT_NAME varchar(255) null,
                        VALUE varchar(255) null,
                        VALUE_STATE varchar(255) null,
                        ACKNOWLEDGEMENT_REQUIRED varchar(255) null,
                        SEVERITY varchar(255) null,
                        HIDDEN int null,
                        THEME varchar(255) null,
                        EQUIPMENT_DATE datetime null,
                        ACQUISITION_DATE datetime null,
                        SCS_TIME datetime null,
                        FUNCTIONAL_CATEGORY varchar(255) null,
                        GEOGRAPHICAL_CATEGORY varchar(255) null,
                        ENVIRONMENT varchar(255) null,
                        USER1 varchar(255) null,
                        ASSET_ID_RAW varchar(255) null,
                        ASSET_DESCRIPTION varchar(255) null,
                        EVENT_DESCRIPTION varchar(255) null,
                        EVENT_STATUS varchar(255) null,
                        OPERATOR_INITIALS varchar(255) null,
                        isAlarm int null,
                        DATETIME_LOADED DATETIME NULL DEFAULT(GETDATE())
                    )
                    ''')
    
    # Initialise Output_AlarmTagged Table
    cursor.execute('''
                if not exists (select * from sys.tables where name='Output_AlarmTagged')
                    CREATE TABLE Output_AlarmTagged (
                        ENTRY_CODE_SUFFIX varchar(255) null, 
                        ENTRY_CODE varchar(255) null,
                        ALARM_ID varchar(255) null,
                        USER_ID varchar(255) null,
                        EQUIPMENT_NAME varchar(255) null,
                        VALUE varchar(255) null,
                        VALUE_STATE varchar(255) null,
                        ACKNOWLEDGEMENT_REQUIRED int null,
                        SEVERITY varchar(255) null,
                        HIDDEN int null,
                        THEME varchar(255) null,
                        EQUIPMENT_DATE datetime null,
                        ACQUISITION_DATE datetime null,
                        SCS_TIME datetime null,
                        FUNCTIONAL_CATEGORY varchar(255) null,
                        GEOGRAPHICAL_CATEGORY varchar(255) null,
                        ENVIRONMENT varchar(255) null,
                        USER1 varchar(255) null,
                        ASSET_ID_RAW varchar(255) null,
                        ASSET_DESCRIPTION varchar(255) null,
                        EVENT_DESCRIPTION varchar(255) null,
                        EVENT_STATUS varchar(255) null,
                        OPERATOR_INITIALS varchar(255) null,
                        ASSET_DESC_CAT varchar(255) null,
                        EVENT_DESC_CAT varchar(255) null,
                        TrainID varchar(255) null,
                        CarID varchar(255) null,
                        ServiceID varchar(255) null,
                        AssetClass varchar(255) null,
                        AssetSubClass varchar(255) null,
                        DATETIME_SENT datetime null,
                        DATETIME_RECEIVED datetime null,
                        TIME_CODE datetime null,
                        isAlarm int null,
                        DATETIME_LOADED DATETIME NULL DEFAULT(GETDATE())
                    )
                    ''')
    
    # Initialise Output_TagComplete
    cursor.execute('''
                if not exists (select * from sys.tables where name='Output_TagComplete')
                    CREATE TABLE Output_TagComplete (
                        ENTRY_CODE_SUFFIX varchar(255) null, 
                        ENTRY_CODE varchar(255) null,
                        ALARM_ID varchar(255) null,
                        USER_ID varchar(255) null,
                        EQUIPMENT_NAME varchar(255) null,
                        VALUE varchar(255) null,
                        VALUE_STATE varchar(255) null,
                        ACKNOWLEDGEMENT_REQUIRED int null,
                        SEVERITY varchar(255) null,
                        HIDDEN int null,
                        THEME varchar(255) null,
                        EQUIPMENT_DATE datetime null,
                        ACQUISITION_DATE datetime null,
                        SCS_TIME datetime null,
                        FUNCTIONAL_CATEGORY varchar(255) null,
                        GEOGRAPHICAL_CATEGORY varchar(255) null,
                        ENVIRONMENT varchar(255) null,
                        USER1 varchar(255) null,
                        ASSET_ID_RAW varchar(255) null,
                        ASSET_DESCRIPTION varchar(255) null,
                        EVENT_DESCRIPTION varchar(255) null,
                        EVENT_STATUS varchar(255) null,
                        OPERATOR_INITIALS varchar(255) null,
                        ASSET_DESC_CAT varchar(255) null,
                        EVENT_DESC_CAT varchar(255) null,
                        TrainID varchar(255) null,
                        CarID varchar(255) null,
                        ServiceID varchar(255) null,
                        AssetClass varchar(255) null,
                        AssetSubClass varchar(255) null,
                        DATETIME_SENT datetime null,
                        DATETIME_RECEIVED datetime null,
                        TIME_CODE datetime null,
                        isAlarm int null,
                        DATETIME_LOADED DATETIME NULL DEFAULT(GETDATE()),
                        RepeatAlarm int null,
                        AltAlarm2 int null,
                        AltAlarm3 int null,
                        NuisanceAlarm int null
                    )
                    ''')
    
    # Initialise TestValues_Master Table
    cursor.execute('''
                if not exists (select * from sys.tables where name='TestValues_Master')
                    CREATE TABLE TestValues_Master (
                        eventID int null, 
                        DATEANDTIME datetime null,
                        eventDesc text null,
                        TnF int null,
                        blanCol int null,
                        DATETIME_LOADED DATETIME NULL DEFAULT(GETDATE())
                    )
                    ''')
    # Initialise TestValues_Alarm Table
    cursor.execute('''
                if not exists (select * from sys.tables where name='TestValues_Alarm')
                    CREATE TABLE TestValues_Alarm (
                        eventID int null, 
                        DATEANDTIME datetime null,
                        eventDesc text null,
                        TnF int null,
                        blanCol int null,
                        DATETIME_LOADED DATETIME NULL DEFAULT(GETDATE())
                    )
                    ''')
    # Initialise Test_AlarmTagged Table
    cursor.execute('''
                if not exists (select * from sys.tables where name='Test_AlarmTagged')
                    CREATE TABLE Test_AlarmTagged (
                        eventID int null, 
                        DATEANDTIME datetime null,
                        eventDesc text null,
                        TnF int null,
                        blanCol int null,
                        DATETIME_LOADED DATETIME NULL DEFAULT(GETDATE())
                    )
                    ''')
    # Initialise Test_Final Table
    cursor.execute('''
                if not exists (select * from sys.tables where name='Test_Final')
                    CREATE TABLE Test_Final (
                        eventID int null, 
                        DATEANDTIME datetime null,
                        eventDesc text null,
                        TnF int null,
                        blanCol int null,
                        DATETIME_LOADED DATETIME NULL DEFAULT(GETDATE()),
                        RepeatAlarm int null, 
                        ToggleEventA int null, 
                        ToggleEventB int null, 
                        NuisanceAlarm int null
                    )
                    ''')
    
    
    conn.commit()
else:
    pass

In [None]:
# Function to Inspect All Tables
def list_dbTables():
    # Check list of tables in db
    cursor.execute("SELECT table_name FROM INFORMATION_SCHEMA.TABLES ")
    print(cursor.fetchall())

# List tables in database
# Note: %memit" prefix is used to log peak memory uage
list_dbTables()

In [None]:
# Function to list contents in target table
def list_tableContents(targetTable, rowLimit = 10):
    # Gets first n rows (rowLimit) from target table sorted by datetime (oldest entry first)
    # Not allowed to get all values as the table size can be huge
    cursor.execute(f"SELECT top {rowLimit} * FROM {targetTable} ORDER BY 'DATEANDTIME' ASC")
    results = cursor.fetchall()
    print(targetTable, "Contents")
    counter = 0
    print("Table Values")
    print("---START---")
    for row in results:
        counter=counter+1
        print(counter, row)
    print("---END---")

# Define the target table
# This table will be where the cleaned data would be saved to 
# and manipulated for future operations
targetTable = "TestValues_Master"

# Inspect target table
list_tableContents(targetTable)

In [None]:
# Function to list contents in target table
def list_tableColDtype(targetTable):
    cursor.execute(f"SELECT COLUMN_NAME, DATA_TYPE FROM information_schema.columns where TABLE_NAME = '{targetTable}'")
    results = cursor.fetchall()
    print(targetTable, "Contents")
    counter = 0
    print("Table Column Data Type")
    print("---START---")
    for row in results:
        counter=counter+1
        print(counter, row)
    print("---END---")

# Inspect target table
targetTable = "TestValues_Master"
list_tableColDtype(targetTable)

In [None]:
# Function to Create a dataframe with dummy data for testing purposes
def createDummyDataDF(inputData = "A"):
    if (inputData == "A"):
        dummyData = {
                     'eventID': [1, 2, 3, 4, 5],
                     'DATEANDTIME': [1649318400, 1649318400, 1649318400, 1649318460, 1649318470],
                     'eventDesc': ['Alpha', 'Bravo', 'Charlie', 'Charlie', 'Charlie'],
                     'TnF': [0, 0, 0, 0, 0] 
                    }
        1649318400
    else:
        dummyData = {
                     'eventID': [3, 4, 5],
                     'DATEANDTIME': [1649318400, 1649318460, 1649318470],
                     'eventDesc': ['Charlie', 'Charlie', 'Charlie'],
                     'TnF': [1, 1, 1] 
                    }
    
    df = pd.DataFrame(dummyData)
    df['DATEANDTIME'] = pd.to_datetime(df['DATEANDTIME'], unit='s')

    # To get time in seconds resolution if it comes in higher resolutions
    # Not required
    #df['TIME_S'] = df['DATEANDTIME'].dt.floor("s").dt.time

    return df

# Generate test dataframe in testing mode
if (testMode == True):
    # Create a dummy dataframe for testing purposes
    testDF = createDummyDataDF("A")
    testDF2 = createDummyDataDF("B")
    print("Dataframe columns would have the same data type as SQL table values")
    testDF.info()
else:
    print("Test script skipped")

In [None]:
# Inspect test dataframe
if (testMode == True):
    # Inspect Data
    print(testDF.head())
    print(testDF2.head())
else:
    print("Test script skipped")

In [None]:
# Function to append data to database as an entire dataframe
def appendData(tableName, inputDF, serverParams):    
    # Create connection engine 
    # Default connection function only works for SQLite
    engine = sqlalchemy.create_engine("mssql+pyodbc:///?odbc_connect={}".format(serverParams))
    # Append data
    inputDF.to_sql(tableName, con=engine, if_exists="append", index = False)

In [None]:
# Test data append function
if (testMode == True):
    # Test if append dataframe function works
    appendData("TestValues_Master", testDF, serverParams)
    list_tableContents("TestValues_Master")
    appendData("TestValues_Alarm", testDF2, serverParams)
    list_tableContents("TestValues_Alarm")
else:
    print("Test script skipped")

In [None]:
# Function to delete last n rows in table sorted by datetime (oldest first)
def delDataNRow(targetTable, nRow=3):
    cursor.execute(f"WITH CTE AS (SELECT TOP {nRow} * FROM {targetTable} ORDER BY DATEANDTIME DESC) DELETE FROM CTE")
    conn.commit()
    
# Function to delete all rows in table
def delDataAll(targetTable):
    cursor.execute(f"DELETE FROM {targetTable}")
    conn.commit()

In [None]:
# Test data delete latest nRows function
if (testMode == True):
    #delDataNRow(targetTable, nRow=2) # Disabled
    list_tableContents(targetTable)
else:
    print("Test script skipped")

In [None]:
# Test data delete all function
if (resetTableData == True):
    #delDataAll(targetTable) # disabled
    list_tableContents(targetTable)
else:
    print("Test script skipped")

In [None]:
# Delete redundant variables used in test
if (resetTableData == True):
    del testDF, testDF2
else:
    print("Test script skipped")

In [12]:
# Close Connection to Database
conn.close()