# Clean up data, convert to dataframes, and save to csv

The BAC file is pipe separated with the following fields:
LogLevel [TimeStamp]:[LogVisibility][LogSeverity][entryType][entrySubType][eventType][EventType dependent strings]

The information changes at the end of each entry based on its EventType.  Here is a list of the supported event types and the subsequent additional information for each listed below.

GeneralMessage - [string message]

LevelChangedEvent - [load ID][loadName][roomName][rampTime][rampBaseValue][rampFinalValue]

ButtonChangedEvent - [keypad ID][keypadName][roomName][buttonNum][buttonState]

RemoteSystemEvent - [signalID][signalName][roomName][RemoteSystemEvent string]
TimeClockChangedEvent/OccupancyChangedEvent/SceneChangedEvent - [ID][name][roomName][message]

ConnectionStatus - [device ID][Name][room Name][connection status][Load 1 Room Name:Load 1 Name]|[Load 2 Room Name:Load 2 Name]

    NOTE: DeviceConnectionStatusWithOptions is the same format as ConnectionStatus. 
    
SignalChangedEvent - [device ID][Device Name][room Name][signal event ID][signal Value] - Signal event ID differs by device and signal value is either bool or int based on the eventID.

SignalChangedEventWithStrings - [device ID][Device Name][Signal Name][Signal Value string][Signal direction][message]


In [1]:
import os 
import pandas as pd
from IPython.display import display, clear_output
from ipywidgets import widgets

import pdb

loadingBar = widgets.IntProgress(value=0, min=0, max=100, step=1, description='Loading: 0%', bar_style='info', orientation='horizontal')
display(loadingBar)
count=1

labels = ['LogLevel',
          'TimeStamp',
          'LogVisibility',
          'LogSeverity',
          'entryType',
          'entrySubType',
          'eventType',
          'EventType dependent strings']
# from label list
EVENT_TYPE_INDEX = 6
EVENT_TYPE_DEPENDENT_STRINGS = 7

# from line in .bac file
LOG_LEVEL_START = 0
LOG_LEVEL_END = 3
TIMESTAMP_START = 6
TIMESTAMP_END = 14
PIPE_SEPARATED_DATA_START = 17

event_type_labels = [
    "string message", # GeneralMessage
    "load ID", "loadName", "roomName", "rampTime", "rampBaseValue", "rampFinalValue", # LevelChangedEvent
    "keypad ID", "keypadName", "roomName", "buttonNum", "buttonState", # ButtonChangedEvent
    "signalID", "signalName", "roomName", "RemoteSystemEvent string", # RemoteSystemEvent
    "ID", "name", "roomName", "message", # TimeClockChangedEvent/OccupancyChangedEvent/SceneChangedEvent
    "device ID", "Name", "room Name", "connection status", # ConnectionStatus/DeviceConnectionStatusWithOptions
    "device ID", "Device Name", "room Name", "signal event ID", "signal Value", # SignalChangedEvent
    "device ID", "Device Name", "Signal Name", "Signal Value string", "Signal direction", "message", # SignalChangedEventWithStrings
]

for log in os.listdir('data'):
    # strip the new lines off of the ends of each line in log file
    with open('data/{}'.format(log)) as logfile:
        for line in logfile:
            line = line.rstrip('\n')
            clean_line = ([line[LOG_LEVEL_START:LOG_LEVEL_END], line[TIMESTAMP_START:TIMESTAMP_END]]
                          + line[PIPE_SEPARATED_DATA_START:].split('|'))
            
            event_type_dependent_strings = clean_line[EVENT_TYPE_DEPENDENT_STRINGS:]
            
            if clean_line[EVENT_TYPE_INDEX] == 'GeneralMessage':
                START_INDEX = 7
                END_INDEX = 8
            elif clean_line[EVENT_TYPE_INDEX] == 'LevelChangedEvent':
                START_INDEX = 8
                END_INDEX = 14
            elif clean_line[EVENT_TYPE_INDEX] == 'ButtonChangedEvent':
                START_INDEX = 14
                END_INDEX = 19
            elif clean_line[EVENT_TYPE_INDEX] == 'RemoteSystemEvent':
                START_INDEX = 19
                END_INDEX = 23
            elif (clean_line[EVENT_TYPE_INDEX] == 'TimeClockChangedEvent' or
                  clean_line[EVENT_TYPE_INDEX] == 'OccupancyChangedEvent' or
                  clean_line[EVENT_TYPE_INDEX] == 'SceneChangedEvent'):
                START_INDEX = 23
                END_INDEX = 27
            elif (clean_line[EVENT_TYPE_INDEX] == 'ConnectionStatus' or
                  clean_line[EVENT_TYPE_INDEX] == 'DeviceConnectionStatusWithOptions'):
                START_INDEX = 27
                END_INDEX = 31
            elif clean_line[EVENT_TYPE_INDEX] == 'SignalChangedEvent':
                START_INDEX = 31
                END_INDEX = 36
            elif clean_line[EVENT_TYPE_INDEX] == 'SignalChangedEventWithStrings':
                START_INDEX = 36
                END_INDEX = 42
            else:
                raise ValueError("Wrong event type: {}".format(clean_line[EVENT_TYPE_INDEX]))
                
            clean_line = []
            
            
                          
#             uniform_lines = []
#             for line in info_lines:
#                 VARIABLE_LENGTH_FIELD_START = 8
#                 new_line = line[:VARIABLE_LENGTH_FIELD_START]
#                 if len(line) > VARIABLE_LENGTH_FIELD_START:
#                     pdb.set_trace()
#                     new_line[7] = line[-(len(line)-7):]
#                 uniform_lines.append(new_line)
#             df = pd.DataFrame.from_records(uniform_lines, columns=labels)
#             df.to_csv('processed_data/'+log[-14:-4]+'.csv', sep=',', encoding='utf-8', index=False)
#             loadingBar.value = int((count/len(os.listdir('data'))) * 100)
#             loadingBar.description = 'Loading: ' + str(loadingBar.value) + '%'
#             count+=1

A Jupyter Widget

> <ipython-input-1-ff664bc8c8d4>(38)<module>()
-> new_line[7] = line[-(len(line)-7):]
(Pdb) line
['L:0', '00:00:05', 'UserVisible', 'Information', 'Lights', 'Scene', 'SceneChangedEvent', '74', 'Night', 'Upstairs Landing', 'SceneRecalled']
(Pdb) new_line
['L:0', '00:00:05', 'UserVisible', 'Information', 'Lights', 'Scene', 'SceneChangedEvent', '74']
(Pdb) len(new_line)
8
(Pdb) n
> <ipython-input-1-ff664bc8c8d4>(39)<module>()
-> uniform_lines.append(new_line)
(Pdb) new_line
['L:0', '00:00:05', 'UserVisible', 'Information', 'Lights', 'Scene', 'SceneChangedEvent', ['74', 'Night', 'Upstairs Landing', 'SceneRecalled']]
(Pdb) exit


BdbQuit: 

In [None]:
loaded_df = pd.read_csv('processed_data/2017-08-04.csv')

In [None]:
loaded_df

In [None]:
df_dict = loaded_df.to_dict()

In [None]:
df_dict["eventType"][0]