In [1]:
from typing import List

from pathlib import Path
import re

import pandas as pd
import xlwings as xw

### Specialized message parsing
- ***Wrote DICOM RT-Struct File***: `C:/DICOM/Limbus Export\limbus_1.6.0_1.2.840.113704.1.- 111.7856.`
- ***Applied Template*** - `Prostate via import rule - Prostate for 0455860`
- ***No suitable global reference structure found for*** `BODY`
- ***Imported*** `1 Series.`
- ***No import rule triggered*** - `No Template could be applied for series`
- ***All pending series contoured.*** `Waiting for additional imports`
- ***Initialized LimbusContourService***
- *Head-Neck* ***Template applied for series***
- 0369892 - 1.2.840.113704.1.111.7064.1679931718.12 ***could not be contoured - no import rule triggered***
- [Errno 2] ***No such file or directory***: `'C:/DICOM/Limbus Imports\\1.2.840.113704.1.111.8136.1680795532.10\\CT.1.2.840.113704.1.111.2688.1680795559.2050.dcm'`


In [2]:
def parse_log_file(raw_text: List[str])->pd.DataFrame:
    date_line_ptrn = re.compile(
        r'^'                                                # Start of line
        r'(?P<date>[0-9: -]+)'                               # Date and time group
        r',[0-9]+'                                          # Milliseconds
        r'[ -]+'                                            # Break indicated by '-'
        r'(?P<msg_type>WARNING|INFO|ERRORAUTOMATIC|ERROR):'  # Message type group
        r'(?P<auto_tag>AUTOMATIC)?:?\s*'                     # Possible 'AUTOMATIC' Tag
        r'(?P<remainder>.*)$'                                # Remainder of line
        )

    action_ptrn = re.compile(
        r'^'                         # Start of line
        r'\s*'                       # Possible initial spaces
        r'(?P<action>[A-Za-z .]+):'  # Text including periods and spaces before a ':'
        r'\s*'                       # Possible spaces
        r'(?P<remainder>.*)$'        # Remainder of line
        )

    alternate_ptrn = re.compile(
        r'^'                     # Start of line
        r'\s*'                   # Possible initial spaces
        r'(?:[0-9]+ - [0-9.]*\s*)?'  # Non-capturing optional string of numbers representing UID
        r'(?:\[Errno[ 0-9]*\])?' # Non-capturing Optional Error number
        r'(?P<action>'           # Beginning of action group
        # The following are possible phrases that qualify as 'actions'
        r'Wrote DICOM RT-Struct File|'
        r'Applied Template|'
        r'No suitable global reference structure found for|'
        r'Imported|'
        r'No import rule triggered|'
        r'All pending series contoured.|'
        r'Initialized LimbusContourService|'
        r'.*Template applied for series|'
        r'.*No such file or directory|'
        r'could not be contoured - no import rule triggered'    
        r')'                     # End of action group
        r'[ :-]*'                # Possible spaces, ':', '-'
        r'(?P<remainder>.*)$'    # Remainder of line
        )

    message_rows = []
    for line in raw_text:
        first_match = date_line_ptrn.fullmatch(line)
        if first_match is not None:
            column_groups = first_match.groupdict()
            remainder = first_match.group('remainder')
        else:
            remainder = line.strip()
        action_match = action_ptrn.fullmatch(remainder)
        if action_match is not None:
            column_groups.update(action_match.groupdict())
            remainder = action_match.group('remainder')
        else:
            alternate_action_match = alternate_ptrn.fullmatch(remainder)
            if alternate_action_match is not None:
                column_groups.update(alternate_action_match.groupdict())
                remainder = alternate_action_match.group('remainder')
            else:
                remainder = remainder.strip()
        message_rows.append(column_groups)

    message_table = pd.DataFrame(message_rows)
    selected_columns = ['date', 'msg_type', 'action', 
                    'auto_tag', 'remainder']
    message_table = message_table.loc[:, selected_columns].copy()
    #message_table.sort_values(['date', 'msg_type', 'action'], inplace=True, ignore_index=True)
    return message_table

In [3]:
def get_log_lines(log_file: Path)->List[str]:
    raw_text = log_file.read_text().splitlines()
    # Drop 'PROGRESS', becasue they will not contain error information
    raw_text = [line for line in raw_text if 'PROGRESS' not in line]
    return raw_text

In [4]:
base_path = Path(r'..\Log files').resolve()

log_file = base_path / 'LimbusContourService_2023_03_27.log'

In [5]:
raw_text = get_log_lines(log_file)
message_table = parse_log_file(raw_text)

In [6]:
save_name = log_file.stem + '.xlsx'
save_file = log_file.parent / save_name
wb = xw.Book()
wb.save(save_file)

sheet = wb.sheets.add('Raw Log')
sheet.range('A1').options(transpose=True).value = raw_text

sheet = wb.sheets.add('Formatted Log')
xw.view(message_table, sheet=sheet)

wb.save()