# Case 1

Break line using regular expressions.

### Explanation:
1. **Regex Pattern Breakdown**:
   - `\b0E7\b`: Matches "0E7" as a whole word.
   - `\b\d{2}[A-Z]{3}\d{2}\b`: Matches the date format (e.g., "16APR24").
   - `\b\d{2}:\d{2}:\d{2}\.\d\b`: Matches the time format (e.g., "16:47:29.9").
   - `Track \d`: Matches "Track 3".
   - `EZ: \d{3}`: Matches "EZ: 101".
   - `EX: \d{2}`: Matches "EX: 98".
   - `Speed: \d{2}   mph`: Matches "Speed: 82   mph".
2. **`re.findall`**:
   Extracts all matches of the pattern from the input string.


The | characters in the pattern expression act as logical OR operators in regular expressions. They allow you to specify multiple patterns, and the re.findall function will match any part of the input string that satisfies one of these patterns.

In [8]:
import re

pattern = r'\b\d{2}[A-Z]{3}\d{2}\b|\b\d{2}:\d{2}:\d{2}\.\d{1,}\b|Track \d|EZ: \d{1,}|EX: \d{1,}|Speed: \d{1,}   mph'

dataList = [
    "0E7 16APR24 17:35:16.6  Track 1   EZ: 32         EX: 101     Speed: 42   mph    ",
    "0E7 16APR24 17:35:20.6  Track 3   EZ: 97         EX: 99    ",
    "0E7 16APR24 17:35:21.8  Track 1   EZ: 29         EX: 102     Speed: 38   mph    ",
    "0E7 16APR24 17:35:27.3  Track 1   EZ: 26         EX: 101     Speed: 34   mph    ",
    "0E7 16APR24 17:35:28.6  Track 3   EZ: 100        EX: 99      ",
    "0E7 16APR24 17:35:33.0  Track 1   EZ: 23         EX: 102     Speed: 9   mph    ",
]

for data in dataList:
    # Extract the elements
    elements = re.findall(pattern, data)

    print(elements)

['16APR24', '17:35:16.6', 'Track 1', 'EZ: 32', 'EX: 101', 'Speed: 42   mph']
['16APR24', '17:35:20.6', 'Track 3', 'EZ: 97', 'EX: 99']
['16APR24', '17:35:21.8', 'Track 1', 'EZ: 29', 'EX: 102', 'Speed: 38   mph']
['16APR24', '17:35:27.3', 'Track 1', 'EZ: 26', 'EX: 101', 'Speed: 34   mph']
['16APR24', '17:35:28.6', 'Track 3', 'EZ: 100', 'EX: 99']
['16APR24', '17:35:33.0', 'Track 1', 'EZ: 23', 'EX: 102', 'Speed: 9   mph']


# Case 2

Break line using spaces and number of 'word' elements

In [5]:
# Input string
data = "0E7 16APR24 16:47:29.9  Track 3   EZ: 101        EX: 98      Speed: 82   mph"

# Manually split and trim based on observed patterns
parts = data.split()  # Initial split by whitespace
result = []

# Manually group items based on positions or patterns
result.append(parts[0])  # "0E7"
result.append(parts[1])  # "16APR24"
result.append(parts[2])  # "16:47:29.9"
result.append(f"{parts[3]} {parts[4]}")  # "Track 3"
result.append(f"{parts[5]} {parts[6]}")  # "EZ: 101"
result.append(f"{parts[7]} {parts[8]}")  # "EX: 98"
result.append(" ".join(parts[9:]))       # "Speed: 82   mph"

print(result)


['0E7', '16APR24', '16:47:29.9', 'Track 3', 'EZ: 101', 'EX: 98', 'Speed: 82 mph']


In [24]:
import re

dataList = [
    "0E7 16APR24 17:35:16.6  Track 1   EZ: 32         EX: 101     Speed: 42   mph    ",
    "0E7 16APR24 17:35:20.6  Track 3   EZ: 97         EX: 99    ",
    "0E7 16APR24 17:35:21.8  Track 1   EZ: 29         EX: 102     Speed: 38   mph    ",
    "0E7 16APR24 17:35:27.3  Track 1   EZ: 26         EX: 101     Speed: 34   mph    ",
    "0E7 16APR24 17:35:28.6  Track 3   EZ: 100        EX: 99      ",
    "0E7 16APR24 17:35:33.0  Track 1   EZ: 23         EX: 102     Speed: 9   mph    ",
    "0E7 16APR24 17:35:14.1  Track 3 Train Present      ",
    "0E7 16APR24 17:36:23.0  Track 1 Island Deenergized                              ",
    "0E7 16APR24 17:36:23.0  Trk: 1 Crossing Warning Time: 89 s, Isl Speed: 26    mph",
    "0E7 16APR24 17:36:23.0  Train Move Trk: 1, WTime: 89  s, Speeds: 38  26  26  mph",
    "0E7 16APR24 18:24:56.3  Track 2 Prime    Deenergized   ,EZ: 45  EX: 100 Spd: 105",
    "0E7 16APR24 18:24:56.3  Track 2 Preempt  Deenergized   ,EZ: 45  EX: 100 Spd: 105",
]


def checkDataLine(data: str) -> bool:
    pattern = r'\bTrack | EZ: | EX: '
    # Extract the elements
    elements = re.findall(pattern, data)

    print(elements)
    if len(elements) >= 3:
        return True
    else:
        return False

def groupTextFromParts(parts) -> list[str]:
    result = []

    # Manually group items based on positions or patterns
    result.append(parts[0])                     # e.g. "0E7"
    result.append(parts[1])                     # e.g. "16APR24"
    result.append(parts[2])                     # e.g. "16:47:29.9"
    result.append(f"{parts[3]} {parts[4]}")     # e.g. "Track 3"
    result.append(f"{parts[5]} {parts[6]}")     # e.g. "EZ: 101"
    result.append(f"{parts[7]} {parts[8]}")     # e.g. "EX: 98"
    result.append(" ".join(parts[9:]))          # e.g. "Speed: 82   mph"
    
    return result

for i, data in enumerate(dataList):
    # Manually split and trim based on observed patterns
    parts = data.split()  # Initial split by whitespace

    if checkDataLine(data) == False:
        continue

    result = groupTextFromParts(parts)

    print(i, '\t', result)

['Track ', ' EZ: ', ' EX: ']
0 	 ['0E7', '16APR24', '17:35:16.6', 'Track 1', 'EZ: 32', 'EX: 101', 'Speed: 42 mph']
['Track ', ' EZ: ', ' EX: ']
1 	 ['0E7', '16APR24', '17:35:20.6', 'Track 3', 'EZ: 97', 'EX: 99', '']
['Track ', ' EZ: ', ' EX: ']
2 	 ['0E7', '16APR24', '17:35:21.8', 'Track 1', 'EZ: 29', 'EX: 102', 'Speed: 38 mph']
['Track ', ' EZ: ', ' EX: ']
3 	 ['0E7', '16APR24', '17:35:27.3', 'Track 1', 'EZ: 26', 'EX: 101', 'Speed: 34 mph']
['Track ', ' EZ: ', ' EX: ']
4 	 ['0E7', '16APR24', '17:35:28.6', 'Track 3', 'EZ: 100', 'EX: 99', '']
['Track ', ' EZ: ', ' EX: ']
5 	 ['0E7', '16APR24', '17:35:33.0', 'Track 1', 'EZ: 23', 'EX: 102', 'Speed: 9 mph']
['Track ']
['Track ']
[]
[]
['Track ', ' EX: ']
['Track ', ' EX: ']


# Case 3

Use method from Case 2 to process text from a file.

In [48]:
import re

def checkDataLine(data: str) -> bool:
    # check 1
    pattern = r'CHK: '
    elements = re.findall(pattern, data)
    if len(elements) > 0:
        return False

    # check 2
    pattern = r'\bTrack | EZ: | EX: '
    elements = re.findall(pattern, data)
    if len(elements) >= 3:
        return True
    else:
        return False

def groupTextFromParts(parts) -> list[str]:
    result = []

    # Manually group items based on positions or patterns
    # result.append(parts[0])       # e.g. "0E7"
    result.append(parts[1])         # e.g. "16APR24"
    result.append(parts[2])         # e.g. "16:47:29.9"
    result.append(f"{parts[3]} {parts[4]}")    # e.g. "Track 3"
    result.append(f"{parts[6]}")    # e.g. "EZ: 101"
    result.append(f"{parts[8]}")    # e.g. "EX: 98"
    if len(parts) > 9:              # e.g. "Speed: 82   mph"
        result.append(f"{parts[10]}")   
    else:
        result.append('')
    
    return result

def processFileData(readFile, writeFile):
    while True:
        dataLine = readFile.readline()

        if dataLine == '': break    # check for end of file
        if checkDataLine(dataLine) == False: continue

        # break line of data into components and extract useful components
        parts = dataLine.split()
        dataList: list = groupTextFromParts(parts)

        writeProcessedData(writeFile, dataList)


def writeProcessedData(writeFile, dataList):
    for data in dataList:
        writeFile.write(data)
        writeFile.write(", ")
    
    writeFile.write("\n")
    

In [49]:
file_path_read = r'Input\StatLogSL1CP-106357A-17-Apr-2024-12-31-05.log'
file_path_write = r'example.csv'

with open(file_path_read, "r") as readFile:
    with open(file_path_write, "w") as writeFile:
        # add headings to file
        writeFile.write("DATE, TIME, TRACK NUM, EZ, EX, SPEED (mph), \n")

        processFileData(readFile, writeFile)


print("Done!")


Done!
