#Step 1: Reading the File

In [None]:
# Step 1: Open the file and read each line
file_path = r'C:\Users\alkav\Documents\Thesis\5G-edge\Alka-Thesis\data\Effnet Longrun 5151\eff_log.bin'

with open(file_path, 'r') as file:
    for line in file:
        print(line.strip())

# CSI decode report from L1 to L2:
1740363206110561969 info CSI_DECODE_REPORT{slotAndFrame: sfn=385,slot=19, csiDecodeDescriptors_size: 1, [{ueIdentity: 0, RNTI: 42000, uciChannel: 0, csiBits_part1_size: 11, csiBits_part1: 10010001011, csiBits_part2_size: 0, csiBits_part2: , reliability: 1, snr_dB: 19.2695, rxPower_dB: 63.5117, timingOffset_nsec: 85, crcPass_part1_present: 0, crcPass_part2_present: 0}]}

# CSI Report, MSC and RI (for DL):
1740363206110579249 info UE(42000): CSI Update - MCS 19  RI 4  PMI 8

# UL SCH decode report from L1 to L2:
1740363206112216439 info ULSCH_DECODE_REPORT{slotAndFrame: sfn=385,slot=18, ulschDecodeDescriptors_size: 1, [{ueIdentity: 0, RNTI: 42000, crcPass: 1, transportBlockSize: 19985, transportBlockOffset: 8, timingOffset_nsec: 85, snr_dB: 19.4414, rxPower_dB: 63.4648}], ulschBuffer_size: 19993}

# UL Base MCS:
1740363206112234999 info UE(42000): ulSnrUpdate MCS: 25.9766

# UL HARQ Manager information:
1740363206112397630 info HarqManager-UL(42000): Re

#Step 3: Extracting Data Using Regular Expressions

In [3]:
import re

# Define a pattern that looks for:
# 1. A series of digits (timestamp)
# 2. One or more spaces followed by a word (log level)
# 3. The rest of the line (log message)
pattern = r'^(\d+)\s+(\w+)\s+(.*)'

with open(file_path, 'r') as file:
    for line in file:
        line = line.strip()
        # Skip comment lines
        if line.startswith('#'):
            continue
        
        match = re.match(pattern, line)
        if match:
            timestamp = match.group(1)
            log_level = match.group(2)
            message = match.group(3)
            print(f"Timestamp: {timestamp}")
            print(f"Log Level: {log_level}")
            print(f"Message: {message}")
            print('-' * 40)


Timestamp: 1740363206110561969
Log Level: info
Message: CSI_DECODE_REPORT{slotAndFrame: sfn=385,slot=19, csiDecodeDescriptors_size: 1, [{ueIdentity: 0, RNTI: 42000, uciChannel: 0, csiBits_part1_size: 11, csiBits_part1: 10010001011, csiBits_part2_size: 0, csiBits_part2: , reliability: 1, snr_dB: 19.2695, rxPower_dB: 63.5117, timingOffset_nsec: 85, crcPass_part1_present: 0, crcPass_part2_present: 0}]}
----------------------------------------
Timestamp: 1740363206110579249
Log Level: info
Message: UE(42000): CSI Update - MCS 19  RI 4  PMI 8
----------------------------------------
Timestamp: 1740363206112216439
Log Level: info
Message: ULSCH_DECODE_REPORT{slotAndFrame: sfn=385,slot=18, ulschDecodeDescriptors_size: 1, [{ueIdentity: 0, RNTI: 42000, crcPass: 1, transportBlockSize: 19985, transportBlockOffset: 8, timingOffset_nsec: 85, snr_dB: 19.4414, rxPower_dB: 63.4648}], ulschBuffer_size: 19993}
----------------------------------------
Timestamp: 1740363206112234999
Log Level: info
Messag

In [7]:
import re
import pandas as pd

def parse_message(message):
    """
    Parse the message field of a log entry and return a dictionary of extracted fields.
    This function handles a few common message types. Extend it as needed.
    """
    result = {}
    
    # CSI_DECODE_REPORT example:
    if message.startswith("CSI_DECODE_REPORT"):
        result['message_type'] = "CSI_DECODE_REPORT"
        inside = re.search(r'\{(.*)\}', message)
        if inside:
            content = inside.group(1)
            sfn_match = re.search(r'sfn=(\d+)', content)
            slot_match = re.search(r'slot=(\d+)', content)
            if sfn_match:
                result['sfn'] = int(sfn_match.group(1))
            if slot_match:
                result['slot'] = int(slot_match.group(1))
            csi_desc_match = re.search(r'csiDecodeDescriptors_size:\s*(\d+)', content)
            if csi_desc_match:
                result['csiDecodeDescriptors_size'] = int(csi_desc_match.group(1))
            snr_match = re.search(r'snr_dB:\s*([\d\.]+)', content)
            if snr_match:
                result['snr_dB'] = float(snr_match.group(1))
            rx_match = re.search(r'rxPower_dB:\s*([\d\.]+)', content)
            if rx_match:
                result['rxPower_dB'] = float(rx_match.group(1))
            timing_match = re.search(r'timingOffset_nsec:\s*(\d+)', content)
            if timing_match:
                result['timingOffset_nsec'] = int(timing_match.group(1))
                
    # UE messages starting with "UE(...)":
    elif message.startswith("UE("):
        ue_match = re.match(r'UE\((\d+)\):\s*(.*)', message)
        if ue_match:
            result['message_type'] = "UE"
            result['UE'] = int(ue_match.group(1))
            rest = ue_match.group(2)
            if "CSI Update" in rest:
                result['event'] = "CSI Update"
                mcs_match = re.search(r'MCS\s+(\d+)', rest)
                if mcs_match:
                    result['MCS'] = int(mcs_match.group(1))
                ri_match = re.search(r'RI\s+(\d+)', rest)
                if ri_match:
                    result['RI'] = int(ri_match.group(1))
                pmi_match = re.search(r'PMI\s+(\d+)', rest)
                if pmi_match:
                    result['PMI'] = int(pmi_match.group(1))
            elif "ulSnrUpdate MCS:" in rest:
                result['event'] = "ulSnrUpdate"
                mcs_match = re.search(r'MCS:\s*([\d\.]+)', rest)
                if mcs_match:
                    result['MCS'] = float(mcs_match.group(1))
            elif "MCS(UL):" in rest:
                result['event'] = "MCS(UL)"
                base_match = re.search(r'base:\s*([\d\.]+)', rest)
                lam_match = re.search(r'LAM:\s*([-\d\.]+)', rest)
                index_match = re.search(r'index:\s*(\d+)', rest)
                if base_match:
                    result['base_MCS'] = float(base_match.group(1))
                if lam_match:
                    result['LAM'] = float(lam_match.group(1))
                if index_match:
                    result['index'] = int(index_match.group(1))
            elif "MCS(DL):" in rest:
                result['event'] = "MCS(DL)"
                base_match = re.search(r'base:\s*([\d\.]+)', rest)
                lam_match = re.search(r'LAM:\s*([\d\.]+)', rest)
                index_match = re.search(r'index:\s*(\d+)', rest)
                if base_match:
                    result['base_MCS'] = float(base_match.group(1))
                if lam_match:
                    result['LAM'] = float(lam_match.group(1))
                if index_match:
                    result['index'] = int(index_match.group(1))
            elif "DL re-transmission" in rest:
                result['event'] = "DL re-transmission"
                rb_match = re.search(r'remaining RBs\s*(\d+)', rest)
                if rb_match:
                    result['remaining_RBs'] = int(rb_match.group(1))
                    
    # ULSCH_DECODE_REPORT example:
    elif message.startswith("ULSCH_DECODE_REPORT"):
        result['message_type'] = "ULSCH_DECODE_REPORT"
        inside = re.search(r'\{(.*)\}', message)
        if inside:
            content = inside.group(1)
            sfn_match = re.search(r'sfn=(\d+)', content)
            slot_match = re.search(r'slot=(\d+)', content)
            if sfn_match:
                result['sfn'] = int(sfn_match.group(1))
            if slot_match:
                result['slot'] = int(slot_match.group(1))
            ulsch_desc_match = re.search(r'ulschDecodeDescriptors_size:\s*(\d+)', content)
            if ulsch_desc_match:
                result['ulschDecodeDescriptors_size'] = int(ulsch_desc_match.group(1))
            snr_match = re.search(r'snr_dB:\s*([\d\.]+)', content)
            if snr_match:
                result['snr_dB'] = float(snr_match.group(1))
            rx_match = re.search(r'rxPower_dB:\s*([\d\.]+)', content)
            if rx_match:
                result['rxPower_dB'] = float(rx_match.group(1))
            timing_match = re.search(r'timingOffset_nsec:\s*(\d+)', content)
            if timing_match:
                result['timingOffset_nsec'] = int(timing_match.group(1))
    
    # More parsing rules can be added here for other message types...
    
    return result


In [10]:
pattern = r'^(\d+)\s+(\w+)\s+(.*)'  # Matches: timestamp, log level, and message

structured_entries = []

with open(file_path, 'r') as file:
    for line in file:
        line = line.strip()
        # Skip comment lines (starting with '#')
        if line.startswith('#'):
            continue
        match = re.match(pattern, line)
        if match:
            timestamp = int(match.group(1))
            log_level = match.group(2)
            message = match.group(3)
            # Parse the message for structured data
            parsed = parse_message(message)
            # Add common fields
            parsed['timestamp'] = timestamp
            parsed['log_level'] = log_level
            structured_entries.append(parsed)

# Group entries by message type and create separate DataFrames
dfs = {}
for entry in structured_entries:
    msg_type = entry.get('message_type', 'Unknown')
    if msg_type not in dfs:
        dfs[msg_type] = []
    dfs[msg_type].append(entry)

# Convert each list of entries to a DataFrame and display it
for msg_type, entries in dfs.items():
    df = pd.DataFrame(entries)
    print(f"\nDataFrame for message type: {msg_type}")
    print(df.head())


DataFrame for message type: CSI_DECODE_REPORT
        message_type  sfn  slot  csiDecodeDescriptors_size   snr_dB  \
0  CSI_DECODE_REPORT  385    19                          1  19.2695   

   rxPower_dB  timingOffset_nsec            timestamp log_level  
0     63.5117                 85  1740363206110561969      info  

DataFrame for message type: UE
  message_type     UE        event      MCS   RI  PMI            timestamp  \
0           UE  42000   CSI Update  19.0000  4.0  8.0  1740363206110579249   
1           UE  42000  ulSnrUpdate  25.9766  NaN  NaN  1740363206112234999   
2           UE  42000      MCS(UL)      NaN  NaN  NaN  1740363206112398090   
3           UE  42000      MCS(DL)      NaN  NaN  NaN  1740363206205417456   
4           UE  42000      MCS(DL)      NaN  NaN  NaN  1740363206215936696   

  log_level  base_MCS      LAM  index  remaining_RBs  
0      info       NaN      NaN    NaN            NaN  
1      info       NaN      NaN    NaN            NaN  
2      info 