In [None]:
#############################################################

# Copyright 2025 North Carolina State University

# Authored by
# Pranshav Gajjar, Abiodun Ganiyu, and Vijay K. Shah
# NextG Wireless Lab, North Carolina State University

############################################################# 

import json

# Load the log data from the JSON file
def load_log_data(log_file_path):
    with open(log_file_path, 'r') as f:
        log_data = json.load(f)
    return log_data

log_file_path = '/content/ue_attach_over_e1_f1.json'
log_data = load_log_data(log_file_path)


In [None]:
log_data[3]

{'user_dlt': [],
 'e1ap': ['E1AP-PDU: successfulOutcome (1)',
  'successfulOutcome',
  'procedureCode: id-gNB-CU-UP-E1Setup (3)',
  'criticality: reject (0)',
  'value',
  'GNB-CU-UP-E1SetupResponse',
  'protocolIEs: 1 item',
  'Item 0: id-TransactionID',
  'ProtocolIE-Field',
  'id: id-TransactionID (57)',
  'TransactionID: 0',
  'criticality: reject (0)',
  'value']}

In [None]:
from langchain import PromptTemplate
from langchain import LLMChain
import os
import torch
from langchain_nvidia_ai_endpoints import ChatNVIDIA

nvidia = 'NVIDIA_API_KEY'
#llm_model = 'mistralai/Mistral-7B-Instruct-v0.3'

#llm2=HuggingFaceEndpoint(repo_id=llm_model, max_new_tokens=30000, streaming=True)

llm2 = ChatNVIDIA(
  model="meta/llama-3.1-70b-instruct",
  api_key=nvidia,
  temperature=0.2,
  top_p=0.7,
  max_tokens=30000,
  stream=True,
)

In [None]:
_qa_prompt = """
You are an assistant for O-RAN.
You will be provided with a particular step from a test case, log files from an experiment, and your task is to validate if the step has been performed.
Each step explicitly mentions a **message name**. To consider the step as executed, the message name must appear **exactly as written in the test case description** in the log file. Even slight variations, additional words, or different formatting do not qualify as an exact match.

The step in consideration is:
{tc}

The log file is:
{log}

If and only if the exact message name is present in the log file, the label will be 'Yes'. In all other cases, the label will be 'No'. Do not assume or infer equivalence between different names.

For example:
- If the message name in the test case step is "RRCSetup" and the log file contains "rrcSetup," the label is 'Yes' because the names are exactly the same.
- If the message name in the test case step is "RRCSetupRequest" and the log file contains "NGSetupRequest," the label is 'No' because the names are not identical.
- If the message name in the test case step is "RRCSetupRequest" and the log file contains "RRCSetupRequest," the label is 'Yes' because the names are exactly the same.
- If the message name in the test case step is "UL RRC MESSAGE TRANSFER" and the log file contains the name in brackets like "F1 Application Protocol (ULRRCMessageTransfer)," you can ignore brackets and spaces in the step, and this would be considered a match with the label 'Yes'.
- If the message name in the test case step is "INITIAL CONTEXT SETUP REQUEST" and the log file contains "CONTEXT SETUP REQUEST," the label is 'No' because the messages are not identical.
- While matching message names, ignore spaces and compare them in lowercase, as names are not case-sensitive.

Now, is the given step incorporated in this log file? Remember that if and only if the exact message name is present in the log file, the label will be 'Yes'. In all other cases, the label will be 'No'.

Additionally, to confidently label the step as completed, you must validate the sending and receiving entities and the communication platform:
- 'mac-nr' corresponds to communication between a UE and the gNB-DU.
- 'f1ap' corresponds to communication between the gNB-DU and gNB-CU.
- 'ngap' corresponds to communication between the gNB-CU and the AMF.
- 'e1ap' corresponds to communication between the gNB-CU-CP and the gNB-CU-UP
- A gNB-DU has a unique ID denoted by GNB-DU-ID, a UE has a UEId, and a gNB-CU has a GNB-CU-ID.
- If you see a UEId or GNB-DU-ID in 'mac-nr', it indicates communication between a UE and a gNB-DU, which helps further validate if the step was correctly executed.
- Ensure the communication occurs between the correct entities, and the message matches the step.
- For example, if the step specifies that the gNB-CU sends the "UL RRC MESSAGE TRANSFER" message to the gNB-DU, the log should mention the GNB-DU-ID and the corresponding message name in a matching form.

Please generate the label, a confidence score between 0-100, and a brief explanation of the result.

Label:
Confidence Score:
Explanation:
"""
qa_prompt = PromptTemplate.from_template(_qa_prompt)


In [None]:
tc_steps = [
"The UE sends an RRCSetupRequest to the gNB-DU.",
"The gNB-DU forwards an INITIAL UL RRC MESSAGE TRANSFER to the gNB-CU-CP.",
"The gNB-CU-CP sends a DL RRC MESSAGE TRANSFER to the gNB-DU.",
"The gNB-DU sends an RRCSetup message to the UE.",
"The UE responds with RRCSetupComplete to the gNB-DU.",
"The gNB-DU sends an UL RRC MESSAGE TRANSFER to the gNB-CU-CP.",
"The gNB-CU-CP forwards an INITIAL UE MESSAGE to the AMF.",
"The AMF sends an INITIAL CONTEXT SETUP REQUEST to the gNB-CU-CP.",
"The gNB-CU-CP sends a BEARER CONTEXT SETUP REQUEST to the gNB-CU-UP.",
"The gNB-CU-UP responds with a BEARER CONTEXT SETUP RESPONSE.",
"The gNB-CU-CP sends a UE CONTEXT SETUP REQUEST to the gNB-DU.",
"The gNB-DU sends a SecurityModeCommand to the UE.",
"The UE responds with a SecurityModeComplete message to the gNB-DU.",
"The gNB-DU sends a UE CONTEXT SETUP RESPONSE to the gNB-CU-CP.",
"The gNB-CU-CP sends a BEARER CONTEXT MODIFICATION REQUEST to the gNB-CU-UP.",
"The gNB-CU-UP responds with a BEARER CONTEXT MODIFICATION RESPONSE.",
"The UE sends an UL RRC MESSAGE TRANSFER to the gNB-DU.",
"The gNB-DU sends a DL RRC MESSAGE TRANSFER to the UE.",
"The UE sends an RRCReconfiguration message to the gNB-DU.",
"The UE responds with an RRCReconfigurationComplete message to the gNB-DU.",
"The gNB-DU sends an UL RRC MESSAGE TRANSFER to the gNB-CU-CP.",
"The gNB-CU-CP sends an INITIAL CONTEXT SETUP RESPONSE to the AMF."
]


In [None]:
from langchain import PromptTemplate, LLMChain

conversation = LLMChain(
    llm=llm2,
    prompt=qa_prompt,
    verbose=False
)

In [None]:
import re
import json

def run_test(step, log_window, window_index):
    out = conversation({"tc": tc_steps[step - 1], "log": log_window})
    text = out['text']

    # Extract Label and Confidence Score
    label_match = re.search(r'Label:\s*(\w+)', text)
    confidence_match = re.search(r'Confidence Score:\s*(\d+)', text)

    label = label_match.group(1) if label_match else "Not Found"
    confidence_score = int(confidence_match.group(1)) if confidence_match else 0

    print(f"Step {step}: Label={label}, Confidence Score={confidence_score}")

    return {
        "step": step,
        "log_window": window_index,
        "label": label,
        "confidence_score": confidence_score,
        "text": text
    }

# Data structure to store results
results = []
path = []
step = 1  # Start from step 1
window_index = 0

while step <= len(tc_steps):

    print(f"Processing Step {step}...")

    while window_index < len(log_data):
        log_window = log_data[window_index]

        print(f"Checking log window {window_index}...")

        # Run test
        result = run_test(step, log_window, window_index)
        results.append(result)
        path.append(result)

        # If the condition is satisfied, move to the next step
        if result["label"] == "Yes":
            print(f"Step {step} condition met, moving to next step.")
            window_index += 1
            step += 1
            break  # Move to the next step immediately
        else:
            print(f"Step {step} condition not met, trying next window.")
            window_index += 1  # Move to the next log window
    else:
        # No valid log window found for the current step
        print(f"Step {step} exhausted all log windows. Algorithm converged.")
        path.append({
            "step": step,
            "log_window": None,
            "label": "Not Found",
            "confidence_score": 0,
            "text": "The algorithm has converged as we have exhausted the log windows."
        })
        break  # Terminate the loop

# Save results to a file
with open("/content/results_UE-attach over e1-f1.json", "w") as f:
    json.dump(results, f, indent=4)

print("Processing complete. Results saved to results_sf.json")

In [None]:
import re
import json

# Load previous results (steps 1-10)
with open("/content/results_UE-attach over e1-f1.json", "r") as f:
    old_results = json.load(f)

# Initialize used_indices from the old results.
# We assume that for each successful step, a key "log_window" is stored.
used_indices = set()
for res in old_results:
    if res.get("label") == "Yes" and "log_window" in res:
        used_indices.add(res["log_window"])

# Start with the old results to keep continuity.
results = old_results.copy()
anomalies = []

used_indices

In [None]:
# Process steps from 14 onward.
for step in range(14, len(tc_steps) + 1):
    print(f"\nProcessing Step {step}...")
    found = False

    # Iterate over log_data, skipping indices that were used before.
    for window_index, log_window in enumerate(log_data):
        if window_index in used_indices:
            continue  # Skip already used log windows

        print(f"Checking log window {window_index} for Step {step}...")
        result = run_test(step, log_window, window_index)
        # Record the log window index in the result.
        result["log_index"] = window_index

        if result["label"] == "Yes":
            print(f"Step {step} condition met at log window {window_index}.")
            results.append(result)
            used_indices.add(window_index)
            found = True
            break  # Proceed to the next step as soon as a match is found.

    if not found:
        print(f"Step {step} condition not met in any remaining log windows. Marking as anomaly.")
        anomaly_record = {
            "step": step,
            "log_index": None  # No valid log window was found for this step.
        }
        anomalies.append(anomaly_record)
        # Optionally, if you wish to stop further processing after the first anomaly, you could break here.
        # break

# Save the updated overall results and the anomalies to separate files.
with open("/content/results_updated.json", "w") as f:
    json.dump(results, f, indent=4)

with open("/content/anomalies.json", "w") as f:
    json.dump(anomalies, f, indent=4)

print("Processing complete. Results saved to results_updated.json and anomalies.json")


In [None]:
import json
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from matplotlib.patches import FancyArrowPatch, Rectangle
import matplotlib
matplotlib.use('Agg')  # Use non-interactive backend

# ========== 1. Load the main results data  ==========
with open("/content/results_UE-attach over e1-f1.json", "r") as f:
    results = json.load(f)

# Extract step numbers and corresponding log window indices
steps = [entry["step"] for entry in results]
log_indices = list(range(len(results)))  # Use index number of each log entry

# Identify successful transitions
success_steps = [entry["step"] for entry in results if entry["label"] == "Yes"]
success_indices = [i for i, entry in enumerate(results) if entry["label"] == "Yes"]

# ========== 2. Load anomalies data  ==========
with open("/content/results_updated.json", "r") as f:
    anomalies = json.load(f)

# Filter anomalies to keep only those in Section 2 (steps > 10)
filtered_anomalies = [entry for entry in anomalies if entry["step"] > 10]

anomaly_steps = [entry["step"] for entry in filtered_anomalies]
# If "log_window" is None, use -1 (dummy) for plotting
anomaly_log_indices = [
    entry["log_window"] if entry["log_window"] is not None else -1
    for entry in filtered_anomalies
]

# ========== 3. Split the main data into two segments  ==========
#    Segment A: steps 1 to 10 (solid line)
#    Step 11: special annotation
#    Segment B: steps 11 to 22 (dashed line)

# Segment A
segA_steps = []
segA_indices = []
segA_labels = []  # We'll keep the label to check if it's "Yes"

# Segment B
segB_steps = []
segB_indices = []
segB_labels = []

for i, entry in enumerate(results):
    step = entry["step"]
    lbl = entry["label"]
    if 1 <= step <= 10:
        segA_steps.append(step)
        segA_indices.append(i)
        segA_labels.append(lbl)
    elif 11 <= step <= 22:
        segB_steps.append(step)
        segB_indices.append(i)
        segB_labels.append(lbl)

# Define signaling messages for steps
signaling_messages = {
    1: "RRCSetupRequest",
    2: "INITIAL UL RRC MESSAGE TRANSFER",
    3: "DL RRC MESSAGE TRANSFER",
    4: "RRCSetup",
    5: "RRCSetupComplete",
    6: "UL RRC MESSAGE TRANSFER",
    7: "INITIAL UE MESSAGE",
    8: "INITIAL CONTEXT SETUP REQUEST",
    9: "BEARER CONTEXT SETUP REQUEST",
    10: "BEARER CONTEXT SETUP RESPONSE",
    11: "UE CONTEXT SETUP REQUEST",
    12: "SecurityModeCommand",
    13: "UE CONTEXT SETUP RESPONSE",
    14: "BEARER CONTEXT MODIFICATION REQUEST",
    15: "BEARER CONTEXT MODIFICATION RESPONSE",
    16: "SecurityModeComplete",
    17: "UL RRC MESSAGE TRANSFER",
    18: "DL RRC MESSAGE TRANSFER",
    19: "RRCReconfiguration",
    20: "RRCReconfigurationComplete",
    21: "UL RRC MESSAGE TRANSFER",
    22: "INITIAL CONTEXT SETUP RESPONSE"
}

# Define the participants in the sequence diagram
participants = ["UE", "gNB-DU", "gNB-CU-CP", "gNB-CU-UP", "AMF"]

# Define the message flows between participants (source, target, step, message)
message_flows = [
    ("UE", "gNB-DU", 1, "RRCSetupRequest"),
    ("gNB-DU", "gNB-CU-CP", 2, "INITIAL UL RRC MESSAGE TRANSFER"),
    ("gNB-CU-CP", "gNB-DU", 3, "DL RRC MESSAGE TRANSFER"),
    ("gNB-DU", "UE", 4, "RRCSetup"),
    ("UE", "gNB-DU", 5, "RRCSetupComplete"),
    ("gNB-DU", "gNB-CU-CP", 6, "UL RRC MESSAGE TRANSFER"),
    ("gNB-CU-CP", "gNB-CU-UP", 7, "INITIAL UE MESSAGE"),
    ("gNB-CU-UP", "AMF", 8, "INITIAL CONTEXT SETUP REQUEST"),
    ("gNB-CU-UP", "gNB-CU-CP", 9, "BEARER CONTEXT SETUP REQUEST"),
    ("gNB-CU-CP", "gNB-CU-UP", 10, "BEARER CONTEXT SETUP RESPONSE"),
    ("gNB-CU-CP", "gNB-DU", 11, "UE CONTEXT SETUP REQUEST"),
    ("gNB-DU", "UE", 12, "SecurityModeCommand"),
    ("gNB-DU", "gNB-CU-CP", 13, "UE CONTEXT SETUP RESPONSE"),
    ("gNB-CU-CP", "gNB-CU-UP", 14, "BEARER CONTEXT MODIFICATION REQUEST"),
    ("gNB-CU-UP", "gNB-CU-CP", 15, "BEARER CONTEXT MODIFICATION RESPONSE"),
    ("UE", "gNB-DU", 16, "SecurityModeComplete"),
    ("gNB-DU", "gNB-CU-CP", 17, "UL RRC MESSAGE TRANSFER"),
    ("gNB-CU-CP", "gNB-DU", 18, "DL RRC MESSAGE TRANSFER"),
    ("gNB-DU", "UE", 19, "RRCReconfiguration"),
    ("UE", "gNB-DU", 20, "RRCReconfigurationComplete"),
    ("gNB-DU", "gNB-CU-CP", 21, "UL RRC MESSAGE TRANSFER"),
    ("gNB-CU-CP", "AMF", 22, "INITIAL CONTEXT SETUP RESPONSE")
]

# Create a figure with two subplots side by side
fig = plt.figure(figsize=(20, 10))
gs = gridspec.GridSpec(1, 2, width_ratios=[1, 1.5])

# ===== Left subplot: Sequence Diagram =====
ax1 = plt.subplot(gs[0])

# Map participants to their x-positions
participant_x = {}
for i, p in enumerate(participants):
    participant_x[p] = i + 1

# Draw participant boxes at the top
for participant, x_pos in participant_x.items():
    rect = Rectangle((x_pos-0.42, 22.4), 0.85, 0.6, facecolor='white', edgecolor='black')
    ax1.add_patch(rect)
    ax1.text(x_pos, 22.7, participant, ha='center', va='center', fontsize=12, fontweight='bold')

# Draw vertical lifelines
for participant, x_pos in participant_x.items():
    ax1.plot([x_pos, x_pos], [0, 22.5], 'k-', alpha=0.3, linestyle='-')

# Calculate y-positions for messages
# Use evenly spaced y-positions for messages
message_y = {}
for i in range(1, 23):
    message_y[i] = 22 - i * 0.95

# Draw arrows for message flows and add labels
for source, target, step, message in message_flows:
    source_x = participant_x[source]
    target_x = participant_x[target]
    y = message_y[step]

    # Determine arrow direction
    dx = target_x - source_x

    # Draw arrow
    if dx > 0:  # left to right
        ax1.annotate('', xy=(target_x-0.1, y), xytext=(source_x+0.1, y),
                   arrowprops=dict(arrowstyle='->', color='black', linewidth=1))
    else:  # right to left
        ax1.annotate('', xy=(target_x+0.1, y), xytext=(source_x-0.1, y),
                   arrowprops=dict(arrowstyle='->', color='black', linewidth=1))

    # Add message label
    mid_x = (source_x + target_x) / 2
    mid_label = f"{step}.{message}"

    # Adjust label placement based on arrow direction and length
    label_y_offset = 0.2
    if dx > 0:
        ax1.text(mid_x, y+label_y_offset, mid_label, ha='center', va='bottom', fontsize=9, rotation=0)
    else:
        ax1.text(mid_x, y+label_y_offset, mid_label, ha='center', va='bottom', fontsize=9, rotation=0)

# Add participant names at bottom (for clarity)
for participant, x_pos in participant_x.items():
    ax1.text(x_pos, -0.5, participant, ha='center', va='center', fontsize=11, fontweight='bold')

ax1.set_xlim(0, len(participants) + 1)
ax1.set_ylim(-1, 24)
#ax1.set_title("Signaling Message Sequence Diagram")
ax1.axis('off')  # Hide axes

# ===== Right subplot: Hierarchical Log Processing =====
ax2 = plt.subplot(gs[1])

# Plot Segment A (steps 1 to 10) with a solid line - AXES SWAPPED
ax2.plot(
    segA_indices,  # x-axis is now log window index
    segA_steps,    # y-axis is now step number
    marker="o",
    linestyle="-",  # solid line
    color="blue",
    label="Val-LLM",
    zorder=2
)

# Annotate Step 11 with text
if len(segA_steps) > 0:
    last_x = segA_indices[-1]  # Last log window index in segment A
    last_y = 10                # Last step in segment A
    # Annotation for "Log Windows Exhausted" with swapped axes
    ax2.text(
        last_x + 5,           # small horizontal offset
        11.5,                   # at step 11
        "Log Windows Exhausted at Step 11",
        fontsize=12,
        fontweight="bold",
        ha="left",
        va="center",
        color="red"
    )

# Plot Segment B (steps 11 to 22) with a dashed line
ax2.plot(
    segB_indices,  # x-axis is now log window index
    segB_steps,    # y-axis is now step number
    marker="o",
    linestyle="--",  # dashed line
    color="black",
    label="Debug-LLM",
    zorder=2
)

# Plot Anomalies
ax2.scatter(
    anomaly_log_indices,  # x-axis is now log window index
    anomaly_steps,        # y-axis is now step number
    color="red",
    marker="s",
    s=100,
    label="Anomalous Steps",
    zorder=3
)
ax2.plot(
    anomaly_log_indices,  # x-axis is now log window index
    anomaly_steps,        # y-axis is now step number
    linestyle=":",
    color="black",
    linewidth=2,
    label="Anomaly Connection",
    zorder=3
)

# Add signaling messages next to each step number
for step, message in signaling_messages.items():
    # Find the log index for this step if it exists
    log_idx = None
    for i, s in enumerate(steps):
        if s == step:
            log_idx = i
            break

    # If it's an anomaly step, find its log index
    if log_idx is None:
        for i, s in enumerate(anomaly_steps):
            if s == step:
                log_idx = anomaly_log_indices[i]
                break

    # If we found the index, add the text
    if log_idx is not None:
        ax2.text(
            log_idx + 0.5,  # Small horizontal offset
            step,           # At the step level
            f"{step}. {message}",
            fontsize=11,
            va="center",
            ha="left",
            color="black",
            bbox=dict(facecolor='white', alpha=0.7, edgecolor='none')
        )

# Final plot adjustments
ax2.set_ylabel("Step Number", fontweight='bold', fontsize=14)
ax2.set_xlabel("Log Index", fontweight='bold', fontsize=14)
#ax2.set_title("Hierarchical Log Processing")
ax2.legend()
ax2.grid(axis="y", linestyle="--", alpha=0.7)
ax2.set_ylim(0, 23)
ax2.set_xlim(-2, max(log_indices + anomaly_log_indices) + 15)
ax2.invert_yaxis()

plt.tight_layout()
plt.show()

plt.savefig("sequence_diagram.svg", bbox_inches='tight', format='svg')
