In [7]:
!pip install langchain_community

Collecting langchain_community
  Downloading langchain_community-0.3.17-py3-none-any.whl.metadata (2.4 kB)
Collecting langchain-core<1.0.0,>=0.3.34 (from langchain_community)
  Downloading langchain_core-0.3.34-py3-none-any.whl.metadata (5.9 kB)
Collecting langchain<1.0.0,>=0.3.18 (from langchain_community)
  Downloading langchain-0.3.18-py3-none-any.whl.metadata (7.8 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.7.1-py3-none-any.whl.metadata (3.5 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting langchain-text-splitters<1.0.0,>=0.3.6 (from langchain<1.0.0,>=0.3.18->langchain_community)
  Downloading langchain_text_splitters-0.3.6-py3-none-any.whl.metadata (1.9 kB)
Collecting async-timeout<6.0,>=4.0 (from aiohttp<4.0.0,>=3.8.3->langchain_community)
  Downloading async_timeout-4.0.3-py3-none-any.whl.metadata (4.2 kB)
Collecting python-

In [3]:
import pandas as pd
import re
from langchain.schema import SystemMessage, HumanMessage
from io import StringIO
from tqdm import tqdm
from langchain.chat_models import AzureChatOpenAI

# Assuming chat_model is already initialized as in your provided code
Chat_Model = {
    "openai_api_type": "azure",
    "deployment_name": 'ic11-hack-43-gpt-4o-mini',
    "model_name": "gpt-4o-mini",
    "openai_api_base": "https://hac-openai-instance-03.openai.azure.com/",
    "openai_api_version": "2024-05-01-preview",
    "openai_api_key": "8mKCtXvWx2EoLkrx0At91ZMLEWlLXlq9wxyQ33eeXFzXJ9VFxWmKJQQJ99BBACYeBjFXJ3w3AAABACOGrn5G",
}

chat_model = AzureChatOpenAI(openai_api_type=Chat_Model['openai_api_type'],
                                        deployment_name=Chat_Model['deployment_name'],
                                        model_name=Chat_Model['model_name'],
                                        azure_endpoint=Chat_Model['openai_api_base'],
                                        openai_api_version=Chat_Model['openai_api_version'],
                                        openai_api_key=Chat_Model['openai_api_key'],
                                        temperature=0)


# 1. Load Data
file_path = '/kaggle/input/log-net/network_logs.csv'
try:
    df = pd.read_csv(file_path)
    print("Data loaded successfully.")
except FileNotFoundError:
    print(f"Error: The file {file_path} was not found.")
    exit()

# 2. Define Functions

def parse_log_message(message):
    """Parses log messages to extract interface, ifIndex, oper_status, fpc_number, retry_count."""
    interface_match = re.search(r"(ge-\S+)", message)
    interface_name = interface_match.group(1) if interface_match else None

    if "SNMP_TRAP_LINK_DOWN" in message or "SNMP_TRAP_LINK_UP" in message:
        if_index_match = re.search(r"ifIndex (\d+)", message)
        if_index = int(if_index_match.group(1)) if if_index_match else None
        oper_status_match = re.search(r"ifOperStatus (\d+)", message)
        oper_status = int(oper_status_match.group(1)) if oper_status_match else None
    else:
        if_index = None
        oper_status = None

    if "i2c access retry count" in message:
        fpc_match = re.search(r"(fpc\d+)", message)
        fpc_number = fpc_match.group(1) if fpc_match else None
        retry_count_match = re.search(r"retry count (\d+)", message)
        retry_count = int(retry_count_match.group(1)) if retry_count_match else None
    else:
        fpc_number = None
        retry_count = None

    return pd.Series({  # Returning a Series for easier assignment
        "interface_name": interface_name,
        "if_index": if_index,
        "oper_status": oper_status,
        "fpc_number": fpc_number,
        "retry_count": retry_count,
    })


def generate_problem_description(message, interface_name, fpc_number, retry_count, ip_address):
    """
    Generates a short description of the network problem using a language model,
    focusing on key information for quick understanding [1][3][4].
    """
    system_message = SystemMessage(
        content="You are a network monitoring system summarizing network issues. Provide a concise (one-sentence) description highlighting the key problem."
    )

    if interface_name:
        human_message = HumanMessage(
            content=f"A network issue has been detected on device {ip_address}, interface {interface_name}. The log message is: '{message}'. Briefly describe the core problem."
        )
    elif fpc_number:
        human_message = HumanMessage(
            content=f"A network issue has been detected on device {ip_address}. There are I2C errors on {fpc_number} with retry count {retry_count}. The log message is: '{message}'. Briefly describe the core problem."
        )
    else:
        human_message = HumanMessage(
            content=f"A network issue has been detected on device {ip_address}. The log message is: '{message}'. Briefly describe the core problem."
        )

    response = chat_model.invoke([system_message, human_message])
    return response.content


# 3. Apply Parsing and Generate Descriptions
df[['interface_name', 'if_index', 'oper_status', 'fpc_number', 'retry_count']] = df['Message'].apply(parse_log_message)

#Identify problematic or critical based on log level and key words
critical_keywords = ["ERROR", "CRITICAL", "FAILURE", "DOWN"]
df['is_critical'] = df['Log_Level'].isin(critical_keywords) | df['Message'].str.contains('|'.join(critical_keywords), case=False)

# 4. Generate Descriptions and save to file
output_buffer = StringIO()

# Adding filter for critical logs
critical_df = df[df['is_critical'] == True]

#Get the top 100 critical logs
top_n = 100
critical_df = critical_df.head(top_n)

# Wrap the loop with tqdm for progress tracking
for index, row in tqdm(critical_df.iterrows(), total=len(critical_df), desc="Generating Problem Descriptions"):
    description = generate_problem_description(
        row['Message'],
        row['interface_name'],
        row['fpc_number'],
        row['retry_count'],
        row['IP_Address']
    )
    output_buffer.write(f"Log Entry {index}:\n{description}\n{'-'*50}\n")

# Get the entire string from the buffer
output_string = output_buffer.getvalue()

# Optionally, you can print it to console too
print(output_string)

# Save to a file
with open("network_problem_descriptions.txt", "w") as file:
    file.write(output_string)

print("Problem descriptions generated and saved to network_problem_descriptions.txt")


Data loaded successfully.


Generating Problem Descriptions: 100%|██████████| 100/100 [01:34<00:00,  1.06it/s]

Log Entry 30:
The core problem is that the interface ge-3/0/43 on device 10.163.160.2 has been down for approximately 4174852599 milliseconds, indicating a prolonged outage.
--------------------------------------------------
Log Entry 31:
The core problem is that the interface ge-3/0/43.0 on device 10.163.160.2 has experienced an UpDown event, indicating a disruption in its broadcast multicast functionality.
--------------------------------------------------
Log Entry 32:
The core problem is that the interface ge-3/0/43 on device 10.163.160.2 has experienced an up/down event, indicating a potential connectivity issue affecting broadcast and multicast traffic.
--------------------------------------------------
Log Entry 33:
The core problem is a network interface (ge-3/0/43.0) on device 10.163.160.2 experiencing intermittent connectivity issues, indicated by an UpDown event related to a broadcast/multicast address.
--------------------------------------------------
Log Entry 34:
The cor


