In [1]:
import pandas as pd
import re

In [2]:
def parse_network_log(file_path, vm_name):
    data = []
    with open(file_path, 'r') as f:
        lines = f.readlines()
        for line in lines[1:]: # ข้าม Header
            parts = re.split(r'\s+', line.strip())
            if len(parts) >= 5:
                # ดึง Local Port และ Process Name
                local_addr_port = parts[3]
                port = local_addr_port.split(':')[-1]
                process_info = parts[5] if len(parts) > 5 else "unknown"
                
                data.append({
                    'vm_name': vm_name,
                    'port': port,
                    'process': process_info,
                    'state': parts[0]
                })
    return pd.DataFrame(data)

In [None]:
def parse_audit_log(file_path, vm_name):
    commands = []
    with open(file_path, 'r') as f:
        for line in f:
            if 'proctitle=' in line:
                cmd_match = re.search(r'proctitle="(.*)"', line)
                if cmd_match:
                    commands.append({
                        'vm_name': vm_name,
                        'command': cmd_match.group(1)
                    })
    return pd.DataFrame(commands)

In [13]:
df_net_front = parse_network_log('data/front_log/FrontVM_network-log.txt', 'FrontVM')
df_audit_front = parse_audit_log('data/front_log/FrontVM_audit-log.txt', 'FrontVM')
df_net_back = parse_network_log('data/back_log/BackVM_network-log.txt', 'BackVM')
df_audit_back = parse_audit_log('data/back_log/BackVM_audit-log.txt', 'BackVM')
df_net_db = parse_network_log('data/DB_log/DBVM_network-log.txt', 'DBVM')
df_audit_db = parse_audit_log('data/DB_log/DBVM_audit-log.txt', 'DBVM')

In [48]:
df_net_front

Unnamed: 0,vm_name,port,process,state
0,FrontVM,6010,"users:((""sshd"",pid=18412,fd=8))",LISTEN
1,FrontVM,80,"users:((""nginx"",pid=15826,fd=5),(""nginx"",pid=1...",LISTEN
2,FrontVM,22,"users:((""sshd"",pid=1130,fd=3),(""systemd"",pid=1...",LISTEN
3,FrontVM,53,"users:((""systemd-resolve"",pid=633,fd=17))",LISTEN
4,FrontVM,53,"users:((""systemd-resolve"",pid=633,fd=15))",LISTEN
5,FrontVM,80,"users:((""nginx"",pid=15826,fd=6),(""nginx"",pid=1...",LISTEN
6,FrontVM,6010,"users:((""sshd"",pid=18412,fd=7))",LISTEN
7,FrontVM,22,"users:((""sshd"",pid=1130,fd=4),(""systemd"",pid=1...",LISTEN


In [15]:
def extract_features(df_net, df_audit):
    features = {}
    
    # พฤติกรรมด้าน Network: เครื่องนี้เน้นรับงานที่ Port ไหน
    features['has_port_80'] = 1 if any(df_net['port'] == '80') else 0
    features['has_port_3306'] = 1 if any(df_net['port'] == '3306') else 0
    features['has_port_5000'] = 1 if any(df_net['port'] == '5000') else 0
    
    # พฤติกรรมด้าน System: เครื่องนี้มีการใช้คำสั่งจัดการระบบบ่อยแค่ไหน
    features['cmd_count'] = len(df_audit)
    features['sudo_usage'] = 1 if any('sudo' in cmd for cmd in df_audit['command']) else 0
    
    return features

In [16]:
front_features = extract_features(df_net_front, df_audit_front)
back_features  = extract_features(df_net_back, df_audit_back)
db_features    = extract_features(df_net_db, df_audit_db)

In [22]:
import pandas as pd

In [24]:
vm_matrix = pd.DataFrame([
    front_features,
    back_features,
    db_features
])

vm_matrix = vm_matrix.set_index('vm')

In [None]:
vm_matrix

Unnamed: 0_level_0,has_port_80,has_port_3306,has_port_5000,cmd_count,sudo_usage
vm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
FrontVM,1,0,0,31,0
BackVM,0,0,1,3,0
DBVM,0,1,0,4,0


In [26]:
from sklearn.cluster import KMeans
import numpy as np

In [27]:
data_matrix = vm_matrix

kmeans = KMeans(n_clusters=3, random_state=42)
clusters = kmeans.fit_predict(data_matrix)

In [35]:
vm_names = ['FrontVM', 'BackVM', 'DBVM']
vm_matrix['vm_name'] = vm_names

In [38]:
def get_raw_details(vm_name):
    if vm_name == 'FrontVM': return df_net_front
    if vm_name == 'BackVM': return df_net_back
    return df_net_db

In [41]:
for idx, row in vm_matrix.iterrows():
    vm_name = row['vm_name']
    df_raw = get_raw_details(vm_name)
    
    # เตรียมข้อมูลที่จะส่งให้ AI
    vm_profile = {
        'VM': vm_name,
        'ActivePorts': df_raw['port'].unique().tolist(),
        'MainProcess': df_raw['process'].unique().tolist()
    }

In [None]:
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate

llm = ChatOllama(
    model="thewindmom/llama3-med42-8b",
    temperature=0.8,
)

In [43]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "คุณคือผู้ดูแลระบบ log และให้คำแนะนำทาง cyber policy ที่เหมาะสม"),
        ("human", f"""Analyze the following network behavior and generate a Microsegmentation policy:
                VM Name: {vm_profile['VM']}
                Detected Ports: {vm_profile['ActivePorts']}
                Main Process: {vm_profile['MainProcess']}
    
                Instruction: Generate a "Deny-All, Allow-Specific" rule in human-readable format.""")
    ]
)

In [51]:
chain = prompt | llm
response = chain.invoke({"vm_profile": vm_profile})

In [57]:
print(f"--- Policy for {vm_name} ---")
print(response.content)
print("\n" + "="*50 + "\n")

--- Policy for DBVM ---
Based on the provided network behavior analysis of VM Name: DBVM, I recommend implementing the following Microsegmentation policy as a "Deny-All, Allow-Specific" rule:

1. Deny all incoming and outgoing traffic to/from ports other than 6010, 53, 33060, 22, and 3306.
2. Allow incoming SSH (port 22) traffic from trusted IP addresses only, specifically for authorized users and systems.
3. Enable MySQL communication on port 3306 exclusively between the VM (DBVM) and authenticated database clients, such as application servers or other databases.
4. Allow DNS queries and responses on UDP port 53 to resolve hostnames and support proper system operation without external access.
5. Enable SSH tunneling on port 6010 for secure communications with other systems (e.g., backup services).
6. Implement strict firewall rules and intrusion prevention measures at the network perimeter and virtual switch levels.

In summary, this policy establishes granular control over allowed tr