### 라이브러리 Import

In [1]:
import os
import pandas as pd
import networkx as nx
import plotly.graph_objects as go
import pickle
from datetime import datetime

### 경로 설정

In [2]:
# ───────────────────────────────
# 전처리된 파일들이 저장된 폴더
# ───────────────────────────────
BASE_PREPROCESSED_DIR = r"C:\Users\idle9\Desktop\Naver MYBOX\AISLab\Dataset\AWID3_Dataset_CSV\Preprocessed"

# 예시: 각 공격별 폴더 (필요한 공격의 폴더 경로 사용)
DATA_DIR_1 = os.path.join(BASE_PREPROCESSED_DIR, "1.Deauth")
DATA_DIR_2 = os.path.join(BASE_PREPROCESSED_DIR, "2.Disas")
DATA_DIR_3 = os.path.join(BASE_PREPROCESSED_DIR, "3.(Re)Assoc")
DATA_DIR_4 = os.path.join(BASE_PREPROCESSED_DIR, "4.Rogue_AP")
DATA_DIR_5 = os.path.join(BASE_PREPROCESSED_DIR, "5.Krack")
DATA_DIR_6 = os.path.join(BASE_PREPROCESSED_DIR, "6.Kr00k")
DATA_DIR_7 = os.path.join(BASE_PREPROCESSED_DIR, "7.SSH")
DATA_DIR_8 = os.path.join(BASE_PREPROCESSED_DIR, "8.Botnet")
DATA_DIR_9 = os.path.join(BASE_PREPROCESSED_DIR, "9.Malware")
DATA_DIR_10 = os.path.join(BASE_PREPROCESSED_DIR, "10.SQL_Injection")
DATA_DIR_11 = os.path.join(BASE_PREPROCESSED_DIR, "11.SSDP")
DATA_DIR_12 = os.path.join(BASE_PREPROCESSED_DIR, "12.Evil_Twin")
DATA_DIR_13 = os.path.join(BASE_PREPROCESSED_DIR, "13.Website_spoofing")

# ───────────────────────────────
# 스냅샷 저장 폴더 생성 (현재 작업 디렉토리 내)
# ───────────────────────────────
SNAPSHOT_DIR = os.path.join(os.getcwd(), "Snapshots\\train")
if not os.path.exists(SNAPSHOT_DIR):
    os.makedirs(SNAPSHOT_DIR)

### 데이터 로딩

In [3]:
# ───────────────────────────────
# 데이터 로딩 함수 (각 공격 폴더 내 train_preprocessed.csv 파일 읽기)
# ───────────────────────────────
def load_data_from_directory(data_dir):
    """
    주어진 디렉토리 내의 train_preprocessed.csv 파일을 읽어 DataFrame으로 반환.
    """
    file_path = os.path.join(data_dir, "train_preprocessed.csv")
    if os.path.exists(file_path):
        print(f"Loading {os.path.basename(file_path)} from {data_dir}...")
        df = pd.read_csv(file_path)
        print(f"Loaded {len(df)} records from {data_dir}.")
        return df
    else:
        print(f"File {file_path} does not exist in {data_dir}.")
        return pd.DataFrame()

### 그래프 생성

In [4]:
def create_graph_from_snapshot(snapshot_df, drop_label=False):
    G = nx.Graph()
    for _, row in snapshot_df.iterrows():
        src = row.get('wlan.sa')
        dst = row.get('wlan.da')
        if pd.isna(src) or pd.isna(dst):
            continue

        packet_info = row.to_dict()
        packet_info.pop('wlan.sa', None)
        packet_info.pop('wlan.da', None)
        if drop_label:
            packet_info.pop('label', None)

        if src not in G:
            G.add_node(src, role='src')
        else:
            if G.nodes[src].get('role') != 'src':
                G.nodes[src]['role'] = 'both'

        if dst not in G:
            G.add_node(dst, role='dst')
        else:
            if G.nodes[dst].get('role') != 'dst':
                G.nodes[dst]['role'] = 'both'

        if G.has_edge(src, dst):
            G[src][dst]['count'] += 1
            G[src][dst]['features'].append(packet_info)
        else:
            G.add_edge(src, dst, count=1, features=[packet_info])
    return G

### Event + Session 기반 스냅샷 생성

In [5]:
def generate_event_session_based_snapshots_with_time_fixed(df, attack_name, output_dir, drop_label=True):
    EVENT_CODES = {'0x0C','0x0A','0x0B','0x02'}
    df = df.sort_values('frame.time_epoch')
    attack_dir = os.path.join(output_dir, attack_name)
    os.makedirs(attack_dir, exist_ok=True)

    snapshots = []
    buf = []
    curr_sess = None
    counter = 0

    for _, row in df.iterrows():
        sess = "_".join(map(str, (
            row.get('wlan.sa',''),
            row.get('wlan.da',''),
            row.get('tcp.srcport',0),
            row.get('tcp.dstport',0),
            row.get('ip.proto','')
        )))
        subtype = str(row.get('wlan.fc.subtype',''))
        event_trig = (subtype in EVENT_CODES)

        if buf and (sess != curr_sess or event_trig):
            snapshot_df = pd.DataFrame(buf)
            G = create_graph_from_snapshot(snapshot_df, drop_label=drop_label)
            ts = float(buf[-1]['frame.time_epoch'])
            tstr = datetime.utcfromtimestamp(ts).strftime('%Y%m%d_%H%M%S')
            fname = f"{attack_name}_snapshot_{counter:03d}_{tstr}.gpickle"
            path = os.path.join(attack_dir, fname)
            with open(path, 'wb') as f:
                pickle.dump(G, f)
            snapshots.append((G, tstr))
            counter += 1
            buf.clear()

        buf.append(row.to_dict())
        curr_sess = sess

    if buf:
        snapshot_df = pd.DataFrame(buf)
        G = create_graph_from_snapshot(snapshot_df, drop_label=drop_label)
        ts = float(buf[-1]['frame.time_epoch'])
        tstr = datetime.utcfromtimestamp(ts).strftime('%Y%m%d_%H%M%S')
        fname = f"{attack_name}_snapshot_{counter:03d}_{tstr}.gpickle"
        path = os.path.join(attack_dir, fname)
        with open(path, 'wb') as f:
            pickle.dump(G, f)
        snapshots.append((G, tstr))

    print(f"▶ {attack_name}: 총 {len(snapshots)}개 스냅샷 생성 → {attack_dir}")
    return snapshots

### 스냅샷 비교

In [6]:
def compare_snapshots(G1, G2):
    return {
        'added_nodes': set(G2.nodes()) - set(G1.nodes()),
        'removed_nodes': set(G1.nodes()) - set(G2.nodes()),
        'added_edges': set(G2.edges()) - set(G1.edges()),
        'removed_edges': set(G1.edges()) - set(G2.edges())
    }

### 시각화

In [7]:
def visualize_graph_3d(G):
    pos = nx.spring_layout(G, dim=3, seed=42)
    x_nodes, y_nodes, z_nodes, node_colors = [], [], [], []
    for node in G.nodes():
        x, y, z = pos[node]
        x_nodes.append(x); y_nodes.append(y); z_nodes.append(z)
        role = G.nodes[node].get('role', 'default')
        color = 'gray'
        if role == 'src': color = 'red'
        elif role == 'dst': color = 'blue'
        elif role == 'both': color = 'green'
        node_colors.append(color)

    node_trace = go.Scatter3d(x=x_nodes, y=y_nodes, z=z_nodes, mode='markers',
        marker=dict(size=4, color=node_colors), text=list(G.nodes()))

    edge_x, edge_y, edge_z = [], [], []
    for u, v in G.edges():
        x0, y0, z0 = pos[u]
        x1, y1, z1 = pos[v]
        edge_x += [x0, x1, None]
        edge_y += [y0, y1, None]
        edge_z += [z0, z1, None]

    edge_trace = go.Scatter3d(x=edge_x, y=edge_y, z=edge_z, mode='lines', line=dict(width=2, color='gray'))
    fig = go.Figure(data=[edge_trace, node_trace])
    fig.update_layout(title='3D Snapshot', margin=dict(l=0, r=0, t=30, b=0))
    fig.show()

### 스냅샷 정보 출력

In [8]:
def load_and_print_snapshot_info(attack_name):
    attack_dir = os.path.join(SNAPSHOT_DIR, attack_name)
    if not os.path.exists(attack_dir):
        print(f"No directory found for attack: {attack_name}")
        return
    for fname in sorted(os.listdir(attack_dir)):
        if not fname.endswith('.gpickle'):
            continue
        fpath = os.path.join(attack_dir, fname)
        try:
            with open(fpath, 'rb') as f:
                G = pickle.load(f)
            print(f"\nSnapshot: {fname}")
            print(f"  Nodes: {len(G.nodes())}, Edges: {len(G.edges())}")
            print(f"  Node list: {list(G.nodes())}")
            print(f"  Edge list: {list(G.edges())}")
        except Exception as e:
            print(f"Error loading {fname}: {e}")


### 데이터 로딩

In [9]:
# 데이터 로딩
df1 = load_data_from_directory(DATA_DIR_1)
df2 = load_data_from_directory(DATA_DIR_2)
df3 = load_data_from_directory(DATA_DIR_3)
# df4 = load_data_from_directory(DATA_DIR_4)
# df5 = load_data_from_directory(DATA_DIR_5)
# df6 = load_data_from_directory(DATA_DIR_6)
# df7 = load_data_from_directory(DATA_DIR_7)
# df8 = load_data_from_directory(DATA_DIR_8)
# df9 = load_data_from_directory(DATA_DIR_9)
# df10 = load_data_from_directory(DATA_DIR_10)
# df11 = load_data_from_directory(DATA_DIR_11)
# df12 = load_data_from_directory(DATA_DIR_12)
# df13 = load_data_from_directory(DATA_DIR_13)

# 데이터셋과 공격 이름 매핑
datasets = {
    'df1': (df1, 'Deauth'),
    'df2': (df2, 'Disas'),
    'df3': (df3, '(Re)Assoc'),
    # 'df4': (df4, 'Rogue_AP'),
    # 'df5': (df5, 'Krack'),
    # 'df6': (df6, 'Kr00k'),
    # 'df7': (df7, 'SSH'),
    # 'df8': (df8, 'Botnet'),
    # 'df9': (df9, 'Malware'),
    # 'df10': (df10, 'SQL_Injection'),
    # 'df11': (df11, 'SSDP'),
    # 'df12': (df12, 'Evil_Twin'),
    # 'df13': (df13, 'Website_spoofing')
}

Loading train_preprocessed.csv from C:\Users\idle9\Desktop\Naver MYBOX\AISLab\Dataset\AWID3_Dataset_CSV\Preprocessed\1.Deauth...
Loaded 1138528 records from C:\Users\idle9\Desktop\Naver MYBOX\AISLab\Dataset\AWID3_Dataset_CSV\Preprocessed\1.Deauth.
Loading train_preprocessed.csv from C:\Users\idle9\Desktop\Naver MYBOX\AISLab\Dataset\AWID3_Dataset_CSV\Preprocessed\2.Disas...
Loaded 1409601 records from C:\Users\idle9\Desktop\Naver MYBOX\AISLab\Dataset\AWID3_Dataset_CSV\Preprocessed\2.Disas.
Loading train_preprocessed.csv from C:\Users\idle9\Desktop\Naver MYBOX\AISLab\Dataset\AWID3_Dataset_CSV\Preprocessed\3.(Re)Assoc...
Loaded 1290756 records from C:\Users\idle9\Desktop\Naver MYBOX\AISLab\Dataset\AWID3_Dataset_CSV\Preprocessed\3.(Re)Assoc.


### 스냅샷 생성

In [10]:
# 스냅샷 생성
all_snapshots = {}
for name, (df, attack_name) in datasets.items():
    print(f"\nProcessing dataset: {name} ({attack_name})")
    paths = generate_event_session_based_snapshots_with_time_fixed(
        df, attack_name, SNAPSHOT_DIR, drop_label=True
    )
    all_snapshots[attack_name] = paths



Processing dataset: df1 (Deauth)
▶ Deauth: 총 560961개 스냅샷 생성 → c:\Users\idle9\Desktop\Naver MYBOX\AISLab\실험\SCI\AWID3-MSA_Graph_Detection\Snapshots\train\Deauth

Processing dataset: df2 (Disas)
▶ Disas: 총 712053개 스냅샷 생성 → c:\Users\idle9\Desktop\Naver MYBOX\AISLab\실험\SCI\AWID3-MSA_Graph_Detection\Snapshots\train\Disas

Processing dataset: df3 ((Re)Assoc)


ValueError: Invalid value NaN (not a number)

### 스냅샷 정보 출력

In [None]:
# 예시: 전체 공격에 대해 출력
for attack_name in ['Deauth', 
                    'Disas', 
                    '(Re)Assoc',
                    # 'Rogue_AP',
                    # 'Krack',
                    # 'Kr00k',
                    # 'SSH',
                    # 'Botnet',
                    # 'Malware',
                    # 'SQL_Injection',
                    # 'SSDP',
                    # 'Evil_Twin',
                    # 'Website_spoofing'
                    ]:
    load_and_print_snapshot_info(attack_name)


Snapshot: Deauth_snapshot_000_20201218_154258.gpickle
  Nodes: 2, Edges: 1
  Node list: ['13870904442420', '281474976710655']
  Edge list: [('13870904442420', '281474976710655')]

Snapshot: Deauth_snapshot_001_20201218_154258.gpickle
  Nodes: 2, Edges: 1
  Node list: ['128437431589470', '281474976710655']
  Edge list: [('128437431589470', '281474976710655')]

Snapshot: Deauth_snapshot_002_20201218_154258.gpickle
  Nodes: 2, Edges: 1
  Node list: ['13870904442420', '281474976710655']
  Edge list: [('13870904442420', '281474976710655')]

Snapshot: Deauth_snapshot_003_20201218_154258.gpickle
  Nodes: 2, Edges: 1
  Node list: ['128437431589470', '281474976710655']
  Edge list: [('128437431589470', '281474976710655')]

Snapshot: Deauth_snapshot_004_20201218_154258.gpickle
  Nodes: 2, Edges: 1
  Node list: ['13870904442420', '281474976710655']
  Edge list: [('13870904442420', '281474976710655')]

Snapshot: Deauth_snapshot_005_20201218_154259.gpickle
  Nodes: 2, Edges: 1
  Node list: ['13870

KeyboardInterrupt: 

### 시각화 및 비교

In [None]:
# 시각화 및 비교
for i, (graph, tstr) in enumerate(snapshots):
    print(f"\nSnapshot {i}: {tstr}")
    print(f"  Nodes: {len(graph.nodes())}, Edges: {len(graph.edges())}")
    print(f"  Node list: {list(graph.nodes())}")
    print(f"  Edge list: {list(graph.edges())}")
    if i < 3:
        visualize_graph_3d(graph)
    if i > 0:
        diff = compare_snapshots(snapshots[i-1][0], graph)
        print("  Changes:", diff)

NameError: name 'snapshots' is not defined