In [6]:
# ──────────────────────────────────────────────────────────
# 1) Import & 기본 경로 설정
# ──────────────────────────────────────────────────────────
import os
import pickle
from datetime import datetime, timedelta
import pandas as pd
import networkx as nx
import plotly.graph_objects as go

BASE_PREPROCESSED_DIR = r"C:\Users\idle9\Desktop\Naver MYBOX\AISLab\Dataset\AWID3_Dataset_CSV\Preprocessed"
DATA_DIR_1  = os.path.join(BASE_PREPROCESSED_DIR, "1.Deauth")
DATA_DIR_2 = os.path.join(BASE_PREPROCESSED_DIR, "2.Disas")
DATA_DIR_3 = os.path.join(BASE_PREPROCESSED_DIR, "3.(Re)Assoc")
DATA_DIR_4 = os.path.join(BASE_PREPROCESSED_DIR, "4.Rogue_AP")
DATA_DIR_5 = os.path.join(BASE_PREPROCESSED_DIR, "5.Krack")
DATA_DIR_6 = os.path.join(BASE_PREPROCESSED_DIR, "6.Kr00k")
DATA_DIR_7 = os.path.join(BASE_PREPROCESSED_DIR, "7.SSH")
DATA_DIR_8 = os.path.join(BASE_PREPROCESSED_DIR, "8.Botnet")
DATA_DIR_9 = os.path.join(BASE_PREPROCESSED_DIR, "9.Malware")
DATA_DIR_10 = os.path.join(BASE_PREPROCESSED_DIR, "10.SQL_Injection")
DATA_DIR_11 = os.path.join(BASE_PREPROCESSED_DIR, "11.SSDP")
DATA_DIR_12 = os.path.join(BASE_PREPROCESSED_DIR, "12.Evil_Twin")
DATA_DIR_13 = os.path.join(BASE_PREPROCESSED_DIR, "13.Website_spoofing")
SNAPSHOT_DIR = os.path.join(os.getcwd(), "Snapshots", "train")
os.makedirs(SNAPSHOT_DIR, exist_ok=True)


# ──────────────────────────────────────────────────────────
# 2) 유틸 함수들 (print 추가)
# ──────────────────────────────────────────────────────────
def load_data_from_directory(data_dir):
    fp = os.path.join(data_dir, "train_preprocessed.csv")
    print(f"[LOAD] 데이터 로드 시도: {fp}")
    if not os.path.exists(fp):
        print(f"  ▶ 파일이 존재하지 않습니다: {fp}")
        return pd.DataFrame()
    df = pd.read_csv(fp)
    print(f"  ▶ 로드 완료: {len(df)}개 레코드")
    return df

def extract_session_id(row):
    sa    = str(row.get('wlan.sa',''))
    da    = str(row.get('wlan.da',''))
    sport = str(int(row.get('tcp.srcport',0))) if not pd.isna(row.get('tcp.srcport',None)) else '0'
    dport = str(int(row.get('tcp.dstport',0))) if not pd.isna(row.get('tcp.dstport',None)) else '0'
    proto = str(row.get('ip.proto','UNKNOWN'))
    return "_".join([sa, da, sport, dport, proto])

EVENT_CODES = {'0x0C','0x0A','0x0B','0x02'}
def is_event_trigger(row):
    sub = row.get('wlan.fc.subtype', None)
    return str(sub) in EVENT_CODES if sub is not None and not pd.isna(sub) else False

def create_graph_from_snapshot(df_snap, drop_label=False):
    print(f"[GRAPH] 스냅샷으로 Graph 생성 (rows={len(df_snap)})")
    G = nx.Graph()
    for _, row in df_snap.iterrows():
        src, dst = row['wlan.sa'], row['wlan.da']
        if pd.isna(src) or pd.isna(dst):
            continue
        info = row.to_dict()
        info.pop('wlan.sa', None)
        info.pop('wlan.da', None)
        if drop_label:
            info.pop('label', None)
        if src not in G:
            G.add_node(src, role='src')
        else:
            if G.nodes[src]['role']!='src':
                G.nodes[src]['role']='both'
        if dst not in G:
            G.add_node(dst, role='dst')
        else:
            if G.nodes[dst]['role']!='dst':
                G.nodes[dst]['role']='both'
        if G.has_edge(src,dst):
            G[src][dst]['count'] += 1
            G[src][dst]['features'].append(info)
        else:
            G.add_edge(src,dst, count=1, features=[info])
    print(f"  ▶ Graph 생성 완료: nodes={G.number_of_nodes()}, edges={G.number_of_edges()}")
    return G

def generate_event_session_based_snapshots(df, attack_name, output_dir, drop_label=True):
    print(f"\n[SNAPSHOT] '{attack_name}' 스냅샷 생성 시작")
    df = df.sort_values('frame.time_epoch')
    attack_dir = os.path.join(output_dir, attack_name)
    os.makedirs(attack_dir, exist_ok=True)

    buf, curr_sess, cnt, snapshots = [], None, 0, []
    for idx, row in enumerate(df.itertuples()):
        sess = extract_session_id(row._asdict())
        trig = is_event_trigger(row._asdict())
        if buf and (sess != curr_sess or trig):
            print(f"  ▶ 커밋 조건 충족 (idx={idx}, session_change={sess!=curr_sess}, event_trigger={trig})")
            df_buf = pd.DataFrame(buf)
            G = create_graph_from_snapshot(df_buf, drop_label)
            ts = float(buf[-1]['frame.time_epoch'])
            dt = datetime.utcfromtimestamp(ts)
            fname = f"{attack_name}_snapshot_{cnt:03d}_{dt.strftime('%Y%m%d_%H%M%S')}.gpickle"
            path = os.path.join(attack_dir, fname)
            with open(path,'wb') as f:
                pickle.dump(G,f)
            print(f"    ▶ 스냅샷 저장 #{cnt}: {fname}")
            snapshots.append((G,dt))
            cnt += 1
            buf = []
        buf.append(row._asdict())
        curr_sess = sess

    if buf:
        print("  ▶ 마지막 버퍼 커밋")
        df_buf = pd.DataFrame(buf)
        G = create_graph_from_snapshot(df_buf, drop_label)
        ts = float(buf[-1]['frame.time_epoch'])
        dt = datetime.utcfromtimestamp(ts)
        fname = f"{attack_name}_snapshot_{cnt:03d}_{dt.strftime('%Y%m%d_%H%M%S')}.gpickle"
        path = os.path.join(attack_dir, fname)
        with open(path,'wb') as f:
            pickle.dump(G,f)
        print(f"    ▶ 스냅샷 저장 #{cnt}: {fname}")
        snapshots.append((G,dt))

    print(f"[SNAPSHOT] '{attack_name}' 완료: 총 {len(snapshots)}개 생성 → {attack_dir}")
    return snapshots

def compare_snapshots(G1, G2):
    n1,n2 = set(G1.nodes()), set(G2.nodes())
    e1,e2 = set(G1.edges()), set(G2.edges())
    return {
        'added_nodes':   n2 - n1,
        'removed_nodes': n1 - n2,
        'added_edges':   e2 - e1,
        'removed_edges': e1 - e2
    }

def visualize_graph_3d(G):
    print("  ▶ 3D 그래프 시각화 시작")
    pos = nx.spring_layout(G, dim=3)
    xn,yn,zn,cols = [],[],[],[]
    for n in G.nodes():
        x,y,z = pos[n]
        xn.append(x); yn.append(y); zn.append(z)
        role = G.nodes[n].get('role','def')
        cols.append('red' if role=='src' else 'blue' if role=='dst' else 'green' if role=='both' else 'gray')
    node_trace = go.Scatter3d(x=xn,y=yn,z=zn,mode='markers',marker=dict(size=2,color=cols),text=list(G.nodes()))
    ex,ey,ez = [],[],[]
    for u,v in G.edges():
        x0,y0,z0 = pos[u]; x1,y1,z1 = pos[v]
        ex += [x0,x1,None]; ey += [y0,y1,None]; ez += [z0,z1,None]
    edge_trace = go.Scatter3d(x=ex,y=ey,z=ez,mode='lines',line=dict(width=2,color='gray'))
    fig = go.Figure(data=[edge_trace,node_trace])
    fig.update_layout(scene=dict(aspectmode='data'),margin=dict(l=0,r=0,t=0,b=0))
    fig.show()
    print("  ▶ 시각화 완료")


# ──────────────────────────────────────────────────────────
# 3) 실행: 로드 → 생성 → 출력 → 시각화 → 비교
# ──────────────────────────────────────────────────────────
print("\n=== 1) 데이터 로드 ===")
df1 = load_data_from_directory(DATA_DIR_1)

datasets = {'Deauth': df1}

all_snapshots = {}
for name, df in datasets.items():
    snaps = generate_event_session_based_snapshots(df, name, SNAPSHOT_DIR, drop_label=False)
    all_snapshots[name] = snaps

print("\n=== 2) 스냅샷 정보 출력 ===")
for name, snaps in all_snapshots.items():
    print(f"\n[{name}] 총 {len(snaps)}개")
    for i,(G,dt) in enumerate(snaps):
        print(f"  [{i:03d}] {dt.strftime('%Y-%m-%d %H:%M:%S')} → nodes={G.number_of_nodes()}, edges={G.number_of_edges()}")

# print("\n=== 3) 1분 단위 누적 시각화 ===")
# for name, snaps in all_snapshots.items():
#     if not snaps: continue
#     start = snaps[0][1].replace(second=0)
#     end   = snaps[-1][1].replace(second=0)
#     curr = start
#     print(f"\n-- [{name}] {start} → {end}")
#     while curr <= end:
#         print(f">> 시각화 시점: {curr.strftime('%Y-%m-%d %H:%M')}")
#         # curr까지 마지막 스냅샷
#         cands = [(G,dt) for G,dt in snaps if dt <= curr]
#         if cands:
#             visualize_graph_3d(cands[-1][0])
#         curr += timedelta(minutes=1)

print("\n=== 3) 저장된 스냅샷을 누적하며 10000개 단위로 시각화 ===")

BATCH_SIZE = 10000  # 누적 단위

for attack_name in datasets.keys():
    attack_dir = os.path.join(SNAPSHOT_DIR, attack_name)
    if not os.path.exists(attack_dir):
        print(f"  ▶ 스냅샷 폴더 없음: {attack_dir}")
        continue

    print(f"\n-- [{attack_name}] 누적 기반 시각화 시작 (경로: {attack_dir})")

    snapshot_files = sorted([
        f for f in os.listdir(attack_dir)
        if f.endswith('.gpickle')
    ])

    print(f"  ▶ 총 {len(snapshot_files)}개 스냅샷 파일 감지")

    # 누적 그래프 초기화
    G_accum = nx.Graph()
    for i, filename in enumerate(snapshot_files):
        file_path = os.path.join(attack_dir, filename)
        with open(file_path, 'rb') as f:
            G_snap = pickle.load(f)
        
        # 누적 병합
        G_accum.add_nodes_from(G_snap.nodes(data=True))
        G_accum.add_edges_from(G_snap.edges(data=True))

        # 누적 기준 시각화
        if (i + 1) % BATCH_SIZE == 0:
            print(f">> 누적 시각화: {i+1}번째 스냅샷까지 포함 (노드: {G_accum.number_of_nodes()}, 엣지: {G_accum.number_of_edges()})")
            visualize_graph_3d(G_accum)

    # 마지막 잔여 스냅샷 누적 시각화
    if (len(snapshot_files) % BATCH_SIZE) != 0:
        print(f">> 마지막 누적 시각화 (전체 {len(snapshot_files)}개 포함)")
        visualize_graph_3d(G_accum)



print("\n=== 4) 인접 스냅샷 변화 비교 ===")
for name, snaps in all_snapshots.items():
    print(f"\n[{name}] 변화 비교")
    for i in range(1, len(snaps)):
        Gp,dtp = snaps[i-1]
        Gc,dtc = snaps[i]
        diff = compare_snapshots(Gp,Gc)
        print(f"  {dtp.strftime('%H:%M:%S')}→{dtc.strftime('%H:%M:%S')}: "
              f"+nodes={len(diff['added_nodes'])}, -nodes={len(diff['removed_nodes'])}, "
              f"+edges={len(diff['added_edges'])}, -edges={len(diff['removed_edges'])}")



=== 1) 데이터 로드 ===
[LOAD] 데이터 로드 시도: C:\Users\idle9\Desktop\Naver MYBOX\AISLab\Dataset\AWID3_Dataset_CSV\Preprocessed\1.Deauth\train_preprocessed.csv
  ▶ 로드 완료: 1138528개 레코드

[SNAPSHOT] 'Deauth' 스냅샷 생성 시작
  ▶ 마지막 버퍼 커밋
[GRAPH] 스냅샷으로 Graph 생성 (rows=1138528)


KeyError: 'wlan.sa'

In [4]:
df

Unnamed: 0,frame.len,frame.number,frame.time,frame.time_delta,frame.time_delta_displayed,frame.time_epoch,frame.time_relative,radiotap.datarate,radiotap.dbm_antsignal,radiotap.length,...,tls.compress_certificate.compressed_certificate_message.length,tls.connection_id,tls.handshake.extension.type,tls.handshake.extensions_key_share_group,tls.handshake.session_ticket_length,tls.handshake.version,tls.record.content_type,tls.record.version,wlan.da,wlan.sa
0,1610,924233,1.608281e+09,0.000009,0.000009,1.608307e+09,391.150429,,-113,64,...,,,,,,,,,828033739094,13870904442416
1,88,39646,1.608281e+09,0.000004,0.000004,1.608306e+09,202.179431,24.0,-167,56,...,,,,,,,,,,
2,70,418297,1.608281e+09,0.000004,0.000004,1.608306e+09,283.057663,24.0,-213,56,...,,,,,,,,,,
3,170,993489,1.608281e+09,0.000152,0.000152,1.608307e+09,406.614351,,-211,64,...,,,,,,,,,13870904442416,163730486904183
4,158,796813,1.608281e+09,0.000000,0.000000,1.608307e+09,361.745497,,-176,64,...,,,,,,,,,13870904442416,828033738302
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1138523,88,259179,1.608281e+09,0.000123,0.000123,1.608306e+09,256.615603,24.0,-114,56,...,,,,,,,,,,
1138524,76,1414418,1.608281e+09,0.001411,0.001411,1.608307e+09,517.771990,24.0,-115,56,...,,,,,,,,,,
1138525,88,131933,1.608281e+09,0.000005,0.000005,1.608306e+09,219.036183,24.0,-194,56,...,,,,,,,,,,
1138526,70,671159,1.608281e+09,0.000063,0.000063,1.608307e+09,331.713167,24.0,-160,56,...,,,,,,,,,,
