In [1]:
import pandas as pd
import os 
from sklearn.model_selection import train_test_split 
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset 
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:
dir_data_unlabeled = os.path.join('.', 'traffictracer', 'evaluation', 'model_evaluation', 'data_no_label') 
dir_data_labeled = os.path.join('.', 'traffictracer', 'evaluation', 'model_evaluation', 'data_label') 

In [3]:
def label_and_merge(): 
    merge_df = pd.DataFrame()
    for file in os.listdir(dir_data_unlabeled): 
        activity_name = file[:-4] # .csv 
        temp_df = pd.read_csv(os.path.join(dir_data_unlabeled, file), index_col=0) 
        temp_df['label'] = activity_name 
        merge_df = pd.concat([merge_df, temp_df], ignore_index=True) 
        merge_df.to_csv(os.path.join(dir_data_labeled, 'merged_df.csv')) 

In [4]:
df = pd.read_csv(os.path.join(dir_data_labeled, 'merged_df.csv')) 

# 按列名提取代理前、代理后特征
# M_features = df.filter(M_features_list).columns
# W_features = df.filter(W_features_list).columns
# labels = 'label'
features = ['Packets', 'Bytes', 'Rel Start', 'Duration', 'Flows'] 
M_features = ['M ' + feature for feature in features] 
W_features = ['W ' + feature for feature in features] 
labels = 'label' 

# 划分数据集
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# 提取不同特征和标签
X_M_train = train_df[M_features]
X_MW_train = train_df[M_features + W_features]
X_W_train = train_df[W_features]
y_train = train_df[labels]

X_M_test = test_df[M_features]
X_MW_test = test_df[M_features + W_features]
X_W_test = test_df[W_features]
y_test = test_df[labels] 

if 'W Flows' in W_features: 
    W_features.remove('W Flows')

X_F_train = train_df[W_features] 
X_F_test = test_df[W_features]

In [5]:
# 创建映射字典
label_mapping = {
    'video': 0,
    'audio': 1,
    'upload': 2,
    'download': 3,
    'streaming': 4,
    'email': 5
} 
# 转换标签为整数
y_train = y_train.map(label_mapping)
y_test = y_test.map(label_mapping)

In [6]:
# 定义一个简单的神经网络
class SimpleModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(SimpleModel, self).__init__()
        self.fc = nn.Sequential(
            # nn.Linear(input_dim, 128),
            # nn.ReLU(),
            # nn.Linear(128, output_dim) 
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, output_dim)
        )

    def forward(self, x):
        return self.fc(x)

# 训练函数
def train_model(model, train_loader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        for inputs, labels in train_loader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}")   
    return model

In [7]:
# 确保特征和标签都是数值型，且维度一致
X_M_train = X_M_train.fillna(0)  # 填充缺失值
y_train = y_train.astype(int)    # 转换标签为整数
X_M_train = X_M_train.loc[y_train.index]  # 确保特征和标签行数一致

# 转换为 Tensor
def to_tensor(data, labels):
    return TensorDataset(
        torch.tensor(data.values, dtype=torch.float32),
        torch.tensor(labels.values, dtype=torch.long)
    )

train_loader_M = DataLoader(to_tensor(X_M_train, y_train), batch_size=32, shuffle=True)
train_loader_MW = DataLoader(to_tensor(X_MW_train, y_train), batch_size=32, shuffle=True)
train_loader_W = DataLoader(to_tensor(X_W_train, y_train), batch_size=32, shuffle=True)

In [8]:
# 初始化模型
f1 = SimpleModel(input_dim=X_M_train.shape[1], output_dim=6)
f2 = SimpleModel(input_dim=X_MW_train.shape[1], output_dim=6)

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer_f1 = torch.optim.Adam(f1.parameters(), lr=0.001)
optimizer_f2 = torch.optim.Adam(f2.parameters(), lr=0.001)

# 训练模型
f1 = train_model(f1, train_loader_M, criterion, optimizer_f1, num_epochs=100)
f2 = train_model(f2, train_loader_MW, criterion, optimizer_f2, num_epochs=100)


Epoch [1/100], Loss: 1326.4584
Epoch [2/100], Loss: 55389.8828
Epoch [3/100], Loss: 6184.8135
Epoch [4/100], Loss: 856.3326
Epoch [5/100], Loss: 1013.0439
Epoch [6/100], Loss: 1177.2610
Epoch [7/100], Loss: 694.5280
Epoch [8/100], Loss: 990.5077
Epoch [9/100], Loss: 1620.8641
Epoch [10/100], Loss: 32.6281
Epoch [11/100], Loss: 723.2231
Epoch [12/100], Loss: 15.3720
Epoch [13/100], Loss: 4745.9399
Epoch [14/100], Loss: 642.5472
Epoch [15/100], Loss: 50.5853
Epoch [16/100], Loss: 85.8863
Epoch [17/100], Loss: 413.8706
Epoch [18/100], Loss: 601.6241
Epoch [19/100], Loss: 237.2246
Epoch [20/100], Loss: 1912.5670
Epoch [21/100], Loss: 1117.3599
Epoch [22/100], Loss: 937.8638
Epoch [23/100], Loss: 1583.5690
Epoch [24/100], Loss: 362.7700
Epoch [25/100], Loss: 1363.6591
Epoch [26/100], Loss: 146.0341
Epoch [27/100], Loss: 1034.9556
Epoch [28/100], Loss: 82.1122
Epoch [29/100], Loss: 13.8560
Epoch [30/100], Loss: 1467.1624
Epoch [31/100], Loss: 12.4896
Epoch [32/100], Loss: 1.7744
Epoch [33/10

In [9]:
# 使用 f1 和 f2 的输出作为指导特征
class GuidedModel(nn.Module):
    def __init__(self, input_dim, output_dim, guide_dim):
        super(GuidedModel, self).__init__()
        self.fc = nn.Sequential(
            # nn.Linear(input_dim + guide_dim, 128),
            # nn.ReLU(),
            # nn.Linear(128, output_dim) 
            nn.Linear(input_dim + guide_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, output_dim)
        )

    def forward(self, x, guide_features):
        x = torch.cat((x, guide_features), dim=1)
        return self.fc(x)

# 初始化 f3
guide_dim = 128  # 指导特征维度
f3 = GuidedModel(input_dim=X_W_train.shape[1], output_dim=6, guide_dim=guide_dim)
optimizer_f3 = torch.optim.Adam(f3.parameters(), lr=0.001)

# 提取 f1 和 f2 的指导特征
def get_guided_features(model, loader):
    model.eval()
    features = []
    with torch.no_grad():
        for inputs, _ in loader:
            features.append(model(inputs))
    return torch.cat(features)

guide_features_f1 = get_guided_features(f1, train_loader_M)
guide_features_f2 = get_guided_features(f2, train_loader_MW)


In [10]:
# 定制训练函数
def train_model_f3(model, train_loader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        for inputs, labels in train_loader:
            # 拆分 inputs
            x = inputs[:, :-256]
            guide_features = inputs[:, -256:]

            # 前向传播
            outputs = model(x, guide_features)

            # 计算损失
            loss = criterion(outputs, labels)

            # 反向传播和优化
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}")
    return model

# 合并特征
X_combined_train = torch.cat(
    (torch.tensor(X_W_train.values, dtype=torch.float32),
     guide_features_f1,
     guide_features_f2),
    dim=1
)

# 构建 DataLoader
train_loader_f3 = DataLoader(
    TensorDataset(X_combined_train, torch.tensor(y_train.values, dtype=torch.long)),
    batch_size=32,
    shuffle=True
)

# 确定输入维度
guide_dim = guide_features_f1.shape[1] + guide_features_f2.shape[1]
f3 = GuidedModel(input_dim=X_W_train.shape[1], output_dim=6, guide_dim=guide_dim)
optimizer_f3 = torch.optim.Adam(f3.parameters(), lr=0.001)

# 训练 f3
f3 = train_model_f3(f3, train_loader_f3, criterion, optimizer_f3, num_epochs=100)

Epoch [1/100], Loss: 16440.4727
Epoch [2/100], Loss: 10928.4854
Epoch [3/100], Loss: 198.5665
Epoch [4/100], Loss: 318.4887
Epoch [5/100], Loss: 2994.1863
Epoch [6/100], Loss: 3425.3850
Epoch [7/100], Loss: 338.5929
Epoch [8/100], Loss: 788.0220
Epoch [9/100], Loss: 438.5712
Epoch [10/100], Loss: 35.1962
Epoch [11/100], Loss: 146.5782
Epoch [12/100], Loss: 90.9776
Epoch [13/100], Loss: 777.3290
Epoch [14/100], Loss: 726.3698
Epoch [15/100], Loss: 1290.9681
Epoch [16/100], Loss: 16.6754
Epoch [17/100], Loss: 82.4041
Epoch [18/100], Loss: 24.7839
Epoch [19/100], Loss: 21.7528
Epoch [20/100], Loss: 22.9509
Epoch [21/100], Loss: 379.2130
Epoch [22/100], Loss: 53.2386
Epoch [23/100], Loss: 714.7524
Epoch [24/100], Loss: 5.0750
Epoch [25/100], Loss: 1.7357
Epoch [26/100], Loss: 1.7273
Epoch [27/100], Loss: 1.6934
Epoch [28/100], Loss: 1.6747
Epoch [29/100], Loss: 1.7104
Epoch [30/100], Loss: 1.6852
Epoch [31/100], Loss: 1.6956
Epoch [32/100], Loss: 1.7366
Epoch [33/100], Loss: 1.7336
Epoch [

In [11]:
# 填充缺失值并转换为 Tensor
X_W_test = X_W_test.fillna(0)
y_test = y_test.astype(int)

# 提取 f1 和 f2 的指导特征
guide_features_f1_test = get_guided_features(f1, DataLoader(to_tensor(X_M_test, y_test), batch_size=32))
guide_features_f2_test = get_guided_features(f2, DataLoader(to_tensor(X_MW_test, y_test), batch_size=32))

# 合并特征
X_combined_test = torch.cat(
    (torch.tensor(X_W_test.values, dtype=torch.float32),
     guide_features_f1_test,
     guide_features_f2_test),
    dim=1
)

# 构建 DataLoader
test_loader_f3 = DataLoader(
    TensorDataset(X_combined_test, torch.tensor(y_test.values, dtype=torch.long)),
    batch_size=32,
    shuffle=False
)

In [12]:
def evaluate_model(model, test_loader):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            x = inputs[:, :-256]
            guide_features = inputs[:, -256:]
            outputs = model(x, guide_features)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')

    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')

In [13]:
evaluate_model(f3, test_loader_f3) # f3 performance 

Accuracy: 0.2013
Precision: 0.0405
Recall: 0.2013
F1 Score: 0.0674


  _warn_prf(average, modifier, msg_start, len(result))


In [22]:
train_loader_F = DataLoader(
    TensorDataset(
        torch.tensor(X_F_train.values, dtype=torch.float32),
        torch.tensor(y_train.values, dtype=torch.long)
    ),
    batch_size=32,
    shuffle=True
)

In [32]:
f_others = SimpleModel(input_dim=X_F_train.shape[1], output_dim=6) 

criterion = nn.CrossEntropyLoss()
optimizer_fothers = torch.optim.Adam(f_others.parameters(), lr=0.1)

In [33]:
f_others = train_model(f_others, train_loader_F, criterion, optimizer_fothers, num_epochs=100) 

Epoch [1/100], Loss: 1.7498
Epoch [2/100], Loss: 1.8705
Epoch [3/100], Loss: 1.6282
Epoch [4/100], Loss: 1.4993
Epoch [5/100], Loss: 1.6285
Epoch [6/100], Loss: 1.5636
Epoch [7/100], Loss: 1.6385
Epoch [8/100], Loss: 1.9085
Epoch [9/100], Loss: 1.7574
Epoch [10/100], Loss: 1.6107
Epoch [11/100], Loss: 1.5396
Epoch [12/100], Loss: 1.7539
Epoch [13/100], Loss: 1.7089
Epoch [14/100], Loss: 1.7653
Epoch [15/100], Loss: 1.8231
Epoch [16/100], Loss: 1.6653
Epoch [17/100], Loss: 1.7335
Epoch [18/100], Loss: 1.7033
Epoch [19/100], Loss: 1.7481
Epoch [20/100], Loss: 1.8552
Epoch [21/100], Loss: 1.6742
Epoch [22/100], Loss: 1.6799
Epoch [23/100], Loss: 1.6591
Epoch [24/100], Loss: 1.6345
Epoch [25/100], Loss: 1.7524
Epoch [26/100], Loss: 1.8255
Epoch [27/100], Loss: 1.6829
Epoch [28/100], Loss: 1.6259
Epoch [29/100], Loss: 1.5831
Epoch [30/100], Loss: 1.7149
Epoch [31/100], Loss: 1.6128
Epoch [32/100], Loss: 1.7078
Epoch [33/100], Loss: 1.7236
Epoch [34/100], Loss: 1.6650
Epoch [35/100], Loss: 1

In [34]:
# 填充缺失值并转换为 Tensor
X_F_test = X_F_test.fillna(0)
y_test = y_test.astype(int)

# 构建 DataLoader
test_loader_F = DataLoader(
    TensorDataset(
        torch.tensor(X_F_test.values, dtype=torch.float32),
        torch.tensor(y_test.values, dtype=torch.long)
    ),
    batch_size=32,
    shuffle=False
)

In [35]:
def evaluate_model_o(model, test_loader):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')

    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')

In [36]:
print("Evaluating SimpleModel:")
evaluate_model_o(f_others, test_loader_F)

Evaluating SimpleModel:
Accuracy: 0.2013
Precision: 0.0405
Recall: 0.2013
F1 Score: 0.0674


  _warn_prf(average, modifier, msg_start, len(result))
