In [1]:
import pandas as pd
import os 
from sklearn.model_selection import train_test_split 
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset 
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:
dir_data_unlabeled = os.path.join('.', 'traffictracer', 'evaluation', 'model_evaluation', 'data_no_label') 
dir_data_labeled = os.path.join('.', 'traffictracer', 'evaluation', 'model_evaluation', 'data_label') 

In [3]:
df = pd.read_csv(os.path.join(dir_data_labeled, 'merged_df.csv')) 

# 按列名提取代理前、代理后特征
# M_features = df.filter(M_features_list).columns
# W_features = df.filter(W_features_list).columns
# labels = 'label'
features = ['Packets', 'Bytes', 'Rel Start', 'Duration', 'Flows'] 
M_features = ['M ' + feature for feature in features] 
W_features = ['W ' + feature for feature in features] 
labels = 'label' 

# 划分数据集
train_df, test_df = train_test_split(df, test_size=0.2)

# 提取不同特征和标签
X_M_train = train_df[M_features]
X_MW_train = train_df[M_features + W_features]
X_W_train = train_df[W_features]
y_train = train_df[labels]

X_M_test = test_df[M_features]
X_MW_test = test_df[M_features + W_features]
X_W_test = test_df[W_features]
y_test = test_df[labels] 

if 'W Flows' in W_features: 
    W_features.remove('W Flows')

X_F_train = train_df[W_features] 
X_F_test = test_df[W_features]

In [4]:
# 创建映射字典
label_mapping = {
    'video': 0,
    'audio': 1,
    'upload': 2,
    'download': 3,
    'streaming': 4,
    'email': 5
} 
# 转换标签为整数
y_train = y_train.map(label_mapping)
y_test = y_test.map(label_mapping)

In [5]:
# 定义一个简单的神经网络
class SimpleModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(SimpleModel, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64), 
            nn.ReLU(), 
            nn.Linear(64, output_dim)
        )

    def forward(self, x):
        return self.fc(x)

# 训练函数
def train_model(model, train_loader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        for inputs, labels in train_loader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}")   
    return model

In [6]:
train_loader_F = DataLoader(
    TensorDataset(
        torch.tensor(X_F_train.values, dtype=torch.float32),
        torch.tensor(y_train.values, dtype=torch.long)
    ),
    batch_size=32,
    shuffle=True
)

f_others = SimpleModel(input_dim=X_F_train.shape[1], output_dim=6) 

criterion = nn.CrossEntropyLoss()
optimizer_fothers = torch.optim.Adam(f_others.parameters(), lr=0.001)

In [7]:
f_others = train_model(f_others, train_loader_F, criterion, optimizer_fothers, num_epochs=100) 

Epoch [1/100], Loss: 6561.0474
Epoch [2/100], Loss: 4736.9590
Epoch [3/100], Loss: 10009.3369
Epoch [4/100], Loss: 7696.9565
Epoch [5/100], Loss: 7751.5659
Epoch [6/100], Loss: 10109.8848
Epoch [7/100], Loss: 8854.7168
Epoch [8/100], Loss: 632.1324
Epoch [9/100], Loss: 588.2939
Epoch [10/100], Loss: 5232.3931
Epoch [11/100], Loss: 1806.5450
Epoch [12/100], Loss: 1945.4081
Epoch [13/100], Loss: 1213.5394
Epoch [14/100], Loss: 769.7413
Epoch [15/100], Loss: 512.3705
Epoch [16/100], Loss: 2402.3572
Epoch [17/100], Loss: 2701.2266
Epoch [18/100], Loss: 916.2110
Epoch [19/100], Loss: 143.6232
Epoch [20/100], Loss: 1489.8593
Epoch [21/100], Loss: 1495.0469
Epoch [22/100], Loss: 4653.1758
Epoch [23/100], Loss: 2361.0894
Epoch [24/100], Loss: 212.9585
Epoch [25/100], Loss: 1951.4279
Epoch [26/100], Loss: 569.6225
Epoch [27/100], Loss: 720.7976
Epoch [28/100], Loss: 2385.4150
Epoch [29/100], Loss: 874.2375
Epoch [30/100], Loss: 496.9261
Epoch [31/100], Loss: 9141.7939
Epoch [32/100], Loss: 2005

In [8]:
# 填充缺失值并转换为 Tensor
X_F_test = X_F_test.fillna(0)
y_test = y_test.astype(int)

# 构建 DataLoader
test_loader_F = DataLoader(
    TensorDataset(
        torch.tensor(X_F_test.values, dtype=torch.float32),
        torch.tensor(y_test.values, dtype=torch.long)
    ),
    batch_size=32,
    shuffle=False
)

In [9]:
def evaluate_model_o(model, test_loader):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')

    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')

In [10]:
print("Evaluating SimpleModel:")
evaluate_model_o(f_others, test_loader_F)

Evaluating SimpleModel:
Accuracy: 0.1761
Precision: 0.0310
Recall: 0.1761
F1 Score: 0.0527


  _warn_prf(average, modifier, msg_start, len(result))
