# CNN

## load data

In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler

def load_and_preprocess(train_path, test_path):
    train_df = pd.read_csv(train_path)
    test_df = pd.read_csv(test_path)

    # 删除包含 NaN 的行
    train_df = train_df.dropna()
    test_df = test_df.dropna()

    # 将 'time' 列转换为 datetime 格式并设置为索引
    train_df['time'] = pd.to_datetime(train_df['time'], format='%Y-%m-%d %H:%M:%S.%f')
    train_df.set_index('time', inplace=True)

    test_df['time'] = pd.to_datetime(test_df['time'], format='%Y-%m-%d %H:%M:%S.%f')
    test_df.set_index('time', inplace=True)

    # 提取特征和标签
    X_train = train_df.drop(columns=['attack'])
    y_train = train_df['attack']

    X_test = test_df.drop(columns=['attack'])
    y_test = test_df['attack']

    # 标准化特征
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    return X_train, y_train, X_test, y_test

X_train, y_train, X_test, y_test = load_and_preprocess('../data/attack_2_short_test.csv', '../data/attack_1_short_test.csv')

## difine model

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test.values, dtype=torch.long)

class CNNClassifier(nn.Module):
    def __init__(self, num_classes):
        super(CNNClassifier, self).__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=(3, 1), padding=(1, 0)),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 1)),
            nn.Conv2d(32, 64, kernel_size=(3, 1), padding=(1, 0)),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 1))
        )
        self.fc = nn.Linear(64 * (X_train.shape[2] // 4), num_classes)
    
    def forward(self, x):
        x = self.cnn(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

num_classes = 2
model = CNNClassifier(num_classes)

IndexError: tuple index out of range

## train

In [None]:
device = torch.device('mps' if torch.cuda.is_available() else 'cpu')
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 50
batch_size = 64

def train_model(model, X_train, y_train, num_epochs, batch_size):
    model.train()
    for epoch in range(num_epochs):
        for i in range(0, len(X_train), batch_size):
            X_batch = X_train[i:i+batch_size].to(device)
            y_batch = y_train[i:i+batch_size].to(device)
            
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        if (epoch+1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

train_model(model, X_train, y_train, num_epochs, batch_size)

: 

## evaluate

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score, accuracy_score, classification_report

def evaluate_model(model, X_test, y_test):
    model.eval()
    with torch.no_grad():
        X_test = X_test.to(device)
        y_test = y_test.to(device)

        outputs = model(X_test)
        _, predicted = torch.max(outputs.data, 1)

        accuracy = accuracy_score(y_test.cpu(), predicted.cpu())
        report = classification_report(y_test.cpu(), predicted.cpu())

        y_prob = torch.softmax(outputs, dim=1)[:, 1].cpu().numpy()
        fpr, tpr, _ = roc_curve(y_test.cpu(), y_prob)
        roc_auc = auc(fpr, tpr)

        plt.figure()
        plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
        plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver Operating Characteristic (ROC)')
        plt.legend(loc="lower right")
        plt.show()

        precision, recall, _ = precision_recall_curve(y_test.cpu(), y_prob)
        average_precision = average_precision_score(y_test.cpu(), y_prob)

        plt.figure()
        plt.plot(recall, precision, color='b', lw=2, label=f'PR curve (AP = {average_precision:.2f})')
        plt.xlabel('Recall')
        plt.ylabel('Precision')
        plt.title('Precision-Recall Curve')
        plt.legend(loc="lower left")
        plt.show()

        print(f"CNN Accuracy: {accuracy}")
        print("CNN Classification Report:")
        print(report)

evaluate_model(model, X_test, y_test)

: 