In [None]:
import pandas as pd
import numpy as np

df_t = pd.read_csv('data_tcpudp_with_labels.csv', low_memory=False)
df_p = pd.read_csv('data_pfcp_with_labels.csv')

In [None]:
df_t = df_t.drop(columns='label')
df_p.rename(columns={'Label_val': 'label_val'}, inplace=True)

In [None]:
df = pd.concat([df_t, df_p], ignore_index=True)

In [None]:
df.fillna(0, inplace=True)
df['label_val'].unique()

In [None]:
df.shape

In [None]:
X = df.drop(columns='label_val')
y = df['label_val']

In [None]:
X.shape

In [74]:
df.to_csv('merge_pfcp_tcp_udp.csv', index=False)

## Models

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, ClassifierMixin

# Definicja ConditionalClassifier
class ConditionalClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, binary_clf, multiclass_clf):
        self.binary_clf = binary_clf
        self.multiclass_clf = multiclass_clf

    def fit(self, X, y):
        self.binary_clf.fit(X, y)
        binary_preds = self.binary_clf.predict(X)
        X_multiclass = X[binary_preds != 0]
        y_multiclass = y[binary_preds != 0]
        self.multiclass_clf.fit(X_multiclass, y_multiclass)
        
        return self

    def predict(self, X):
        binary_preds = self.binary_clf.predict(X)
        final_preds = np.copy(binary_preds)
        X_multiclass = X[binary_preds != 0]
        multiclass_preds = self.multiclass_clf.predict(X_multiclass)
        final_preds[binary_preds != 0] = multiclass_preds
        
        return final_preds

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

binary_clf = RandomForestClassifier(
    max_depth=20,
    max_features='sqrt',
    min_samples_leaf=1,
    min_samples_split=5,
    n_estimators=500,
    random_state=42
)

multiclass_clf = GradientBoostingClassifier(n_estimators=100)  

conditional_clf = ConditionalClassifier(binary_clf, multiclass_clf)

pipeline = Pipeline([
    ('scaler', StandardScaler()), 
    ('clf', conditional_clf)       
])

pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)
conditional_accuracy = accuracy_score(y_test, y_pred)

cf_matrix = confusion_matrix(y_test, y_pred)
cfm = cf_matrix / np.sum(cf_matrix, axis=1)[:, None]
df_cm = pd.DataFrame(cfm, index=np.unique(y), columns=np.unique(y))

with np.printoptions(precision=3, suppress=True):
    print('cm_diag', np.diag(df_cm))

plt.figure(figsize=(10, 7))
sns.heatmap(df_cm, annot=True, fmt='.2f', cbar=True, 
            xticklabels=np.unique(y), yticklabels=np.unique(y))
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix Heatmap for Conditional Classifier')
plt.show()


In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# Definicja Discriminatora
class Discriminator(nn.Module):
    def __init__(self, input_dim):
        super(Discriminator, self).__init__()
        self.fc1 = nn.Linear(input_dim, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.fc4(x)
        x = self.sigmoid(x)
        return x

# Definicja ConditionalClassifier
class ConditionalClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, binary_clf, multiclass_clf, scaler):
        self.binary_clf = binary_clf
        self.multiclass_clf = multiclass_clf
        self.scaler = scaler
        
    def fit(self, X, y):
        X = self.scaler.transform(X)
        X_tensor = torch.tensor(X, dtype=torch.float32)
        y_tensor = torch.tensor((y > 0).astype(int).values, dtype=torch.float32).view(-1, 1)
        self.binary_clf.train()
        criterion = nn.BCELoss()
        optimizer = optim.Adam(self.binary_clf.parameters(), lr=0.0001)
        epochs = 2000

        for epoch in range(epochs):
            outputs = self.binary_clf(X_tensor)
            loss = criterion(outputs, y_tensor)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (epoch + 1) % 1000 == 0:
                print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

        with torch.no_grad():
            print(self.binary_clf(X_tensor).numpy())
            binary_preds = (self.binary_clf(X_tensor).numpy() > 0.5).astype(int).flatten()
            
        mask = binary_preds != 0
        print(len(mask))
        print(X.shape)
        print(y.shape)
        X_multiclass = X[binary_preds != 0,:]
        y_multiclass = y[binary_preds != 0]
        self.multiclass_clf.fit(X_multiclass, y_multiclass)
        
        return self

    def predict(self, X):
        X = self.scaler.transform(X)
        X_tensor = torch.tensor(X, dtype=torch.float32)
        
        with torch.no_grad():
            binary_preds = (self.binary_clf(X_tensor).numpy() > 0.5).astype(int).flatten()
        
        X_multiclass = X[binary_preds != 0]
        multiclass_preds = self.multiclass_clf.predict(X_multiclass)
        final_preds = np.copy(binary_preds)
        final_preds[binary_preds != 0] = multiclass_preds       
        return final_preds 


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

scaler = MinMaxScaler()
scaler.fit(X_train)

input_dim = X_train.shape[1]
discriminator = Discriminator(input_dim)
multiclass_clf = GradientBoostingClassifier(n_estimators=100)

conditional_clf = ConditionalClassifier(discriminator, multiclass_clf, scaler)

conditional_clf.fit(X_train, y_train)
y_pred = conditional_clf.predict(X_test)

conditional_accuracy = accuracy_score(y_test, y_pred)
print("Conditional Classifier Accuracy:", conditional_accuracy)

cf_matrix = confusion_matrix(y_test, y_pred)
cfm = cf_matrix / np.sum(cf_matrix, axis=1)[:, None]
df_cm = pd.DataFrame(cfm, index=np.unique(y), columns=np.unique(y))

with np.printoptions(precision=3, suppress=True):
    print('cm_diag', np.diag(df_cm))

plt.figure(figsize=(10, 7))
sns.heatmap(df_cm, annot=True, fmt='.2f', cbar=True, 
            xticklabels=np.unique(y), yticklabels=np.unique(y))
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix Heatmap for Conditional Classifier')
plt.show()