# Install Dependencies

In [1]:
# !pip install adapt pyswarm tensorflow==2.15.1 --quiet
!pip install adapt pyswarm --quiet

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m620.6/620.6 MB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m50.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.9/4.9 MB[0m [31m89.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.0/322.0 kB[0m [31m17.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m102.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for adapt (setup.py) ... [?25l[?25hdone
  Building wheel for pyswarm (setup.py) ... [?25l[?25hdone
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency confli

In [4]:
import pandas as pd
import numpy as np
import random
import os
import time
import matplotlib.pyplot as plt
from scipy.stats import wasserstein_distance
from scipy.special import expit, logit

from pyswarm import pso
from adapt.feature_based import DANN, ADDA
from adapt.utils import make_classification_da

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier, DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.utils import check_random_state
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [4]:
RANDOM_STATE_FOR_SKLEARN = check_random_state(42)
SEED = 42
DEVICE = "cuda" if tf.config.list_physical_devices("GPU") else "cpu"

In [5]:
def set_seed(seed=42):
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  torch.cuda.manual_seed_all(seed)
  torch.backends.cudnn.deterministic = True
  torch.backends.cudnn.benchmark = False
  tf.random.set_seed(seed)
  os.environ['PYTHONHASHSEED'] = str(seed)
  os.environ['TF_DETERMINISTIC_OPS'] = '1'
  os.environ['TF_CUDNN_DETERMINISTIC'] = '1'
  gpus = tf.config.experimental.list_physical_devices('GPU')
  if gpus:
    try:
      tf.config.experimental.set_deterministic(True)
      for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
      print(e)

In [6]:
set_seed()

# Get Datasets

In [5]:
source_base_link = "http://cicresearch.ca/IOTDataset/CIC-BCCC-NRC-TabularIoTAttacks-2024/Dataset/CIC-BCCC-NRC-ACI-IOT-2023/"
target_base_link = "http://cicresearch.ca/IOTDataset/CIC-BCCC-NRC-TabularIoTAttacks-2024/Dataset/CIC-BCCC-NRC-IoMT-2024/"
data_files = ["DoS%20ICMP%20Flood.csv", "DoS%20UDP%20Flood.csv", "Recon%20OS%20Scan.csv", "Benign%20Traffic.csv", "MITM%20ARP%20Spoofing.csv", "Recon%20Ping%20Sweep.csv", "Recon%20Vulnerability%20Scan.csv"]

In [6]:
source_dfs = [pd.read_csv(source_base_link + file) for file in data_files]
target_dfs = [pd.read_csv(target_base_link + file) for file in data_files]

In [7]:
source = pd.concat(source_dfs, ignore_index=True) # ACI-IOT-2023
target = pd.concat(target_dfs, ignore_index=True) # IoMT-2024
print(source.shape)
print(target.shape)
# print(source.columns)
# print(target.columns)

(233331, 85)
(132604, 85)


In [8]:
print(source["Attack Name"].value_counts())
print(target["Attack Name"].value_counts())
print(source["Label"].value_counts())
print(target["Label"].value_counts())

Attack Name
Benign Traffic              86525
Recon Ping Sweep            47123
Recon OS Scan               42173
Recon Vulnerability Scan    39489
MITM ARP Spoofing           14768
DoS UDP Flood                1848
DoS ICMP Flood               1405
Name: count, dtype: int64
Attack Name
Recon OS Scan               85317
Benign Traffic              32620
Recon Vulnerability Scan     8321
DoS UDP Flood                3115
DoS ICMP Flood               2107
MITM ARP Spoofing            1053
Recon Ping Sweep               71
Name: count, dtype: int64
Label
1    146806
0     86525
Name: count, dtype: int64
Label
1    99984
0    32620
Name: count, dtype: int64


In [9]:
useful_columns = [
    'Flow Duration', 'Total Fwd Packet', 'Total Bwd packets',
    'Total Length of Fwd Packet', 'Total Length of Bwd Packet',
    'Fwd Packet Length Max', 'Fwd Packet Length Min',
    'Fwd Packet Length Mean', 'Fwd Packet Length Std',
    'Bwd Packet Length Max', 'Bwd Packet Length Min',
    'Bwd Packet Length Mean', 'Bwd Packet Length Std',
    'Flow Bytes/s', 'Flow Packets/s', 'Flow IAT Mean',
    'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min',
    'Fwd IAT Total', 'Fwd IAT Mean', 'Fwd IAT Std',
    'Fwd IAT Max', 'Fwd IAT Min', 'Bwd IAT Total',
    'Bwd IAT Mean', 'Bwd IAT Std', 'Bwd IAT Max',
    'Bwd IAT Min', 'Fwd PSH Flags', 'Bwd PSH Flags',
    'Fwd URG Flags', 'Bwd URG Flags', 'Fwd Header Length',
    'Bwd Header Length', 'Fwd Packets/s', 'Bwd Packets/s',
    'Packet Length Min', 'Packet Length Max',
    'Packet Length Mean', 'Packet Length Std',
    'Packet Length Variance', 'FIN Flag Count',
    'SYN Flag Count', 'RST Flag Count', 'PSH Flag Count',
    'ACK Flag Count', 'URG Flag Count', 'CWR Flag Count',
    'ECE Flag Count', 'Down/Up Ratio', 'Average Packet Size',
    'Fwd Segment Size Avg', 'Bwd Segment Size Avg',
    'Fwd Bytes/Bulk Avg', 'Fwd Packet/Bulk Avg',
    'Fwd Bulk Rate Avg', 'Bwd Bytes/Bulk Avg',
    'Bwd Packet/Bulk Avg', 'Bwd Bulk Rate Avg',
    'Subflow Fwd Packets', 'Subflow Fwd Bytes',
    'Subflow Bwd Packets', 'Subflow Bwd Bytes',
    'FWD Init Win Bytes', 'Bwd Init Win Bytes',
    'Fwd Act Data Pkts', 'Fwd Seg Size Min', 'Active Mean',
    'Active Std', 'Active Max', 'Active Min', 'Idle Mean',
    'Idle Std', 'Idle Max', 'Idle Min', 'Protocol', 'Label'
]

In [10]:
source = source[useful_columns]
target = target[useful_columns]
X_source, y_source, X_target, y_target = source.drop(columns=['Label']), source['Label'], target.drop(columns=['Label']), target['Label']

In [11]:
# Identify feature types
categorical_features = ['Protocol']
numerical_features = [f for f in useful_columns[:-1] if f != 'Protocol']

# Create preprocessing pipeline
# The sparse argument has been changed to sparse_output
preprocessor = ColumnTransformer([
    ('num', StandardScaler(), numerical_features),
    ('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), categorical_features)
])

X_source = preprocessor.fit_transform(X_source)
X_target = preprocessor.transform(X_target)

# Transformation Functions

## Wasserstein

In [None]:
def wasserstein_transform(Xs, Xt):
    """
    Perform Wasserstein Distance-based alignment on source features.

    Parameters:
    Xs: np.array, shape (ns, d)
        Source feature matrix (samples x features)
    Xt: np.array, shape (nt, d)
        Target feature matrix (samples x features)

    Returns:
    Xs_aligned: np.array, shape (ns, d)
        Wasserstein-aligned source features
    """
    Xs_mean, Xt_mean = np.mean(Xs, axis=0), np.mean(Xt, axis=0)
    Xs_std, Xt_std = np.std(Xs, axis=0), np.std(Xt, axis=0)

    # Compute original Wasserstein distance before transformation
    original_distances = [wasserstein_distance(Xs[:, i], Xt[:, i]) for i in range(Xs.shape[1])]
    original_avg_distance = np.mean(original_distances)

    # Align mean and standard deviation
    Xs_aligned = (Xs - Xs_mean) * (Xt_std / (Xs_std + 1e-6)) + Xt_mean

    # Compute new Wasserstein distance after transformation
    new_distances = [wasserstein_distance(Xs_aligned[:, i], Xt[:, i]) for i in range(Xs.shape[1])]
    new_avg_distance = np.mean(new_distances)

    print(f"Original Average Wasserstein Distance: {original_avg_distance:.6f}")
    print(f"New Average Wasserstein Distance After Transformation: {new_avg_distance:.6f}")

    return Xs_aligned

In [None]:
X_source_aligned_wasserstein = wasserstein_transform(X_source, X_target)
# higher_order_statisctics(X_source_aligned_wasserstein, X_target)

Original Average Wasserstein Distance: 0.255301
New Average Wasserstein Distance After Transformation: 0.230939


## Wasserstein Classwise

In [None]:
def wasserstein_transform_classwise(Xs, Xt, ys, yt):
    """
    Perform class-wise Wasserstein Distance-based alignment on source features.

    Parameters:
    Xs: np.array, shape (ns, d)
        Source feature matrix (samples x features)
    Xt: np.array, shape (nt, d)
        Target feature matrix (samples x features)
    ys: np.array, shape (ns,)
        Source labels
    yt: np.array, shape (nt,)
        Target labels

    Returns:
    Xs_aligned: np.array, shape (ns, d)
        Wasserstein-aligned source features
    """
    Xs_aligned = np.zeros_like(Xs)

    # Unique class labels
    unique_classes_s = np.unique(ys)
    unique_classes_t = np.unique(yt)

    original_distances = []
    new_distances = []

    for cls in unique_classes_s:
        # Get the samples belonging to the current class in source and target
        Xs_cls = Xs[ys == cls]
        Xt_cls = Xt[yt == cls]

        if len(Xt_cls) == 0:
            continue

        # Compute original Wasserstein distance for this class
        original_class_distances = [wasserstein_distance(Xs_cls[:, i], Xt_cls[:, i]) for i in range(Xs.shape[1])]
        original_avg_class_distance = np.mean(original_class_distances)
        original_distances.append(original_avg_class_distance)

        # Compute the mean and std for each class in source and target
        Xs_mean, Xt_mean = np.mean(Xs_cls, axis=0), np.mean(Xt_cls, axis=0)
        Xs_std, Xt_std = np.std(Xs_cls, axis=0), np.std(Xt_cls, axis=0)

        # Apply Wasserstein transformation per class
        Xs_aligned[ys == cls] = (Xs_cls - Xs_mean) * (Xt_std / (Xs_std + 1e-6)) + Xt_mean

        # Compute new Wasserstein distance for this class
        new_class_distances = [wasserstein_distance(Xs_aligned[ys == cls][:, i], Xt_cls[:, i]) for i in range(Xs.shape[1])]
        new_avg_class_distance = np.mean(new_class_distances)
        new_distances.append(new_avg_class_distance)

        print(f"Class {cls}: Original Avg Wasserstein Distance = {original_avg_class_distance:.6f}, New Avg Wasserstein Distance = {new_avg_class_distance:.6f}")

    # Compute overall Wasserstein distances
    overall_original_distance = np.mean(original_distances) if original_distances else 0.0
    overall_new_distance = np.mean(new_distances) if new_distances else 0.0

    print(f"\nOverall Original Avg Wasserstein Distance: {overall_original_distance:.6f}")
    print(f"Overall New Avg Wasserstein Distance After Transformation: {overall_new_distance:.6f}")

    return Xs_aligned


In [None]:
X_source_aligned_wasserstein_classwise = wasserstein_transform_classwise(X_source, X_target, y_source, y_target)
# higher_order_statisctics(X_source_aligned_wasserstein_classwise, X_target)

Class 0: Original Avg Wasserstein Distance = 0.805725, New Avg Wasserstein Distance = 0.654817
Class 1: Original Avg Wasserstein Distance = 0.095517, New Avg Wasserstein Distance = 0.080339

Overall Original Avg Wasserstein Distance: 0.450621
Overall New Avg Wasserstein Distance After Transformation: 0.367578


## Domain-Adversarial Neural Networks (DANN)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.autograd import Function
import numpy as np

# Gradient Reversal Layer
class GradReverse(Function):
    @staticmethod
    def forward(ctx, x, lambd):
        ctx.lambd = lambd
        return x.view_as(x)
    @staticmethod
    def backward(ctx, grad_output):
        return grad_output.neg() * ctx.lambd, None

def grad_reverse(x, lambd=1.0):
    return GradReverse.apply(x, lambd)

# Modified DANN (outputs same dimension as input)
class DANN_SameDim(nn.Module):
    def __init__(self, input_dim, class_num=2):
        super(DANN_SameDim, self).__init__()

        # Feature extractor with same input/output dim
        self.feature = nn.Sequential(
            nn.Linear(input_dim, input_dim),
            nn.ReLU(),
            nn.Linear(input_dim, input_dim),
        )

        # Label predictor
        self.class_classifier = nn.Sequential(
            nn.ReLU(),
            nn.Linear(input_dim, 100),
            nn.ReLU(),
            nn.Linear(100, class_num),
        )

        # Domain classifier
        self.domain_classifier = nn.Sequential(
            nn.ReLU(),
            nn.Linear(input_dim, 100),
            nn.ReLU(),
            nn.Linear(100, 2),
        )

    def forward(self, x, lambd=1.0):
        feat = self.feature(x)
        class_out = self.class_classifier(feat)
        reverse_feat = grad_reverse(feat, lambd)
        domain_out = self.domain_classifier(reverse_feat)
        return class_out, domain_out

# Final transformation function
def dann_transform_same_dim(Xs, ys, Xt, epochs=20, batch_size=64, lr=1e-3, device='cpu'):
    """
    Perform DANN-based alignment while preserving original feature dimensions.

    Parameters:
    - Xs: np.array (ns, d)
    - ys: np.array (ns,)
    - Xt: np.array (nt, d)

    Returns:
    - Xs_aligned: np.array (ns, d) - same shape as Xs
    """
    # Convert to tensors
    Xs_tensor = torch.tensor(Xs, dtype=torch.float32)
    ys_tensor = torch.tensor(ys, dtype=torch.long)
    Xt_tensor = torch.tensor(Xt, dtype=torch.float32)

    source_dataset = TensorDataset(Xs_tensor, ys_tensor)
    target_dataset = TensorDataset(Xt_tensor, torch.zeros(len(Xt_tensor)))  # dummy labels

    source_loader = DataLoader(source_dataset, batch_size=batch_size, shuffle=True)
    target_loader = DataLoader(target_dataset, batch_size=batch_size, shuffle=True)

    input_dim = Xs.shape[1]
    class_num = len(np.unique(ys))

    model = DANN_SameDim(input_dim=input_dim, class_num=class_num).to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion_class = nn.CrossEntropyLoss()
    criterion_domain = nn.CrossEntropyLoss()

    # === Training ===
    model.train()
    for epoch in range(epochs):
        len_dataloader = min(len(source_loader), len(target_loader))
        data_zip = zip(source_loader, target_loader)

        for i, ((source_data, source_label), (target_data, _)) in enumerate(data_zip):
            if i >= len_dataloader:
                break

            source_data, source_label = source_data.to(device), source_label.to(device)
            target_data = target_data.to(device)

            combined_data = torch.cat([source_data, target_data], dim=0)
            domain_label = torch.cat([
                torch.zeros(source_data.size(0)).long(),
                torch.ones(target_data.size(0)).long()
            ]).to(device)

            # Grad reversal schedule
            p = float(i + epoch * len_dataloader) / (epochs * len_dataloader)
            lambd = 2. / (1. + np.exp(-10 * p)) - 1

            optimizer.zero_grad()
            class_out, domain_out = model(combined_data, lambd=lambd)

            class_loss = criterion_class(class_out[:source_data.size(0)], source_label)
            domain_loss = criterion_domain(domain_out, domain_label)
            loss = class_loss + domain_loss
            loss.backward()
            optimizer.step()

    # === Feature Extraction ===
    model.eval()
    with torch.no_grad():
        Xs_tensor = Xs_tensor.to(device)
        aligned_features = model.feature(Xs_tensor).cpu().numpy()

    return aligned_features


In [None]:
X_source_aligned_dann = dann_transform_same_dim(
    Xs=X_source,
    ys=y_source,
    Xt=X_target,
    epochs=30,
    device='cuda' if torch.cuda.is_available() else 'cpu'
)

print("Shape match check:", X_source.shape, X_source_aligned_dann.shape, X_target.shape)


Shape match check: (233331, 77) (233331, 77) (132604, 77)


## Adapt DANN

In [12]:
!apt-get update -y --quiet
!apt-get install python3.9 python3.9-distutils --quiet # Replace 3.10 with your desired version

Hit:1 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:3 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:4 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:5 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:6 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:7 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Reading package lists...
W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
Reading package lists...
Building dependency tree...
Reading state informat

In [13]:
!pip install tensorflow==2.15.0

Collecting tensorflow==2.15.0
  Using cached tensorflow-2.15.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Collecting ml-dtypes~=0.2.0 (from tensorflow==2.15.0)
  Using cached ml_dtypes-0.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3 (from tensorflow==2.15.0)
  Using cached protobuf-4.25.8-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
Collecting tensorboard<2.16,>=2.15 (from tensorflow==2.15.0)
  Using cached tensorboard-2.15.2-py3-none-any.whl.metadata (1.7 kB)
Collecting keras<2.16,>=2.15.0 (from tensorflow==2.15.0)
  Using cached keras-2.15.0-py3-none-any.whl.metadata (2.4 kB)
Using cached tensorflow-2.15.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (475.3 MB)
Using cached keras-2.15.0-py3-none-any.whl (1.7 MB)
Using cached ml_dtypes-0.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.

In [15]:
from adapt.utils import make_classification_da
from adapt.feature_based import DANN
import numpy as np

y_source_np = np.array(y_source)
adaptive_dann = DANN() # Initialize CORAL with regularization (lambda_=0.5)
adaptive_dann.fit(X_source, y_source_np, Xt=X_target) # Fit CORAL to source and target data, explicitly providing y_source
# adaptive_dann.fit(X=X_source, Xt=X_target) # Fit CORAL to source and target data, explicitly providing y_source
X_source_aligned_dann = adaptive_dann.transform(X_source)
X_target_aligned_dann = adaptive_dann.transform(X_target)



## Adversarial Discriminative Domain Adaptation (ADDA)

In [None]:
# Simple discriminator
class Discriminator(nn.Module):
    def __init__(self, input_dim):
        super(Discriminator, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 100),
            nn.ReLU(),
            nn.Linear(100, 2)  # domain: 0 = source, 1 = target
        )

    def forward(self, x):
        return self.net(x)

# Feature extractor that preserves input dimensions (same as DANN_SameDim)
class FeatureExtractorSameDim(nn.Module):
    def __init__(self, input_dim):
        super(FeatureExtractorSameDim, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, input_dim),
            nn.ReLU(),
            nn.Linear(input_dim, input_dim),
        )

    def forward(self, x):
        return self.net(x)

# Label classifier (used only in source training phase)
class LabelClassifier(nn.Module):
    def __init__(self, input_dim, class_num):
        super(LabelClassifier, self).__init__()
        self.net = nn.Sequential(
            nn.ReLU(),
            nn.Linear(input_dim, 100),
            nn.ReLU(),
            nn.Linear(100, class_num)
        )

    def forward(self, x):
        return self.net(x)


In [None]:
def adda_transform_same_dim(Xs, ys, Xt, epochs_src=20, epochs_adv=30, batch_size=64, lr=1e-3, device='cpu'):
    """
    Perform ADDA-based alignment of source features to target distribution, keeping original feature dimensions.

    Returns:
    - Xs_aligned_adda: source features aligned to target domain
    """

    input_dim = Xs.shape[1]
    class_num = len(np.unique(ys))

    # Convert to torch tensors
    Xs_tensor = torch.tensor(Xs, dtype=torch.float32)
    ys_tensor = torch.tensor(ys, dtype=torch.long)
    Xt_tensor = torch.tensor(Xt, dtype=torch.float32)

    source_dataset = TensorDataset(Xs_tensor, ys_tensor)
    target_dataset = TensorDataset(Xt_tensor, torch.zeros(len(Xt_tensor)))  # dummy

    source_loader = DataLoader(source_dataset, batch_size=batch_size, shuffle=True)
    target_loader = DataLoader(target_dataset, batch_size=batch_size, shuffle=True)

    # === Stage 1: Train source encoder + classifier ===
    source_encoder = FeatureExtractorSameDim(input_dim).to(device)
    label_classifier = LabelClassifier(input_dim, class_num).to(device)

    optimizer_src = optim.Adam(list(source_encoder.parameters()) + list(label_classifier.parameters()), lr=lr)
    criterion_class = nn.CrossEntropyLoss()

    source_encoder.train()
    label_classifier.train()

    for epoch in range(epochs_src):
        for xb, yb in source_loader:
            xb, yb = xb.to(device), yb.to(device)

            optimizer_src.zero_grad()
            features = source_encoder(xb)
            preds = label_classifier(features)
            loss = criterion_class(preds, yb)
            loss.backward()
            optimizer_src.step()

    # === Stage 2: Adversarial training ===
    target_encoder = FeatureExtractorSameDim(input_dim).to(device)
    discriminator = Discriminator(input_dim).to(device)

    optimizer_tgt = optim.Adam(target_encoder.parameters(), lr=lr)
    optimizer_d = optim.Adam(discriminator.parameters(), lr=lr)
    criterion_domain = nn.CrossEntropyLoss()

    target_encoder.train()
    discriminator.train()

    for epoch in range(epochs_adv):
        len_loader = min(len(source_loader), len(target_loader))
        zip_loader = zip(source_loader, target_loader)

        for (src_batch, _), (tgt_batch, _) in zip_loader:
            src_data = src_batch.to(device)
            tgt_data = tgt_batch.to(device)

            # === 1. Train discriminator ===
            source_feat = source_encoder(src_data).detach()
            target_feat = target_encoder(tgt_data).detach()

            domain_input = torch.cat([source_feat, target_feat], dim=0)
            domain_labels = torch.cat([
                torch.zeros(source_feat.size(0)).long(),
                torch.ones(target_feat.size(0)).long()
            ]).to(device)

            optimizer_d.zero_grad()
            domain_preds = discriminator(domain_input)
            loss_d = criterion_domain(domain_preds, domain_labels)
            loss_d.backward()
            optimizer_d.step()

            # === 2. Train target encoder to fool discriminator ===
            target_feat = target_encoder(tgt_data)
            domain_preds_tgt = discriminator(target_feat)
            fool_labels = torch.zeros(target_feat.size(0)).long().to(device)

            optimizer_tgt.zero_grad()
            loss_tgt = criterion_domain(domain_preds_tgt, fool_labels)
            loss_tgt.backward()
            optimizer_tgt.step()

    # === Output transformed source features ===
    source_encoder.eval()
    with torch.no_grad():
        Xs_tensor = Xs_tensor.to(device)
        Xs_aligned = source_encoder(Xs_tensor).cpu().numpy()

    return Xs_aligned


In [None]:
X_source_aligned_adda = adda_transform_same_dim(
    Xs=X_source,
    ys=y_source,
    Xt=X_target,
    epochs_src=20,
    epochs_adv=30,
    device='cuda' if torch.cuda.is_available() else 'cpu'
)

print("Shape check:", X_source.shape, X_source_aligned_adda.shape, X_target.shape)


Shape check: (233331, 77) (233331, 77) (132604, 77)


## Adapt ADDA

In [22]:
y_source_np = np.array(y_source)
adaptive_adda = ADDA(random_state=42) # Initialize CORAL with regularization (lambda_=0.5)
adaptive_adda.fit(X_source, y_source_np, Xt=X_target) # Fit CORAL to source and target data, explicitly providing y_source
X_source_aligned_adda = adaptive_adda.transform(X_source)
X_target_aligned_adda = adaptive_adda.transform(X_target)



# Run Models

## Logistic Regression

### Logistic Regression Without Domain Adaptation

In [23]:
# #############################################
# Logistic Regression without domain adaptation
# #############################################


# Initialize Logistic Regression
lr = LogisticRegression(max_iter=5000, random_state=42)

# Before CORAL
train_start_time = time.time()
lr.fit(X_source, y_source)
test_start_time = time.time()
pred_before = lr.predict(X_target)
score_start_time = time.time()
acc_before = accuracy_score(y_target, pred_before)
f1_before = f1_score(y_target, pred_before, average='weighted')  # Weighted average for multi-class
precision_before = precision_score(y_target, pred_before, average='weighted')  # Overall precision
recall_before = recall_score(y_target, pred_before, average='weighted')  # Overall recall
report_before = classification_report(y_target, pred_before, output_dict=True)
cm_before = confusion_matrix(y_target, pred_before)
train_time_before = time.time() - train_start_time
test_time_before = time.time() - test_start_time
score_time_before = time.time() - score_start_time


print("Logistic Regression:")
print(f"Normal Approach: Accuracy = {acc_before:.4f}, F1 = {f1_before:.4f}, Precision = {precision_before:.4f}, Recall = {recall_before:.4f}")
print(f"Training Time: {train_time_before:.4f} seconds")
print(f"Testing Time: {test_time_before:.4f} seconds")
print(f"Scoring Time: {score_time_before:.4f} seconds")
print(f"Training Time Without Testing And Score Calulations: {(train_time_before - test_time_before - score_time_before):.4f} seconds")
print(f"Testing Time Without Score Calulations: {(test_time_before - score_time_before):.4f} seconds")
print("Class-wise Metrics (Normal Approach):")
for label in report_before:
    if label.isdigit():  # Check if it's a class label
        print(f"Label {label}: Precision = {report_before[label]['precision']:.4f}, Recall = {report_before[label]['recall']:.4f}, F1-score = {report_before[label]['f1-score']:.4f}")

print("\nConfusion Matrix (Normal Approach):")
print(cm_before)

Logistic Regression:
Normal Approach: Accuracy = 0.8192, F1 = 0.7903, Precision = 0.8185, Recall = 0.8192
Training Time: 17.7447 seconds
Testing Time: 0.1876 seconds
Scoring Time: 0.1690 seconds
Training Time Without Testing And Score Calulations: 17.3881 seconds
Testing Time Without Score Calulations: 0.0186 seconds
Class-wise Metrics (Normal Approach):
Label 0: Precision = 0.8144, Recall = 0.3434, F1-score = 0.4831
Label 1: Precision = 0.8198, Recall = 0.9745, F1-score = 0.8905

Confusion Matrix (Normal Approach):
[[11202 21418]
 [ 2553 97431]]


### Logistic Regression Wasserstein

In [None]:
# #############################################
# Logistic Regression Wasserstein
# #############################################



lr = LogisticRegression(max_iter=5000, random_state=42)

train_start_time = time.time()
lr.fit(X_source_aligned_wasserstein, y_source)
test_start_time = time.time()
pred_after = lr.predict(X_target)
score_start_time = time.time()
acc_after = accuracy_score(y_target, pred_after)
f1_after = f1_score(y_target, pred_after, average='weighted')  # Weighted average for multi-class
precision_after = precision_score(y_target, pred_after, average='weighted')  # Overall precision
recall_after = recall_score(y_target, pred_after, average='weighted')  # Overall recall
report_after = classification_report(y_target, pred_after, output_dict=True)
cm_after = confusion_matrix(y_target, pred_after)
train_time_after = time.time() - train_start_time
test_time_after = time.time() - test_start_time
score_time_after = time.time() - score_start_time


print("Logistic Regression Wasserstein:")
print(f" Wasserstein:  Accuracy = {acc_after:.4f}, F1 = {f1_after:.4f}, Precision = {precision_after:.4f}, Recall = {recall_after:.4f}")
print(f"Training Time: {train_time_after:.4f} seconds")
print(f"Testing Time: {test_time_after:.4f} seconds")
print(f"Scoring Time: {score_time_after:.4f} seconds")
print(f"Training Time Without Testing And Score Calulations: {(train_time_after - test_time_after - score_time_after):.4f} seconds")
print(f"Testing Time Without Score Calulations: {(test_time_after - score_time_after):.4f} seconds")
print("Class-wise Metrics (Wasserstein):")
for label in report_after:
    if label.isdigit():
        print(f"Label {label}: Precision = {report_after[label]['precision']:.4f}, Recall = {report_after[label]['recall']:.4f}, F1-score = {report_after[label]['f1-score']:.4f}")

print("\nConfusion Matrix (Wasserstein):")
print(cm_after)

Logistic Regression Wasserstein:
 Wasserstein:  Accuracy = 0.9138, F1 = 0.9134, Precision = 0.9131, Recall = 0.9138
Training Time: 71.5103 seconds
Testing Time: 0.1022 seconds
Scoring Time: 0.0834 seconds
Training Time Without Testing And Score Calulations: 71.3248 seconds
Testing Time Without Score Calulations: 0.0189 seconds
Class-wise Metrics (Wasserstein):
Label 0: Precision = 0.8349, Recall = 0.8099, F1-score = 0.8222
Label 1: Precision = 0.9386, Recall = 0.9477, F1-score = 0.9431

Confusion Matrix (Wasserstein):
[[26418  6202]
 [ 5225 94759]]


In [None]:
print(f"Improvement:  Accuracy Δ = {acc_after - acc_before:+.4f}, F1 Δ = {f1_after - f1_before:+.4f}, Precision Δ = {precision_after - precision_before:+.4f}, Recall Δ = {recall_after - recall_before:+.4f}\n")

print("Class-wise Improvement:")
for label in report_before:
    if label.isdigit() and label in report_after:
        print(f"Label {label}:")
        print(f"  Precision Δ = {report_after[label]['precision'] - report_before[label]['precision']:+.4f}")
        print(f"  Recall Δ = {report_after[label]['recall'] - report_before[label]['recall']:+.4f}")
        print(f"  F1-score Δ = {report_after[label]['f1-score'] - report_before[label]['f1-score']:+.4f}")

Improvement:  Accuracy Δ = +0.0946, F1 Δ = +0.1231, Precision Δ = +0.0946, Recall Δ = +0.0946

Class-wise Improvement:
Label 0:
  Precision Δ = +0.0205
  Recall Δ = +0.4665
  F1-score Δ = +0.3391
Label 1:
  Precision Δ = +0.1188
  Recall Δ = -0.0267
  F1-score Δ = +0.0527


### Logistic Regression Classwise Wasserstein

In [None]:
# #############################################
# Logistic Regression Wasserstein Classwise
# #############################################


lr = LogisticRegression(max_iter=5000, random_state=42)
train_start_time = time.time()
lr.fit(X_source_aligned_wasserstein_classwise, y_source)
test_start_time = time.time()
pred_after = lr.predict(X_target)
score_start_time = time.time()
acc_after = accuracy_score(y_target, pred_after)
f1_after = f1_score(y_target, pred_after, average='weighted')  # Weighted average for multi-class
precision_after = precision_score(y_target, pred_after, average='weighted')  # Overall precision
recall_after = recall_score(y_target, pred_after, average='weighted')  # Overall recall
report_after = classification_report(y_target, pred_after, output_dict=True)
cm_after = confusion_matrix(y_target, pred_after)
train_time_after = time.time() - train_start_time
test_time_after = time.time() - test_start_time
score_time_after = time.time() - score_start_time


print("Logistic Regression Classwise Wasserstein:")
print(f"Classwise Wasserstein:  Accuracy = {acc_after:.4f}, F1 = {f1_after:.4f}, Precision = {precision_after:.4f}, Recall = {recall_after:.4f}")
print(f"Training Time: {train_time_after:.4f} seconds")
print(f"Testing Time: {test_time_after:.4f} seconds")
print(f"Scoring Time: {score_time_after:.4f} seconds")
print(f"Training Time Without Testing And Score Calulations: {(train_time_after - test_time_after - score_time_after):.4f} seconds")
print(f"Testing Time Without Score Calulations: {(test_time_after - score_time_after):.4f} seconds")
print("Class-wise Metrics (Classwise Wasserstein):")
for label in report_after:
    if label.isdigit():
        print(f"Label {label}: Precision = {report_after[label]['precision']:.4f}, Recall = {report_after[label]['recall']:.4f}, F1-score = {report_after[label]['f1-score']:.4f}")

print("\nConfusion Matrix (Classwise Wasserstein):")
print(cm_after)

Logistic Regression Classwise Wasserstein:
Classwise Wasserstein:  Accuracy = 0.9404, F1 = 0.9417, Precision = 0.9459, Recall = 0.9404
Training Time: 6.7376 seconds
Testing Time: 0.0852 seconds
Scoring Time: 0.0721 seconds
Training Time Without Testing And Score Calulations: 6.5803 seconds
Testing Time Without Score Calulations: 0.0132 seconds
Class-wise Metrics (Classwise Wasserstein):
Label 0: Precision = 0.8308, Recall = 0.9517, F1-score = 0.8872
Label 1: Precision = 0.9835, Recall = 0.9368, F1-score = 0.9595

Confusion Matrix (Classwise Wasserstein):
[[31045  1575]
 [ 6322 93662]]


In [None]:
print(f"Improvement:  Accuracy Δ = {acc_after - acc_before:+.4f}, F1 Δ = {f1_after - f1_before:+.4f}, Precision Δ = {precision_after - precision_before:+.4f}, Recall Δ = {recall_after - recall_before:+.4f}\n")

print("Class-wise Improvement:")
for label in report_before:
    if label.isdigit() and label in report_after:
        print(f"Label {label}:")
        print(f"  Precision Δ = {report_after[label]['precision'] - report_before[label]['precision']:+.4f}")
        print(f"  Recall Δ = {report_after[label]['recall'] - report_before[label]['recall']:+.4f}")
        print(f"  F1-score Δ = {report_after[label]['f1-score'] - report_before[label]['f1-score']:+.4f}")

Improvement:  Accuracy Δ = +0.1212, F1 Δ = +0.1515, Precision Δ = +0.1274, Recall Δ = +0.1212

Class-wise Improvement:
Label 0:
  Precision Δ = +0.0164
  Recall Δ = +0.6083
  F1-score Δ = +0.4041
Label 1:
  Precision Δ = +0.1637
  Recall Δ = -0.0377
  F1-score Δ = +0.0691


### Logistic Regression DANN

In [15]:
#############################################
# Logistic Regression DANN
#############################################


lr = LogisticRegression(max_iter=5000, random_state=42)
train_start_time = time.time()
lr.fit(X_source_aligned_dann, y_source)
test_start_time = time.time()
pred_after = lr.predict(X_target_aligned_dann)
score_start_time = time.time()
acc_after = accuracy_score(y_target, pred_after)
f1_after = f1_score(y_target, pred_after, average='weighted')  # Weighted average for multi-class
precision_after = precision_score(y_target, pred_after, average='weighted')  # Overall precision
recall_after = recall_score(y_target, pred_after, average='weighted')  # Overall recall
report_after = classification_report(y_target, pred_after, output_dict=True)
cm_after = confusion_matrix(y_target, pred_after)
train_time_after = time.time() - train_start_time
test_time_after = time.time() - test_start_time
score_time_after = time.time() - score_start_time


print("Logistic Regression DANN:")
print(f"DANN:  Accuracy = {acc_after:.4f}, F1 = {f1_after:.4f}, Precision = {precision_after:.4f}, Recall = {recall_after:.4f}")
print(f"Training Time: {train_time_after:.4f} seconds")
print(f"Testing Time: {test_time_after:.4f} seconds")
print(f"Scoring Time: {score_time_after:.4f} seconds")
print(f"Training Time Without Testing And Score Calulations: {(train_time_after - test_time_after - score_time_after):.4f} seconds")
print(f"Testing Time Without Score Calulations: {(test_time_after - score_time_after):.4f} seconds")
print("Class-wise Metrics (DANN):")
for label in report_after:
    if label.isdigit():
        print(f"Label {label}: Precision = {report_after[label]['precision']:.4f}, Recall = {report_after[label]['recall']:.4f}, F1-score = {report_after[label]['f1-score']:.4f}")

print("\nConfusion Matrix (DANN):")
print(cm_after)

Logistic Regression DANN:
DANN:  Accuracy = 0.9116, F1 = 0.9114, Precision = 0.9113, Recall = 0.9116
Training Time: 1.3412 seconds
Testing Time: 0.2234 seconds
Scoring Time: 0.2161 seconds
Training Time Without Testing And Score Calulations: 0.9016 seconds
Testing Time Without Score Calulations: 0.0073 seconds
Class-wise Metrics (DANN):
Label 0: Precision = 0.8231, Recall = 0.8158, F1-score = 0.8195
Label 1: Precision = 0.9401, Recall = 0.9428, F1-score = 0.9415

Confusion Matrix (DANN):
[[26613  6007]
 [ 5718 94266]]


In [18]:
print(f"Improvement:  Accuracy Δ = {acc_after - acc_before:+.4f}, F1 Δ = {f1_after - f1_before:+.4f}, Precision Δ = {precision_after - precision_before:+.4f}, Recall Δ = {recall_after - recall_before:+.4f}\n")

print("Class-wise Improvement:")
for label in report_before:
    if label.isdigit() and label in report_after:
        print(f"Label {label}:")
        print(f"  Precision Δ = {report_after[label]['precision'] - report_before[label]['precision']:+.4f}")
        print(f"  Recall Δ = {report_after[label]['recall'] - report_before[label]['recall']:+.4f}")
        print(f"  F1-score Δ = {report_after[label]['f1-score'] - report_before[label]['f1-score']:+.4f}")

Improvement:  Accuracy Δ = +0.0924, F1 Δ = +0.1212, Precision Δ = +0.0929, Recall Δ = +0.0924

Class-wise Improvement:
Label 0:
  Precision Δ = +0.0087
  Recall Δ = +0.4724
  F1-score Δ = +0.3364
Label 1:
  Precision Δ = +0.1203
  Recall Δ = -0.0317
  F1-score Δ = +0.0510


### Logistic Regression ADDA

In [24]:
# #############################################
# Logistic Regression ADDA
# #############################################


lr = LogisticRegression(max_iter=5000, random_state=42)
train_start_time = time.time()
lr.fit(X_source_aligned_adda, y_source)
test_start_time = time.time()
pred_after = lr.predict(X_target_aligned_adda)
score_start_time = time.time()
acc_after = accuracy_score(y_target, pred_after)
f1_after = f1_score(y_target, pred_after, average='weighted')  # Weighted average for multi-class
precision_after = precision_score(y_target, pred_after, average='weighted')  # Overall precision
recall_after = recall_score(y_target, pred_after, average='weighted')  # Overall recall
report_after = classification_report(y_target, pred_after, output_dict=True)
cm_after = confusion_matrix(y_target, pred_after)
train_time_after = time.time() - train_start_time
test_time_after = time.time() - test_start_time
score_time_after = time.time() - score_start_time


print("Logistic Regression ADDA:")
print(f"ADDA:  Accuracy = {acc_after:.4f}, F1 = {f1_after:.4f}, Precision = {precision_after:.4f}, Recall = {recall_after:.4f}")
print(f"Training Time: {train_time_after:.4f} seconds")
print(f"Testing Time: {test_time_after:.4f} seconds")
print(f"Scoring Time: {score_time_after:.4f} seconds")
print(f"Training Time Without Testing And Score Calulations: {(train_time_after - test_time_after - score_time_after):.4f} seconds")
print(f"Testing Time Without Score Calulations: {(test_time_after - score_time_after):.4f} seconds")
print("Class-wise Metrics (ADDA):")
for label in report_after:
    if label.isdigit():
        print(f"Label {label}: Precision = {report_after[label]['precision']:.4f}, Recall = {report_after[label]['recall']:.4f}, F1-score = {report_after[label]['f1-score']:.4f}")

print("\nConfusion Matrix (ADDA):")
print(cm_after)

Logistic Regression ADDA:
ADDA:  Accuracy = 0.9259, F1 = 0.9268, Precision = 0.9286, Recall = 0.9259
Training Time: 1.9820 seconds
Testing Time: 0.3004 seconds
Scoring Time: 0.2908 seconds
Training Time Without Testing And Score Calulations: 1.3908 seconds
Testing Time Without Score Calulations: 0.0096 seconds
Class-wise Metrics (ADDA):
Label 0: Precision = 0.8236, Recall = 0.8892, F1-score = 0.8551
Label 1: Precision = 0.9629, Recall = 0.9379, F1-score = 0.9502

Confusion Matrix (ADDA):
[[29005  3615]
 [ 6214 93770]]


In [20]:
print(f"Improvement:  Accuracy Δ = {acc_after - acc_before:+.4f}, F1 Δ = {f1_after - f1_before:+.4f}, Precision Δ = {precision_after - precision_before:+.4f}, Recall Δ = {recall_after - recall_before:+.4f}\n")

print("Class-wise Improvement:")
for label in report_before:
    if label.isdigit() and label in report_after:
        print(f"Label {label}:")
        print(f"  Precision Δ = {report_after[label]['precision'] - report_before[label]['precision']:+.4f}")
        print(f"  Recall Δ = {report_after[label]['recall'] - report_before[label]['recall']:+.4f}")
        print(f"  F1-score Δ = {report_after[label]['f1-score'] - report_before[label]['f1-score']:+.4f}")

Improvement:  Accuracy Δ = +0.1185, F1 Δ = +0.1482, Precision Δ = +0.1217, Recall Δ = +0.1185

Class-wise Improvement:
Label 0:
  Precision Δ = +0.0315
  Recall Δ = +0.5697
  F1-score Δ = +0.3951
Label 1:
  Precision Δ = +0.1511
  Recall Δ = -0.0287
  F1-score Δ = +0.0677


## Random Forest

### Random Forest Without Domain Adaptation

In [21]:
# #############################################
# Random Forest without domain adaptation
# #############################################



rf = RandomForestClassifier(n_estimators=100, random_state=42)
train_start_time = time.time()
rf.fit(X_source, y_source)
test_start_time = time.time()
pred_before = rf.predict(X_target)
score_start_time = time.time()
acc_before = accuracy_score(y_target, pred_before)
f1_before = f1_score(y_target, pred_before, average='weighted')  # Weighted average for multi-class
precision_before = precision_score(y_target, pred_before, average='weighted')  # Overall precision
recall_before = recall_score(y_target, pred_before, average='weighted')  # Overall recall
report_before = classification_report(y_target, pred_before, output_dict=True)
cm_before = confusion_matrix(y_target, pred_before)
train_time_before = time.time() - train_start_time
test_time_before = time.time() - test_start_time
score_time_before = time.time() - score_start_time


print("Random Forest Classifier:")
print(f"Normal Approach: Accuracy = {acc_before:.4f}, F1 = {f1_before:.4f}, Precision = {precision_before:.4f}, Recall = {recall_before:.4f}")
print(f"Training Time: {train_time_before:.4f} seconds")
print(f"Testing Time: {test_time_before:.4f} seconds")
print(f"Scoring Time: {score_time_before:.4f} seconds")
print(f"Training Time Without Testing And Score Calulations: {(train_time_before - test_time_before - score_time_before):.4f} seconds")
print(f"Testing Time Without Score Calulations: {(test_time_before - score_time_before):.4f} seconds")
print("Class-wise Metrics (Normal Approach):")
for label in report_before:
    if label.isdigit():  # Check if it's a class label
        print(f"Label {label}: Precision = {report_before[label]['precision']:.4f}, Recall = {report_before[label]['recall']:.4f}, F1-score = {report_before[label]['f1-score']:.4f}")

print("\nConfusion Matrix (Normal Approach):")
print(cm_before)

Random Forest Classifier:
Normal Approach: Accuracy = 0.8798, F1 = 0.8745, Precision = 0.8763, Recall = 0.8798
Training Time: 114.0900 seconds
Testing Time: 1.2125 seconds
Scoring Time: 0.1751 seconds
Training Time Without Testing And Score Calulations: 112.7025 seconds
Testing Time Without Score Calulations: 1.0374 seconds
Class-wise Metrics (Normal Approach):
Label 0: Precision = 0.8265, Recall = 0.6471, F1-score = 0.7259
Label 1: Precision = 0.8925, Recall = 0.9557, F1-score = 0.9230

Confusion Matrix (Normal Approach):
[[21109 11511]
 [ 4431 95553]]


### Random Forest Wasserstein

In [None]:
# #############################################
# Random Forest Wasserstein
# #############################################



rf = RandomForestClassifier(n_estimators=100, random_state=42)
train_start_time = time.time()
rf.fit(X_source_aligned_wasserstein, y_source)
test_start_time = time.time()
pred_after = rf.predict(X_target)
score_start_time = time.time()
acc_after = accuracy_score(y_target, pred_after)
f1_after = f1_score(y_target, pred_after, average='weighted')  # Weighted average for multi-class
precision_after = precision_score(y_target, pred_after, average='weighted')  # Overall precision
recall_after = recall_score(y_target, pred_after, average='weighted')  # Overall recall
report_after = classification_report(y_target, pred_after, output_dict=True)
cm_after = confusion_matrix(y_target, pred_after)
train_time_after = time.time() - train_start_time
test_time_after = time.time() - test_start_time
score_time_after = time.time() - score_start_time


print("Random Forest Classifier:")
print(f"Wasserstein:  Accuracy = {acc_after:.4f}, F1 = {f1_after:.4f}, Precision = {precision_after:.4f}, Recall = {recall_after:.4f}")
print(f"Training Time: {train_time_after:.4f} seconds")
print(f"Testing Time: {test_time_after:.4f} seconds")
print(f"Scoring Time: {score_time_after:.4f} seconds")
print(f"Training Time Without Testing And Score Calulations: {(train_time_after - test_time_after - score_time_after):.4f} seconds")
print(f"Testing Time Without Score Calulations: {(test_time_after - score_time_after):.4f} seconds")
print("Class-wise Metrics (Wasserstein):")
for label in report_after:
    if label.isdigit():
        print(f"Label {label}: Precision = {report_after[label]['precision']:.4f}, Recall = {report_after[label]['recall']:.4f}, F1-score = {report_after[label]['f1-score']:.4f}")

print("\nConfusion Matrix (Wasserstein):")
print(cm_after)

Random Forest Classifier:
Wasserstein:  Accuracy = 0.9034, F1 = 0.9044, Precision = 0.9059, Recall = 0.9034
Training Time: 67.4229 seconds
Testing Time: 0.3820 seconds
Scoring Time: 0.0370 seconds
Training Time Without Testing And Score Calulations: 67.0038 seconds
Testing Time Without Score Calulations: 0.3450 seconds
Class-wise Metrics (Wasserstein):
Label 0: Precision = 0.7854, Recall = 0.8355, F1-score = 0.8097
Label 1: Precision = 0.9452, Recall = 0.9255, F1-score = 0.9353

Confusion Matrix (Wasserstein):
[[27255  5365]
 [ 7445 92539]]


In [None]:
print(f"Improvement:  Accuracy Δ = {acc_after - acc_before:+.4f}, F1 Δ = {f1_after - f1_before:+.4f}, Precision Δ = {precision_after - precision_before:+.4f}, Recall Δ = {recall_after - recall_before:+.4f}\n")

print("Class-wise Improvement:")
for label in report_before:
    if label.isdigit() and label in report_after:
        print(f"Label {label}:")
        print(f"  Precision Δ = {report_after[label]['precision'] - report_before[label]['precision']:+.4f}")
        print(f"  Recall Δ = {report_after[label]['recall'] - report_before[label]['recall']:+.4f}")
        print(f"  F1-score Δ = {report_after[label]['f1-score'] - report_before[label]['f1-score']:+.4f}")

Improvement:  Accuracy Δ = +0.0236, F1 Δ = +0.0299, Precision Δ = +0.0296, Recall Δ = +0.0236

Class-wise Improvement:
Label 0:
  Precision Δ = -0.0411
  Recall Δ = +0.1884
  F1-score Δ = +0.0838
Label 1:
  Precision Δ = +0.0527
  Recall Δ = -0.0301
  F1-score Δ = +0.0123


### Random Forest classwise Wasserstein

In [None]:
# #############################################
# Random Forest Wasserstein Classwise
# #############################################



rf = RandomForestClassifier(n_estimators=100, random_state=42)
train_start_time = time.time()
rf.fit(X_source_aligned_wasserstein_classwise, y_source)
test_start_time = time.time()
pred_after = rf.predict(X_target)
score_start_time = time.time()
acc_after = accuracy_score(y_target, pred_after)
f1_after = f1_score(y_target, pred_after, average='weighted')  # Weighted average for multi-class
precision_after = precision_score(y_target, pred_after, average='weighted')  # Overall precision
recall_after = recall_score(y_target, pred_after, average='weighted')  # Overall recall
report_after = classification_report(y_target, pred_after, output_dict=True)
cm_after = confusion_matrix(y_target, pred_after)
train_time_after = time.time() - train_start_time
test_time_after = time.time() - test_start_time
score_time_after = time.time() - score_start_time


print("Random Forest Classifier:")
print(f"Classwise Wasserstein:  Accuracy = {acc_after:.4f}, F1 = {f1_after:.4f}, Precision = {precision_after:.4f}, Recall = {recall_after:.4f}")
print(f"Training Time: {train_time_after:.4f} seconds")
print(f"Testing Time: {test_time_after:.4f} seconds")
print(f"Scoring Time: {score_time_after:.4f} seconds")
print(f"Training Time Without Testing And Score Calulations: {(train_time_after - test_time_after - score_time_after):.4f} seconds")
print(f"Testing Time Without Score Calulations: {(test_time_after - score_time_after):.4f} seconds")
print("Class-wise Metrics (Classwise Wasserstein):")
for label in report_after:
    if label.isdigit():
        print(f"Label {label}: Precision = {report_after[label]['precision']:.4f}, Recall = {report_after[label]['recall']:.4f}, F1-score = {report_after[label]['f1-score']:.4f}")

print("\nConfusion Matrix (Classwise Wasserstein):")
print(cm_after)

Random Forest Classifier:
Classwise Wasserstein:  Accuracy = 0.9250, F1 = 0.9248, Precision = 0.9247, Recall = 0.9250
Training Time: 13.6459 seconds
Testing Time: 0.3524 seconds
Scoring Time: 0.0758 seconds
Training Time Without Testing And Score Calulations: 13.2177 seconds
Testing Time Without Score Calulations: 0.2766 seconds
Class-wise Metrics (Classwise Wasserstein):
Label 0: Precision = 0.8512, Recall = 0.8423, F1-score = 0.8467
Label 1: Precision = 0.9487, Recall = 0.9520, F1-score = 0.9503

Confusion Matrix (Classwise Wasserstein):
[[27476  5144]
 [ 4804 95180]]


In [None]:
print(f"Improvement:  Accuracy Δ = {acc_after - acc_before:+.4f}, F1 Δ = {f1_after - f1_before:+.4f}, Precision Δ = {precision_after - precision_before:+.4f}, Recall Δ = {recall_after - recall_before:+.4f}\n")

print("Class-wise Improvement:")
for label in report_before:
    if label.isdigit() and label in report_after:
        print(f"Label {label}:")
        print(f"  Precision Δ = {report_after[label]['precision'] - report_before[label]['precision']:+.4f}")
        print(f"  Recall Δ = {report_after[label]['recall'] - report_before[label]['recall']:+.4f}")
        print(f"  F1-score Δ = {report_after[label]['f1-score'] - report_before[label]['f1-score']:+.4f}")

Improvement:  Accuracy Δ = +0.0452, F1 Δ = +0.0503, Precision Δ = +0.0485, Recall Δ = +0.0452

Class-wise Improvement:
Label 0:
  Precision Δ = +0.0247
  Recall Δ = +0.1952
  F1-score Δ = +0.1208
Label 1:
  Precision Δ = +0.0562
  Recall Δ = -0.0037
  F1-score Δ = +0.0273


### Random Forest DANN

In [22]:
# #############################################
# Random Forest DANN
# #############################################



rf = RandomForestClassifier(n_estimators=100, random_state=42)
train_start_time = time.time()
rf.fit(X_source_aligned_dann, y_source)
test_start_time = time.time()
pred_after = rf.predict(X_target_aligned_dann)
score_start_time = time.time()
acc_after = accuracy_score(y_target, pred_after)
f1_after = f1_score(y_target, pred_after, average='weighted')  # Weighted average for multi-class
precision_after = precision_score(y_target, pred_after, average='weighted')  # Overall precision
recall_after = recall_score(y_target, pred_after, average='weighted')  # Overall recall
report_after = classification_report(y_target, pred_after, output_dict=True)
cm_after = confusion_matrix(y_target, pred_after)
train_time_after = time.time() - train_start_time
test_time_after = time.time() - test_start_time
score_time_after = time.time() - score_start_time


print("Random Forest Classifier:")
print(f"DANN:  Accuracy = {acc_after:.4f}, F1 = {f1_after:.4f}, Precision = {precision_after:.4f}, Recall = {recall_after:.4f}")
print(f"Training Time: {train_time_after:.4f} seconds")
print(f"Testing Time: {test_time_after:.4f} seconds")
print(f"Scoring Time: {score_time_after:.4f} seconds")
print(f"Training Time Without Testing And Score Calulations: {(train_time_after - test_time_after - score_time_after):.4f} seconds")
print(f"Testing Time Without Score Calulations: {(test_time_after - score_time_after):.4f} seconds")
print("Class-wise Metrics (DANN):")
for label in report_after:
    if label.isdigit():
        print(f"Label {label}: Precision = {report_after[label]['precision']:.4f}, Recall = {report_after[label]['recall']:.4f}, F1-score = {report_after[label]['f1-score']:.4f}")

print("\nConfusion Matrix (DANN):")
print(cm_after)

Random Forest Classifier:
DANN:  Accuracy = 0.9254, F1 = 0.9267, Precision = 0.9294, Recall = 0.9254
Training Time: 68.0802 seconds
Testing Time: 1.1191 seconds
Scoring Time: 0.1775 seconds
Training Time Without Testing And Score Calulations: 66.7837 seconds
Testing Time Without Score Calulations: 0.9416 seconds
Class-wise Metrics (DANN):
Label 0: Precision = 0.8146, Recall = 0.9021, F1-score = 0.8561
Label 1: Precision = 0.9669, Recall = 0.9330, F1-score = 0.9497

Confusion Matrix (DANN):
[[29425  3195]
 [ 6695 93289]]


In [23]:
print(f"Improvement:  Accuracy Δ = {acc_after - acc_before:+.4f}, F1 Δ = {f1_after - f1_before:+.4f}, Precision Δ = {precision_after - precision_before:+.4f}, Recall Δ = {recall_after - recall_before:+.4f}\n")

print("Class-wise Improvement:")
for label in report_before:
    if label.isdigit() and label in report_after:
        print(f"Label {label}:")
        print(f"  Precision Δ = {report_after[label]['precision'] - report_before[label]['precision']:+.4f}")
        print(f"  Recall Δ = {report_after[label]['recall'] - report_before[label]['recall']:+.4f}")
        print(f"  F1-score Δ = {report_after[label]['f1-score'] - report_before[label]['f1-score']:+.4f}")

Improvement:  Accuracy Δ = +0.0456, F1 Δ = +0.0521, Precision Δ = +0.0532, Recall Δ = +0.0456

Class-wise Improvement:
Label 0:
  Precision Δ = -0.0119
  Recall Δ = +0.2549
  F1-score Δ = +0.1302
Label 1:
  Precision Δ = +0.0744
  Recall Δ = -0.0226
  F1-score Δ = +0.0267


### Random Forest ADDA

In [24]:
# #############################################
# Random Forest ADDA
# #############################################



rf = RandomForestClassifier(n_estimators=100, random_state=42)
train_start_time = time.time()
rf.fit(X_source_aligned_adda, y_source)
test_start_time = time.time()
pred_after = rf.predict(X_target_aligned_adda)
score_start_time = time.time()
acc_after = accuracy_score(y_target, pred_after)
f1_after = f1_score(y_target, pred_after, average='weighted')  # Weighted average for multi-class
precision_after = precision_score(y_target, pred_after, average='weighted')  # Overall precision
recall_after = recall_score(y_target, pred_after, average='weighted')  # Overall recall
report_after = classification_report(y_target, pred_after, output_dict=True)
cm_after = confusion_matrix(y_target, pred_after)
train_time_after = time.time() - train_start_time
test_time_after = time.time() - test_start_time
score_time_after = time.time() - score_start_time


print("Random Forest Classifier:")
print(f"ADDA:  Accuracy = {acc_after:.4f}, F1 = {f1_after:.4f}, Precision = {precision_after:.4f}, Recall = {recall_after:.4f}")
print(f"Training Time: {train_time_after:.4f} seconds")
print(f"Testing Time: {test_time_after:.4f} seconds")
print(f"Scoring Time: {score_time_after:.4f} seconds")
print(f"Training Time Without Testing And Score Calulations: {(train_time_after - test_time_after - score_time_after):.4f} seconds")
print(f"Testing Time Without Score Calulations: {(test_time_after - score_time_after):.4f} seconds")
print("Class-wise Metrics (ADDA):")
for label in report_after:
    if label.isdigit():
        print(f"Label {label}: Precision = {report_after[label]['precision']:.4f}, Recall = {report_after[label]['recall']:.4f}, F1-score = {report_after[label]['f1-score']:.4f}")

print("\nConfusion Matrix (ADDA):")
print(cm_after)

Random Forest Classifier:
ADDA:  Accuracy = 0.9291, F1 = 0.9302, Precision = 0.9326, Recall = 0.9291
Training Time: 103.2255 seconds
Testing Time: 1.0360 seconds
Scoring Time: 0.1749 seconds
Training Time Without Testing And Score Calulations: 102.0146 seconds
Testing Time Without Score Calulations: 0.8612 seconds
Class-wise Metrics (ADDA):
Label 0: Precision = 0.8236, Recall = 0.9058, F1-score = 0.8627
Label 1: Precision = 0.9682, Recall = 0.9367, F1-score = 0.9522

Confusion Matrix (ADDA):
[[29546  3074]
 [ 6328 93656]]


In [25]:
print(f"Improvement:  Accuracy Δ = {acc_after - acc_before:+.4f}, F1 Δ = {f1_after - f1_before:+.4f}, Precision Δ = {precision_after - precision_before:+.4f}, Recall Δ = {recall_after - recall_before:+.4f}\n")

print("Class-wise Improvement:")
for label in report_before:
    if label.isdigit() and label in report_after:
        print(f"Label {label}:")
        print(f"  Precision Δ = {report_after[label]['precision'] - report_before[label]['precision']:+.4f}")
        print(f"  Recall Δ = {report_after[label]['recall'] - report_before[label]['recall']:+.4f}")
        print(f"  F1-score Δ = {report_after[label]['f1-score'] - report_before[label]['f1-score']:+.4f}")

Improvement:  Accuracy Δ = +0.0493, F1 Δ = +0.0557, Precision Δ = +0.0564, Recall Δ = +0.0493

Class-wise Improvement:
Label 0:
  Precision Δ = -0.0029
  Recall Δ = +0.2586
  F1-score Δ = +0.1368
Label 1:
  Precision Δ = +0.0757
  Recall Δ = -0.0190
  F1-score Δ = +0.0292


## Multi-Layer Perceptron

### MLPClassifier Without Domain Adaptation

In [26]:
# #############################################
# Neural Network without domain adaptation
# #############################################


mlp = MLPClassifier(random_state=42)
train_start_time = time.time()
mlp.fit(X_source, y_source)
test_start_time = time.time()
pred_before = mlp.predict(X_target)
score_start_time = time.time()
acc_before = accuracy_score(y_target, pred_before)
f1_before = f1_score(y_target, pred_before, average='weighted')  # Weighted average for multi-class
precision_before = precision_score(y_target, pred_before, average='weighted')  # Overall precision
recall_before = recall_score(y_target, pred_before, average='weighted')  # Overall recall
report_before = classification_report(y_target, pred_before, output_dict=True)
cm_before = confusion_matrix(y_target, pred_before)
train_time_before = time.time() - train_start_time
test_time_before = time.time() - test_start_time
score_time_before = time.time() - score_start_time


print("Neural Network:")
print(f"Normal:  Accuracy = {acc_before:.4f}, F1 = {f1_before:.4f}, Precision = {precision_before:.4f}, Recall = {recall_before:.4f}")
print(f"Training Time: {train_time_before:.4f} seconds")
print(f"Testing Time: {test_time_before:.4f} seconds")
print(f"Scoring Time: {score_time_before:.4f} seconds")
print(f"Training Time Without Testing And Score Calulations: {(train_time_before - test_time_before - score_time_before):.4f} seconds")
print(f"Testing Time Without Score Calulations: {(test_time_before - score_time_before):.4f} seconds")
print("Class-wise Metrics (Normal):")
for label in report_before:
    if label.isdigit():
        print(f"Label {label}: Precision = {report_before[label]['precision']:.4f}, Recall = {report_before[label]['recall']:.4f}, F1-score = {report_before[label]['f1-score']:.4f}")

print("\nConfusion Matrix (Normal Approach):")
print(cm_before)

Neural Network:
Normal:  Accuracy = 0.8089, F1 = 0.7792, Precision = 0.8009, Recall = 0.8089
Training Time: 509.4440 seconds
Testing Time: 1.0048 seconds
Scoring Time: 0.3047 seconds
Training Time Without Testing And Score Calulations: 508.1344 seconds
Testing Time Without Score Calulations: 0.7001 seconds
Class-wise Metrics (Normal):
Label 0: Precision = 0.7579, Recall = 0.3281, F1-score = 0.4579
Label 1: Precision = 0.8150, Recall = 0.9658, F1-score = 0.8840

Confusion Matrix (Normal Approach):
[[10701 21919]
 [ 3419 96565]]


### MLPClassifier Wasserstein

In [None]:
# #############################################
# Neural Network wasserstein
# #############################################


mlp = MLPClassifier(random_state=42)
train_start_time = time.time()
mlp.fit(X_source_aligned_wasserstein, y_source)
test_start_time = time.time()
pred_after = mlp.predict(X_target)
score_start_time = time.time()
acc_after = accuracy_score(y_target, pred_after)
f1_after = f1_score(y_target, pred_after, average='weighted')  # Weighted average for multi-class
precision_after = precision_score(y_target, pred_after, average='weighted')  # Overall precision
recall_after = recall_score(y_target, pred_after, average='weighted')  # Overall recall
report_after = classification_report(y_target, pred_after, output_dict=True)
cm_after = confusion_matrix(y_target, pred_after)
train_time_after = time.time() - train_start_time
test_time_after = time.time() - test_start_time
score_time_after = time.time() - score_start_time


print("Neural Network:")
print(f"Wasserstein:  Accuracy = {acc_after:.4f}, F1 = {f1_after:.4f}, Precision = {precision_after:.4f}, Recall = {recall_after:.4f}")
print(f"Training Time: {train_time_after:.4f} seconds")
print(f"Testing Time: {test_time_after:.4f} seconds")
print(f"Scoring Time: {score_time_after:.4f} seconds")
print(f"Training Time Without Testing And Score Calulations: {(train_time_after - test_time_after - score_time_after):.4f} seconds")
print(f"Testing Time Without Score Calulations: {(test_time_after - score_time_after):.4f} seconds")
print("Class-wise Metrics (Wasserstein):")
for label in report_after:
    if label.isdigit():
        print(f"Label {label}: Precision = {report_after[label]['precision']:.4f}, Recall = {report_after[label]['recall']:.4f}, F1-score = {report_after[label]['f1-score']:.4f}")

print("\nConfusion Matrix (Wasserstein):")
print(cm_after)

Neural Network:
Wasserstein:  Accuracy = 0.8771, F1 = 0.8708, Precision = 0.8737, Recall = 0.8771
Training Time: 527.3795 seconds
Testing Time: 1.9282 seconds
Scoring Time: 0.0964 seconds
Training Time Without Testing And Score Calulations: 525.3549 seconds
Testing Time Without Score Calulations: 1.8318 seconds
Class-wise Metrics (Wasserstein):
Label 0: Precision = 0.8323, Recall = 0.6266, F1-score = 0.7149
Label 1: Precision = 0.8873, Recall = 0.9588, F1-score = 0.9217

Confusion Matrix (Wasserstein):
[[20440 12180]
 [ 4119 95865]]


In [None]:
print(f"Improvement:  Accuracy Δ = {acc_after - acc_before:+.4f}, F1 Δ = {f1_after - f1_before:+.4f}, Precision Δ = {precision_after - precision_before:+.4f}, Recall Δ = {recall_after - recall_before:+.4f}\n")

print("Class-wise Improvement:")
for label in report_before:
    if label.isdigit() and label in report_after:
        print(f"Label {label}:")
        print(f"  Precision Δ = {report_after[label]['precision'] - report_before[label]['precision']:+.4f}")
        print(f"  Recall Δ = {report_after[label]['recall'] - report_before[label]['recall']:+.4f}")
        print(f"  F1-score Δ = {report_after[label]['f1-score'] - report_before[label]['f1-score']:+.4f}")

Improvement:  Accuracy Δ = +0.0641, F1 Δ = +0.0871, Precision Δ = +0.0663, Recall Δ = +0.0641

Class-wise Improvement:
Label 0:
  Precision Δ = +0.0544
  Recall Δ = +0.2913
  F1-score Δ = +0.2463
Label 1:
  Precision Δ = +0.0702
  Recall Δ = -0.0100
  F1-score Δ = +0.0352


### MLPClassifier Calsswise Wasserstein

In [None]:
# #############################################
# Neural Network Wasserstein Classwise
# #############################################


mlp = MLPClassifier(random_state=42)
train_start_time = time.time()
mlp.fit(X_source_aligned_wasserstein_classwise, y_source)
test_start_time = time.time()
pred_after = mlp.predict(X_target)
score_start_time = time.time()
acc_after = accuracy_score(y_target, pred_after)
f1_after = f1_score(y_target, pred_after, average='weighted')  # Weighted average for multi-class
precision_after = precision_score(y_target, pred_after, average='weighted')  # Overall precision
recall_after = recall_score(y_target, pred_after, average='weighted')  # Overall recall
report_after = classification_report(y_target, pred_after, output_dict=True)
cm_after = confusion_matrix(y_target, pred_after)
train_time_after = time.time() - train_start_time
test_time_after = time.time() - test_start_time
score_time_after = time.time() - score_start_time


print("Neural Network:")
print(f"Calsswise Wasserstein:  Accuracy = {acc_after:.4f}, F1 = {f1_after:.4f}, Precision = {precision_after:.4f}, Recall = {recall_after:.4f}")
print(f"Training Time: {train_time_after:.4f} seconds")
print(f"Testing Time: {test_time_after:.4f} seconds")
print(f"Scoring Time: {score_time_after:.4f} seconds")
print(f"Training Time Without Testing And Score Calulations: {(train_time_after - test_time_after - score_time_after):.4f} seconds")
print(f"Testing Time Without Score Calulations: {(test_time_after - score_time_after):.4f} seconds")
print("Class-wise Metrics (Calsswise Wasserstein):")
for label in report_after:
    if label.isdigit():
        print(f"Label {label}: Precision = {report_after[label]['precision']:.4f}, Recall = {report_after[label]['recall']:.4f}, F1-score = {report_after[label]['f1-score']:.4f}")

print("\nConfusion Matrix (Calsswise Wasserstein):") # Commenting out since the original code didn't print confusion matrix
print(cm_after)

Neural Network:
Calsswise Wasserstein:  Accuracy = 0.9173, F1 = 0.9165, Precision = 0.9161, Recall = 0.9173
Training Time: 36.9762 seconds
Testing Time: 0.2773 seconds
Scoring Time: 0.0922 seconds
Training Time Without Testing And Score Calulations: 36.6066 seconds
Testing Time Without Score Calulations: 0.1851 seconds
Class-wise Metrics (Calsswise Wasserstein):
Label 0: Precision = 0.8528, Recall = 0.8025, F1-score = 0.8269
Label 1: Precision = 0.9368, Recall = 0.9548, F1-score = 0.9457

Confusion Matrix (Calsswise Wasserstein):
[[26179  6441]
 [ 4520 95464]]


In [None]:
print(f"Improvement:  Accuracy Δ = {acc_after - acc_before:+.4f}, F1 Δ = {f1_after - f1_before:+.4f}, Precision Δ = {precision_after - precision_before:+.4f}, Recall Δ = {recall_after - recall_before:+.4f}\n")

print("Class-wise Improvement:")
for label in report_before:
    if label.isdigit() and label in report_after:
        print(f"Label {label}:")
        print(f"  Precision Δ = {report_after[label]['precision'] - report_before[label]['precision']:+.4f}")
        print(f"  Recall Δ = {report_after[label]['recall'] - report_before[label]['recall']:+.4f}")
        print(f"  F1-score Δ = {report_after[label]['f1-score'] - report_before[label]['f1-score']:+.4f}")

Improvement:  Accuracy Δ = +0.1044, F1 Δ = +0.1328, Precision Δ = +0.1087, Recall Δ = +0.1044

Class-wise Improvement:
Label 0:
  Precision Δ = +0.0749
  Recall Δ = +0.4672
  F1-score Δ = +0.3582
Label 1:
  Precision Δ = +0.1197
  Recall Δ = -0.0140
  F1-score Δ = +0.0592


### MLPClassifier DANN

In [16]:
# #############################################
# Neural Network Wasserstein Classwise
# #############################################


mlp = MLPClassifier(random_state=42)
train_start_time = time.time()
mlp.fit(X_source_aligned_dann, y_source)
test_start_time = time.time()
pred_after = mlp.predict(X_target_aligned_dann)
score_start_time = time.time()
acc_after = accuracy_score(y_target, pred_after)
f1_after = f1_score(y_target, pred_after, average='weighted')  # Weighted average for multi-class
precision_after = precision_score(y_target, pred_after, average='weighted')  # Overall precision
recall_after = recall_score(y_target, pred_after, average='weighted')  # Overall recall
report_after = classification_report(y_target, pred_after, output_dict=True)
cm_after = confusion_matrix(y_target, pred_after)
train_time_after = time.time() - train_start_time
test_time_after = time.time() - test_start_time
score_time_after = time.time() - score_start_time


print("Neural Network:")
print(f"DANN:  Accuracy = {acc_after:.4f}, F1 = {f1_after:.4f}, Precision = {precision_after:.4f}, Recall = {recall_after:.4f}")
print(f"Training Time: {train_time_after:.4f} seconds")
print(f"Testing Time: {test_time_after:.4f} seconds")
print(f"Scoring Time: {score_time_after:.4f} seconds")
print(f"Training Time Without Testing And Score Calulations: {(train_time_after - test_time_after - score_time_after):.4f} seconds")
print(f"Testing Time Without Score Calulations: {(test_time_after - score_time_after):.4f} seconds")
print("Class-wise Metrics (DANN):")
for label in report_after:
    if label.isdigit():
        print(f"Label {label}: Precision = {report_after[label]['precision']:.4f}, Recall = {report_after[label]['recall']:.4f}, F1-score = {report_after[label]['f1-score']:.4f}")

print("\nConfusion Matrix (DANN):") # Commenting out since the original code didn't print confusion matrix
print(cm_after)

Neural Network:
DANN:  Accuracy = 0.9204, F1 = 0.9210, Precision = 0.9217, Recall = 0.9204
Training Time: 92.7779 seconds
Testing Time: 0.2108 seconds
Scoring Time: 0.1573 seconds
Training Time Without Testing And Score Calulations: 92.4098 seconds
Testing Time Without Score Calulations: 0.0536 seconds
Class-wise Metrics (DANN):
Label 0: Precision = 0.8248, Recall = 0.8590, F1-score = 0.8416
Label 1: Precision = 0.9534, Recall = 0.9405, F1-score = 0.9469

Confusion Matrix (DANN):
[[28022  4598]
 [ 5954 94030]]


In [28]:
print(f"Improvement:  Accuracy Δ = {acc_after - acc_before:+.4f}, F1 Δ = {f1_after - f1_before:+.4f}, Precision Δ = {precision_after - precision_before:+.4f}, Recall Δ = {recall_after - recall_before:+.4f}\n")

print("Class-wise Improvement:")
for label in report_before:
    if label.isdigit() and label in report_after:
        print(f"Label {label}:")
        print(f"  Precision Δ = {report_after[label]['precision'] - report_before[label]['precision']:+.4f}")
        print(f"  Recall Δ = {report_after[label]['recall'] - report_before[label]['recall']:+.4f}")
        print(f"  F1-score Δ = {report_after[label]['f1-score'] - report_before[label]['f1-score']:+.4f}")

Improvement:  Accuracy Δ = +0.1346, F1 Δ = +0.1657, Precision Δ = +0.1492, Recall Δ = +0.1346

Class-wise Improvement:
Label 0:
  Precision Δ = +0.0719
  Recall Δ = +0.6414
  F1-score Δ = +0.4363
Label 1:
  Precision Δ = +0.1744
  Recall Δ = -0.0307
  F1-score Δ = +0.0775


### MLPCLassifier ADDA

In [29]:
mlp = MLPClassifier(random_state=42)
train_start_time = time.time()
mlp.fit(X_source_aligned_adda, y_source)
test_start_time = time.time()
pred_after = mlp.predict(X_target_aligned_adda)
score_start_time = time.time()
acc_after = accuracy_score(y_target, pred_after)
f1_after = f1_score(y_target, pred_after, average='weighted')  # Weighted average for multi-class
precision_after = precision_score(y_target, pred_after, average='weighted')  # Overall precision
recall_after = recall_score(y_target, pred_after, average='weighted')  # Overall recall
report_after = classification_report(y_target, pred_after, output_dict=True)
cm_after = confusion_matrix(y_target, pred_after)
train_time_after = time.time() - train_start_time
test_time_after = time.time() - test_start_time
score_time_after = time.time() - score_start_time


print("Neural Network:")
print(f"ADDA:  Accuracy = {acc_after:.4f}, F1 = {f1_after:.4f}, Precision = {precision_after:.4f}, Recall = {recall_after:.4f}")
print(f"Training Time: {train_time_after:.4f} seconds")
print(f"Testing Time: {test_time_after:.4f} seconds")
print(f"Scoring Time: {score_time_after:.4f} seconds")
print(f"Training Time Without Testing And Score Calulations: {(train_time_after - test_time_after - score_time_after):.4f} seconds")
print(f"Testing Time Without Score Calulations: {(test_time_after - score_time_after):.4f} seconds")
print("Class-wise Metrics (ADDA):")
for label in report_after:
    if label.isdigit():
        print(f"Label {label}: Precision = {report_after[label]['precision']:.4f}, Recall = {report_after[label]['recall']:.4f}, F1-score = {report_after[label]['f1-score']:.4f}")

print("\nConfusion Matrix (ADDA):") # Commenting out since the original code didn't print confusion matrix
print(cm_after)

Neural Network:
ADDA:  Accuracy = 0.9216, F1 = 0.9222, Precision = 0.9231, Recall = 0.9216
Training Time: 184.7702 seconds
Testing Time: 0.3167 seconds
Scoring Time: 0.2275 seconds
Training Time Without Testing And Score Calulations: 184.2260 seconds
Testing Time Without Score Calulations: 0.0892 seconds
Class-wise Metrics (ADDA):
Label 0: Precision = 0.8254, Recall = 0.8642, F1-score = 0.8444
Label 1: Precision = 0.9550, Recall = 0.9404, F1-score = 0.9476

Confusion Matrix (ADDA):
[[28191  4429]
 [ 5963 94021]]


In [30]:
print(f"Improvement:  Accuracy Δ = {acc_after - acc_before:+.4f}, F1 Δ = {f1_after - f1_before:+.4f}, Precision Δ = {precision_after - precision_before:+.4f}, Recall Δ = {recall_after - recall_before:+.4f}\n")

print("Class-wise Improvement:")
for label in report_before:
    if label.isdigit() and label in report_after:
        print(f"Label {label}:")
        print(f"  Precision Δ = {report_after[label]['precision'] - report_before[label]['precision']:+.4f}")
        print(f"  Recall Δ = {report_after[label]['recall'] - report_before[label]['recall']:+.4f}")
        print(f"  F1-score Δ = {report_after[label]['f1-score'] - report_before[label]['f1-score']:+.4f}")

Improvement:  Accuracy Δ = +0.1127, F1 Δ = +0.1430, Precision Δ = +0.1222, Recall Δ = +0.1127

Class-wise Improvement:
Label 0:
  Precision Δ = +0.0675
  Recall Δ = +0.5362
  F1-score Δ = +0.3865
Label 1:
  Precision Δ = +0.1400
  Recall Δ = -0.0254
  F1-score Δ = +0.0636


## Decision Tree

### Decision Tree Without Domain Adaptation

In [31]:
# #############################################
# Decision Tree without domain adaptation
# #############################################


dt = DecisionTreeClassifier(random_state=42)
train_start_time = time.time()
dt.fit(X_source, y_source)
test_start_time = time.time()
pred_before = dt.predict(X_target)
score_start_time = time.time()
acc_before = accuracy_score(y_target, pred_before)
f1_before = f1_score(y_target, pred_before, average='weighted')  # Weighted average for multi-class
precision_before = precision_score(y_target, pred_before, average='weighted')  # Overall precision
recall_before = recall_score(y_target, pred_before, average='weighted')  # Overall recall
report_before = classification_report(y_target, pred_before, output_dict=True)
cm_before = confusion_matrix(y_target, pred_before)
train_time_before = time.time() - train_start_time
test_time_before = time.time() - test_start_time
score_time_before = time.time() - score_start_time


print("Decision Tree:")
print(f"Normal:  Accuracy = {acc_before:.4f}, F1 = {f1_before:.4f}, Precision = {precision_before:.4f}, Recall = {recall_before:.4f}")
print(f"Training Time: {train_time_before:.4f} seconds")
print(f"Testing Time: {test_time_before:.4f} seconds")
print(f"Scoring Time: {score_time_before:.4f} seconds")
print(f"Training Time Without Testing And Score Calulations: {(train_time_before - test_time_before - score_time_before):.4f} seconds")
print(f"Testing Time Without Score Calulations: {(test_time_before - score_time_before):.4f} seconds")
print("Class-wise Metrics (Normal):")
for label in report_before:
    if label.isdigit():
        print(f"Label {label}: Precision = {report_before[label]['precision']:.4f}, Recall = {report_before[label]['recall']:.4f}, F1-score = {report_before[label]['f1-score']:.4f}")

print("\nConfusion Matrix (Normal Approach):")
print(cm_before)

Decision Tree:
Normal:  Accuracy = 0.9174, F1 = 0.9174, Precision = 0.9174, Recall = 0.9174
Training Time: 12.0704 seconds
Testing Time: 0.1931 seconds
Scoring Time: 0.1592 seconds
Training Time Without Testing And Score Calulations: 11.7181 seconds
Testing Time Without Score Calulations: 0.0339 seconds
Class-wise Metrics (Normal):
Label 0: Precision = 0.8314, Recall = 0.8330, F1-score = 0.8322
Label 1: Precision = 0.9455, Recall = 0.9449, F1-score = 0.9452

Confusion Matrix (Normal Approach):
[[27173  5447]
 [ 5511 94473]]


### Decision Tree Wasserstein

In [None]:
# #############################################
# Decision Tree Wasserstein
# #############################################


dt = DecisionTreeClassifier(random_state=42)
train_start_time = time.time()
dt.fit(X_source_aligned_wasserstein, y_source)
test_start_time = time.time()
pred_after = dt.predict(X_target)
score_start_time = time.time()
acc_after = accuracy_score(y_target, pred_after)
f1_after = f1_score(y_target, pred_after, average='weighted')  # Weighted average for multi-class
precision_after = precision_score(y_target, pred_after, average='weighted')  # Overall precision
recall_after = recall_score(y_target, pred_after, average='weighted')  # Overall recall
report_after = classification_report(y_target, pred_after, output_dict=True)
cm_after = confusion_matrix(y_target, pred_after)
train_time_after = time.time() - train_start_time
test_time_after = time.time() - test_start_time
score_time_after = time.time() - score_start_time


print("Decision Tree:")
print(f"Wasserstein:  Accuracy = {acc_after:.4f}, F1 = {f1_after:.4f}, Precision = {precision_after:.4f}, Recall = {recall_after:.4f}")
print(f"Training Time: {train_time_after:.4f} seconds")
print(f"Testing Time: {test_time_after:.4f} seconds")
print(f"Scoring Time: {score_time_after:.4f} seconds")
print(f"Training Time Without Testing And Score Calulations: {(train_time_after - test_time_after - score_time_after):.4f} seconds")
print(f"Testing Time Without Score Calulations: {(test_time_after - score_time_after):.4f} seconds")
print("Class-wise Metrics (Wasserstein):")
for label in report_after:
    if label.isdigit():
        print(f"Label {label}: Precision = {report_after[label]['precision']:.4f}, Recall = {report_after[label]['recall']:.4f}, F1-score = {report_after[label]['f1-score']:.4f}")

print("\nConfusion Matrix (Wasserstein):")
print(cm_after)

Decision Tree:
Wasserstein:  Accuracy = 0.1269, F1 = 0.0880, Precision = 0.1432, Recall = 0.1269
Training Time: 12.7743 seconds
Testing Time: 0.0945 seconds
Scoring Time: 0.0650 seconds
Training Time Without Testing And Score Calulations: 12.6149 seconds
Testing Time Without Score Calulations: 0.0295 seconds
Class-wise Metrics (Wasserstein):
Label 0: Precision = 0.1221, Recall = 0.4118, F1-score = 0.1883
Label 1: Precision = 0.1501, Recall = 0.0339, F1-score = 0.0553

Confusion Matrix (Wasserstein):
[[13434 19186]
 [96596  3388]]


In [None]:
print(f"Improvement:  Accuracy Δ = {acc_after - acc_before:+.4f}, F1 Δ = {f1_after - f1_before:+.4f}, Precision Δ = {precision_after - precision_before:+.4f}, Recall Δ = {recall_after - recall_before:+.4f}\n")

print("Class-wise Improvement:")
for label in report_before:
    if label.isdigit() and label in report_after:
        print(f"Label {label}:")
        print(f"  Precision Δ = {report_after[label]['precision'] - report_before[label]['precision']:+.4f}")
        print(f"  Recall Δ = {report_after[label]['recall'] - report_before[label]['recall']:+.4f}")
        print(f"  F1-score Δ = {report_after[label]['f1-score'] - report_before[label]['f1-score']:+.4f}")

Improvement:  Accuracy Δ = -0.7905, F1 Δ = -0.8294, Precision Δ = -0.7742, Recall Δ = -0.7905

Class-wise Improvement:
Label 0:
  Precision Δ = -0.7093
  Recall Δ = -0.4212
  F1-score Δ = -0.6439
Label 1:
  Precision Δ = -0.7954
  Recall Δ = -0.9110
  F1-score Δ = -0.8899


### Decision Tree Classwise Wasserstein

In [None]:
# #############################################
# Decision Tree Wasserstein Classwise
# #############################################


dt = DecisionTreeClassifier(random_state=42)
train_start_time = time.time()
dt.fit(X_source_aligned_wasserstein_classwise, y_source)
test_start_time = time.time()
pred_after = dt.predict(X_target)
score_start_time = time.time()
acc_after = accuracy_score(y_target, pred_after)
f1_after = f1_score(y_target, pred_after, average='weighted')  # Weighted average for multi-class
precision_after = precision_score(y_target, pred_after, average='weighted')  # Overall precision
recall_after = recall_score(y_target, pred_after, average='weighted')  # Overall recall
report_after = classification_report(y_target, pred_after, output_dict=True)
cm_after = confusion_matrix(y_target, pred_after)
train_time_after = time.time() - train_start_time
test_time_after = time.time() - test_start_time
score_time_after = time.time() - score_start_time


print("Decision Tree:")
print(f"Wasserstein Classwise:  Accuracy = {acc_after:.4f}, F1 = {f1_after:.4f}, Precision = {precision_after:.4f}, Recall = {recall_after:.4f}")
print(f"Training Time: {train_time_after:.4f} seconds")
print(f"Testing Time: {test_time_after:.4f} seconds")
print(f"Scoring Time: {score_time_after:.4f} seconds")
print(f"Training Time Without Testing And Score Calulations: {(train_time_after - test_time_after - score_time_after):.4f} seconds")
print(f"Testing Time Without Score Calulations: {(test_time_after - score_time_after):.4f} seconds")
print("Class-wise Metrics (Wasserstein Classwise):")
for label in report_after:
    if label.isdigit():
        print(f"Label {label}: Precision = {report_after[label]['precision']:.4f}, Recall = {report_after[label]['recall']:.4f}, F1-score = {report_after[label]['f1-score']:.4f}")

print("\nConfusion Matrix (Wasserstein Classwise):")
print(cm_after)

Decision Tree:
Wasserstein Classwise:  Accuracy = 0.2473, F1 = 0.0999, Precision = 0.7672, Recall = 0.2473
Training Time: 0.9155 seconds
Testing Time: 0.0811 seconds
Scoring Time: 0.0628 seconds
Training Time Without Testing And Score Calulations: 0.7715 seconds
Testing Time Without Score Calulations: 0.0183 seconds
Class-wise Metrics (Wasserstein Classwise):
Label 0: Precision = 0.2463, Recall = 0.9996, F1-score = 0.3952
Label 1: Precision = 0.9372, Recall = 0.0018, F1-score = 0.0036

Confusion Matrix (Wasserstein Classwise):
[[32608    12]
 [99805   179]]


In [None]:
print(f"Improvement:  Accuracy Δ = {acc_after - acc_before:+.4f}, F1 Δ = {f1_after - f1_before:+.4f}, Precision Δ = {precision_after - precision_before:+.4f}, Recall Δ = {recall_after - recall_before:+.4f}\n")

print("Class-wise Improvement:")
for label in report_before:
    if label.isdigit() and label in report_after:
        print(f"Label {label}:")
        print(f"  Precision Δ = {report_after[label]['precision'] - report_before[label]['precision']:+.4f}")
        print(f"  Recall Δ = {report_after[label]['recall'] - report_before[label]['recall']:+.4f}")
        print(f"  F1-score Δ = {report_after[label]['f1-score'] - report_before[label]['f1-score']:+.4f}")

Improvement:  Accuracy Δ = -0.6701, F1 Δ = -0.8175, Precision Δ = -0.1502, Recall Δ = -0.6701

Class-wise Improvement:
Label 0:
  Precision Δ = -0.5851
  Recall Δ = +0.1666
  F1-score Δ = -0.4370
Label 1:
  Precision Δ = -0.0083
  Recall Δ = -0.9431
  F1-score Δ = -0.9416


### Decision Tree DANN

In [32]:
dt = DecisionTreeClassifier(random_state=42)
train_start_time = time.time()
dt.fit(X_source_aligned_dann, y_source)
test_start_time = time.time()
pred_after = dt.predict(X_target_aligned_dann)
score_start_time = time.time()
acc_after = accuracy_score(y_target, pred_after)
f1_after = f1_score(y_target, pred_after, average='weighted')  # Weighted average for multi-class
precision_after = precision_score(y_target, pred_after, average='weighted')  # Overall precision
recall_after = recall_score(y_target, pred_after, average='weighted')  # Overall recall
report_after = classification_report(y_target, pred_after, output_dict=True)
cm_after = confusion_matrix(y_target, pred_after)
train_time_after = time.time() - train_start_time
test_time_after = time.time() - test_start_time
score_time_after = time.time() - score_start_time


print("Decision Tree:")
print(f"DANN:  Accuracy = {acc_after:.4f}, F1 = {f1_after:.4f}, Precision = {precision_after:.4f}, Recall = {recall_after:.4f}")
print(f"Training Time: {train_time_after:.4f} seconds")
print(f"Testing Time: {test_time_after:.4f} seconds")
print(f"Scoring Time: {score_time_after:.4f} seconds")
print(f"Training Time Without Testing And Score Calulations: {(train_time_after - test_time_after - score_time_after):.4f} seconds")
print(f"Testing Time Without Score Calulations: {(test_time_after - score_time_after):.4f} seconds")
print("Class-wise Metrics (DANN):")
for label in report_after:
    if label.isdigit():
        print(f"Label {label}: Precision = {report_after[label]['precision']:.4f}, Recall = {report_after[label]['recall']:.4f}, F1-score = {report_after[label]['f1-score']:.4f}")

print("\nConfusion Matrix (DANN):") # Commenting out since the original code didn't print confusion matrix
print(cm_after)

Decision Tree:
DANN:  Accuracy = 0.8877, F1 = 0.8848, Precision = 0.8846, Recall = 0.8877
Training Time: 3.0473 seconds
Testing Time: 0.1783 seconds
Scoring Time: 0.1663 seconds
Training Time Without Testing And Score Calulations: 2.7027 seconds
Testing Time Without Score Calulations: 0.0120 seconds
Class-wise Metrics (DANN):
Label 0: Precision = 0.8145, Recall = 0.7038, F1-score = 0.7551
Label 1: Precision = 0.9075, Recall = 0.9477, F1-score = 0.9271

Confusion Matrix (DANN):
[[22957  9663]
 [ 5230 94754]]


In [33]:
print(f"Improvement:  Accuracy Δ = {acc_after - acc_before:+.4f}, F1 Δ = {f1_after - f1_before:+.4f}, Precision Δ = {precision_after - precision_before:+.4f}, Recall Δ = {recall_after - recall_before:+.4f}\n")

print("Class-wise Improvement:")
for label in report_before:
    if label.isdigit() and label in report_after:
        print(f"Label {label}:")
        print(f"  Precision Δ = {report_after[label]['precision'] - report_before[label]['precision']:+.4f}")
        print(f"  Recall Δ = {report_after[label]['recall'] - report_before[label]['recall']:+.4f}")
        print(f"  F1-score Δ = {report_after[label]['f1-score'] - report_before[label]['f1-score']:+.4f}")

Improvement:  Accuracy Δ = -0.0297, F1 Δ = -0.0326, Precision Δ = -0.0328, Recall Δ = -0.0297

Class-wise Improvement:
Label 0:
  Precision Δ = -0.0169
  Recall Δ = -0.1292
  F1-score Δ = -0.0771
Label 1:
  Precision Δ = -0.0380
  Recall Δ = +0.0028
  F1-score Δ = -0.0180


### Decision Tree ADDA

In [34]:
dt = DecisionTreeClassifier(random_state=42)
train_start_time = time.time()
dt.fit(X_source_aligned_adda, y_source)
test_start_time = time.time()
pred_after = dt.predict(X_target_aligned_adda)
score_start_time = time.time()
acc_after = accuracy_score(y_target, pred_after)
f1_after = f1_score(y_target, pred_after, average='weighted')  # Weighted average for multi-class
precision_after = precision_score(y_target, pred_after, average='weighted')  # Overall precision
recall_after = recall_score(y_target, pred_after, average='weighted')  # Overall recall
report_after = classification_report(y_target, pred_after, output_dict=True)
cm_after = confusion_matrix(y_target, pred_after)
train_time_after = time.time() - train_start_time
test_time_after = time.time() - test_start_time
score_time_after = time.time() - score_start_time


print("Decision Tree:")
print(f"ADDA:  Accuracy = {acc_after:.4f}, F1 = {f1_after:.4f}, Precision = {precision_after:.4f}, Recall = {recall_after:.4f}")
print(f"Training Time: {train_time_after:.4f} seconds")
print(f"Testing Time: {test_time_after:.4f} seconds")
print(f"Scoring Time: {score_time_after:.4f} seconds")
print(f"Training Time Without Testing And Score Calulations: {(train_time_after - test_time_after - score_time_after):.4f} seconds")
print(f"Testing Time Without Score Calulations: {(test_time_after - score_time_after):.4f} seconds")
print("Class-wise Metrics (ADDA):")
for label in report_after:
    if label.isdigit():
        print(f"Label {label}: Precision = {report_after[label]['precision']:.4f}, Recall = {report_after[label]['recall']:.4f}, F1-score = {report_after[label]['f1-score']:.4f}")

print("\nConfusion Matrix (ADDA):") # Commenting out since the original code didn't print confusion matrix
print(cm_after)

Decision Tree:
ADDA:  Accuracy = 0.2301, F1 = 0.2446, Precision = 0.3888, Recall = 0.2301
Training Time: 5.3323 seconds
Testing Time: 0.1841 seconds
Scoring Time: 0.1726 seconds
Training Time Without Testing And Score Calulations: 4.9756 seconds
Testing Time Without Score Calulations: 0.0115 seconds
Class-wise Metrics (ADDA):
Label 0: Precision = 0.1331, Recall = 0.3863, F1-score = 0.1980
Label 1: Precision = 0.4723, Recall = 0.1792, F1-score = 0.2598

Confusion Matrix (ADDA):
[[12600 20020]
 [82069 17915]]


In [35]:
print(f"Improvement:  Accuracy Δ = {acc_after - acc_before:+.4f}, F1 Δ = {f1_after - f1_before:+.4f}, Precision Δ = {precision_after - precision_before:+.4f}, Recall Δ = {recall_after - recall_before:+.4f}\n")

print("Class-wise Improvement:")
for label in report_before:
    if label.isdigit() and label in report_after:
        print(f"Label {label}:")
        print(f"  Precision Δ = {report_after[label]['precision'] - report_before[label]['precision']:+.4f}")
        print(f"  Recall Δ = {report_after[label]['recall'] - report_before[label]['recall']:+.4f}")
        print(f"  F1-score Δ = {report_after[label]['f1-score'] - report_before[label]['f1-score']:+.4f}")

Improvement:  Accuracy Δ = -0.6872, F1 Δ = -0.6728, Precision Δ = -0.5286, Recall Δ = -0.6872

Class-wise Improvement:
Label 0:
  Precision Δ = -0.6983
  Recall Δ = -0.4468
  F1-score Δ = -0.6342
Label 1:
  Precision Δ = -0.4732
  Recall Δ = -0.7657
  F1-score Δ = -0.6854


## K-Nearest Neighbors

### KNN Without Domain Adaptation

In [36]:
# #############################################
# K-Nearest Neighbors without domain adaptation
# #############################################


knn = KNeighborsClassifier()
train_start_time = time.time()
knn.fit(X_source, y_source)
test_start_time = time.time()
pred_before = knn.predict(X_target)
score_start_time = time.time()
acc_before = accuracy_score(y_target, pred_before)
f1_before = f1_score(y_target, pred_before, average='weighted')  # Weighted average for multi-class
precision_before = precision_score(y_target, pred_before, average='weighted')  # Overall precision
recall_before = recall_score(y_target, pred_before, average='weighted')  # Overall recall
report_before = classification_report(y_target, pred_before, output_dict=True)
cm_before = confusion_matrix(y_target, pred_before)
train_time_before = time.time() - train_start_time
test_time_before = time.time() - test_start_time
score_time_before = time.time() - score_start_time


print("K-Nearest Neighbors:")
print(f"Normal:  Accuracy = {acc_before:.4f}, F1 = {f1_before:.4f}, Precision = {precision_before:.4f}, Recall = {recall_before:.4f}")
print(f"Training Time: {train_time_before:.4f} seconds")
print(f"Testing Time: {test_time_before:.4f} seconds")
print(f"Scoring Time: {score_time_before:.4f} seconds")
print(f"Training Time Without Testing And Score Calulations: {(train_time_before - test_time_before - score_time_before):.4f} seconds")
print(f"Testing Time Without Score Calulations: {(test_time_before - score_time_before):.4f} seconds")
print("Class-wise Metrics (Normal):")
for label in report_before:
    if label.isdigit():
        print(f"Label {label}: Precision = {report_before[label]['precision']:.4f}, Recall = {report_before[label]['recall']:.4f}, F1-score = {report_before[label]['f1-score']:.4f}")

print("\nConfusion Matrix (Normal Approach):")
print(cm_before)

K-Nearest Neighbors:
Normal:  Accuracy = 0.8458, F1 = 0.8325, Precision = 0.8407, Recall = 0.8458
Training Time: 347.4489 seconds
Testing Time: 347.2962 seconds
Scoring Time: 0.1646 seconds
Training Time Without Testing And Score Calulations: -0.0119 seconds
Testing Time Without Score Calulations: 347.1317 seconds
Class-wise Metrics (Normal):
Label 0: Precision = 0.7985, Recall = 0.4993, F1-score = 0.6144
Label 1: Precision = 0.8544, Recall = 0.9589, F1-score = 0.9037

Confusion Matrix (Normal Approach):
[[16286 16334]
 [ 4110 95874]]


### KNN Wasserstein

In [None]:
# #############################################
# K-Nearest Neighbors Wasserstein
# #############################################


knn = KNeighborsClassifier()
train_start_time = time.time()
knn.fit(X_source_aligned_wasserstein, y_source)
test_start_time = time.time()
pred_after = knn.predict(X_target)
score_start_time = time.time()
acc_after = accuracy_score(y_target, pred_after)
f1_after = f1_score(y_target, pred_after, average='weighted')  # Weighted average for multi-class
precision_after = precision_score(y_target, pred_after, average='weighted')  # Overall precision
recall_after = recall_score(y_target, pred_after, average='weighted')  # Overall recall
report_after = classification_report(y_target, pred_after, output_dict=True)
cm_after = confusion_matrix(y_target, pred_after)
train_time_after = time.time() - train_start_time
test_time_after = time.time() - test_start_time
score_time_after = time.time() - score_start_time


print("K-Nearest Neighbors:")
print(f"Wasserstein:  Accuracy = {acc_after:.4f}, F1 = {f1_after:.4f}, Precision = {precision_after:.4f}, Recall = {recall_after:.4f}")
print(f"Training Time: {train_time_after:.4f} seconds")
print(f"Testing Time: {test_time_after:.4f} seconds")
print(f"Scoring Time: {score_time_after:.4f} seconds")
print(f"Training Time Without Testing And Score Calulations: {(train_time_after - test_time_after - score_time_after):.4f} seconds")
print(f"Testing Time Without Score Calulations: {(test_time_after - score_time_after):.4f} seconds")
print("Class-wise Metrics (Wasserstein):")
for label in report_after:
    if label.isdigit():
        print(f"Label {label}: Precision = {report_after[label]['precision']:.4f}, Recall = {report_after[label]['recall']:.4f}, F1-score = {report_after[label]['f1-score']:.4f}")

print("\nConfusion Matrix (Wasserstein):")
print(cm_after)

K-Nearest Neighbors:
Wasserstein:  Accuracy = 0.9142, F1 = 0.9144, Precision = 0.9147, Recall = 0.9142
Training Time: 345.7367 seconds
Testing Time: 345.6330 seconds
Scoring Time: 0.0726 seconds
Training Time Without Testing And Score Calulations: 0.0312 seconds
Testing Time Without Score Calulations: 345.5604 seconds
Class-wise Metrics (Wasserstein):
Label 0: Precision = 0.8198, Recall = 0.8346, F1-score = 0.8271
Label 1: Precision = 0.9457, Recall = 0.9401, F1-score = 0.9429

Confusion Matrix (Wasserstein):
[[27224  5396]
 [ 5985 93999]]


In [None]:
print(f"Improvement:  Accuracy Δ = {acc_after - acc_before:+.4f}, F1 Δ = {f1_after - f1_before:+.4f}, Precision Δ = {precision_after - precision_before:+.4f}, Recall Δ = {recall_after - recall_before:+.4f}\n")

print("Class-wise Improvement:")
for label in report_before:
    if label.isdigit() and label in report_after:
        print(f"Label {label}:")
        print(f"  Precision Δ = {report_after[label]['precision'] - report_before[label]['precision']:+.4f}")
        print(f"  Recall Δ = {report_after[label]['recall'] - report_before[label]['recall']:+.4f}")
        print(f"  F1-score Δ = {report_after[label]['f1-score'] - report_before[label]['f1-score']:+.4f}")

Improvement:  Accuracy Δ = +0.0683, F1 Δ = +0.0819, Precision Δ = +0.0741, Recall Δ = +0.0683

Class-wise Improvement:
Label 0:
  Precision Δ = +0.0213
  Recall Δ = +0.3353
  F1-score Δ = +0.2127
Label 1:
  Precision Δ = +0.0913
  Recall Δ = -0.0188
  F1-score Δ = +0.0393


### KNN Classwise Wasserstein

In [None]:
# #############################################
# K-Nearest Neighbors Wasserstein Classwise
# #############################################


knn = KNeighborsClassifier()
train_start_time = time.time()
knn.fit(X_source_aligned_wasserstein_classwise, y_source)
test_start_time = time.time()
pred_after = knn.predict(X_target)
score_start_time = time.time()
acc_after = accuracy_score(y_target, pred_after)
f1_after = f1_score(y_target, pred_after, average='weighted')  # Weighted average for multi-class
precision_after = precision_score(y_target, pred_after, average='weighted')  # Overall precision
recall_after = recall_score(y_target, pred_after, average='weighted')  # Overall recall
report_after = classification_report(y_target, pred_after, output_dict=True)
cm_after = confusion_matrix(y_target, pred_after)
train_time_after = time.time() - train_start_time
test_time_after = time.time() - test_start_time
score_time_after = time.time() - score_start_time


print("K-Nearest Neighbors:")
print(f"Wasserstein Classwise:  Accuracy = {acc_after:.4f}, F1 = {f1_after:.4f}, Precision = {precision_after:.4f}, Recall = {recall_after:.4f}")
print(f"Training Time: {train_time_after:.4f} seconds")
print(f"Testing Time: {test_time_after:.4f} seconds")
print(f"Scoring Time: {score_time_after:.4f} seconds")
print(f"Training Time Without Testing And Score Calulations: {(train_time_after - test_time_after - score_time_after):.4f} seconds")
print(f"Testing Time Without Score Calulations: {(test_time_after - score_time_after):.4f} seconds")
print("Class-wise Metrics (Wasserstein Classwise):")
for label in report_after:
    if label.isdigit():
        print(f"Label {label}: Precision = {report_after[label]['precision']:.4f}, Recall = {report_after[label]['recall']:.4f}, F1-score = {report_after[label]['f1-score']:.4f}")

print("\nConfusion Matrix (Wasserstein Classwise):")
print(cm_after)

K-Nearest Neighbors:
Wasserstein Classwise:  Accuracy = 0.8150, F1 = 0.7757, Precision = 0.8303, Recall = 0.8150
Training Time: 348.1721 seconds
Testing Time: 348.0683 seconds
Scoring Time: 0.0807 seconds
Training Time Without Testing And Score Calulations: 0.0231 seconds
Testing Time Without Score Calulations: 347.9876 seconds
Class-wise Metrics (Wasserstein Classwise):
Label 0: Precision = 0.8983, Recall = 0.2795, F1-score = 0.4263
Label 1: Precision = 0.8081, Recall = 0.9897, F1-score = 0.8897

Confusion Matrix (Wasserstein Classwise):
[[ 9117 23503]
 [ 1032 98952]]


In [None]:
print(f"Improvement:  Accuracy Δ = {acc_after - acc_before:+.4f}, F1 Δ = {f1_after - f1_before:+.4f}, Precision Δ = {precision_after - precision_before:+.4f}, Recall Δ = {recall_after - recall_before:+.4f}\n")

print("Class-wise Improvement:")
for label in report_before:
    if label.isdigit() and label in report_after:
        print(f"Label {label}:")
        print(f"  Precision Δ = {report_after[label]['precision'] - report_before[label]['precision']:+.4f}")
        print(f"  Recall Δ = {report_after[label]['recall'] - report_before[label]['recall']:+.4f}")
        print(f"  F1-score Δ = {report_after[label]['f1-score'] - report_before[label]['f1-score']:+.4f}")

Improvement:  Accuracy Δ = -0.0309, F1 Δ = -0.0568, Precision Δ = -0.0104, Recall Δ = -0.0309

Class-wise Improvement:
Label 0:
  Precision Δ = +0.0998
  Recall Δ = -0.2198
  F1-score Δ = -0.1880
Label 1:
  Precision Δ = -0.0464
  Recall Δ = +0.0308
  F1-score Δ = -0.0140


### KNN DANN

In [37]:
knn = KNeighborsClassifier()
train_start_time = time.time()
knn.fit(X_source_aligned_dann, y_source)
test_start_time = time.time()
pred_after = knn.predict(X_target_aligned_dann)
score_start_time = time.time()
acc_after = accuracy_score(y_target, pred_after)
f1_after = f1_score(y_target, pred_after, average='weighted')  # Weighted average for multi-class
precision_after = precision_score(y_target, pred_after, average='weighted')  # Overall precision
recall_after = recall_score(y_target, pred_after, average='weighted')  # Overall recall
report_after = classification_report(y_target, pred_after, output_dict=True)
cm_after = confusion_matrix(y_target, pred_after)
train_time_after = time.time() - train_start_time
test_time_after = time.time() - test_start_time
score_time_after = time.time() - score_start_time


print("KNN:")
print(f"DANN:  Accuracy = {acc_after:.4f}, F1 = {f1_after:.4f}, Precision = {precision_after:.4f}, Recall = {recall_after:.4f}")
print(f"Training Time: {train_time_after:.4f} seconds")
print(f"Testing Time: {test_time_after:.4f} seconds")
print(f"Scoring Time: {score_time_after:.4f} seconds")
print(f"Training Time Without Testing And Score Calulations: {(train_time_after - test_time_after - score_time_after):.4f} seconds")
print(f"Testing Time Without Score Calulations: {(test_time_after - score_time_after):.4f} seconds")
print("Class-wise Metrics (DANN):")
for label in report_after:
    if label.isdigit():
        print(f"Label {label}: Precision = {report_after[label]['precision']:.4f}, Recall = {report_after[label]['recall']:.4f}, F1-score = {report_after[label]['f1-score']:.4f}")

print("\nConfusion Matrix (DANN):") # Commenting out since the original code didn't print confusion matrix
print(cm_after)

KNN:
DANN:  Accuracy = 0.9253, F1 = 0.9260, Precision = 0.9271, Recall = 0.9253
Training Time: 14.4303 seconds
Testing Time: 13.8131 seconds
Scoring Time: 0.1658 seconds
Training Time Without Testing And Score Calulations: 0.4514 seconds
Testing Time Without Score Calulations: 13.6473 seconds
Class-wise Metrics (DANN):
Label 0: Precision = 0.8289, Recall = 0.8773, F1-score = 0.8524
Label 1: Precision = 0.9592, Recall = 0.9409, F1-score = 0.9500

Confusion Matrix (DANN):
[[28618  4002]
 [ 5909 94075]]


In [38]:
print(f"Improvement:  Accuracy Δ = {acc_after - acc_before:+.4f}, F1 Δ = {f1_after - f1_before:+.4f}, Precision Δ = {precision_after - precision_before:+.4f}, Recall Δ = {recall_after - recall_before:+.4f}\n")

print("Class-wise Improvement:")
for label in report_before:
    if label.isdigit() and label in report_after:
        print(f"Label {label}:")
        print(f"  Precision Δ = {report_after[label]['precision'] - report_before[label]['precision']:+.4f}")
        print(f"  Recall Δ = {report_after[label]['recall'] - report_before[label]['recall']:+.4f}")
        print(f"  F1-score Δ = {report_after[label]['f1-score'] - report_before[label]['f1-score']:+.4f}")

Improvement:  Accuracy Δ = +0.0794, F1 Δ = +0.0935, Precision Δ = +0.0865, Recall Δ = +0.0794

Class-wise Improvement:
Label 0:
  Precision Δ = +0.0304
  Recall Δ = +0.3781
  F1-score Δ = +0.2380
Label 1:
  Precision Δ = +0.1048
  Recall Δ = -0.0180
  F1-score Δ = +0.0463


### KNN ADDA

In [39]:
knn = KNeighborsClassifier()
train_start_time = time.time()
knn.fit(X_source_aligned_adda, y_source)
test_start_time = time.time()
pred_after = knn.predict(X_target_aligned_adda)
score_start_time = time.time()
acc_after = accuracy_score(y_target, pred_after)
f1_after = f1_score(y_target, pred_after, average='weighted')  # Weighted average for multi-class
precision_after = precision_score(y_target, pred_after, average='weighted')  # Overall precision
recall_after = recall_score(y_target, pred_after, average='weighted')  # Overall recall
report_after = classification_report(y_target, pred_after, output_dict=True)
cm_after = confusion_matrix(y_target, pred_after)
train_time_after = time.time() - train_start_time
test_time_after = time.time() - test_start_time
score_time_after = time.time() - score_start_time


print("KNN:")
print(f"ADDA:  Accuracy = {acc_after:.4f}, F1 = {f1_after:.4f}, Precision = {precision_after:.4f}, Recall = {recall_after:.4f}")
print(f"Training Time: {train_time_after:.4f} seconds")
print(f"Testing Time: {test_time_after:.4f} seconds")
print(f"Scoring Time: {score_time_after:.4f} seconds")
print(f"Training Time Without Testing And Score Calulations: {(train_time_after - test_time_after - score_time_after):.4f} seconds")
print(f"Testing Time Without Score Calulations: {(test_time_after - score_time_after):.4f} seconds")
print("Class-wise Metrics (ADDA):")
for label in report_after:
    if label.isdigit():
        print(f"Label {label}: Precision = {report_after[label]['precision']:.4f}, Recall = {report_after[label]['recall']:.4f}, F1-score = {report_after[label]['f1-score']:.4f}")

print("\nConfusion Matrix (ADDA):") # Commenting out since the original code didn't print confusion matrix
print(cm_after)

KNN:
ADDA:  Accuracy = 0.9157, F1 = 0.9157, Precision = 0.9157, Recall = 0.9157
Training Time: 21.0980 seconds
Testing Time: 20.4766 seconds
Scoring Time: 0.1640 seconds
Training Time Without Testing And Score Calulations: 0.4574 seconds
Testing Time Without Score Calulations: 20.3126 seconds
Class-wise Metrics (ADDA):
Label 0: Precision = 0.8296, Recall = 0.8273, F1-score = 0.8285
Label 1: Precision = 0.9437, Recall = 0.9446, F1-score = 0.9441

Confusion Matrix (ADDA):
[[26988  5632]
 [ 5542 94442]]


In [40]:
print(f"Improvement:  Accuracy Δ = {acc_after - acc_before:+.4f}, F1 Δ = {f1_after - f1_before:+.4f}, Precision Δ = {precision_after - precision_before:+.4f}, Recall Δ = {recall_after - recall_before:+.4f}\n")

print("Class-wise Improvement:")
for label in report_before:
    if label.isdigit() and label in report_after:
        print(f"Label {label}:")
        print(f"  Precision Δ = {report_after[label]['precision'] - report_before[label]['precision']:+.4f}")
        print(f"  Recall Δ = {report_after[label]['recall'] - report_before[label]['recall']:+.4f}")
        print(f"  F1-score Δ = {report_after[label]['f1-score'] - report_before[label]['f1-score']:+.4f}")

Improvement:  Accuracy Δ = +0.0699, F1 Δ = +0.0832, Precision Δ = +0.0750, Recall Δ = +0.0699

Class-wise Improvement:
Label 0:
  Precision Δ = +0.0311
  Recall Δ = +0.3281
  F1-score Δ = +0.2141
Label 1:
  Precision Δ = +0.0893
  Recall Δ = -0.0143
  F1-score Δ = +0.0405


## Logistic Regression With Particle Swarm Optimization

### LR + PSO Without Domain Adaptation

In [41]:
# Define sigmoid function
def sigmoid(z):
    # z = np.clip(z, -500, 500)  # Prevent overflow
    return 1 / (1 + expit(-z))

# Define objective function to minimize: 1 - accuracy
def objective_function(weights):
    z = np.dot(X_source, weights)
    predictions = sigmoid(z) > 0.5
    acc = accuracy_score(y_source, predictions)
    return 1 - acc  # We want to maximize accuracy, so we minimize (1 - accuracy)

train_start_time = time.time()
dim = X_source.shape[1]
lb = [-20] * dim  # Lower bounds
ub = [20] * dim   # Upper bounds
best_weights, fopt = pso(objective_function, lb, ub, swarmsize=30, maxiter=500)


test_start_time = time.time()
z_test = np.dot(X_target, best_weights)
pred_before = sigmoid(z_test) > 0.5


score_start_time = time.time()
acc_before = accuracy_score(y_target, pred_before)
f1_before = f1_score(y_target, pred_before, average='weighted')  # Weighted average for multi-class
precision_before = precision_score(y_target, pred_before, average='weighted')  # Overall precision
recall_before = recall_score(y_target, pred_before, average='weighted')  # Overall recall
report_before = classification_report(y_target, pred_before, output_dict=True)
cm_before = confusion_matrix(y_target, pred_before)
score_time_before = time.time() - score_start_time
test_time_before = time.time() - test_start_time
train_time_before = time.time() - train_start_time


print("LR + PSO:")
print(f"Normal Approach: Accuracy = {acc_before:.4f}, F1 = {f1_before:.4f}, Precision = {precision_before:.4f}, Recall = {recall_before:.4f}")
print(f"Training Time: {train_time_before:.4f} seconds")
print(f"Testing Time: {test_time_before:.4f} seconds")
print(f"Scoring Time: {score_time_before:.4f} seconds")
print(f"Training Time Without Testing And Score Calulations: {(train_time_before - test_time_before - score_time_before):.4f} seconds")
print(f"Testing Time Without Score Calulations: {(test_time_before - score_time_before):.4f} seconds")
print("Class-wise Metrics (Normal Approach):")
for label in report_before:
    if label.isdigit():  # Check if it's a class label
        print(f"Label {label}: Precision = {report_before[label]['precision']:.4f}, Recall = {report_before[label]['recall']:.4f}, F1-score = {report_before[label]['f1-score']:.4f}")

print("\nConfusion Matrix (Normal Approach):")
print(cm_before)

Stopping search: maximum iterations reached --> 500
LR + PSO:
Normal Approach: Accuracy = 0.8529, F1 = 0.8395, Precision = 0.8503, Recall = 0.8529
Training Time: 694.6435 seconds
Testing Time: 0.2305 seconds
Scoring Time: 0.2185 seconds
Training Time Without Testing And Score Calulations: 694.1945 seconds
Testing Time Without Score Calulations: 0.0119 seconds
Class-wise Metrics (Normal Approach):
Label 0: Precision = 0.8296, Recall = 0.5061, F1-score = 0.6287
Label 1: Precision = 0.8571, Recall = 0.9661, F1-score = 0.9083

Confusion Matrix (Normal Approach):
[[16510 16110]
 [ 3392 96592]]


### LR + PSO Wasserstein

In [None]:
# #############################################
# PSO Wasserstein
# #############################################


def sigmoid(z):
    # z = np.clip(z, -500, 500)  # Prevent overflow
    return 1 / (1 + expit(-z))

# Define objective function to minimize: 1 - accuracy
def objective_function(weights):
    z = np.dot(X_source_aligned_wasserstein, weights)
    predictions = sigmoid(z) > 0.5
    acc = accuracy_score(y_source, predictions)
    return 1 - acc  # We want to maximize accuracy, so we minimize (1 - accuracy)


train_start_time = time.time()
dim = X_source_aligned_wasserstein.shape[1]
lb = [-20] * dim  # Lower bounds
ub = [20] * dim   # Upper bounds
best_weights, fopt = pso(objective_function, lb, ub, swarmsize=30, maxiter=500)


test_start_time = time.time()
z_test = np.dot(X_target, best_weights)
pred_after = sigmoid(z_test) > 0.5


score_start_time = time.time()
acc_after = accuracy_score(y_target, pred_after)
f1_after = f1_score(y_target, pred_after, average='weighted')  # Weighted average for multi-class
precision_after = precision_score(y_target, pred_after, average='weighted')  # Overall precision
recall_after = recall_score(y_target, pred_after, average='weighted')  # Overall recall
report_after = classification_report(y_target, pred_after, output_dict=True)
cm_after = confusion_matrix(y_target, pred_after)
score_time_after = time.time() - score_start_time
test_time_after = time.time() - test_start_time
train_time_after = time.time() - train_start_time


print("LR + PSO Wasserstein:")
print(f" Wasserstein:  Accuracy = {acc_after:.4f}, F1 = {f1_after:.4f}, Precision = {precision_after:.4f}, Recall = {recall_after:.4f}")
print(f"Training Time: {train_time_after:.4f} seconds")
print(f"Testing Time: {test_time_after:.4f} seconds")
print(f"Scoring Time: {score_time_after:.4f} seconds")
print(f"Training Time Without Testing And Score Calulations: {(train_time_after - test_time_after - score_time_after):.4f} seconds")
print(f"Testing Time Without Score Calulations: {(test_time_after - score_time_after):.4f} seconds")
print("Class-wise Metrics (Wasserstein):")
for label in report_after:
    if label.isdigit():
        print(f"Label {label}: Precision = {report_after[label]['precision']:.4f}, Recall = {report_after[label]['recall']:.4f}, F1-score = {report_after[label]['f1-score']:.4f}")

print("\nConfusion Matrix (Wasserstein):")
print(cm_after)

Stopping search: maximum iterations reached --> 500
LR + PSO Wasserstein:
 Wasserstein:  Accuracy = 0.9266, F1 = 0.9274, Precision = 0.9289, Recall = 0.9266
Training Time: 468.5440 seconds
Testing Time: 0.2031 seconds
Scoring Time: 0.1826 seconds
Training Time Without Testing And Score Calulations: 468.1582 seconds
Testing Time Without Score Calulations: 0.0205 seconds
Class-wise Metrics (Wasserstein):
Label 0: Precision = 0.8279, Recall = 0.8856, F1-score = 0.8558
Label 1: Precision = 0.9618, Recall = 0.9399, F1-score = 0.9507

Confusion Matrix (Wasserstein):
[[28889  3731]
 [ 6006 93978]]


In [None]:
print(f"Improvement:  Accuracy Δ = {acc_after - acc_before:+.4f}, F1 Δ = {f1_after - f1_before:+.4f}, Precision Δ = {precision_after - precision_before:+.4f}, Recall Δ = {recall_after - recall_before:+.4f}\n")

print("Class-wise Improvement:")
for label in report_before:
    if label.isdigit() and label in report_after:
        print(f"Label {label}:")
        print(f"  Precision Δ = {report_after[label]['precision'] - report_before[label]['precision']:+.4f}")
        print(f"  Recall Δ = {report_after[label]['recall'] - report_before[label]['recall']:+.4f}")
        print(f"  F1-score Δ = {report_after[label]['f1-score'] - report_before[label]['f1-score']:+.4f}")

Improvement:  Accuracy Δ = +0.0096, F1 Δ = +0.0103, Precision Δ = +0.0116, Recall Δ = +0.0096

Class-wise Improvement:
Label 0:
  Precision Δ = +0.0006
  Recall Δ = +0.0485
  F1-score Δ = +0.0236
Label 1:
  Precision Δ = +0.0152
  Recall Δ = -0.0030
  F1-score Δ = +0.0059


### LR + PSO Classwise Wasserstein

In [None]:
# #############################################
# PSO Wasserstein Classwise
# #############################################

def sigmoid(z):
    # z = np.clip(z, -500, 500)  # Prevent overflow
    return 1 / (1 + expit(-z))

# Define objective function to minimize: 1 - accuracy
def objective_function(weights):
    z = np.dot(X_source_aligned_wasserstein_classwise, weights)
    predictions = sigmoid(z) > 0.5
    acc = accuracy_score(y_source, predictions)
    return 1 - acc  # We want to maximize accuracy, so we minimize (1 - accuracy)


train_start_time = time.time()
dim = X_source_aligned_wasserstein_classwise.shape[1]
lb = [-20] * dim  # Lower bounds
ub = [20] * dim   # Upper bounds
best_weights, fopt = pso(objective_function, lb, ub, swarmsize=30, maxiter=500)

test_start_time = time.time()
z_test = np.dot(X_target, best_weights)
pred_after = sigmoid(z_test) > 0.5

score_start_time = time.time()
acc_after = accuracy_score(y_target, pred_after)
f1_after = f1_score(y_target, pred_after, average='weighted')  # Weighted average for multi-class
precision_after = precision_score(y_target, pred_after, average='weighted')  # Overall precision
recall_after = recall_score(y_target, pred_after, average='weighted')  # Overall recall
report_after = classification_report(y_target, pred_after, output_dict=True)
cm_after = confusion_matrix(y_target, pred_after)
score_time_after = time.time() - score_start_time
train_time_after = time.time() - train_start_time
test_time_after = time.time() - test_start_time
# time_after = time.time() - train_start_time # Calculate overall time

print("LR + PSO Classwise Wasserstein:")
print(f" Wasserstein Classwise:  Accuracy = {acc_after:.4f}, F1 = {f1_after:.4f}, Precision = {precision_after:.4f}, Recall = {recall_after:.4f}")
print(f"Training Time: {train_time_after:.4f} seconds")
print(f"Testing Time: {test_time_after:.4f} seconds")
print(f"Scoring Time: {score_time_after:.4f} seconds")
print(f"Training Time Without Testing And Score Calulations: {(train_time_after - test_time_after - score_time_after):.4f} seconds")
print(f"Testing Time Without Score Calulations: {(test_time_after - score_time_after):.4f} seconds")
print("Class-wise Metrics (Wasserstein):")
for label in report_after:
    if label.isdigit():
        print(f"Label {label}: Precision = {report_after[label]['precision']:.4f}, Recall = {report_after[label]['recall']:.4f}, F1-score = {report_after[label]['f1-score']:.4f}")

print("\nConfusion Matrix (Wasserstein Classwise):")
print(cm_after)

Stopping search: maximum iterations reached --> 500
LR + PSO Classwise Wasserstein:
 Wasserstein Classwise:  Accuracy = 0.8302, F1 = 0.8061, Precision = 0.8305, Recall = 0.8302
Training Time: 459.8356 seconds
Testing Time: 0.1390 seconds
Scoring Time: 0.1291 seconds
Training Time Without Testing And Score Calulations: 459.5675 seconds
Testing Time Without Score Calulations: 0.0099 seconds
Class-wise Metrics (Wasserstein):
Label 0: Precision = 0.8324, Recall = 0.3879, F1-score = 0.5292
Label 1: Precision = 0.8299, Recall = 0.9745, F1-score = 0.8964

Confusion Matrix (Wasserstein Classwise):
[[12653 19967]
 [ 2548 97436]]


In [None]:
print(f"Improvement:  Accuracy Δ = {acc_after - acc_before:+.4f}, F1 Δ = {f1_after - f1_before:+.4f}, Precision Δ = {precision_after - precision_before:+.4f}, Recall Δ = {recall_after - recall_before:+.4f}\n")

print("Class-wise Improvement:")
for label in report_before:
    if label.isdigit() and label in report_after:
        print(f"Label {label}:")
        print(f"  Precision Δ = {report_after[label]['precision'] - report_before[label]['precision']:+.4f}")
        print(f"  Recall Δ = {report_after[label]['recall'] - report_before[label]['recall']:+.4f}")
        print(f"  F1-score Δ = {report_after[label]['f1-score'] - report_before[label]['f1-score']:+.4f}")

Improvement:  Accuracy Δ = -0.0867, F1 Δ = -0.1110, Precision Δ = -0.0867, Recall Δ = -0.0867

Class-wise Improvement:
Label 0:
  Precision Δ = +0.0051
  Recall Δ = -0.4492
  F1-score Δ = -0.3030
Label 1:
  Precision Δ = -0.1167
  Recall Δ = +0.0315
  F1-score Δ = -0.0484


### LR + PSO DANN

In [42]:
# #############################################
# PSO DANN
# #############################################

def sigmoid(z):
    # z = np.clip(z, -500, 500)  # Prevent overflow
    return 1 / (1 + expit(-z))

# Define objective function to minimize: 1 - accuracy
def objective_function(weights):
    z = np.dot(X_source_aligned_dann, weights)
    predictions = sigmoid(z) > 0.5
    acc = accuracy_score(y_source, predictions)
    return 1 - acc  # We want to maximize accuracy, so we minimize (1 - accuracy)


train_start_time = time.time()
dim = X_source_aligned_dann.shape[1]
lb = [-20] * dim  # Lower bounds
ub = [20] * dim   # Upper bounds
best_weights, fopt = pso(objective_function, lb, ub, swarmsize=30, maxiter=500)

test_start_time = time.time()
z_test = np.dot(X_target_aligned_dann, best_weights)
pred_after = sigmoid(z_test) > 0.5

score_start_time = time.time()
acc_after = accuracy_score(y_target, pred_after)
f1_after = f1_score(y_target, pred_after, average='weighted')  # Weighted average for multi-class
precision_after = precision_score(y_target, pred_after, average='weighted')  # Overall precision
recall_after = recall_score(y_target, pred_after, average='weighted')  # Overall recall
report_after = classification_report(y_target, pred_after, output_dict=True)
cm_after = confusion_matrix(y_target, pred_after)
score_time_after = time.time() - score_start_time
train_time_after = time.time() - train_start_time
test_time_after = time.time() - test_start_time
# time_after = time.time() - train_start_time # Calculate overall time

print("LR + PSO DANN:")
print(f"DANN:  Accuracy = {acc_after:.4f}, F1 = {f1_after:.4f}, Precision = {precision_after:.4f}, Recall = {recall_after:.4f}")
print(f"Training Time: {train_time_after:.4f} seconds")
print(f"Testing Time: {test_time_after:.4f} seconds")
print(f"Scoring Time: {score_time_after:.4f} seconds")
print(f"Training Time Without Testing And Score Calulations: {(train_time_after - test_time_after - score_time_after):.4f} seconds")
print(f"Testing Time Without Score Calulations: {(test_time_after - score_time_after):.4f} seconds")
print("Class-wise Metrics (Wasserstein):")
for label in report_after:
    if label.isdigit():
        print(f"Label {label}: Precision = {report_after[label]['precision']:.4f}, Recall = {report_after[label]['recall']:.4f}, F1-score = {report_after[label]['f1-score']:.4f}")

print("\nConfusion Matrix (DANN):")
print(cm_after)

Stopping search: maximum iterations reached --> 500
LR + PSO DANN:
DANN:  Accuracy = 0.9259, F1 = 0.9271, Precision = 0.9296, Recall = 0.9259
Training Time: 602.3002 seconds
Testing Time: 0.2215 seconds
Scoring Time: 0.2124 seconds
Training Time Without Testing And Score Calulations: 601.8662 seconds
Testing Time Without Score Calulations: 0.0091 seconds
Class-wise Metrics (Wasserstein):
Label 0: Precision = 0.8174, Recall = 0.8998, F1-score = 0.8567
Label 1: Precision = 0.9662, Recall = 0.9344, F1-score = 0.9501

Confusion Matrix (DANN):
[[29353  3267]
 [ 6556 93428]]


In [43]:
print(f"Improvement:  Accuracy Δ = {acc_after - acc_before:+.4f}, F1 Δ = {f1_after - f1_before:+.4f}, Precision Δ = {precision_after - precision_before:+.4f}, Recall Δ = {recall_after - recall_before:+.4f}\n")

print("Class-wise Improvement:")
for label in report_before:
    if label.isdigit() and label in report_after:
        print(f"Label {label}:")
        print(f"  Precision Δ = {report_after[label]['precision'] - report_before[label]['precision']:+.4f}")
        print(f"  Recall Δ = {report_after[label]['recall'] - report_before[label]['recall']:+.4f}")
        print(f"  F1-score Δ = {report_after[label]['f1-score'] - report_before[label]['f1-score']:+.4f}")

Improvement:  Accuracy Δ = +0.0730, F1 Δ = +0.0876, Precision Δ = +0.0793, Recall Δ = +0.0730

Class-wise Improvement:
Label 0:
  Precision Δ = -0.0121
  Recall Δ = +0.3937
  F1-score Δ = +0.2280
Label 1:
  Precision Δ = +0.1092
  Recall Δ = -0.0316
  F1-score Δ = +0.0417


### LR + PSO ADDA

In [44]:
# #############################################
# PSO ADDA
# #############################################

def sigmoid(z):
    # z = np.clip(z, -500, 500)  # Prevent overflow
    return 1 / (1 + expit(-z))

# Define objective function to minimize: 1 - accuracy
def objective_function(weights):
    z = np.dot(X_source_aligned_adda, weights)
    predictions = sigmoid(z) > 0.5
    acc = accuracy_score(y_source, predictions)
    return 1 - acc  # We want to maximize accuracy, so we minimize (1 - accuracy)


train_start_time = time.time()
dim = X_source_aligned_adda.shape[1]
lb = [-20] * dim  # Lower bounds
ub = [20] * dim   # Upper bounds
best_weights, fopt = pso(objective_function, lb, ub, swarmsize=30, maxiter=500)

test_start_time = time.time()
z_test = np.dot(X_target_aligned_adda, best_weights)
pred_after = sigmoid(z_test) > 0.5

score_start_time = time.time()
acc_after = accuracy_score(y_target, pred_after)
f1_after = f1_score(y_target, pred_after, average='weighted')  # Weighted average for multi-class
precision_after = precision_score(y_target, pred_after, average='weighted')  # Overall precision
recall_after = recall_score(y_target, pred_after, average='weighted')  # Overall recall
report_after = classification_report(y_target, pred_after, output_dict=True)
cm_after = confusion_matrix(y_target, pred_after)
score_time_after = time.time() - score_start_time
train_time_after = time.time() - train_start_time
test_time_after = time.time() - test_start_time
# time_after = time.time() - train_start_time # Calculate overall time

print("LR + PSO ADDA:")
print(f"ADDA:  Accuracy = {acc_after:.4f}, F1 = {f1_after:.4f}, Precision = {precision_after:.4f}, Recall = {recall_after:.4f}")
print(f"Training Time: {train_time_after:.4f} seconds")
print(f"Testing Time: {test_time_after:.4f} seconds")
print(f"Scoring Time: {score_time_after:.4f} seconds")
print(f"Training Time Without Testing And Score Calulations: {(train_time_after - test_time_after - score_time_after):.4f} seconds")
print(f"Testing Time Without Score Calulations: {(test_time_after - score_time_after):.4f} seconds")
print("Class-wise Metrics (Wasserstein):")
for label in report_after:
    if label.isdigit():
        print(f"Label {label}: Precision = {report_after[label]['precision']:.4f}, Recall = {report_after[label]['recall']:.4f}, F1-score = {report_after[label]['f1-score']:.4f}")

print("\nConfusion Matrix (ADDA):")
print(cm_after)

Stopping search: maximum iterations reached --> 500
LR + PSO ADDA:
ADDA:  Accuracy = 0.9424, F1 = 0.9440, Precision = 0.9503, Recall = 0.9424
Training Time: 586.3792 seconds
Testing Time: 0.2217 seconds
Scoring Time: 0.2135 seconds
Training Time Without Testing And Score Calulations: 585.9440 seconds
Testing Time Without Score Calulations: 0.0081 seconds
Class-wise Metrics (Wasserstein):
Label 0: Precision = 0.8221, Recall = 0.9775, F1-score = 0.8931
Label 1: Precision = 0.9922, Recall = 0.9310, F1-score = 0.9606

Confusion Matrix (ADDA):
[[31885   735]
 [ 6900 93084]]


In [45]:
print(f"Improvement:  Accuracy Δ = {acc_after - acc_before:+.4f}, F1 Δ = {f1_after - f1_before:+.4f}, Precision Δ = {precision_after - precision_before:+.4f}, Recall Δ = {recall_after - recall_before:+.4f}\n")

print("Class-wise Improvement:")
for label in report_before:
    if label.isdigit() and label in report_after:
        print(f"Label {label}:")
        print(f"  Precision Δ = {report_after[label]['precision'] - report_before[label]['precision']:+.4f}")
        print(f"  Recall Δ = {report_after[label]['recall'] - report_before[label]['recall']:+.4f}")
        print(f"  F1-score Δ = {report_after[label]['f1-score'] - report_before[label]['f1-score']:+.4f}")

Improvement:  Accuracy Δ = +0.0895, F1 Δ = +0.1045, Precision Δ = +0.1000, Recall Δ = +0.0895

Class-wise Improvement:
Label 0:
  Precision Δ = -0.0075
  Recall Δ = +0.4713
  F1-score Δ = +0.2644
Label 1:
  Precision Δ = +0.1351
  Recall Δ = -0.0351
  F1-score Δ = +0.0523
