In [32]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split

Deep LDA -

https://github.com/VahidooX/DeepLDA/tree/master

https://github.com/tchaton/DeepLDA/tree/master




In [33]:
import torch
import torch.nn as nn
import torch.optim as optim

class LDA_Loss(nn.Module):
    def __init__(self, n_components, margin):
        super(LDA_Loss, self).__init__()
        self.n_components = n_components
        self.margin = margin

    def forward(self, y_true, y_pred):
        r = 1e-4

        # Initialize groups
        groups = torch.unique(y_true)

        def compute_cov(group, Xt, yt):
            Xgt = Xt[yt == group]
            Xgt_bar = Xgt - torch.mean(Xgt, axis=0)
            m = float(Xgt_bar.shape[0])
            return (1.0 / (m - 1)) * torch.matmul(Xgt_bar.T, Xgt_bar)

        # Scan over groups
        covs_t = torch.stack([compute_cov(group, y_pred, y_true) for group in groups])

        # Compute average covariance matrix (within scatter)
        Sw_t = torch.mean(covs_t, axis=0)

        # Compute total scatter
        Xt_bar = y_pred - torch.mean(y_pred, axis=0)
        m = float(Xt_bar.shape[0])
        St_t = (1.0 / (m - 1)) * torch.matmul(Xt_bar.T, Xt_bar)

        # Compute between scatter
        Sb_t = St_t - Sw_t

        # Cope for numerical instability (regularize)
        Sw_t += torch.eye(Sw_t.shape[0]) * r

        # Compute eigenvalues
        evals_t = torch.linalg.eigvalsh(Sb_t, UPLO='U')  # Use UPLO='U' for upper triangular portion

        # Get top eigenvalues
        top_k_evals = evals_t[-self.n_components:]

        # Maximize variance between classes
        thresh = torch.min(top_k_evals) + self.margin
        top_k_evals = top_k_evals[top_k_evals <= thresh]
        costs = torch.mean(top_k_evals)

        return -costs


In [41]:
def lda_prune_layers(model, x_train, y_train, x_test, y_test, n_components, margin):
    model.eval()
    with torch.no_grad():
        # Extract features
        x_train_features = model(x_train)
        x_test_features = model(x_test)
    print(x_train.shape,x_train_features.shape)

    # Training and testing of SVM with linear kernel on the new features
    [train_acc, test_acc] = svm_classify(x_train_features.numpy(), y_train.numpy(), x_test_features.numpy(), y_test.numpy(), C=0.1)

    print("Accuracy on train data is:", train_acc * 100.0)
    print("Accuracy on test data is:", test_acc * 100.0)
    return test_acc

In [35]:
def svm_classify(x_train, y_train, x_test, y_test, C):
    print('Training SVM...')
    clf = svm.LinearSVC(C=C, dual=False)
    clf.fit(x_train, y_train.ravel())

    train_predictions = clf.predict(x_train)
    train_acc = accuracy_score(y_train, train_predictions)

    test_predictions = clf.predict(x_test)
    test_acc = accuracy_score(y_test, test_predictions)

    return [train_acc, test_acc]

In [36]:
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = torch.sigmoid(self.fc1(x))
        x = self.fc2(x)
        return x

In [37]:
outdim_size = 10
n_components = 9
margin = 1.0


In [38]:
# Load data
mnist = fetch_openml("mnist_784")
X = mnist.data.astype('float32') / 255
y = mnist.target.astype('int64')

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
x_train = x_train.values
x_test = x_test.values
y_train = y_train.values
y_test = y_test.values

  warn(


In [39]:
from sklearn.ensemble import AdaBoostClassifier

def ada_boost_classify(x_train, y_train, x_test, y_test, n_estimators):
    print('Training AdaBoost classifier...')
    clf = AdaBoostClassifier(n_estimators=n_estimators)
    clf.fit(x_train, y_train.ravel())

    train_predictions = clf.predict(x_train)
    train_acc = accuracy_score(y_train, train_predictions)

    test_predictions = clf.predict(x_test)
    test_acc = accuracy_score(y_test, test_predictions)

    return [train_acc, test_acc]

def lda_prune_layers_with_adaboost(model, x_train, y_train, x_test, y_test, n_components, margin, n_estimators):
    model.eval()
    with torch.no_grad():
        # Extract features
        x_train_features = model(x_train)
        x_test_features = model(x_test)
    print(x_train.shape,x_train_features.shape)
    # Training and testing of AdaBoost on the new features
    [train_acc, test_acc] = ada_boost_classify(x_train_features.numpy(), y_train.numpy(), x_test_features.numpy(), y_test.numpy(), n_estimators)

    print("Accuracy on train data is:", train_acc * 100.0)
    print("Accuracy on test data is:", test_acc * 100.0)

# Usage


WITHOUT LDA

In [14]:

# Model without LDA
model_no_lda = MLP(input_dim=x_train.shape[1], hidden_dim=1024, output_dim=outdim_size)

# Training without LDA
optimizer_no_lda = optim.Adam(model_no_lda.parameters())
criterion_no_lda = nn.CrossEntropyLoss()

for epoch in range(100):
    model_no_lda.train()
    optimizer_no_lda.zero_grad()
    outputs_no_lda = model_no_lda(torch.tensor(x_train))
    loss_no_lda = criterion_no_lda(outputs_no_lda, torch.tensor(y_train))
    loss_no_lda.backward()
    optimizer_no_lda.step()

    print(f'Epoch [{epoch+1}/100], Loss (No LDA): {loss_no_lda.item():.4f}')

# Evaluate MLP without LDA
model_no_lda.eval()
with torch.no_grad():
    outputs_train_no_lda = model_no_lda(torch.tensor(x_train))
    outputs_test_no_lda = model_no_lda(torch.tensor(x_test))
acc_train_no_lda = accuracy_score(y_train, torch.argmax(outputs_train_no_lda, axis=1))
acc_test_no_lda = accuracy_score(y_test, torch.argmax(outputs_test_no_lda, axis=1))
print("Accuracy on train data (No LDA):", acc_train_no_lda * 100.0)
print("Accuracy on test data (No LDA):", acc_test_no_lda * 100.0)

Epoch [1/100], Loss (No LDA): 2.3506
Epoch [2/100], Loss (No LDA): 2.2475
Epoch [3/100], Loss (No LDA): 2.1987
Epoch [4/100], Loss (No LDA): 2.1308
Epoch [5/100], Loss (No LDA): 2.0538
Epoch [6/100], Loss (No LDA): 1.9749
Epoch [7/100], Loss (No LDA): 1.8986
Epoch [8/100], Loss (No LDA): 1.8347
Epoch [9/100], Loss (No LDA): 1.7757
Epoch [10/100], Loss (No LDA): 1.7094
Epoch [11/100], Loss (No LDA): 1.6379
Epoch [12/100], Loss (No LDA): 1.5680
Epoch [13/100], Loss (No LDA): 1.5023
Epoch [14/100], Loss (No LDA): 1.4405
Epoch [15/100], Loss (No LDA): 1.3814
Epoch [16/100], Loss (No LDA): 1.3237
Epoch [17/100], Loss (No LDA): 1.2662
Epoch [18/100], Loss (No LDA): 1.2094
Epoch [19/100], Loss (No LDA): 1.1550
Epoch [20/100], Loss (No LDA): 1.1043
Epoch [21/100], Loss (No LDA): 1.0575
Epoch [22/100], Loss (No LDA): 1.0140
Epoch [23/100], Loss (No LDA): 0.9728
Epoch [24/100], Loss (No LDA): 0.9334
Epoch [25/100], Loss (No LDA): 0.8960
Epoch [26/100], Loss (No LDA): 0.8609
Epoch [27/100], Loss 

WITH LDA

In [15]:
model_LDA = MLP(input_dim=x_train.shape[1], hidden_dim=1024, output_dim=outdim_size)

# Training
optimizer_LDA = optim.Adam(model_LDA.parameters())
criterion_LDA = LDA_Loss(n_components, margin)
for epoch in range(100):
    model_LDA.train()
    optimizer_LDA.zero_grad()
    outputs = model_LDA(torch.tensor(x_train))
    loss = criterion_LDA(torch.tensor(y_train), outputs)
    loss.backward()
    optimizer_LDA.step()

    print(f'Epoch [{epoch+1}/100], Loss: {loss.item():.4f}')

lda_prune_layers(model_LDA, torch.tensor(x_train), torch.tensor(y_train), torch.tensor(x_test), torch.tensor(y_test), n_components, margin)


Epoch [1/100], Loss: -0.0001
Epoch [2/100], Loss: -0.0028
Epoch [3/100], Loss: -0.0086
Epoch [4/100], Loss: -0.0188
Epoch [5/100], Loss: -0.0351
Epoch [6/100], Loss: -0.0594
Epoch [7/100], Loss: -0.0944
Epoch [8/100], Loss: -0.1430
Epoch [9/100], Loss: -0.0678
Epoch [10/100], Loss: -0.0933
Epoch [11/100], Loss: -0.1290
Epoch [12/100], Loss: -0.0285
Epoch [13/100], Loss: -0.0362
Epoch [14/100], Loss: -0.0470
Epoch [15/100], Loss: -0.0619
Epoch [16/100], Loss: -0.0823
Epoch [17/100], Loss: -0.1101
Epoch [18/100], Loss: -0.0047
Epoch [19/100], Loss: -0.0055
Epoch [20/100], Loss: -0.0065
Epoch [21/100], Loss: -0.0078
Epoch [22/100], Loss: -0.0095
Epoch [23/100], Loss: -0.0116
Epoch [24/100], Loss: -0.0143
Epoch [25/100], Loss: -0.0178
Epoch [26/100], Loss: -0.0223
Epoch [27/100], Loss: -0.0281
Epoch [28/100], Loss: -0.0357
Epoch [29/100], Loss: -0.0456
Epoch [30/100], Loss: -0.0586
Epoch [31/100], Loss: -0.0758
Epoch [32/100], Loss: -0.0985
Epoch [33/100], Loss: -0.1287
Epoch [34/100], Los

In [16]:
lda_prune_layers_with_adaboost(model_LDA, torch.tensor(x_train), torch.tensor(y_train), torch.tensor(x_test), torch.tensor(y_test), n_components, margin, n_estimators=100)


torch.Size([56000, 784]) torch.Size([56000, 10])
Training AdaBoost classifier...
Accuracy on train data is: 74.37142857142857
Accuracy on test data is: 74.12857142857143


WHAT IS IDEAL N_COMPONENTS?

In [42]:
best_test_acc = 0
best_model = None
best_n_components = None

for n_components in range(1, 10):
    model_LDA = MLP(input_dim=x_train.shape[1], hidden_dim=1024, output_dim=outdim_size)

    # Training
    optimizer_LDA = optim.Adam(model_LDA.parameters())
    criterion_LDA = LDA_Loss(n_components, margin)
    for epoch in range(100):
        model_LDA.train()
        optimizer_LDA.zero_grad()
        outputs = model_LDA(torch.tensor(x_train))
        loss = criterion_LDA(torch.tensor(y_train), outputs)
        loss.backward()
        optimizer_LDA.step()

     #   print(f'Epoch [{epoch+1}/100], Loss: {loss.item():.4f}')

    print(f"Training for {n_components} components complete.")

    # Evaluate test accuracy
    test_acc = lda_prune_layers(model_LDA, torch.tensor(x_train), torch.tensor(y_train), torch.tensor(x_test), torch.tensor(y_test), n_components, margin)

    if test_acc > best_test_acc:
        best_test_acc = test_acc
        best_model = model_LDA
        best_n_components = n_components

print(f"Best test accuracy: {best_test_acc} with {best_n_components} components.")
# Save the best model if needed
# torch.save(best_model.state_dict(), "best_lda_model.pth")


Training for 1 components complete.
torch.Size([56000, 784]) torch.Size([56000, 10])
Training SVM...
Accuracy on train data is: 35.949999999999996
Accuracy on test data is: 36.22142857142857
Training for 2 components complete.
torch.Size([56000, 784]) torch.Size([56000, 10])
Training SVM...
Accuracy on train data is: 57.6125
Accuracy on test data is: 58.76428571428571
Training for 3 components complete.
torch.Size([56000, 784]) torch.Size([56000, 10])
Training SVM...
Accuracy on train data is: 70.65535714285714
Accuracy on test data is: 71.02142857142857
Training for 4 components complete.
torch.Size([56000, 784]) torch.Size([56000, 10])
Training SVM...
Accuracy on train data is: 72.85357142857143
Accuracy on test data is: 73.65714285714286
Training for 5 components complete.
torch.Size([56000, 784]) torch.Size([56000, 10])
Training SVM...
Accuracy on train data is: 79.00178571428572
Accuracy on test data is: 79.03571428571429
Training for 6 components complete.
torch.Size([56000, 784]

BINARY CLASSIFICATION

In [17]:
from sklearn.datasets import load_breast_cancer

# Load Breast Cancer dataset
data = load_breast_cancer()
X = data.data
y = data.target

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [18]:

# Parameters
outdim_size = 2  # 2 classes
n_components = 1  # We're using LDA for binary classification
margin = 1.0
n_estimators = 50  # Number of estimators for AdaBoost

In [19]:
# Convert input data to torch.float64
x_train_tensor = torch.tensor(x_train, dtype=torch.float64)
x_test_tensor = torch.tensor(x_test, dtype=torch.float64)

# Model without LDA
model_no_lda = MLP(input_dim=x_train_tensor.shape[1], hidden_dim=1024, output_dim=outdim_size)
model_no_lda.double()  # Set model parameters to double precision

# Training without LDA
optimizer_no_lda = optim.Adam(model_no_lda.parameters())
criterion_no_lda = nn.CrossEntropyLoss()

for epoch in range(100):
    model_no_lda.train()
    optimizer_no_lda.zero_grad()
    outputs_no_lda = model_no_lda(x_train_tensor)  # Use converted tensor
    loss_no_lda = criterion_no_lda(outputs_no_lda, torch.tensor(y_train))
    loss_no_lda.backward()
    optimizer_no_lda.step()

    print(f'Epoch [{epoch+1}/100], Loss (No LDA): {loss_no_lda.item():.4f}')

# Evaluate MLP without LDA
model_no_lda.eval()
with torch.no_grad():
    outputs_train_no_lda = model_no_lda(x_train_tensor)  # Use converted tensor
    outputs_test_no_lda = model_no_lda(x_test_tensor)  # Use converted tensor
acc_train_no_lda = accuracy_score(y_train, torch.argmax(outputs_train_no_lda, axis=1))
acc_test_no_lda = accuracy_score(y_test, torch.argmax(outputs_test_no_lda, axis=1))
print("Accuracy on train data (No LDA):", acc_train_no_lda * 100.0)
print("Accuracy on test data (No LDA):", acc_test_no_lda * 100.0)



Epoch [1/100], Loss (No LDA): 0.6873
Epoch [2/100], Loss (No LDA): 0.6770
Epoch [3/100], Loss (No LDA): 0.6160
Epoch [4/100], Loss (No LDA): 0.5698
Epoch [5/100], Loss (No LDA): 0.5574
Epoch [6/100], Loss (No LDA): 0.5279
Epoch [7/100], Loss (No LDA): 0.4888
Epoch [8/100], Loss (No LDA): 0.4646
Epoch [9/100], Loss (No LDA): 0.4509
Epoch [10/100], Loss (No LDA): 0.4329
Epoch [11/100], Loss (No LDA): 0.4097
Epoch [12/100], Loss (No LDA): 0.3892
Epoch [13/100], Loss (No LDA): 0.3752
Epoch [14/100], Loss (No LDA): 0.3626
Epoch [15/100], Loss (No LDA): 0.3477
Epoch [16/100], Loss (No LDA): 0.3322
Epoch [17/100], Loss (No LDA): 0.3199
Epoch [18/100], Loss (No LDA): 0.3111
Epoch [19/100], Loss (No LDA): 0.3027
Epoch [20/100], Loss (No LDA): 0.2932
Epoch [21/100], Loss (No LDA): 0.2834
Epoch [22/100], Loss (No LDA): 0.2757
Epoch [23/100], Loss (No LDA): 0.2695
Epoch [24/100], Loss (No LDA): 0.2627
Epoch [25/100], Loss (No LDA): 0.2552
Epoch [26/100], Loss (No LDA): 0.2483
Epoch [27/100], Loss 

In [20]:
# Model with LDA
model_lda = MLP(input_dim=x_train_tensor.shape[1], hidden_dim=1024, output_dim=outdim_size)
model_lda.double()  # Set model parameters to double precision

# Training with LDA
optimizer_lda = optim.Adam(model_lda.parameters())
criterion_lda = LDA_Loss(n_components, margin)

for epoch in range(100):
    model_lda.train()
    optimizer_lda.zero_grad()
    outputs_lda = model_lda(torch.tensor(x_train, dtype=torch.float64))  # Use converted tensor
    loss_lda = criterion_lda(torch.tensor(y_train), outputs_lda)
    loss_lda.backward()
    optimizer_lda.step()

    print(f'Epoch [{epoch+1}/100], Loss (With LDA): {loss_lda.item():.4f}')

# Prune Layers and SVM Classification with LDA
lda_prune_layers(model_lda, torch.tensor(x_train, dtype=torch.float64), torch.tensor(y_train), torch.tensor(x_test, dtype=torch.float64), torch.tensor(y_test), n_components, margin)


Epoch [1/100], Loss (With LDA): -0.0014
Epoch [2/100], Loss (With LDA): -0.0114
Epoch [3/100], Loss (With LDA): -0.0287
Epoch [4/100], Loss (With LDA): -0.0528
Epoch [5/100], Loss (With LDA): -0.0841
Epoch [6/100], Loss (With LDA): -0.1239
Epoch [7/100], Loss (With LDA): -0.1727
Epoch [8/100], Loss (With LDA): -0.2315
Epoch [9/100], Loss (With LDA): -0.2970
Epoch [10/100], Loss (With LDA): -0.3695
Epoch [11/100], Loss (With LDA): -0.4528
Epoch [12/100], Loss (With LDA): -0.5463
Epoch [13/100], Loss (With LDA): -0.6522
Epoch [14/100], Loss (With LDA): -0.7742
Epoch [15/100], Loss (With LDA): -0.9133
Epoch [16/100], Loss (With LDA): -1.0720
Epoch [17/100], Loss (With LDA): -1.2482
Epoch [18/100], Loss (With LDA): -1.4384
Epoch [19/100], Loss (With LDA): -1.6439
Epoch [20/100], Loss (With LDA): -1.8672
Epoch [21/100], Loss (With LDA): -2.1154
Epoch [22/100], Loss (With LDA): -2.4016
Epoch [23/100], Loss (With LDA): -2.7270
Epoch [24/100], Loss (With LDA): -3.1016
Epoch [25/100], Loss (Wit

In [22]:
lda_prune_layers_with_adaboost(model_lda, torch.tensor(x_train, dtype=torch.float64), torch.tensor(y_train), torch.tensor(x_test, dtype=torch.float64), torch.tensor(y_test),n_components, margin, n_estimators=100)



torch.Size([455, 30]) torch.Size([455, 2])
Training AdaBoost classifier...
Accuracy on train data is: 95.16483516483515
Accuracy on test data is: 92.10526315789474
