In [1]:
# Uninstall the current version of scikit-learn
!pip uninstall -y scikit-learn

# Install scikit-learn version 1.3.2
!pip install scikit-learn==1.3.2

# Verify the installation
import sklearn
print(sklearn.__version__)

Found existing installation: scikit-learn 1.6.1
Uninstalling scikit-learn-1.6.1:
  Successfully uninstalled scikit-learn-1.6.1
Collecting scikit-learn==1.3.2
  Downloading scikit_learn-1.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Downloading scikit_learn-1.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.9/10.9 MB[0m [31m69.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: scikit-learn
Successfully installed scikit-learn-1.3.2
1.3.2


In [2]:
from sklearn.decomposition import PCA
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import RandomizedSearchCV
import math
import torch.nn as nn

# Move model on GPU if available
enable_cuda = True
device = torch.device('cuda' if torch.cuda.is_available() and enable_cuda else 'cpu')

class CTDataset_pca(Dataset):
    def __init__(self, filepath, PCA_model):
        self.x, self.y = torch.load(filepath, weights_only=False)
        self.x = self.x / 255.
        self.x = self.x.reshape(-1, 28*28).cuda().detach().cpu()
        self.x = torch.from_numpy(PCA_model.fit_transform(self.x)).to(device).float()
        self.y = F.one_hot(self.y, num_classes=10).to(float)
    def __len__(self):
        return self.x.shape[0]
    def __getitem__(self, ix):
        return self.x[ix], self.y[ix]

class CTDataset_pca_test(Dataset):
    def __init__(self, filepath, PCA_model):
        self.x, self.y = torch.load(filepath, weights_only=False)
        self.x = self.x / 255.
        self.x = self.x.reshape(-1, 28*28).cuda().detach().cpu()
        self.x = torch.from_numpy(PCA_model.transform(self.x)).to(device).float()
        self.y = F.one_hot(self.y, num_classes=10).to(float)
    def __len__(self):
        return self.x.shape[0]
    def __getitem__(self, ix):
        return self.x[ix], self.y[ix]

torch.manual_seed(42)

for latent_space_dim in [3, 4, 5, 6, 7, 8, 10, 12, 15, 16, 17, 20, 30, 40, 100]:

    PCA_model = PCA(n_components=latent_space_dim)
    train_ds = CTDataset_pca('./training.pt', PCA_model)
    test_ds = CTDataset_pca_test('./test.pt', PCA_model)

    torch.manual_seed(42)
    train_AE_set, train_cond_gen_set = torch.utils.data.random_split(train_ds, [30000, 30000])

    x_train, y_train = train_cond_gen_set[:]
    x_test, y_test = test_ds[:]

    x_train, y_train = x_train.cpu().detach().numpy(), y_train.cpu().detach().numpy()
    x_test, y_test = x_test.cpu().detach().numpy(), y_test.cpu().detach().numpy()

    classifiers = []
    param_dist = {
        "n_neighbors":
        [i for i in range(1, int(math.sqrt(x_train.shape[0])))]
    }
    random_search = RandomizedSearchCV(
        KNeighborsClassifier(),
        param_distributions=param_dist,
        n_iter=60,
        cv=5,
        n_jobs=-1,
        random_state=42 )
    classifiers.append(random_search.fit(x_train, y_train))

    # Assuming x_test and y_test are already defined
    test_accuracy = classifiers[0].score(x_test, y_test)
    print(f" latent_space_dim: {latent_space_dim}; Test Accuracy: {test_accuracy}") # 0.969 # 0.9695
    del train_ds, test_ds, x_train, y_train, x_test, y_test, PCA_model, classifiers, random_search
    torch.cuda.empty_cache()

 latent_space_dim: 3; Test Accuracy: 0.4057
 latent_space_dim: 4; Test Accuracy: 0.5735
 latent_space_dim: 5; Test Accuracy: 0.7042
 latent_space_dim: 6; Test Accuracy: 0.7954
 latent_space_dim: 7; Test Accuracy: 0.8459
 latent_space_dim: 8; Test Accuracy: 0.8805
 latent_space_dim: 10; Test Accuracy: 0.9122
 latent_space_dim: 12; Test Accuracy: 0.9322
 latent_space_dim: 15; Test Accuracy: 0.9496
 latent_space_dim: 16; Test Accuracy: 0.9546
 latent_space_dim: 17; Test Accuracy: 0.9557
 latent_space_dim: 20; Test Accuracy: 0.9631
 latent_space_dim: 30; Test Accuracy: 0.9686
 latent_space_dim: 40; Test Accuracy: 0.9691
 latent_space_dim: 100; Test Accuracy: 0.9671


In [None]:
# 0.4057, 0.5735, 0.7042, 0.7954, 0.8459, 0.8805, 0.9122, 0.9322, 0.9496, 0.9546, 0.9557, 0.9631, 0.9686, 0.9691, 0.9671