<a href="https://colab.research.google.com/github/RajeswariKumaran/SSLMethodsAnalysis/blob/main/EqualHyperParameterTuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.5.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.16.5-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.5.0-py3-none-any.whl (400 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m400.9/400.9 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.16.5-py3-none-any.whl (247 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m247.4/247.4 kB[0m [31m22.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.16.5 colorlog-6.9.0 optuna-4.5.0


In [None]:
# starting here

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset
import torch.nn.functional as F
import optuna

In [None]:
# load the datasets and get labeled subset for fully supervised model
# Dataset loading (CIFAR-10)
# Dataset loading (CIFAR-10)
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# Download CIFAR-10 dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Split into labeled and unlabeled sets
labeled_data_size = 6000  # Assume we have 4000 labeled samples
indices = torch.randperm(len(trainset)).tolist()
labeled_indices = indices[:labeled_data_size]
unlabeled_indices = indices[labeled_data_size:]

labeled_trainset = Subset(trainset, labeled_indices)
unlabeled_trainset = Subset(trainset, unlabeled_indices)

# Data loaders
batch_size = 64
labeled_trainloader = DataLoader(labeled_trainset, batch_size=batch_size, shuffle=True)
unlabeled_trainloader = DataLoader(unlabeled_trainset, batch_size=batch_size, shuffle=True)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=False)

from torch.utils.data import random_split

# Set the proportion of validation data (e.g., 20%)
val_split_ratio = 0.2

# Compute lengths for training and validation sets
total_size = len(labeled_trainset)
val_size = int(total_size * val_split_ratio)
train_size = total_size - val_size

# Perform the split
labeled_trainset, validset = random_split(labeled_trainset, [train_size, val_size])

# Now you can create DataLoaders
labeled_trainloader = DataLoader(labeled_trainset, batch_size=64, shuffle=True)
labeled_validloader = DataLoader(validset, batch_size=64, shuffle=False)


In [None]:
# optimize hyper parameters for supervised training with only labeled data
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torch.nn.functional as F
import optuna  # Hyperparameter optimization library

# Define the same model as used for SSL (for a fair comparison)
class SupervisedModel(nn.Module):
    def __init__(self):
        super(SupervisedModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(32 * 16 * 16, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = x.view(-1, 32 * 16 * 16)
        x = self.fc1(x)
        return x

# Define the objective function for Optuna optimization
def objective_supervised(trial):
    # Hyperparameter search space
    learning_rate = trial.suggest_loguniform('lr', 1e-5, 1e-2)  # Log scale for learning rate
    momentum = trial.suggest_uniform('momentum', 0.5, 0.9)  # Momentum for SGD
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])  # Different batch sizes

    # Create the DataLoader for labeled data
    labeled_trainloader = DataLoader(labeled_trainset, batch_size=batch_size, shuffle=True)

    # Initialize the supervised model, optimizer, and loss criterion
    model = SupervisedModel()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
    criterion = nn.CrossEntropyLoss()

    # Train the model with the current hyperparameters
    model.train()
    for epoch in range(10):  # Training for 10 epochs
        running_loss = 0.0
        for inputs, labels in labeled_trainloader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            print(f"Epoch {epoch+1}, Loss: {running_loss/len(labeled_trainloader)}")

    # Evaluate the model on the validation set after training
    accuracy = evaluate_supervised_model(model, validloader)  # Assuming validloader is predefined
    return accuracy  # Minimize the negative accuracy (maximize accuracy)

# Evaluation function
def evaluate_supervised_model(model, testloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in testloader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy

# Create validation set (same as test set or separate split from training data)
validloader = DataLoader(validset, batch_size=64, shuffle=False)  # Assuming validset is predefined

# Set up Optuna study to find the best hyperparameters
study = optuna.create_study(direction='maximize')  # We want to maximize accuracy
study.optimize(objective_supervised, n_trials=20)  # Search for 20 trials

# Print best hyperparameters and accuracy
print(f"Best trial: {study.best_trial.params}")
print(f"Best validation accuracy: {study.best_trial.value}%")



[I 2025-08-31 06:38:17,877] A new study created in memory with name: no-name-da8cf57e-c808-40d0-8344-bea00303c0c5
  learning_rate = trial.suggest_loguniform('lr', 1e-5, 1e-2)  # Log scale for learning rate
  momentum = trial.suggest_uniform('momentum', 0.5, 0.9)  # Momentum for SGD
[I 2025-08-31 06:38:46,982] Trial 0 finished with value: 29.75 and parameters: {'lr': 5.9321778995265514e-05, 'momentum': 0.6861471040931886, 'batch_size': 32}. Best is trial 0 with value: 29.75.
[I 2025-08-31 06:39:16,324] Trial 1 finished with value: 40.916666666666664 and parameters: {'lr': 0.0036918306459303722, 'momentum': 0.6125154194375058, 'batch_size': 128}. Best is trial 1 with value: 40.916666666666664.
[I 2025-08-31 06:39:46,510] Trial 2 finished with value: 31.583333333333332 and parameters: {'lr': 3.6584529346780894e-05, 'momentum': 0.8682900784531764, 'batch_size': 32}. Best is trial 1 with value: 40.916666666666664.
[I 2025-08-31 06:40:15,112] Trial 3 finished with value: 35.166666666666664 a

Best trial: {'lr': 0.007368715747923294, 'momentum': 0.8974213852548178, 'batch_size': 32}
Best validation accuracy: 51.333333333333336%


In [None]:
supervised_best_params = study.best_trial.params

In [None]:
# get best parameters for semi supervised learning using VAT
import optuna
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.nn.functional as F

# Define your VAT model (as in your original code)
class VATModel(nn.Module):
    def __init__(self):
        super(VATModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(32 * 16 * 16, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = x.view(-1, 32 * 16 * 16)
        x = self.fc1(x)
        return x

# Define VAT loss function
def virtual_adversarial_loss(model, x, epsilon=1e-6):
    x.requires_grad_()
    logits = model(x)

    # Virtual adversarial perturbation
    loss = F.cross_entropy(logits, torch.max(logits, 1)[1])
    loss.backward()
    grad = x.grad

    # Perturbation
    perturbation = epsilon * torch.sign(grad)

    # Perturbed data
    x_perturbed = x + perturbation
    logits_perturbed = model(x_perturbed)
    loss_perturbed = F.cross_entropy(logits_perturbed, torch.max(logits, 1)[1])

    return loss_perturbed

# Combined loss for VAT model
def vat_loss(model, x, labels, criterion, epsilon=1e-6, alpha=1.0):
    # Cross-entropy loss for labeled data
    ce_loss = criterion(model(x), labels)

    # Virtual adversarial loss (VAT)
    va_loss = virtual_adversarial_loss(model, x, epsilon)

    return ce_loss + alpha * va_loss

# Training function for VAT model
def train_vat_model(model, labeled_trainloader, unlabeled_trainloader, criterion, optimizer, num_epochs=10, epsilon=1e-6, alpha=1.0):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for (inputs, labels), (inputs_unlabeled, _) in zip(labeled_trainloader, unlabeled_trainloader):
            optimizer.zero_grad()
            # Train on labeled data with VAT applied to unlabeled data
            loss = vat_loss(model, inputs, labels, criterion, epsilon, alpha)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {running_loss/len(labeled_trainloader)}")

# Optuna objective function for hyperparameter search
def optimize_vat_hyperparameters(labeled_trainloader, unlabeled_trainloader, testloader):
    def objective(trial):
        # Suggest hyperparameters
        lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)
        momentum = trial.suggest_uniform('momentum', 0.8, 0.95)
        epsilon = trial.suggest_loguniform('epsilon', 1e-6, 1e-1)
        alpha = trial.suggest_uniform('alpha', 0.5, 2.0)
        batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])

        # Create model, optimizer, and criterion
        model = VATModel()
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
        criterion = nn.CrossEntropyLoss()

        # DataLoader for current batch size
        labeled_trainloader = DataLoader(labeled_trainset, batch_size=batch_size, shuffle=True)
        unlabeled_trainloader = DataLoader(unlabeled_trainset, batch_size=batch_size, shuffle=True)

        # Train the VAT model
        train_vat_model(model, labeled_trainloader, unlabeled_trainloader, criterion, optimizer, num_epochs=10, epsilon=epsilon, alpha=alpha)

        # Evaluate the model after training
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in validloader:
                outputs = model(inputs)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        accuracy = 100 * correct / total
        return accuracy  # Maximize accuracy

    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=20)

    print(f"Best trial: {study.best_trial.value}")
    print(f"Best hyperparameters: {study.best_trial.params}")
    return study.best_trial.params

# Run hyperparameter optimization
vat_best_params = optimize_vat_hyperparameters(labeled_trainloader, unlabeled_trainloader, testloader)

# Now you can use `vat_best_params` to train the VAT model with optimal hyperparameters.

[I 2025-08-31 07:41:36,044] A new study created in memory with name: no-name-e9ff834d-e65e-4743-a307-4802fc04eff0
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)
  momentum = trial.suggest_uniform('momentum', 0.8, 0.95)
  epsilon = trial.suggest_loguniform('epsilon', 1e-6, 1e-1)
  alpha = trial.suggest_uniform('alpha', 0.5, 2.0)


Epoch 1, Loss: 3.503390174163015
Epoch 2, Loss: 3.182346594961066
Epoch 3, Loss: 3.1231669187545776
Epoch 4, Loss: 3.065893813183433
Epoch 5, Loss: 3.0262865392785323
Epoch 6, Loss: 2.9909278781790483
Epoch 7, Loss: 2.964064880421287
Epoch 8, Loss: 2.940559048401682
Epoch 9, Loss: 2.916605886660124
Epoch 10, Loss: 2.903702735900879


[I 2025-08-31 07:43:07,514] Trial 0 finished with value: 11.833333333333334 and parameters: {'lr': 0.0002933545621957897, 'momentum': 0.8726100152878178, 'epsilon': 2.2241271650750257e-06, 'alpha': 0.6242247997400614, 'batch_size': 128}. Best is trial 0 with value: 11.833333333333334.


Epoch 1, Loss: 3.9939275328318278
Epoch 2, Loss: 3.320546964009603
Epoch 3, Loss: 3.1655237038930255
Epoch 4, Loss: 3.0780448627471926
Epoch 5, Loss: 2.9674384212493896
Epoch 6, Loss: 2.819689718882243
Epoch 7, Loss: 2.3940446472167967
Epoch 8, Loss: 2.1679722690582275
Epoch 9, Loss: 2.064407498041789
Epoch 10, Loss: 1.9316555070877075


[I 2025-08-31 07:44:38,457] Trial 1 finished with value: 48.75 and parameters: {'lr': 0.0055408037667521, 'momentum': 0.9106519230578937, 'epsilon': 0.0002303768254359234, 'alpha': 1.8719397650367253, 'batch_size': 64}. Best is trial 1 with value: 48.75.


Epoch 1, Loss: 3.2542110430566886
Epoch 2, Loss: 3.237873522858871
Epoch 3, Loss: 3.1323172418694747
Epoch 4, Loss: 3.0956427674544487
Epoch 5, Loss: 3.073359169458088
Epoch 6, Loss: 3.0486476986031783
Epoch 7, Loss: 3.021965353112472
Epoch 8, Loss: 3.009729780648884
Epoch 9, Loss: 2.993721183977629
Epoch 10, Loss: 2.9797720156217875


[I 2025-08-31 07:46:11,849] Trial 2 finished with value: 9.916666666666666 and parameters: {'lr': 9.43220258284373e-05, 'momentum': 0.8659118689218435, 'epsilon': 1.746439925258192e-06, 'alpha': 0.5400523056802289, 'batch_size': 128}. Best is trial 1 with value: 48.75.


Epoch 1, Loss: 3.1680461883544924
Epoch 2, Loss: 2.9140812428792318
Epoch 3, Loss: 2.831033765474955
Epoch 4, Loss: 2.76817040125529
Epoch 5, Loss: 2.7190186484654744
Epoch 6, Loss: 2.677310883204142
Epoch 7, Loss: 2.641738576889038
Epoch 8, Loss: 2.6025713284810386
Epoch 9, Loss: 2.5645540793736776
Epoch 10, Loss: 2.5073161856333415


[I 2025-08-31 07:47:45,769] Trial 3 finished with value: 16.75 and parameters: {'lr': 0.0003354520278501304, 'momentum': 0.924309375146222, 'epsilon': 6.503816544194226e-06, 'alpha': 0.6659601437557676, 'batch_size': 32}. Best is trial 1 with value: 48.75.


Epoch 1, Loss: 3.7331960805257163
Epoch 2, Loss: 3.4729818312327065
Epoch 3, Loss: 3.3995107714335124
Epoch 4, Loss: 3.3461879444122316
Epoch 5, Loss: 3.3080249627431235
Epoch 6, Loss: 3.27389222462972
Epoch 7, Loss: 3.2491898345947265
Epoch 8, Loss: 3.2243780453999835
Epoch 9, Loss: 3.205532162984212
Epoch 10, Loss: 3.1858344173431394


[I 2025-08-31 07:49:20,346] Trial 4 finished with value: 11.25 and parameters: {'lr': 0.00030723953083488316, 'momentum': 0.8540882294190101, 'epsilon': 0.0004216536471012794, 'alpha': 1.3314548775094712, 'batch_size': 64}. Best is trial 1 with value: 48.75.


Epoch 1, Loss: 4.609454374564321
Epoch 2, Loss: 3.7104692710073373
Epoch 3, Loss: 3.554553690709566
Epoch 4, Loss: 3.450389084063078
Epoch 5, Loss: 3.364758560532018
Epoch 6, Loss: 3.2872485173375985
Epoch 7, Loss: 3.205483122875816
Epoch 8, Loss: 3.116397782375938
Epoch 9, Loss: 3.023719906806946
Epoch 10, Loss: 2.940094508622822


[I 2025-08-31 07:50:52,481] Trial 5 finished with value: 8.666666666666666 and parameters: {'lr': 0.0005889932913617048, 'momentum': 0.9021824322033115, 'epsilon': 0.07601371843631557, 'alpha': 1.6809140950568353, 'batch_size': 128}. Best is trial 1 with value: 48.75.


Epoch 1, Loss: 3.921592469215393
Epoch 2, Loss: 3.693658216794332
Epoch 3, Loss: 3.657158850034078
Epoch 4, Loss: 3.6280328305562337
Epoch 5, Loss: 3.6024501498540245
Epoch 6, Loss: 3.579818008740743
Epoch 7, Loss: 3.5594992955525715
Epoch 8, Loss: 3.543055313428243
Epoch 9, Loss: 3.525134859085083
Epoch 10, Loss: 3.5104642216364543


[I 2025-08-31 07:52:28,310] Trial 6 finished with value: 11.833333333333334 and parameters: {'lr': 2.362737749699348e-05, 'momentum': 0.83130733258469, 'epsilon': 0.004017988604013305, 'alpha': 1.4112009962459717, 'batch_size': 32}. Best is trial 1 with value: 48.75.


Epoch 1, Loss: 3.9748768186569214
Epoch 2, Loss: 3.4743271112442016
Epoch 3, Loss: 3.364459335009257
Epoch 4, Loss: 3.28841548760732
Epoch 5, Loss: 3.227428631782532
Epoch 6, Loss: 3.1700661977132163
Epoch 7, Loss: 3.1228035847345987
Epoch 8, Loss: 3.079485586484273
Epoch 9, Loss: 3.03784996509552
Epoch 10, Loss: 2.9975254662831623


[I 2025-08-31 07:54:05,025] Trial 7 finished with value: 12.333333333333334 and parameters: {'lr': 0.0004992026563587909, 'momentum': 0.9323393471471606, 'epsilon': 0.00043438340462051054, 'alpha': 1.8851383509439765, 'batch_size': 32}. Best is trial 1 with value: 48.75.


Epoch 1, Loss: 3.5712318054835
Epoch 2, Loss: 3.3763566970825196
Epoch 3, Loss: 3.3091321245829266
Epoch 4, Loss: 3.2612923431396483
Epoch 5, Loss: 3.2254502693812053
Epoch 6, Loss: 3.1953337399164834
Epoch 7, Loss: 3.1671636708577475
Epoch 8, Loss: 3.1479291613896687
Epoch 9, Loss: 3.126456408500671
Epoch 10, Loss: 3.108420893351237


[I 2025-08-31 07:55:40,674] Trial 8 finished with value: 9.416666666666666 and parameters: {'lr': 0.0002579358176957561, 'momentum': 0.8141048194135886, 'epsilon': 1.2736444707960457e-05, 'alpha': 1.2271725036673269, 'batch_size': 32}. Best is trial 1 with value: 48.75.


Epoch 1, Loss: 3.363462282816569
Epoch 2, Loss: 3.0513845984141033
Epoch 3, Loss: 2.918692766825358
Epoch 4, Loss: 2.785106976826986
Epoch 5, Loss: 2.677261637846629
Epoch 6, Loss: 2.7888553206125897
Epoch 7, Loss: 2.8338710792859394
Epoch 8, Loss: 2.6334113478660583
Epoch 9, Loss: 2.713365747133891
Epoch 10, Loss: 2.5568069624900818


[I 2025-08-31 07:57:07,499] Trial 9 finished with value: 18.583333333333332 and parameters: {'lr': 0.026096991764774945, 'momentum': 0.9394170424257748, 'epsilon': 1.121213674983256e-05, 'alpha': 0.6935880215844469, 'batch_size': 32}. Best is trial 1 with value: 48.75.


Epoch 1, Loss: 4.222530946731568
Epoch 2, Loss: 3.2792523161570233
Epoch 3, Loss: 3.0922235584259035
Epoch 4, Loss: 2.8033577537536623
Epoch 5, Loss: 2.5493736362457273
Epoch 6, Loss: 2.447369016011556
Epoch 7, Loss: 2.204736895561218
Epoch 8, Loss: 2.191259420712789
Epoch 9, Loss: 2.044482192993164
Epoch 10, Loss: 1.825566504796346


[I 2025-08-31 07:58:36,337] Trial 10 finished with value: 45.75 and parameters: {'lr': 0.012535001592627755, 'momentum': 0.906860967250878, 'epsilon': 7.458241898462628e-05, 'alpha': 1.9645990885495477, 'batch_size': 64}. Best is trial 1 with value: 48.75.


Epoch 1, Loss: 3.968388833999634
Epoch 2, Loss: 3.300257008870443
Epoch 3, Loss: 3.0173553244272866
Epoch 4, Loss: 2.6825317351023354
Epoch 5, Loss: 2.4764795446395875
Epoch 6, Loss: 2.3096696106592813
Epoch 7, Loss: 2.1818543418248493
Epoch 8, Loss: 2.037479732831319
Epoch 9, Loss: 1.9471484772364298
Epoch 10, Loss: 1.7742943207422892


[I 2025-08-31 08:00:04,494] Trial 11 finished with value: 47.0 and parameters: {'lr': 0.010845855109372325, 'momentum': 0.9016137211898128, 'epsilon': 7.359596355837667e-05, 'alpha': 1.9784424339354418, 'batch_size': 64}. Best is trial 1 with value: 48.75.


Epoch 1, Loss: 3.782855895360311
Epoch 2, Loss: 3.268344472249349
Epoch 3, Loss: 3.1434936714172363
Epoch 4, Loss: 3.0304377110799154
Epoch 5, Loss: 2.964245500564575
Epoch 6, Loss: 2.8584439309438068
Epoch 7, Loss: 2.6124488353729247
Epoch 8, Loss: 2.4158937803904217
Epoch 9, Loss: 2.176226708094279
Epoch 10, Loss: 2.076652863820394


[I 2025-08-31 08:01:37,910] Trial 12 finished with value: 49.75 and parameters: {'lr': 0.004683444236568706, 'momentum': 0.8915845187796497, 'epsilon': 7.633183703505233e-05, 'alpha': 1.6675574886555689, 'batch_size': 64}. Best is trial 12 with value: 49.75.


Epoch 1, Loss: 3.808923956553141
Epoch 2, Loss: 3.365183130900065
Epoch 3, Loss: 3.2429240385691327
Epoch 4, Loss: 3.167512413660685
Epoch 5, Loss: 3.1065325133005777
Epoch 6, Loss: 3.0493707847595215
Epoch 7, Loss: 2.9952682081858315
Epoch 8, Loss: 2.9475677490234373
Epoch 9, Loss: 2.9077593580881755
Epoch 10, Loss: 2.861718772252401


[I 2025-08-31 08:03:10,575] Trial 13 finished with value: 17.333333333333332 and parameters: {'lr': 0.0034804064404314644, 'momentum': 0.8897746638506009, 'epsilon': 0.002046914060295387, 'alpha': 1.6824098444121782, 'batch_size': 64}. Best is trial 12 with value: 49.75.


Epoch 1, Loss: 3.8537770398457845
Epoch 2, Loss: 3.2900181516011555
Epoch 3, Loss: 3.174587526321411
Epoch 4, Loss: 3.0904971917470294
Epoch 5, Loss: 3.0026851431528727
Epoch 6, Loss: 2.9335293134053546
Epoch 7, Loss: 2.8631920210520425
Epoch 8, Loss: 2.715961227416992
Epoch 9, Loss: 2.5019987042744956
Epoch 10, Loss: 2.370544106165568


[I 2025-08-31 08:04:44,021] Trial 14 finished with value: 30.833333333333332 and parameters: {'lr': 0.002584554397761117, 'momentum': 0.9152923827083421, 'epsilon': 9.322360920772776e-05, 'alpha': 1.607861335162541, 'batch_size': 64}. Best is trial 12 with value: 49.75.


Epoch 1, Loss: 5.506511516571045
Epoch 2, Loss: 3.4921219730377198
Epoch 3, Loss: 3.4540666929880777
Epoch 4, Loss: 3.408139279683431
Epoch 5, Loss: 3.36717635790507
Epoch 6, Loss: 3.325817330678304
Epoch 7, Loss: 3.28755446434021
Epoch 8, Loss: 3.243302904764811
Epoch 9, Loss: 3.1727186012268067
Epoch 10, Loss: 3.180309368769328


[I 2025-08-31 08:06:04,460] Trial 15 finished with value: 14.25 and parameters: {'lr': 0.09962500191148407, 'momentum': 0.9490698900898161, 'epsilon': 0.002075467998735307, 'alpha': 1.0711633244063599, 'batch_size': 64}. Best is trial 12 with value: 49.75.


Epoch 1, Loss: 3.7548627185821535
Epoch 2, Loss: 3.317526038487752
Epoch 3, Loss: 3.207225348154704
Epoch 4, Loss: 3.1455707995096844
Epoch 5, Loss: 3.0829796600341797
Epoch 6, Loss: 3.0330856291453046
Epoch 7, Loss: 2.9898232968648273
Epoch 8, Loss: 2.94745033899943
Epoch 9, Loss: 2.917167631785075
Epoch 10, Loss: 2.8884943548838296


[I 2025-08-31 08:07:37,662] Trial 16 finished with value: 13.083333333333334 and parameters: {'lr': 0.002565513331896745, 'momentum': 0.8804068921404457, 'epsilon': 0.013141111271268434, 'alpha': 1.5140783821577934, 'batch_size': 64}. Best is trial 12 with value: 49.75.


Epoch 1, Loss: 3.9703250058492023
Epoch 2, Loss: 3.216384932200114
Epoch 3, Loss: 3.030789581934611
Epoch 4, Loss: 2.7381956990559897
Epoch 5, Loss: 2.6109069045384725
Epoch 6, Loss: 2.4731252066294354
Epoch 7, Loss: 2.252580580711365
Epoch 8, Loss: 2.18709902604421
Epoch 9, Loss: 2.0614030996958417
Epoch 10, Loss: 1.989538623491923


[I 2025-08-31 08:09:02,858] Trial 17 finished with value: 39.916666666666664 and parameters: {'lr': 0.04392108587174598, 'momentum': 0.8446960130935948, 'epsilon': 0.00020802899322891046, 'alpha': 1.7955947870691342, 'batch_size': 64}. Best is trial 12 with value: 49.75.


Epoch 1, Loss: 3.4485055764516193
Epoch 2, Loss: 2.984088484446208
Epoch 3, Loss: 2.8282949924468994
Epoch 4, Loss: 2.4647935485839843
Epoch 5, Loss: 2.180817084312439
Epoch 6, Loss: 2.03221075852712
Epoch 7, Loss: 1.8462496916453044
Epoch 8, Loss: 1.778871579170227
Epoch 9, Loss: 1.6450654109319052
Epoch 10, Loss: 1.5741627661387125


[I 2025-08-31 08:10:32,782] Trial 18 finished with value: 48.0 and parameters: {'lr': 0.006093240840507661, 'momentum': 0.8888334970311471, 'epsilon': 2.396935059500899e-05, 'alpha': 1.077791455233288, 'batch_size': 64}. Best is trial 12 with value: 49.75.


Epoch 1, Loss: 3.949950205485026
Epoch 2, Loss: 3.4121944363911947
Epoch 3, Loss: 3.3067706489562987
Epoch 4, Loss: 3.2310439205169676
Epoch 5, Loss: 3.176257158915202
Epoch 6, Loss: 3.1254377714792887
Epoch 7, Loss: 3.0729797649383546
Epoch 8, Loss: 3.034290132522583
Epoch 9, Loss: 2.9915740426381427
Epoch 10, Loss: 2.9537227598826092


[I 2025-08-31 08:12:03,079] Trial 19 finished with value: 10.666666666666666 and parameters: {'lr': 0.0013267130301124423, 'momentum': 0.9189905399254423, 'epsilon': 0.0006977728075051315, 'alpha': 1.799684167772557, 'batch_size': 64}. Best is trial 12 with value: 49.75.


Best trial: 49.75
Best hyperparameters: {'lr': 0.004683444236568706, 'momentum': 0.8915845187796497, 'epsilon': 7.633183703505233e-05, 'alpha': 1.6675574886555689, 'batch_size': 64}


In [None]:
# Now compare the performance of VAT and fully supervised using respective best parameters

# Initialize and train the fully-supervised model using best parameters
# Define Fully-Supervised Model (Simple CNN)
class FullySupervisedCNN(nn.Module):
    def __init__(self):
        super(FullySupervisedCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(32 * 16 * 16, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = x.view(-1, 32 * 16 * 16)
        x = self.fc1(x)
        return x

model = FullySupervisedCNN()
optimizer = optim.SGD(model.parameters(), lr=supervised_best_params['lr'], momentum=supervised_best_params['momentum'])
criterion = nn.CrossEntropyLoss()

# Training function for models
def train_model(model, trainloader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in trainloader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {running_loss/len(trainloader)}")

train_model(model, trainloader, criterion, optimizer, num_epochs=10)

Epoch 1, Loss: 1.4652755618705164
Epoch 2, Loss: 1.1664301307152605
Epoch 3, Loss: 1.047432781332899
Epoch 4, Loss: 0.9822487318912125
Epoch 5, Loss: 0.9283737188105083
Epoch 6, Loss: 0.8880558254392555
Epoch 7, Loss: 0.8585296645951088
Epoch 8, Loss: 0.827574706710208
Epoch 9, Loss: 0.806894422301551
Epoch 10, Loss: 0.7838642739731333


In [None]:

ssl_model = VATModel()
optimizer_ssl = optim.SGD(ssl_model.parameters(), lr=vat_best_params['lr'], momentum=vat_best_params['momentum'])

# Define criterion for VAT model
criterion_ssl = nn.CrossEntropyLoss()

# Training function for VAT model
def train_vat_model(model, labeled_trainloader, unlabeled_trainloader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for (inputs, labels), (inputs_unlabeled, _) in zip(labeled_trainloader, unlabeled_trainloader):
            optimizer.zero_grad()
            # Train on labeled data with VAT applied to unlabeled data
            loss = vat_loss(model, inputs, labels, criterion, alpha=1.0, epsilon=1e-6)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {running_loss/len(labeled_trainloader)}")

# Train SSL (VAT) model

train_vat_model(ssl_model, labeled_trainloader, unlabeled_trainloader, criterion_ssl, optimizer_ssl, num_epochs=10)

Epoch 1, Loss: 3.3468048350016275
Epoch 2, Loss: 2.9587280813852948
Epoch 3, Loss: 2.8115629291534425
Epoch 4, Loss: 2.688424596786499
Epoch 5, Loss: 2.3976821835835773
Epoch 6, Loss: 2.1479301420847574
Epoch 7, Loss: 1.9474782896041871
Epoch 8, Loss: 1.797435245513916
Epoch 9, Loss: 1.7121193997065227
Epoch 10, Loss: 1.6093256950378418


In [None]:
# Evaluate function
def evaluate(model):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in testloader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return 100 * correct / total

# ssl_model_accuracy = evaluate(ssl_model)
# print(f"SSL (VAT) model accuracy: {ssl_model_accuracy}%")

In [None]:
# Compare performance of SSL (VAT) vs Fully-supervised
fully_supervised_accuracy = evaluate(model)
ssl_model_accuracy = evaluate(ssl_model)

print(f"Fully-supervised model accuracy: {fully_supervised_accuracy}%")
print(f"SSL (VAT) model accuracy: {ssl_model_accuracy}%")

# Justifying the claim: Narrow performance gap under equal hyperparameter tuning
if fully_supervised_accuracy >= ssl_model_accuracy:
    print("The performance gap between SSL (VAT) and fully-supervised methods is narrower when hyperparameters are optimally tuned.")
else:
    print("SSL (VAT) methods outperform fully-supervised methods, even with optimal tuning.")

Fully-supervised model accuracy: 63.22%
SSL (VAT) model accuracy: 49.46%
The performance gap between SSL (VAT) and fully-supervised methods is narrower when hyperparameters are optimally tuned.
