<a href="https://colab.research.google.com/github/AvtnshM/SSL/blob/main/Self_Supervised_Learning%20-%20v4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [2]:

# Used ONLY to load dataset
base_transform = transforms.ToTensor()

# Used to create SSL views
ssl_transform = transforms.Compose([
    transforms.RandomResizedCrop(32),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(0.4, 0.4, 0.4, 0.1),
    transforms.ToTensor()
])

to_pil = transforms.ToPILImage()

In [3]:

dataset = datasets.CIFAR10(
    root="./data",
    train=True,
    download=True,
    transform=base_transform   # ← important
)

100%|██████████| 170M/170M [01:51<00:00, 1.53MB/s]


In [4]:

loader = DataLoader(
    dataset,
    batch_size=256,
    shuffle=True,
    num_workers=2,
    drop_last=True
)

print("DataLoader created. Number of batches:", len(loader))

DataLoader created. Number of batches: 195


In [5]:

class Encoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(3, 64, 3, stride=2),
            nn.ReLU(),
            nn.Conv2d(64, 128, 3, stride=2),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d(1)
        )

    def forward(self, x):
        x = self.net(x)
        return x.view(x.size(0), -1)

In [6]:

class Predictor(nn.Module):
    def __init__(self, dim=128):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, dim),
            nn.ReLU(),
            nn.Linear(dim, dim)
        )

    def forward(self, x):
        return self.net(x)

In [7]:

class Projector(nn.Module):
    def __init__(self, in_dim=128, hidden_dim=256, out_dim=128):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, out_dim)
        )

    def forward(self, x):
        return self.net(x)


In [8]:

def ssl_loss(p, z):
    p = F.normalize(p, dim=1)
    z = F.normalize(z, dim=1)
    return F.mse_loss(p, z)

In [9]:
# =========================
# : Models, Optimizer, EMA
# =========================

encoder = Encoder().to(device)
projector = Projector().to(device)
predictor = Predictor(dim=128).to(device)

target_encoder = Encoder().to(device)
target_encoder.load_state_dict(encoder.state_dict())

for param in target_encoder.parameters():
    param.requires_grad = False

optimizer = torch.optim.Adam(
    list(encoder.parameters()) +
    list(projector.parameters()) +
    list(predictor.parameters()),
    lr=1e-3
)

# EMA tuned for small model
ema_tau = 0.99


In [10]:
# =========================
# Collapse Diagnostics
# =========================

@torch.no_grad()
def feature_variance(z):
    """
    Average variance across embedding dimensions.
    Collapse => variance ~ 0
    """
    return z.var(dim=0).mean().item()


@torch.no_grad()
def cosine_similarity_mean(z1, z2):
    """
    Mean cosine similarity between embeddings of two views.
    Collapse => similarity ~ 1.0
    """
    z1 = F.normalize(z1, dim=1)
    z2 = F.normalize(z2, dim=1)
    return (z1 * z2).sum(dim=1).mean().item()


In [11]:

@torch.no_grad()
def update_target_encoder(online_encoder, target_encoder, tau):
    for online_param, target_param in zip(
        online_encoder.parameters(),
        target_encoder.parameters()
    ):
        target_param.data = (
            tau * target_param.data +
            (1.0 - tau) * online_param.data
        )

In [12]:
# =========================
# Training Loop (with diagnostics)
# =========================

epochs = 10

for epoch in range(epochs):
    total_loss = 0.0
    total_var = 0.0
    total_cos = 0.0

    for images, _ in loader:
        images = images.to(device)

        # Two stochastic views
        view1 = torch.stack([
            ssl_transform(to_pil(img.cpu())) for img in images
        ]).to(device)

        view2 = torch.stack([
            ssl_transform(to_pil(img.cpu())) for img in images
        ]).to(device)

        # -------------------------
        # Online branch
        # -------------------------
        z1 = encoder(view1)
        z2 = encoder(view2)

        h1 = projector(z1)
        h2 = projector(z2)

        p1 = predictor(h1)
        p2 = predictor(h2)

        # -------------------------
        # Target branch (EMA)
        # -------------------------
        with torch.no_grad():
            t1 = projector(target_encoder(view1))
            t2 = projector(target_encoder(view2))

        # BYOL loss (cross-view)
        loss = ssl_loss(p1, t2.detach()) + ssl_loss(p2, t1.detach())

        # Optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # EMA update
        update_target_encoder(encoder, target_encoder, ema_tau)

        # -------------------------
        # Diagnostics (encoder space)
        # -------------------------
        with torch.no_grad():
            batch_var = feature_variance(z1)
            batch_cos = cosine_similarity_mean(z1, z2)

        total_loss += loss.item()
        total_var += batch_var
        total_cos += batch_cos

    avg_loss = total_loss / len(loader)
    avg_var = total_var / len(loader)
    avg_cos = total_cos / len(loader)

    print(
        f"Epoch [{epoch+1}/{epochs}] | "
        f"Loss: {avg_loss:.4f} | "
        f"Var: {avg_var:.4f} | "
        f"CosSim: {avg_cos:.4f}"
    )


Epoch [1/10] | Loss: 0.0037 | Var: 0.0018 | CosSim: 0.9760
Epoch [2/10] | Loss: 0.0022 | Var: 0.0014 | CosSim: 0.9657
Epoch [3/10] | Loss: 0.0021 | Var: 0.0011 | CosSim: 0.9570
Epoch [4/10] | Loss: 0.0022 | Var: 0.0009 | CosSim: 0.9497
Epoch [5/10] | Loss: 0.0023 | Var: 0.0008 | CosSim: 0.9445
Epoch [6/10] | Loss: 0.0024 | Var: 0.0007 | CosSim: 0.9394
Epoch [7/10] | Loss: 0.0024 | Var: 0.0007 | CosSim: 0.9334
Epoch [8/10] | Loss: 0.0024 | Var: 0.0006 | CosSim: 0.9261
Epoch [9/10] | Loss: 0.0026 | Var: 0.0006 | CosSim: 0.9199
Epoch [10/10] | Loss: 0.0027 | Var: 0.0006 | CosSim: 0.9144
