In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Section 1: Import Libraries
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from torchvision.models import resnet18, ResNet18_Weights
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from PIL import Image
import matplotlib.pyplot as plt

# Section 2: Load and Preprocess Dataset
train_images_path = "/content/drive/MyDrive/Brain_images/brain_train_image_final.npy"
train_labels_path = "/content/drive/MyDrive/Brain_images/brain_train_label.npy"
test_images_path = "/content/drive/MyDrive/Brain_images/brain_test_image_final.npy"
test_labels_path = "/content/drive/MyDrive/Brain_images/brain_test_label.npy"

# Load the data
final_X_train_modified = np.load(train_images_path)[:, 1, :, :]
final_X_test_modified = np.load(test_images_path)[:, 1, :, :]
train_labels = np.load(train_labels_path)
test_labels = np.load(test_labels_path)

# Normalize and Resize Images using Pillow
def normalize_and_resize(images, target_size=(224, 224)):
    resized_images = []
    for img in images:
        img = Image.fromarray((img * 255).astype(np.uint8))
        img_resized = img.resize(target_size, Image.Resampling.LANCZOS)
        resized_images.append(np.array(img_resized) / 255.0)
    return np.array(resized_images)

final_X_train_resized = normalize_and_resize(final_X_train_modified)
final_X_test_resized = normalize_and_resize(final_X_test_modified)


In [4]:
# Section 3: Define SimCLR Augmentation and Dataset
transform_simclr = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomResizedCrop(size=224, scale=(0.08, 1.0), ratio=(3/4, 4/3)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(brightness=0.8, contrast=0.8, saturation=0.8, hue=0.2),
    transforms.RandomApply([transforms.GaussianBlur(kernel_size=23, sigma=(0.1, 2.0))], p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

class SimCLRDataset(Dataset):
    def __init__(self, images, transform):
        self.images = images
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = self.images[idx]
        img_1 = self.transform(img)
        img_2 = self.transform(img)
        return img_1, img_2

train_dataset = SimCLRDataset(final_X_train_resized, transform_simclr)
train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)


In [5]:
import os
from PIL import Image

# Section 4: Save Augmented Images
def save_augmented_images(dataset, save_dir="augmented_images", num_images=5):
    """
    Save a set of augmented images from the dataset to a directory.
    """
    # Create directory if it doesn't exist
    os.makedirs(save_dir, exist_ok=True)

    for i in range(num_images):
        img_original = dataset.images[i]  # Original image
        img_aug1, img_aug2 = dataset[i]  # Augmented pair

        # Unnormalize images
        img_aug1 = transforms.ToPILImage()(img_aug1 * 0.5 + 0.5)
        img_aug2 = transforms.ToPILImage()(img_aug2 * 0.5 + 0.5)

        # Save images
        img_aug1.save(os.path.join(save_dir, f"image_{i}_aug1.png"))
        img_aug2.save(os.path.join(save_dir, f"image_{i}_aug2.png"))

        print(f"Saved image_{i}_aug1.png and image_{i}_aug2.png to {save_dir}")

# Save augmented images from the training dataset
save_augmented_images(train_dataset, save_dir="augmented_images", num_images=5)


Saved image_0_aug1.png and image_0_aug2.png to augmented_images
Saved image_1_aug1.png and image_1_aug2.png to augmented_images
Saved image_2_aug1.png and image_2_aug2.png to augmented_images
Saved image_3_aug1.png and image_3_aug2.png to augmented_images
Saved image_4_aug1.png and image_4_aug2.png to augmented_images


In [6]:
# Section 4: Define SimCLR Model and NT-Xent Loss
class SimCLR(nn.Module):
    def __init__(self, base_encoder, projection_dim):
        super(SimCLR, self).__init__()
        self.encoder = base_encoder
        self.projector = nn.Sequential(
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, projection_dim)
        )

    def forward(self, x):
        h = self.encoder(x)
        z = self.projector(h)
        return z

class NTXentLoss(nn.Module):
    def __init__(self, batch_size, temperature):
        super(NTXentLoss, self).__init__()
        self.batch_size = batch_size
        self.temperature = temperature
        self.criterion = nn.CrossEntropyLoss(reduction="sum")

    def forward(self, z_i, z_j):
        N = z_i.size(0) + z_j.size(0)
        z = torch.cat((z_i, z_j), dim=0)
        sim = torch.matmul(z, z.T) / self.temperature
        mask = ~torch.eye(N, dtype=torch.bool, device=z.device)

        positives = torch.cat([
            torch.diag(sim, z_i.size(0)),
            torch.diag(sim, -z_i.size(0))
        ])

        negatives = sim[mask].view(N, -1)
        logits = torch.cat((positives.unsqueeze(1), negatives), dim=1)
        labels = torch.zeros(N, dtype=torch.long, device=z.device)
        loss = self.criterion(logits, labels) / N
        return loss

# Section 5: Initialize and Train SimCLR Model
base_encoder = resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
base_encoder.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
base_encoder.fc = nn.Identity()

model = SimCLR(base_encoder, projection_dim=128).to("cuda")
optimizer = optim.Adam(model.parameters(), lr=1e-4)
criterion = NTXentLoss(batch_size=128, temperature=0.5)

for epoch in range(100):
    total_loss = 0
    model.train()
    for img_1, img_2 in train_loader:
        img_1, img_2 = img_1.to("cuda"), img_2.to("cuda")
        z_i = model(img_1)
        z_j = model(img_2)

        loss = criterion(z_i, z_j)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch [{epoch+1}/100], Loss: {total_loss/len(train_loader):.4f}")

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 189MB/s]


Epoch [1/100], Loss: 7.8979
Epoch [2/100], Loss: 6.7874
Epoch [3/100], Loss: 6.6326
Epoch [4/100], Loss: 6.5602
Epoch [5/100], Loss: 6.5342
Epoch [6/100], Loss: 6.4495
Epoch [7/100], Loss: 6.3722
Epoch [8/100], Loss: 6.3122
Epoch [9/100], Loss: 6.2408
Epoch [10/100], Loss: 6.2075
Epoch [11/100], Loss: 6.1662
Epoch [12/100], Loss: 6.0650
Epoch [13/100], Loss: 5.9732
Epoch [14/100], Loss: 5.8865
Epoch [15/100], Loss: 5.7693
Epoch [16/100], Loss: 5.6096
Epoch [17/100], Loss: 5.5101
Epoch [18/100], Loss: 5.3856
Epoch [19/100], Loss: 5.2289
Epoch [20/100], Loss: 5.1713
Epoch [21/100], Loss: 5.0446
Epoch [22/100], Loss: 4.8334
Epoch [23/100], Loss: 4.7151
Epoch [24/100], Loss: 4.6161
Epoch [25/100], Loss: 4.4583
Epoch [26/100], Loss: 4.4169
Epoch [27/100], Loss: 4.2647
Epoch [28/100], Loss: 4.1795
Epoch [29/100], Loss: 4.1129
Epoch [30/100], Loss: 4.0633
Epoch [31/100], Loss: 3.8735
Epoch [32/100], Loss: 3.8945
Epoch [33/100], Loss: 3.7710
Epoch [34/100], Loss: 3.6720
Epoch [35/100], Loss: 3

In [7]:
# Section 6: Save the Pretrained SimCLR Model
import os

# Define the path to save the model
save_path = "simclr_pretrained_18_128.pth"

# Save the model's state dictionary
torch.save({
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'epoch': epoch,
    'loss': total_loss / len(train_loader)
}, save_path)

print(f"Pretrained SimCLR model saved to {save_path}")


Pretrained SimCLR model saved to simclr_pretrained_18_128.pth


In [8]:
from google.colab import files

# Download the saved model
files.download("simclr_pretrained_18_128.pth")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [9]:
# Section 7: Define Classification Dataset and Head
class TestDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = self.images[idx]
        label = self.labels[idx]
        if self.transform:
            img = self.transform(img)
        return img, label

train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

test_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

train_dataset = TestDataset(final_X_train_resized, train_labels, transform=train_transform)
test_dataset = TestDataset(final_X_test_resized, test_labels, transform=test_transform)

class ClassificationHead(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(ClassificationHead, self).__init__()
        self.fc = nn.Linear(input_dim, num_classes)

    def forward(self, x):
        return self.fc(x)

classification_head = ClassificationHead(input_dim=512, num_classes=len(np.unique(train_labels))).to("cuda")

# Section 8: Fine-tune and Evaluate Classification Model
optimizer_cls = optim.Adam([
    {"params": model.encoder.parameters(), "lr": 1e-5},
    {"params": classification_head.parameters(), "lr": 1e-3},
])
scheduler_cls = StepLR(optimizer_cls, step_size=10, gamma=0.5)
criterion_cls = nn.CrossEntropyLoss()

# Fine-tune
for epoch in range(100):
    model.encoder.train()
    classification_head.train()
    total_loss = 0
    correct = 0
    for img, label in DataLoader(train_dataset, batch_size=128, shuffle=True):
        img, label = img.to("cuda"), label.to("cuda")
        features = model.encoder(img)
        logits = classification_head(features)
        loss = criterion_cls(logits, label)

        optimizer_cls.zero_grad()
        loss.backward()
        optimizer_cls.step()

        total_loss += loss.item()
        correct += (logits.argmax(dim=1) == label).sum().item()

    accuracy = correct / len(train_labels)
    scheduler_cls.step()
    print(f"Epoch [{epoch+1}/100], Loss: {total_loss/len(train_loader):.4f}, Accuracy: {accuracy:.4f}")

# Evaluate
classification_head.eval()
correct = 0
with torch.no_grad():
    for img, label in DataLoader(test_dataset, batch_size=128, shuffle=False):
        img, label = img.to("cuda"), label.to("cuda")
        features = model.encoder(img)
        logits = classification_head(features)
        correct += (logits.argmax(dim=1) == label).sum().item()

test_accuracy = correct / len(test_labels)
print(f"Test Accuracy%: {test_accuracy*100:.4f}")

Epoch [1/100], Loss: 3.4910, Accuracy: 0.4400
Epoch [2/100], Loss: 3.1022, Accuracy: 0.5341
Epoch [3/100], Loss: 2.8545, Accuracy: 0.6041
Epoch [4/100], Loss: 2.6496, Accuracy: 0.6415
Epoch [5/100], Loss: 2.4624, Accuracy: 0.6807
Epoch [6/100], Loss: 2.2080, Accuracy: 0.7399
Epoch [7/100], Loss: 1.9389, Accuracy: 0.7761
Epoch [8/100], Loss: 1.6699, Accuracy: 0.8346
Epoch [9/100], Loss: 1.3650, Accuracy: 0.8835
Epoch [10/100], Loss: 1.0929, Accuracy: 0.9203
Epoch [11/100], Loss: 0.8647, Accuracy: 0.9499
Epoch [12/100], Loss: 0.7448, Accuracy: 0.9620
Epoch [13/100], Loss: 0.6528, Accuracy: 0.9716
Epoch [14/100], Loss: 0.5611, Accuracy: 0.9825
Epoch [15/100], Loss: 0.4798, Accuracy: 0.9873
Epoch [16/100], Loss: 0.4163, Accuracy: 0.9891
Epoch [17/100], Loss: 0.3533, Accuracy: 0.9909
Epoch [18/100], Loss: 0.2988, Accuracy: 0.9958
Epoch [19/100], Loss: 0.2520, Accuracy: 0.9976
Epoch [20/100], Loss: 0.2245, Accuracy: 0.9976
Epoch [21/100], Loss: 0.1862, Accuracy: 0.9982
Epoch [22/100], Loss: 