In [None]:
import torch
import pandas as pd
from PIL import Image
from tqdm import tqdm
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from efficientnet_pytorch import EfficientNet
from torch.cuda.amp import GradScaler, autocast

In [None]:
transform = transforms.Compose([
    transforms.RandomResizedCrop(456, scale=(0.2, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomApply([transforms.ColorJitter(0.4, 0.4, 0.4, 0.4)], p=0.8),
    transforms.RandomGrayscale(p=0.2),
    transforms.RandomApply([transforms.GaussianBlur(kernel_size=23)], p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

eval_transform = transforms.Compose([
    transforms.Resize(456),
    transforms.CenterCrop(456),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
class SimCLRDataset(Dataset):
    def __init__(self, image_paths, transform=None):
        self.image_paths = image_paths
        self.transform = transform
        
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        img = Image.open(img_path).convert('RGB')
        
        if self.transform:
            img1 = self.transform(img)
            img2 = self.transform(img)
        
        return img1, img2

In [None]:
class EfficientNetB5Embedding(nn.Module):
    def __init__(self, embedding_dim=256, projection_dim=128):
        super(EfficientNetB5Embedding, self).__init__()
        self.base_model = EfficientNet.from_pretrained('efficientnet-b5')
        self.pool = nn.AdaptiveAvgPool2d((1, 1))  # Adaptive pooling to reduce size
        self.fc1 = nn.Linear(2048, embedding_dim)  # Adjust this based on the pooled size
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(embedding_dim, projection_dim)
        
    def forward(self, x):
        x = self.base_model.extract_features(x)
        x = self.pool(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = nn.functional.normalize(x, dim=1)  # L2 normalization
        return x

In [None]:
class NTXentLoss(nn.Module):
    def __init__(self, temperature=0.5):
        super(NTXentLoss, self).__init__()
        self.temperature = temperature
        self.cosine_similarity = nn.CosineSimilarity(dim=-1)
        
    def forward(self, z_i, z_j):
        batch_size = z_i.size(0)
        z = torch.cat([z_i, z_j], dim=0)
        sim_matrix = self.cosine_similarity(z.unsqueeze(1), z.unsqueeze(0)) / self.temperature
        labels = torch.arange(batch_size).to(z.device)
        labels = torch.cat([labels, labels], dim=0)
        mask = torch.eye(labels.size(0), dtype=torch.bool).to(z.device)
        sim_matrix = sim_matrix[~mask].view(sim_matrix.size(0), -1)
        loss = F.cross_entropy(sim_matrix, labels)
        return loss

In [None]:
def train_simclr_model(image_paths, batch_size=128, epochs=30, temperature=0.5, learning_rate=1e-3, weight_decay=1e-6):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = EfficientNetB5Embedding().to(device)
    criterion = NTXentLoss(temperature)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
    scaler = GradScaler()
    
    dataset = SimCLRDataset(image_paths, transform=transform)
    dataloader = DataLoader(dataset, shuffle=True, batch_size=batch_size)
    
    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        num_batches = len(dataloader)
        prog_bar = tqdm(dataloader)
        
        for img1, img2 in prog_bar:
            img1, img2 = img1.to(device), img2.to(device)
            
            optimizer.zero_grad()
            
            with autocast():
                z_i = model(img1)
                z_j = model(img2)
                loss = criterion(z_i, z_j)
            
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            
            train_loss += loss.item()
            prog_bar.set_description(f'Epoch {epoch+1}/{epochs} Train Loss: {train_loss/(prog_bar.n + 1):.4f}')
        
        avg_train_loss = train_loss / num_batches
        print(f'Epoch {epoch+1} Train Loss: {avg_train_loss:.4f}')
        
        scheduler.step()
    
    return model

In [None]:
df = pd.read_csv('datasets/cropped_all_one_hot.csv')
image_paths = df['file_path'].tolist()

trained_model = train_simclr_model(image_paths, batch_size=128, epochs=30, temperature=0.5, learning_rate=1e-3, weight_decay=1e-6)