In [None]:
!pip install transformers



#Import Packages

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms, models
from transformers import SwinModel

In [None]:
#import torchvision.transforms as transforms

#Data Preprocessing

In [None]:
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize((224, 224)),  # Resize images to 224x224 (or any size you prefer)
    transforms.ToTensor(),  # Convert images to PyTorch tensors
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize with mean and std
])

#Import Dataset

In [None]:
dataset = datasets.CIFAR100('.',download=True, transform=transform)

# Triplet Dataset Class
class TripletCIFAR100(Dataset):
    def __init__(self, cifar_dataset):
        self.cifar_dataset = cifar_dataset

    def __getitem__(self, index):
        anchor, label = self.cifar_dataset[index]
        positive_index = index
        while positive_index == index:
            positive_index = torch.randint(len(self.cifar_dataset), (1,)).item()
        negative_index = torch.randint(len(self.cifar_dataset), (1,)).item()
        anchor, label = self.cifar_dataset[index]
        positive, _ = self.cifar_dataset[positive_index]
        negative, _ = self.cifar_dataset[negative_index]
        return anchor, positive, negative, label

    def __len__(self):
        return len(self.cifar_dataset)

triplet_dataset = TripletCIFAR100(dataset)
# train_loader = DataLoader(triplet_dataset, batch_size=32, shuffle=True)
train_loader = DataLoader(triplet_dataset, batch_size=4, shuffle=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./cifar-100-python.tar.gz


100%|██████████| 169M/169M [00:03<00:00, 45.7MB/s]


Extracting ./cifar-100-python.tar.gz to .


#Configure Model

In [None]:
#swinmodel = models.swin_v2_s(pretrained=True)
swinmodel = models.swin_v2_s(weights='IMAGENET1K_V1')
swinmodel.head = nn.Identity() # Remove the classification layer

Downloading: "https://download.pytorch.org/models/swin_v2_s-637d8ceb.pth" to /root/.cache/torch/hub/checkpoints/swin_v2_s-637d8ceb.pth
100%|██████████| 191M/191M [00:02<00:00, 87.3MB/s]


In [None]:
swinmodel.eval()

SwinTransformer(
  (features): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
      (1): Permute()
      (2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
    )
    (1): Sequential(
      (0): SwinTransformerBlockV2(
        (norm1): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
        (attn): ShiftedWindowAttentionV2(
          (qkv): Linear(in_features=96, out_features=288, bias=True)
          (proj): Linear(in_features=96, out_features=96, bias=True)
          (cpb_mlp): Sequential(
            (0): Linear(in_features=2, out_features=512, bias=True)
            (1): ReLU(inplace=True)
            (2): Linear(in_features=512, out_features=3, bias=False)
          )
        )
        (stochastic_depth): StochasticDepth(p=0.0, mode=row)
        (norm2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
        (mlp): MLP(
          (0): Linear(in_features=96, out_features=384, bias=True)
          (1): GELU(approximate='

In [None]:
# print(dir(swinmodel))

In [None]:
# help(swinmodel)

In [None]:
# def extract_features(image):
#   with torch.no_grad():
#     features = swinmodel(image)

#   return features

In [None]:
# image = dataset[0][0]
# image = image.unsqueeze(0)

# feature_list = []

# fs = extract_features(image)
# feature_list.append(fs.cpu().numpy().flatten())

# print(fs.shape)

In [None]:
# Define Triplet Loss
class TripletLoss(nn.Module):
  def __init__(self, margin=1.0):
    super(TripletLoss, self).__init__()
    self.margin = margin
    self.loss_fn = nn.TripletMarginLoss(margin=margin, p=2)
  def forward(self, anchor, positive, negative):
    return self.loss_fn(anchor,positive, negative)

Model Training

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
swinmodel = swinmodel.to(device)
criterion = TripletLoss(margin=1.0)
optimizer = optim.Adam(swinmodel.parameters(), lr=0.001)

# Mixed Precision Training
scaler = torch.cuda.amp.GradScaler()

def train(model, criterion, optimizer, dataloader, device, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        for i, data in enumerate(dataloader, 0):
            anchor, positive, negative, _ = data
            anchor, positive, negative = anchor.to(device), positive.to(device), negative.to(device)

            optimizer.zero_grad()
            with torch.cuda.amp.autocast():
                anchor_features = model(anchor)
                positive_features = model(positive)
                negative_features = model(negative)
                loss = criterion(anchor_features, positive_features, negative_features)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            if i % 10 == 0:
                print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}], Loss: {loss.item():.4f}')

  scaler = torch.cuda.amp.GradScaler()


In [None]:
train(swinmodel, criterion, optimizer, train_loader, device)

  with torch.cuda.amp.autocast():


Epoch [1/10], Step [1], Loss: 0.6979
Epoch [1/10], Step [11], Loss: 1.3924
Epoch [1/10], Step [21], Loss: 1.6971
Epoch [1/10], Step [31], Loss: 1.2754
Epoch [1/10], Step [41], Loss: 2.8108
Epoch [1/10], Step [51], Loss: 1.9211
Epoch [1/10], Step [61], Loss: 1.8558
Epoch [1/10], Step [71], Loss: 1.1963
Epoch [1/10], Step [81], Loss: 0.8509
Epoch [1/10], Step [91], Loss: 1.5867
Epoch [1/10], Step [101], Loss: 1.4812
Epoch [1/10], Step [111], Loss: 0.4172
Epoch [1/10], Step [121], Loss: 0.8415
Epoch [1/10], Step [131], Loss: 1.3251
Epoch [1/10], Step [141], Loss: 3.8120
Epoch [1/10], Step [151], Loss: 0.3704
Epoch [1/10], Step [161], Loss: 1.8113
Epoch [1/10], Step [171], Loss: 1.9475
Epoch [1/10], Step [181], Loss: 0.8325
Epoch [1/10], Step [191], Loss: 0.4921
Epoch [1/10], Step [201], Loss: 1.5520
Epoch [1/10], Step [211], Loss: 2.5546
Epoch [1/10], Step [221], Loss: 0.9981
Epoch [1/10], Step [231], Loss: 1.8867
Epoch [1/10], Step [241], Loss: 0.9597
Epoch [1/10], Step [251], Loss: 1.01

KeyboardInterrupt: 

In [None]:
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# swinmodel = swinmodel.to(device)
# criterion = TripletLoss(margin=1.0)
# optimizer = optim.Adam(swinmodel.parameters(), lr=0.001)

# def train(model, criterion, optimizer, dataloader, device, num_epochs=10):
#   model.train()
#   for epoch in range(num_epochs):
#     for i, data in enumerate(dataloader, 0):
#       # dataloader returns 4 values: anchor, positive, negative, label
#       anchor, positive, negative, _ = data  # Unpack and ignore the label

#       anchor = anchor.to(device)
#       positive = positive.to(device)
#       negative = negative.to(device)

#       optimizer.zero_grad()

#       # Get embeddings
#       anchor_embedding = model(anchor)
#       positive_embedding = model(positive)
#       negative_embedding = model(negative)

#       # Calculate triplet loss
#       loss = criterion(anchor_embedding, positive_embedding, negative_embedding)
#       loss.backward()
#       optimizer.step()

#       if i % 10 == 0:
#         print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}], Loss: {loss.item():.4f}')

In [None]:
def inference(model, dataloader, device):
    model.eval()
    with torch.no_grad():
        for data in dataloader:
            images, _ = data
            images = images.to(device)
            features = model(images)
            print(features)

# Create a DataLoader for inference
# inference_loader = DataLoader(dataset, batch_size=32, shuffle=False)
inference_loader = DataLoader(dataset, batch_size=4, shuffle=False)
inference(swinmodel, inference_loader, device)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
tensor([[ 0.0662,  0.0865,  0.0781,  ..., -0.0475,  0.0118, -0.1853],
        [ 0.2346,  0.0123,  0.1461,  ..., -0.1397,  0.0623, -0.0503],
        [ 0.1539,  0.0554,  0.1310,  ..., -0.0390,  0.0455, -0.1172],
        [ 0.0621, -0.0358,  0.0772,  ..., -0.0309,  0.0214, -0.1601]],
       device='cuda:0')
tensor([[ 0.2635,  0.0063,  0.1154,  ..., -0.0450,  0.0900, -0.1934],
        [ 0.1726,  0.0557,  0.0808,  ..., -0.0689,  0.0016, -0.1405],
        [ 0.0734,  0.0634,  0.0972,  ..., -0.0556,  0.0739, -0.0562],
        [ 0.0639, -0.0032,  0.0885,  ..., -0.0987, -0.0068, -0.1611]],
       device='cuda:0')
tensor([[-0.0375,  0.0900,  0.1337,  ..., -0.0951,  0.0387, -0.1583],
        [ 0.1766,  0.0556,  0.1299,  ..., -0.0540,  0.0633, -0.1823],
        [ 0.2227,  0.0074,  0.1243,  ..., -0.0470, -0.0303, -0.2094],
        [ 0.1866,  0.0430,  0.1630,  ..., -0.0817, -0.0437, -0.0227]],
       device='cuda:0')
tensor([[ 0.2124, -0

Evaluation

In [None]:
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

def visualize_embeddings(model, dataloader, device):
    model.eval()
    all_features = []
    all_labels = []
    with torch.no_grad():
        for data in dataloader:
            images, labels = data
            images = images.to(device)
            features = model(images)
            all_features.append(features.cpu().numpy())
            all_labels.append(labels.numpy())

    all_features = np.concatenate(all_features, axis=0)
    all_labels = np.concatenate(all_labels, axis=0)

    tsne = TSNE(n_components=2, random_state=42)
    embeddings = tsne.fit_transform(all_features)

    plt.figure(figsize=(10, 10))
    scatter = plt.scatter(embeddings[:, 0], embeddings[:, 1], c=all_labels, cmap='viridis', s=2)
    legend = plt.legend(*scatter.legend_elements(), loc="best", title="Classes")
    plt.gca().add_artist(legend)
    plt.show()

visualize_embeddings(swinmodel, inference_loader, device)

NameError: name 'np' is not defined

Find Similarity

In [None]:
# Extract features for the test set
def extract_features(model, dataloader, device):
    model.eval()
    features = []
    labels = []
    with torch.no_grad():
        for data in dataloader:
            images, label = data
            images = images.to(device)
            output = model(images)
            features.append(output.cpu().numpy())
            labels.append(label.numpy())
    return np.concatenate(features), np.concatenate(labels)

# Create a DataLoader for the test set
test_loader = DataLoader(dataset, batch_size=32, shuffle=False)
test_features, test_labels = extract_features(swinmodel, test_loader, device)

Find the Most Similar Image in the Test Set

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

def find_most_similar_image(input_image, model, test_features, test_labels, device):
    model.eval()
    with torch.no_grad():
        input_image = input_image.to(device).unsqueeze(0)
        input_feature = model(input_image).cpu().numpy()

    similarities = cosine_similarity(input_feature, test_features)
    most_similar_index = np.argmax(similarities)
    most_similar_image = test_features[most_similar_index]
    most_similar_label = test_labels[most_similar_index]

    return most_similar_image, most_similar_label

# Example usage
input_image, _ = dataset[0]  # Replace with your input image
most_similar_image, most_similar_label = find_most_similar_image(input_image, swinmodel, test_features, test_labels, device)
print(f'Most similar image label: {most_similar_label}')


Calculate Similarity Between Two Input Images

In [None]:
def calculate_similarity(image1, image2, model, device):
    model.eval()
    with torch.no_grad():
        image1 = image1.to(device).unsqueeze(0)
        image2 = image2.to(device).unsqueeze(0)
        feature1 = model(image1).cpu().numpy()
        feature2 = model(image2).cpu().numpy()

    similarity = cosine_similarity(feature1, feature2)
    return similarity[0][0]

# Example usage
image1, _ = dataset[0]  # Replace with your first input image
image2, _ = dataset[1]  # Replace with your second input image
similarity = calculate_similarity(image1, image2, swinmodel, device)
print(f'Similarity between the two images: {similarity}')