In [13]:
'''
1. Embed all the train images using CLIP embedding and store in the FAISS VectorDB
2. Now train all the images while image encoded using VGG19 and cross product with corresponding top 1 CLIP embedding
3. Forward to classifier layer for classification 
4. During test, for each test image gectc
'''

'\n1. Embed all the train images using CLIP embedding and store in the FAISS VectorDB\n2. Now train all the images while image encoded using VGG19 and cross product with corresponding top 1 CLIP embedding\n3. Forward to classifier layer for classification \n4. During test, for each test image gectc\n'

In [1]:
!pip install efficientnet_pytorch

Collecting efficientnet_pytorch
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: efficientnet_pytorch
  Building wheel for efficientnet_pytorch (setup.py) ... [?25ldone
[?25h  Created wheel for efficientnet_pytorch: filename=efficientnet_pytorch-0.7.1-py3-none-any.whl size=16428 sha256=f2f532b1ed81c9454c45c7ecd638a55ac4180b100193e6141be8ac1861712de9
  Stored in directory: /root/.cache/pip/wheels/03/3f/e9/911b1bc46869644912bda90a56bcf7b960f20b5187feea3baf
Successfully built efficientnet_pytorch
Installing collected packages: efficientnet_pytorch
Successfully installed efficientnet_pytorch-0.7.1


In [1]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Downloading faiss_cpu-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m53.5 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.8.0


<hr>
<hr>
<h2>Fine tuning of EffecientNet Model</h2>
<hr>
<hr>

In [2]:
import os
import torch
import faiss
import torch.nn as nn
import torch.optim as optim
from transformers import CLIPProcessor, CLIPModel
from torchvision import datasets, transforms
#from efficientnet_pytorch import EfficientNet

len(os.listdir("/kaggle/input/stanford-car-dataset-by-classes-folder/car_data/car_data/train/"))

196

In [None]:
# Define transforms for the dataset
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load your custom dataset using torchvision.datasets.ImageFolder
train_dataset = datasets.ImageFolder(root='/kaggle/input/stanford-car-dataset-by-classes-folder/car_data/car_data/train/', transform=transform)
val_dataset = datasets.ImageFolder(root='/kaggle/input/stanford-car-dataset-by-classes-folder/car_data/car_data/test/', transform=transform)

# Define DataLoader
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=64, shuffle=False)

In [None]:
# Load pre-trained EfficientNet model
model = EfficientNet.from_pretrained('efficientnet-b3', num_classes=196)  # Change num_classes according to your dataset

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Train the model
num_epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print()

In [None]:
import numpy as np

# Define early stopping parameters
patience = 3  # Number of epochs to wait for improvement
early_stopping_counter = 0
best_val_loss = np.Inf  # Initialize with positive infinity

for epoch in range(num_epochs):
    # Training loop
    model.train()
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader, 1):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        
        if i % 10 == 0:  # Print every 10 mini-batches
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i}/{len(train_loader)}], Loss: {loss.item():.4f}')
    
    epoch_loss = running_loss / len(train_dataset)
    
    # Validation loop
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    val_loss /= len(val_dataset)
    val_accuracy = 100 * correct / total
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%')
    
    # Check for improvement in validation loss
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        early_stopping_counter = 0
        # Save the best model
        torch.save(model.state_dict(), 'efficientnet_finetuned_best.pth')
    else:
        early_stopping_counter += 1
        if early_stopping_counter >= patience:
            print(f'Validation loss did not improve for {patience} epochs. Early stopping...')
            break

# Load the best model
model.load_state_dict(torch.load('efficientnet_finetuned_best.pth'))

<hr>
<hr>
<h1>RAFGIC</h1>
<hr>
<hr>

In [3]:
from torch.utils.data import Dataset, DataLoader

ttransform = transforms.ToTensor()

class CustomDataset(Dataset):
    def __init__(self, dataset):
        self.dataset = dataset

    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, idx):
        # Get original data from the dataset
        original_data, label = self.dataset[idx]
        
        inputs = clip_processor(text=None, images=original_data, return_tensors="pt")["pixel_values"].to(device)
        outputs = clip_model.get_image_features(inputs.cuda())
        D, I = index_flat.search(outputs.cpu().detach().numpy(), 1)  # actual search
        outputs = index_flat.reconstruct(int(I[0][0]))   
        return ttransform(original_data),torch.tensor(outputs), label


In [4]:
import numpy as np
import requests
from io import BytesIO

# URL of the raw numpy file on GitHub
url = 'https://github.com/Lordvarun23/RAFGIC/raw/main/train_clip_embeddings.npy'

# Send a GET request to the URL
response = requests.get(url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Read the content of the response
    content = BytesIO(response.content)
    
    # Load the numpy array from the content
    embeddings = np.load(content)


res = faiss.IndexFlatL2()  # use a single GPU

## Using a flat index

index_flat = faiss.IndexFlatL2(768)  # build a flat (CPU) index

index_flat.add(embeddings)         # add vectors to the index
print(index_flat.ntotal)

8144


In [5]:
'''k = 1                          # we want to see 4 nearest neighbors
D, I = index_flat.search(np.array([embeddings[1]]), k)  # actual search
I[0][0]'''

'k = 1                          # we want to see 4 nearest neighbors\nD, I = index_flat.search(np.array([embeddings[1]]), k)  # actual search\nI[0][0]'

In [None]:
#index_flat.reconstruct(1)

In [6]:
clip_model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")

config.json:   0%|          | 0.00/4.52k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.71G [00:00<?, ?B/s]

2024-05-02 05:12:53.759055: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-02 05:12:53.759213: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-02 05:12:53.888750: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


preprocessor_config.json:   0%|          | 0.00/316 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/905 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/961k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.22M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/389 [00:00<?, ?B/s]

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

clip_model.to(device)
print()




In [13]:
'''import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models

class CustomModel(nn.Module):
    def __init__(self, num_classes):
        super(CustomModel, self).__init__()
        # Load pre-trained VGG19 model
        self.vgg19 = models.vgg19(pretrained=True)
        
        # Define the linear layer
        self.linear1 = nn.Linear(512 * 7 * 7, 768)
        
        # Define the linear layer
        self.linear2 = nn.Linear(768, 512)
        
        # Define the classification layer
        self.classification_layer = nn.Linear(512, num_classes)
    
    def forward(self, x,embedding):
        # Pass input through VGG19 model
        image = x
        x = self.vgg19.features(x)
        x = self.vgg19.avgpool(x)
        x = torch.flatten(x, 1)
        
        # Pass through linear layer
        x = self.linear1(x)
        
        # Apply custom function
        # Reshape tensors for batch-wise matrix multiplication
        x = x * embedding
        
        x = self.linear2(x)
        
        # Forward to classification layer
        x = self.classification_layer(x)
        
        return x

rafgic_model = CustomModel(num_classes=196)  # Set num_classes according to your dataset
# Train the model
num_epochs = 10
rafgic_model.to(device)
print()'''




In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models

class CustomModel(nn.Module):
    def __init__(self, num_classes):
        super(CustomModel, self).__init__()
        # Load pre-trained VGG19 model
        self.vgg19 = models.vgg19(pretrained=True)
        
        # Define the linear layer
        self.linear1 = nn.Linear(512 * 7 * 7, 768)
        
        # Define BatchNorm layer
        self.batchnorm1 = nn.BatchNorm1d(768)
        
        # Define Dropout layer
        self.dropout1 = nn.Dropout(0.2)
        
        # Define the linear layer
        self.linear2 = nn.Linear(768, 512)
        
        # Define the classification layer
        self.classification_layer = nn.Linear(512, num_classes)
    
    def forward(self, x, embedding):
        # Pass input through VGG19 model
        image = x
        x = self.vgg19.features(x)
        x = self.vgg19.avgpool(x)
        x = torch.flatten(x, 1)
        
        # Pass through linear layer
        x = self.linear1(x)
        x = self.batchnorm1(x)
        x = F.relu(x)
        x = self.dropout1(x)
        
        # Apply custom function
        # Reshape tensors for batch-wise matrix multiplication
        x = x * embedding
        
        x = self.linear2(x)
        
        # Forward to classification layer
        x = self.classification_layer(x)
        
        return x

rafgic_model = CustomModel(num_classes=196)  # Set num_classes according to your dataset
# Train the model
num_epochs = 10
rafgic_model.to(device)

Downloading: "https://download.pytorch.org/models/vgg19-dcbb9e9d.pth" to /root/.cache/torch/hub/checkpoints/vgg19-dcbb9e9d.pth
100%|██████████| 548M/548M [00:03<00:00, 162MB/s]  


CustomModel(
  (vgg19): VGG(
    (features): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace=True)
      (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU(inplace=True)
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (6): ReLU(inplace=True)
      (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (8): ReLU(inplace=True)
      (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (11): ReLU(inplace=True)
      (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (13): ReLU(inplace=True)
      (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (15): ReLU(inplace=True)
      (1

In [9]:
# Define transforms for the dataset
transform = transforms.Compose([transforms.Resize((224, 224))])

# Load your custom dataset using torchvision.datasets.ImageFolder
train_dataset = datasets.ImageFolder(root='/kaggle/input/stanford-car-dataset-by-classes-folder/car_data/car_data/train/', transform=transform)
val_dataset = datasets.ImageFolder(root='/kaggle/input/stanford-car-dataset-by-classes-folder/car_data/car_data/test/', transform=transform)


custom_train_dataset = CustomDataset(train_dataset)
custom_test_dataset = CustomDataset(val_dataset)

train_loader = DataLoader(custom_train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(custom_test_dataset, batch_size=32, shuffle=False)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(rafgic_model.parameters(), lr=0.001)

In [None]:
'''# Define DataLoader
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32, shuffle=False)'''

In [None]:
import numpy as np

# Define early stopping parameters
patience = 8  # Number of epochs to wait for improvement
early_stopping_counter = 0
best_val_loss = np.Inf  # Initialize with positive infinity

for epoch in range(num_epochs):
    # Training loop
    rafgic_model.train()
    running_loss = 0.0
    for i, (inputs,embeddings, labels) in enumerate(train_loader, 1):
        inputs,embeddings, labels = inputs.to(device),embeddings.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = rafgic_model(inputs,embeddings)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        
        if i % 10 == 0:  # Print every 10 mini-batches
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i}/{len(train_loader)}], Loss: {loss.item():.4f}')
    
    epoch_loss = running_loss / len(train_dataset)
    
    # Validation loop
    rafgic_model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs,embeddings, labels in test_loader:
            inputs,embeddings, labels = inputs.to(device),embeddings.to(device), labels.to(device)
            outputs = rafgic_model(inputs,embeddings)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    val_loss /= len(val_dataset)
    val_accuracy = 100 * correct / total
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%')
    
    # Check for improvement in validation loss
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        early_stopping_counter = 0
        # Save the best model
        torch.save(rafgic_model.state_dict(), 'rafgic_finetuned_best.pth')
    else:
        early_stopping_counter += 1
        if early_stopping_counter >= patience:
            print(f'Validation loss did not improve for {patience} epochs. Early stopping...')
            break

# Load the best model
rafgic_model.load_state_dict(torch.load('rafgic_finetuned_best.pth'))

Epoch [1/10], Step [10/255], Loss: 5.2941
Epoch [1/10], Step [20/255], Loss: 5.1338
Epoch [1/10], Step [30/255], Loss: 5.0220
Epoch [1/10], Step [40/255], Loss: 4.8500
Epoch [1/10], Step [50/255], Loss: 4.7737
Epoch [1/10], Step [60/255], Loss: 4.5636
Epoch [1/10], Step [70/255], Loss: 4.0922
Epoch [1/10], Step [80/255], Loss: 3.7287
Epoch [1/10], Step [90/255], Loss: 3.2014
Epoch [1/10], Step [100/255], Loss: 3.5809
Epoch [1/10], Step [110/255], Loss: 3.0550
Epoch [1/10], Step [120/255], Loss: 2.5898
Epoch [1/10], Step [130/255], Loss: 2.3502
Epoch [1/10], Step [140/255], Loss: 2.0957
Epoch [1/10], Step [150/255], Loss: 2.0625
Epoch [1/10], Step [160/255], Loss: 1.7281
Epoch [1/10], Step [170/255], Loss: 1.5734
Epoch [1/10], Step [180/255], Loss: 1.4023
Epoch [1/10], Step [190/255], Loss: 1.2878
Epoch [1/10], Step [200/255], Loss: 1.0260
Epoch [1/10], Step [210/255], Loss: 1.0121
Epoch [1/10], Step [220/255], Loss: 1.2540
Epoch [1/10], Step [230/255], Loss: 1.2241
Epoch [1/10], Step [

Epoch [1/10], Loss: 1.4896, Validation Loss: 1.3841, Validation Accuracy: 74.62%
