In [1]:
import os
import clip
import torch
from PIL import Image
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms
from sklearn.preprocessing import LabelEncoder, MultiLabelBinarizer, OneHotEncoder
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.linear_model import LogisticRegression 
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import accuracy_score
from itertools import permutations
from scipy.special import kl_div
import itertools
import numpy as np
import copy
import shutil

import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import numpy as np

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
# print(clip.available_models())
clip_model, preprocess = clip.load('ViT-B/32', device)
clip_model = clip_model.float()

In [3]:
train_features_path = "/home/samyakr2/multilabel/ARK/pascal_train_clip_features_vit14.pt"
train_labels_path = '/home/samyakr2/multilabel/ARK/pascal_train_clip_labels_vit14.pt'
val_features_path = "/home/samyakr2/multilabel/ARK/pascal_val_clip_features_vit14.pt"
val_labels_path = '/home/samyakr2/multilabel/ARK/pascal_val_clip_labels_vit14.pt'

train_features = torch.load(train_features_path)
train_labels = torch.load(train_labels_path)
val_features = torch.load(val_features_path)
val_labels = torch.load(val_labels_path)

In [37]:
class clip_2fc(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(clip_2fc, self).__init__()
        
        self.fc1 = nn.Sequential(
            nn.Linear(input_dim, hidden_dim, bias=False)
        )
        
#         self.fc1 = nn.Linear(input_dim, hidden_dim),
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        self.softmax = nn.Softmax(dim=1)
        self.dropout = nn.Dropout(0.3)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.sigmoid(out)
        return out

In [41]:
input_size = train_features[0].size(1)  
hidden_size = 100  # Define the size of the hidden layer
num_classes = len(train_labels[0][0])  # Assuming labels_batches is a list of lists of labels


seed = 42
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)


print(input_size)
# Initialize the model
model = clip_2fc(input_size, hidden_size, num_classes).to(device)

# # Define loss function and optimizer
criterion = nn.BCELoss()  # Binary Cross-Entropy Loss for multilabel classification
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer with learning rate 0.001

# Training loop

best_model_state_dict = None
best_loss = float('inf')
num_epochs = 10
for epoch in range(num_epochs):
    epoch_loss = 0.0
    for features_batch, labels_batch in zip(train_features, train_labels):
        # Flatten features batch
        features_batch = features_batch.view(features_batch.size(0), -1)

        # Convert labels to tensor
        labels_tensor = torch.tensor(labels_batch, dtype=torch.float32)
        # Forward pass
        outputs = model(features_batch.to(device))

        # Compute loss
        loss = criterion(outputs, labels_tensor.to(device))

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
    
    # Print loss for the epoch
#     if epoch %100 == 0:
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss}")
    if epoch_loss < best_loss:
        best_loss = epoch_loss
        best_model_state_dict = model.state_dict()

# Save the best model
torch.save(best_model_state_dict, "/home/samyakr2/multilabel/ARK/clip+2fc/best_clip_2fc.pth")

512


  labels_tensor = torch.tensor(labels_batch, dtype=torch.float32)


Epoch [1/10], Loss: 50.16105733811855
Epoch [2/10], Loss: 21.40542382746935
Epoch [3/10], Loss: 18.183908856473863
Epoch [4/10], Loss: 16.408542413264513
Epoch [5/10], Loss: 15.083921588025987
Epoch [6/10], Loss: 13.954736817860976
Epoch [7/10], Loss: 12.96553359576501
Epoch [8/10], Loss: 12.03550035203807
Epoch [9/10], Loss: 11.158271201653406
Epoch [10/10], Loss: 10.342951761791483


In [42]:
import numpy as np
from sklearn.metrics import average_precision_score

# Define a function for testing the model
def test_model(model, criterion, features_batches, labels_batches, device):
    model.eval()  # Set the model to evaluation mode
    test_loss = 0.0
    all_labels = []
    all_outputs = []
    with torch.no_grad():  # Disable gradient computation
        for features_batch, labels_batch in zip(features_batches, labels_batches):
            # Move batch to device
            features_batch = features_batch.to(device)
            labels_tensor = torch.tensor(labels_batch, dtype=torch.float32).to(device)

            # Flatten features batch
            features_batch = features_batch.view(features_batch.size(0), -1)

            # Forward pass
            outputs = model(features_batch)

            # Compute loss
            loss = criterion(outputs, labels_tensor)

            test_loss += loss.item()

            # Convert outputs and labels to numpy arrays
            outputs_np = outputs.cpu().detach().numpy()
            labels_np = labels_tensor.cpu().detach().numpy()

            all_outputs.append(outputs_np)
            all_labels.append(labels_np)

    # Concatenate outputs and labels
    all_outputs = np.concatenate(all_outputs)
    all_labels = np.concatenate(all_labels)

    # Compute average precision score
    avg_precision = average_precision_score(all_labels, all_outputs, average='micro')

    # Average test loss
    avg_test_loss = test_loss / len(features_batches)
    print(f"Test Loss: {avg_test_loss}")
    print(f"Average Precision Score: {avg_precision}")


best_model_state_dict = torch.load("/home/samyakr2/multilabel/ARK/clip+2fc/best_clip_2fc.pth")
model.load_state_dict(best_model_state_dict)
test_model(model, criterion, val_features, val_labels, device)


Test Loss: 0.056872614582611634
Average Precision Score: 0.9326465316566379


  labels_tensor = torch.tensor(labels_batch, dtype=torch.float32).to(device)
