In [1]:
import torch
import numpy as np
from torchvision import datasets
import torchvision.transforms as transforms

import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import csv
from sklearn.model_selection import train_test_split


In [2]:
#Creating a preciate matrix-2d list
predicate_file = "predicate-matrix-binary.csv"
with open(predicate_file, "r") as csvfile:
    reader = csv.reader(csvfile)
    predicate_matrix = []
    for row in reader:
        binary_digits = row[0].split(" ")
        predicate_matrix.append([int(value) for value in binary_digits])

In [3]:


class CustomDataset(Dataset):
    def __init__(self, features_file, labels_file,slice, transform=None):
        self.slice = slice
        self.features_frame = pd.read_csv(features_file)
        self.features_frame = self.features_frame.sample(frac=self.slice,random_state=42)
        self.labels_frame = pd.read_csv(labels_file,header=None)
        self.labels_frame = self.labels_frame.sample(frac=self.slice,random_state=42)
        self.transform = transform

    def __len__(self):
        return len(self.features_frame)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        features = self.features_frame.iloc[idx, :].values.astype('float32')
        label = self.labels_frame.iloc[idx, :].values.astype('float32')

        if self.transform:
            features = self.transform(features)

        return features, label


features_file = 'AwA2-features-float.csv'
labels_file = 'AwA2-labels.csv'
custom_dataset = CustomDataset(features_file, labels_file,0.5)
# Split the dataset into train and test sets
train_size = 0.8
test_size = 1 - train_size
train_dataset, test_dataset = train_test_split(custom_dataset, test_size=test_size, random_state=42)

# Create data loaders
awa_train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
awa_test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [4]:
import torch.nn as nn
# define the NN architecture
class AWA_Autoencoder(nn.Module):
    def __init__(self, encoding_dim):
        super(AWA_Autoencoder, self).__init__()
        ## encoder ##
        self.encoder = nn.Sequential(
            nn.Linear(2048,512),nn.ReLU(),nn.Linear(512,128),nn.ReLU(),nn.Linear(128, encoding_dim),nn.Sigmoid())
        ## decoder ##
        self.decoder = nn.Sequential(nn.Linear(encoding_dim, 128),nn.ReLU(),nn.Linear(128, 512),nn.ReLU(),nn.Linear(512,2048),nn.ReLU())

    def forward(self, x):
        # define feedforward behavior 
        # and scale the *output* layer with a sigmoid activation function
        
        # pass x into encoder
        out = (self.encoder(x))
        # pass out into decoder
        out = (self.decoder(out))
        
        return out
    def embed(self,x):
        return (self.encoder(x))

In [14]:
# AWA model
encoding_dim = 85
awa_model = AWA_Autoencoder(encoding_dim)
print(awa_model)

# specify loss function
criterionmse = nn.MSELoss()
criterionce = nn.CrossEntropyLoss()
criterionmce = nn.BCEWithLogitsLoss()
# Loss function - ELBO (Evidence Lower Bound) with MSE loss
def loss_function(recon_x, x, mu, log_var):
    BCE = nn.functional.binary_cross_entropy(recon_x, x.view(-1, 784), reduction='sum')#alculate MSE loss
    KLD = -0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp())
    return BCE + KLD

# specify loss function
optimizer = torch.optim.Adam(awa_model.parameters(), lr=0.001)

AWA_Autoencoder(
  (encoder): Sequential(
    (0): Linear(in_features=2048, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=85, bias=True)
    (5): Sigmoid()
  )
  (decoder): Sequential(
    (0): Linear(in_features=85, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=2048, bias=True)
    (5): ReLU()
  )
)


In [16]:
#AWA
# number of epochs to train the model
n_epochs = 20

for epoch in range(1, n_epochs+1):
    train_loss = 0.0
    for data in awa_train_loader:
        # _ stands in for labels, here
        images, labels = data
        
        # flatten images
        images = images.view(images.size(0), -1)

        attr = [predicate_matrix[int(x[0])-1] for x in labels]
        attr = torch.tensor(attr)
        attr = attr.float()
        optimizer.zero_grad()

        # forward pass: compute predicted outputs by passing inputs to the model
        outputs = awa_model(images)
        latent = awa_model.embed(images)
        
        # calculate the loss
        loss = criterionmse(outputs, images)
        loss+= 0.1*criterionmce(latent,attr)
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update running training loss
        train_loss += loss.item()*images.size(0)
            
    # print avg training statistics 
    train_loss = train_loss/len(awa_train_loader)
    print('Epoch: {} \tTraining Loss: {:.6f}'.format(
        epoch, 
        train_loss
        ))

TypeError: loss_function() missing 4 required positional arguments: 'recon_x', 'x', 'mu', and 'log_var'

In [13]:
total_loss = 0
num_samples = 0
for data in awa_test_loader:
    img, _ = data
    img = img.view(img.size(0), -1)
    output = awa_model(img)
    loss = criterionmse(output, img)
    total_loss += loss.item() * img.size(0)
    num_samples += img.size(0)

average_loss = total_loss / num_samples
print(f'Average Reconstruction Error: {average_loss:.4f}')

Average Reconstruction Error: 0.1827


Performing nearest neighbour search for the predicted attribute vector to find the Zero Shot image label

In [24]:
#function to pick the label given the attribute vector
def zsl_label_prediction(predicted_vector):
    predicted_vector_np = predicted_vector.detach().numpy()  # Convert from PyTorch tensor to NumPy array
    #predicted_vector_np = np.array(predicted_vector)  # Convert from PyTorch tensor to NumPy array
    # Calculate the cosine similarity between the predicted vector and each binary vector
    similarities = []
    for binary_vector in predicate_matrix:
        binary_vector_np = np.array(binary_vector) # Convert from PyTorch tensor to NumPy array
        similarity = np.dot(predicted_vector_np, binary_vector_np) / (np.linalg.norm(predicted_vector_np) * np.linalg.norm(binary_vector_np))
        similarities.append(similarity)

    # Find the index of the binary vector with the highest similarity
    closest_index = np.argmax(similarities)

    # Retrieve the closest binary vector
    return closest_index+1

In [32]:
image, label = test_dataset[35]  # Sample run on an image
image = torch.tensor(image)
with torch.no_grad():  # No need to compute gradients during inference
    output = awa_model.embed(image.unsqueeze(0))  # Adding batch dimension since model expects batch input

#output = awa_model.forward(image)
predicted_attr = torch.softmax(output, dim=1)

pred = zsl_label_prediction(predicted_attr)
print(pred)
print(label)

13
[13.]


In [38]:
#Evaluating ZSL on the test data set
awa_model.eval()  # Set the model to evaluation mode
correct = 0
total = 0

for i in range(len(test_dataset)):
    # Get the i-th sample from the test dataset
    sample = test_dataset[i]

    # Extract input and label from the sample
    input_data, label = sample

    input_data = torch.tensor(input_data)  # Convert to tensor if necessary
    # input_data = input_data.to(device)  # Move to device if necessary

    # Perform a forward pass through the model to obtain predictions
    with torch.no_grad():  # Disable gradient computation for inference
        output = awa_model.embed(input_data.unsqueeze(0))  # Assuming model expects a batch dimension
    predicted_attr = torch.softmax(output, dim=1)
    pred = zsl_label_prediction(predicted_attr)
    correct+= pred==label
    total+=1

accuracy = correct / total
print(accuracy)

[0.31422448]
