In [1]:
import os,sys
import torch
import torchvision
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
import torchvision.models.resnet as models
import h5py
import numpy as np
import scipy.io as sio
import pickle
import pdb
import cv2
from numpy import expand_dims
from numpy import asarray
from PIL import Image
import time
from sklearn import preprocessing
import pandas as pd


In [2]:
data_path = 'C:/Sushree/Jio_Institute/Dataset/'

img_dir = os.path.join(data_path,'Animals_with_Attributes2/JPEGImages/')
print(img_dir)

file_paths = os.path.join(data_path,'data/xlsa17/data/AWA2/res101.mat')
print(file_paths)

C:/Sushree/Jio_Institute/Dataset/Animals_with_Attributes2/JPEGImages/
C:/Sushree/Jio_Institute/Dataset/data/xlsa17/data/AWA2/res101.mat


In [3]:
class CustomedDataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, img_dir , file_paths, transform=None):
        self.matcontent = sio.loadmat(file_paths)
        self.image_files = np.squeeze(self.matcontent['image_files'])
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        image_file = self.image_files[idx][0]
        image_file = os.path.join(self.img_dir, '/'.join(image_file.split('/')[5:]))
        image = Image.open(image_file)
        
        if image.mode == 'L':
            image=image.convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image 

input_size = 224
data_transforms = transforms.Compose([
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
    
AWA2Dataset = CustomedDataset(data_path , file_paths, data_transforms)

batch_size = 32
dataset_loader = torch.utils.data.DataLoader(AWA2Dataset, batch_size=batch_size, shuffle=False, num_workers=0)  

In [4]:
model_name = "resnet"

device = torch.device("cuda:{}".format(idx_GPU) if torch.cuda.is_available() else "cpu")

model_ref = models.resnet101(pretrained = True)
model_ref.eval()

model_f = nn.Sequential(*list(model_ref.children())[:-2])
model_f.to(device)
model_f.eval()

for param in model_f.parameters():
    param.requires_grad = True
    
print(model_f)
        
from torchsummary import summary
summary(model_f, (3, 224, 224))    



Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]           4,096
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]          16,384
      BatchNorm2d-12          [-1, 256, 56, 56]             512
           Conv2d-13          [-1, 256, 56, 56]          16,384
      BatchNorm2d-14          [-1, 256,

In [5]:
matcontent = AWA2Dataset.matcontent
labels = matcontent['labels'].astype(int).squeeze() - 1

split_path = os.path.join(data_path,'data/xlsa17/data/AWA2/att_splits.mat')
matcontent = sio.loadmat(split_path)
trainval_loc = matcontent['trainval_loc'].squeeze() - 1

print(matcontent)

print(labels, len(labels))
print(trainval_loc, len(trainval_loc))

{'__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Fri Aug 21 10:36:20 2020', '__version__': '1.0', '__globals__': [], 'allclasses_names': array([[array(['antelope'], dtype='<U8')],
       [array(['grizzly+bear'], dtype='<U12')],
       [array(['killer+whale'], dtype='<U12')],
       [array(['beaver'], dtype='<U6')],
       [array(['dalmatian'], dtype='<U9')],
       [array(['persian+cat'], dtype='<U11')],
       [array(['horse'], dtype='<U5')],
       [array(['german+shepherd'], dtype='<U15')],
       [array(['blue+whale'], dtype='<U10')],
       [array(['siamese+cat'], dtype='<U11')],
       [array(['skunk'], dtype='<U5')],
       [array(['mole'], dtype='<U4')],
       [array(['tiger'], dtype='<U5')],
       [array(['hippopotamus'], dtype='<U12')],
       [array(['leopard'], dtype='<U7')],
       [array(['moose'], dtype='<U5')],
       [array(['spider+monkey'], dtype='<U13')],
       [array(['humpback+whale'], dtype='<U14')],
       [array(['elephant'], dtype='<U8')

In [6]:
#img_path = 'C:/Sushree/Jio_Institute/Dataset/Animals_with_Attributes2/'
img_path = 'C:/Sushree/Jio_Institute/Dataset/'
mat_path = os.path.join(data_path,'data/xlsa17/data/AWA2/res101.mat')

matcontent2 = sio.loadmat(mat_path)
print(matcontent2)

import torchvision.transforms as transforms
scaler = transforms.Resize((224, 224))
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

to_tensor = transforms.ToTensor()


{'__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Mon May  8 16:33:27 2017', '__version__': '1.0', '__globals__': [], 'features': array([[1.27028406e-01, 0.00000000e+00, 3.83341342e-01, ...,
        1.67552959e-02, 1.21401340e-01, 2.43460596e-01],
       [3.23610830e+00, 2.46691060e+00, 1.01190424e+00, ...,
        1.10568988e+00, 1.05009258e+00, 1.25547945e+00],
       [9.35147524e-01, 2.64539570e-02, 0.00000000e+00, ...,
        1.03398517e-01, 6.92099705e-03, 4.84714359e-02],
       ...,
       [6.72237158e-01, 1.12520206e+00, 2.25265384e+00, ...,
        1.88486859e-01, 5.05729504e-02, 1.23365402e-01],
       [3.22649516e-02, 6.84306072e-03, 2.95226891e-02, ...,
        8.19242734e-04, 0.00000000e+00, 1.57233737e-02],
       [4.07616198e-01, 2.56229609e-01, 3.31381522e-02, ...,
        3.53541344e-01, 2.25771200e-02, 6.21905439e-02]]), 'image_files': array([[array(['/BS/xian/work/data/Animals_with_Attributes2//JPEGImages/antelope/antelope_10001.jpg'],
           

In [61]:
from torch.autograd import Variable

class AWA2DataLoader():
    def __init__(self, data_path, mat_path, img_path, labels, trainval_loc, device = None, is_scale = False, is_balance =True):

        print(data_path)
        sys.path.append(data_path)
        self.data_path = data_path # 'C:/Sushree/Jio_Institute/Dataset/'
        self.img_path = img_path
        self.datadir = self.data_path + 'data/AWA2/' # 'C:/Sushree/Jio_Institute/Dataset/data/AWA2/'
        self.index_in_epoch = 0
        self.epochs_completed = 0
        self.is_scale = is_scale
        self.is_balance = is_balance
        if self.is_balance:
            print('Balance dataloader')
        
        self.augment_img_path(mat_path, img_path, trainval_loc)
        self.read_matdataset(labels, trainval_loc)    
        self.get_idx_classes()

    def augment_img_path(self, mat_path = mat_path, img_path = img_path, trainval_loc = trainval_loc, transform = True):
        self.matcontent = sio.loadmat(mat_path)
        self.image_files = np.squeeze(self.matcontent['image_files'])
        print(self.image_files)
        self.transform = transform
        
        def read_images(image_files, img_path):
            new_images = []
            for idx in range(len(image_files)):
            #for idx in range(100):    
                print(idx)
                image_file = image_files[idx][0]
                image_file = os.path.join(img_path,'/'.join(image_file.split('/')[5:]))
                image = Image.open(image_file)
                if image.mode == 'L':
                    image = image.convert('RGB')
                if self.transform:
                    image = Variable(normalize(to_tensor(scaler(image))).unsqueeze(0))
                    #image = np.array(image)
                    print('image', image.size())
                    
                new_images.append(image)
            return new_images
        
        self.images = read_images(self.image_files, self.img_path)
        print(len(self.images))
        self.images = np.array(self.images)
        #print(self.images.shape)
        #self.data = {}
        #self.data['train_seen'] = {}
        #self.data['train_seen']['img_path'] = self.image_files[trainval_loc]
                

    def next_batch(self, batch_size):
        if self.is_balance:
            idx = []
            n_samples_class = max(batch_size //self.ntrain_class, 1)
            sampled_idx_c = np.random.choice(np.arange(self.ntrain_class), min(self.ntrain_class, batch_size), replace = False).tolist()
            for i_c in sampled_idx_c:
                idxs = self.idxs_list[i_c]
                idx.append(np.random.choice(idxs,n_samples_class))
            idx = np.concatenate(idx)
            idx = torch.from_numpy(idx)
        else:
            idx = torch.randperm(self.ntrain)[0:batch_size]
            
        batch_image = self.data['train_seen']['img_path'][idx]
        batch_label =  self.data['train_seen']['labels'][idx]
        return batch_label, batch_image
    
    def get_idx_classes(self):
        n_classes = self.seenclasses.size(0)
        self.idxs_list = []
        train_label = self.data['train_seen']['labels']
        for i in range(n_classes):
            idx_c = torch.nonzero(train_label == self.seenclasses[i].cpu()).cpu().numpy()
            idx_c = np.squeeze(idx_c)
            self.idxs_list.append(idx_c)
        return self.idxs_list

    def read_matdataset(self, labels, trainval_loc):
                        
        train_label = torch.from_numpy(labels[trainval_loc]).long()
        train_image = self.images[trainval_loc]
        train_image = torch.from_numpy(train_image)
        
        self.seenclasses = torch.from_numpy(np.unique(train_label.numpy()))     
                
        self.ntrain = train_label.size()[0]
        self.ntrain_class = self.seenclasses.size(0)
        self.train_class = self.seenclasses.clone()

        self.data = {}
        self.data['train_seen'] = {}
        self.data['train_seen']['labels'] = train_label
        self.data['train_seen']['img_path'] = train_image


In [62]:
dataloader = AWA2DataLoader(data_path, mat_path, img_path, labels, trainval_loc, device = None)

C:/Sushree/Jio_Institute/Dataset/
Balance dataloader
[array(['/BS/xian/work/data/Animals_with_Attributes2//JPEGImages/antelope/antelope_10001.jpg'],
       dtype='<U83')
 array(['/BS/xian/work/data/Animals_with_Attributes2//JPEGImages/antelope/antelope_10002.jpg'],
       dtype='<U83')
 array(['/BS/xian/work/data/Animals_with_Attributes2//JPEGImages/antelope/antelope_10003.jpg'],
       dtype='<U83')
 ...
 array(['/BS/xian/work/data/Animals_with_Attributes2//JPEGImages/zebra/zebra_11168.jpg'],
       dtype='<U77')
 array(['/BS/xian/work/data/Animals_with_Attributes2//JPEGImages/zebra/zebra_11169.jpg'],
       dtype='<U77')
 array(['/BS/xian/work/data/Animals_with_Attributes2//JPEGImages/zebra/zebra_11170.jpg'],
       dtype='<U77')                                                                     ]
0
image torch.Size([1, 3, 224, 224])
1
image torch.Size([1, 3, 224, 224])
2
image torch.Size([1, 3, 224, 224])
3
image torch.Size([1, 3, 224, 224])
4
image torch.Size([1, 3, 224, 224])
5
i

TypeError: only integer scalar arrays can be converted to a scalar index

In [12]:
images

NameError: name 'images' is not defined

In [None]:
params_to_update = []
params_names = []
for name,param in model_f.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)
        params_names.append(name)
        #print("\t",name)
        
import torch.optim as optim
lr = 0.0001
weight_decay = 0.0001
momentum = 0.
optimizer  = optim.RMSprop(params_to_update, lr = lr, weight_decay = weight_decay, momentum=momentum)

for i in range(0, 100):
    model_f.train()
    optimizer.zero_grad()
    
    cal_epoc = np.ceil((batch_size * i)/dataloader.ntrain)
    
    batch_label, batch_image = dataloader.next_batch(batch_size)
    out_package = model_f(batch_image)
    
    in_package = out_package
    in_package['batch_label'] = batch_label
    
    out_package=model_f.compute_loss(in_package)
    loss, loss_CE, loss_cal = out_package['loss'], out_package['loss_CE'], out_package['loss_cal']

In [None]:
from torch.optim import SGD
from torch.nn import BCELoss

def train_model(train_dl, model):
    # define the optimization
    criterion = BCELoss()
    optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9)
    # enumerate epochs
    for epoch in range(100):
        # enumerate mini batches
        for i, (inputs, targets) in enumerate(train_dl):
            # clear the gradients
            optimizer.zero_grad()
            # compute the model output
            yhat = model(inputs)
            # calculate loss
            loss = criterion(yhat, targets)
            # credit assignment
            loss.backward()
            # update model weights
            optimizer.step()

In [None]:
# train the model
train_model(dataloader, model_f)

In [None]:
best_performance = [0,0,0,0]
for i in range(0,niters):
    model_f.train()
    optimizer.zero_grad()
    
    cal_epoc = np.ceil((batch_size * i)/dataloader.ntrain)
    
    batch_label, batch_feature, batch_att = dataloader.next_batch(batch_size)
    out_package = model(batch_feature)
    
    in_package = out_package
    in_package['batch_label'] = batch_label
    
    out_package = model.compute_loss(in_package)
    loss, loss_CE, loss_cal = out_package['loss'], out_package['loss_CE'], out_package['loss_cal']
    
    loss.backward()
    optimizer.step()
    if i%report_interval==0:
        print('-'*30)
        # evaluate the model for every report interval
        acc_seen, acc_novel, H, acc_zs = eval_zs_gzsl(dataloader, model, device, bias_seen=-bias, bias_unseen=bias)
        
        if H > best_performance[2]:
            best_performance = [acc_seen, acc_novel, H, acc_zs]
        stats_package = {'epoch': cal_epoc, 'iter':i, 'loss':loss.item(), 'loss_CE':loss_CE.item(),
                         'loss_cal': loss_cal.item(),
                         'acc_seen':best_performance[0], 'acc_novel':best_performance[1], 'H':best_performance[2], 'acc_zs':best_performance[3]}
        
        print(stats_package)