In [1]:
import torch
import numpy as np
from torchvision import transforms
import torchvision.models as  models
from torch.utils.data import DataLoader
from src.dataset import PascalVOC_Dataset
import torch.optim as optim
from src.utils import encode_labels, plot_history
import os
import torch.utils.model_zoo as model_zoo
import src.utils as utils 
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
from tqdm import tqdm
import gc
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
import cv2

In [3]:
mean=[0.457342265910642, 0.4387686270106377, 0.4073427106250871]
std=[0.26753769276329037, 0.2638145880487105, 0.2776826934044154]
    
mean=[0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]
    
transformations = transforms.Compose([transforms.Resize((224, 224)),
#                                      transforms.RandomChoice([
#                                              transforms.CenterCrop(300),
#                                              transforms.RandomResizedCrop(300, scale=(0.80, 1.0)),
#                                              ]),                                      
                                      transforms.RandomChoice([
                                          transforms.ColorJitter(brightness=(0.80, 1.20)),
                                          transforms.RandomGrayscale(p = 0.25)
                                          ]),
                                      transforms.RandomHorizontalFlip(p = 0.25),
                                      transforms.RandomRotation(25),
                                      transforms.ToTensor(), 
                                      transforms.Normalize(mean = mean, std = std),
                                      ])
        
transformations_valid = transforms.Compose([transforms.Resize((224,224)), 
                                          transforms.ToTensor(), 
                                           transforms.Normalize(mean = mean, std = std),
                                          ])
data_dir='./VOCdevkit-1'
# Create train dataloader
dataset_train = PascalVOC_Dataset(data_dir,
                                      year='2007', 
                                      image_set='train', 
                                      download=False, 
                                      transform=transformations, 
                                      target_transform=encode_labels)
    
train_loader = DataLoader(dataset_train, batch_size=5, num_workers=0, shuffle=True)
dataset_valid = PascalVOC_Dataset(data_dir,
                                      year='2007', 
                                      image_set='val', 
                                      download=False, 
                                      transform=transformations_valid, 
                                      target_transform=encode_labels)
    
valid_loader = DataLoader(dataset_valid, batch_size=5, num_workers=0)

In [5]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device                            


device(type='cpu')

In [5]:
resnet50 = models.resnet50(pretrained=True).to(device)
    
for param in resnet50.parameters():
    param.requires_grad = False   

resnet50.fc = nn.Sequential(nn.Linear(2048, 128),
               nn.ReLU(inplace=True),
               nn.Linear(128, 20)).to(device)
resnet50.load_state_dict(torch.load('models/Resnet50.h5'))


<All keys matched successfully>

In [6]:
alexnet = models.alexnet(pretrained=True).to(device)
    
for param in alexnet.parameters():
    param.requires_grad = False   



for param in alexnet.parameters():
  param.requires_grad = False
alexnet.classifier[4]=nn.Linear(4096,128)
alexnet.classifier[6]=nn.Linear(128,20).to(device)
alexnet.load_state_dict(torch.load('models/AlexNet.h5'))



<All keys matched successfully>

In [32]:
inceptionv3 = models.inception_v3(pretrained=True).to(device)
    
for param in inceptionv3.parameters():
    param.requires_grad = False   

inceptionv3.aux_logits = False

num_ftrs = inceptionv3.fc.in_features
inceptionv3.fc = torch.nn.Sequential(
    nn.Linear(num_ftrs,128),
    nn.ReLU(inplace=True),
    nn.Linear(128,20)
)
inceptionv3.load_state_dict(torch.load('models/Inception_V3.h5'))




<All keys matched successfully>

In [6]:
effecientnet=models.efficientnet_b0(pretrained=True).to(device)
for params in effecientnet.classifier.parameters():
    params.requires_grad=False
effecientnet.classifier=nn.Sequential(
    nn.Linear(1280,128),
    nn.ReLU(inplace=True),
    nn.Linear(128,20)

)
effecientnet.load_state_dict(torch.load('models/EffecientNet.h5'))



<All keys matched successfully>

In [7]:
mobilenet=models.mobilenet_v2(pretrained=True).to(device)
for param in mobilenet.classifier.parameters():
    param.requires_grad=False
mobilenet.classifier=nn.Sequential(
    nn.Linear(1280,128),
    nn.ReLU(inplace=True),
    nn.Linear(128,20)

)
mobilenet.load_state_dict(torch.load('models/MobileNet.h5'))


<All keys matched successfully>

In [12]:
resnet50_pred_prob=[]
resnet50.train(False)
for idx,(data,target) in enumerate(valid_loader):
    output=resnet50(data)
    probs=torch.nn.functional.softmax(output,1).cpu()
    for prob in probs:
        resnet50_pred_prob.append(prob)

In [13]:
for index in range(0,len(resnet50_pred_prob)):
    resnet50_pred_prob[index]=resnet50_pred_prob[index].cpu().detach().numpy()
    

In [14]:
alexnet_pred_prob=[]
alexnet.train(False)
for idx,(data,target) in enumerate(valid_loader):
    output=alexnet(data)
    probs=torch.nn.functional.softmax(output,1).cpu()
    for prob in probs:
        alexnet_pred_prob.append(prob)
        

In [16]:
for index in range(0,len(alexnet_pred_prob)):
    alexnet_pred_prob[index]=alexnet_pred_prob[index].cpu().detach().numpy()
    

In [17]:
alexnet_pred_prob=np.array(alexnet_pred_prob)
resnet50_pred_prob=np.array(resnet50_pred_prob)
np.savez("resnet50_pred_prob.npz",resnet50_pred_prob)
np.savez("alexnet_pred_prob.npz",alexnet_pred_prob)

In [8]:
effecientnet_pred_prob=[]
effecientnet.train(False)
with torch.no_grad():
  for idx,(data,target) in enumerate(valid_loader):
     
        output=effecientnet(data)
        probs=torch.nn.functional.softmax(output,1).cpu()
        for prob in probs:
          effecientnet_pred_prob.append(prob)
        del data, target, output
        gc.collect()
        torch.cuda.empty_cache()

In [9]:
for index  in range(0,len(effecientnet_pred_prob)):
    effecientnet_pred_prob[index]=effecientnet_pred_prob[index].cpu().detach().numpy()

effecientnet_pred_prob=np.array(effecientnet_pred_prob)
np.savez("effecientnet_pred_prob.npz",effecientnet_pred_prob)

In [10]:
mobilenet_pred_prob=[]
mobilenet.train(False)
with torch.no_grad():
  for idx,(data,target) in enumerate(valid_loader):
     
        output=mobilenet(data)
        probs=torch.nn.functional.softmax(output,1).cpu()
        for prob in probs:
          mobilenet_pred_prob.append(prob)
        del data, target, output
        gc.collect()
        torch.cuda.empty_cache()

In [11]:
for index  in range(0,len(mobilenet_pred_prob)):
    mobilenet_pred_prob[index]=mobilenet_pred_prob[index].cpu().detach().numpy()

mobilenet_pred_prob=np.array(mobilenet_pred_prob)
np.savez("mobilenet_pred_prob.npz",mobilenet_pred_prob)

In [23]:
mean=[0.457342265910642, 0.4387686270106377, 0.4073427106250871]
std=[0.26753769276329037, 0.2638145880487105, 0.2776826934044154]
    
mean=[0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]
    
transformations = transforms.Compose([transforms.Resize((299, 299)),
#                                      transforms.RandomChoice([
#                                              transforms.CenterCrop(300),
#                                              transforms.RandomResizedCrop(300, scale=(0.80, 1.0)),
#                                              ]),                                      
                                      transforms.RandomChoice([
                                          transforms.ColorJitter(brightness=(0.80, 1.20)),
                                          transforms.RandomGrayscale(p = 0.25)
                                          ]),
                                      transforms.RandomHorizontalFlip(p = 0.25),
                                      transforms.RandomRotation(25),
                                      transforms.ToTensor(), 
                                      transforms.Normalize(mean = mean, std = std),
                                      ])
        
transformations_valid = transforms.Compose([transforms.Resize((299,299)), 
                                          transforms.ToTensor(), 
                                           transforms.Normalize(mean = mean, std = std),
                                          ])
data_dir='./VOCdevkit-1'
# Create train dataloader
dataset_train = PascalVOC_Dataset(data_dir,
                                      year='2007', 
                                      image_set='train', 
                                      download=False, 
                                      transform=transformations, 
                                      target_transform=encode_labels)
    
train_loader = DataLoader(dataset_train, batch_size=5, num_workers=0, shuffle=True)
dataset_valid = PascalVOC_Dataset(data_dir,
                                      year='2007', 
                                      image_set='val', 
                                      download=False, 
                                      transform=transformations_valid, 
                                      target_transform=encode_labels)
    
valid_loader = DataLoader(dataset_valid, batch_size=5, num_workers=0)

In [33]:
inceptionv3_pred_prob=[]
inceptionv3.train(False)
with torch.no_grad():
  for idx,(data,target) in enumerate(valid_loader):
     
        output=inceptionv3(data)
        probs=torch.nn.functional.softmax(output,1).cpu()
        for prob in probs:
          inceptionv3_pred_prob.append(prob)
        del data, target, output
        gc.collect()
        torch.cuda.empty_cache()

In [34]:
for index  in range(0,len(inceptionv3_pred_prob)):
    inceptionv3_pred_prob[index]=inceptionv3_pred_prob[index].cpu().detach().numpy()

inceptionv3_pred_prob=np.array(inceptionv3_pred_prob)
np.savez("inceptionv3_pred_prob.npz",inceptionv3_pred_prob)

In [57]:
resnet50_pred_prob=np.load("resnet50_pred_prob.npz")
inceptionv3_pred_prob=np.load("inceptionv3_pred_prob.npz")
mobilenet_pred_prob=np.load("mobilenet_pred_prob.npz")
alexnet_pred_prob=np.load("alexnet_pred_prob.npz")
effecient_pred_prob=np.load("effecientnet_pred_prob.npz")

In [58]:
resnet50_pred_prob=resnet50_pred_prob['arr_0']
inceptionv3_pred_prob=inceptionv3_pred_prob['arr_0']
mobilenet_pred_prob=mobilenet_pred_prob['arr_0']
alexnet_pred_prob=alexnet_pred_prob['arr_0']
effecient_pred_prob=effecient_pred_prob['arr_0']

array([0.00698527, 0.01357401, 0.00483095, 0.00258372, 0.00213092,
       0.00712667, 0.91883   , 0.00237778, 0.00369421, 0.00221537,
       0.00104432, 0.00347559, 0.00116759, 0.00353224, 0.01100953,
       0.00209423, 0.00166113, 0.00226779, 0.00512919, 0.00426955],
      dtype=float32)

In [216]:
concat_pred_probs=[0]*5

for index in range(0,5):
        concat_pred_probs[index]=[]

for index in range(0,2510):
        concat_pred_probs[0].append([resnet50_pred_prob[index]])
        concat_pred_probs[1].append([effecient_pred_prob[index]])
        concat_pred_probs[2].append([alexnet_pred_prob[index]])
        concat_pred_probs[3].append([inceptionv3_pred_prob[index]])
        concat_pred_probs[4].append([mobilenet_pred_prob[index]])

       


In [220]:
print(concat_pred_probs[0][15])
print(concat_pred_probs[0][0])

[array([1.6111665e-03, 8.8136960e-03, 7.3103065e-04, 4.4348679e-04,
       4.4520240e-04, 1.9558598e-03, 9.7083718e-01, 1.4450429e-04,
       6.9646246e-04, 2.0685351e-04, 1.5568979e-04, 2.8960247e-04,
       8.4889754e-05, 4.1533695e-03, 6.4152996e-03, 4.6456611e-04,
       1.5660979e-04, 2.0637883e-04, 8.3264115e-04, 1.3555238e-03],
      dtype=float32)]
[array([1.7615862e-03, 6.9713555e-03, 2.6498262e-03, 2.1270358e-03,
       1.3870042e-02, 2.2450639e-03, 2.3323186e-03, 6.8960763e-03,
       6.4623046e-01, 5.9025333e-04, 1.2016954e-01, 3.7123377e-03,
       3.8586726e-04, 2.9466937e-03, 1.6492698e-02, 4.5313604e-02,
       3.5319896e-04, 5.8152977e-02, 2.2345546e-03, 6.4564548e-02],
      dtype=float32)]


In [223]:
print(concat_pred_probs[0][15][0][0])
print(concat_pred_probs[0][0][0][0])

0.0016111665
0.0017615862


In [199]:
concat_pred_probs[1][2500].shape
concat_pred_probs.squeeze(2)

array([[[1.76158617e-03, 6.97135553e-03, 2.64982623e-03, ...,
         5.81529774e-02, 2.23455462e-03, 6.45645484e-02],
        [6.98527135e-03, 1.35740135e-02, 4.83095320e-03, ...,
         2.26778560e-03, 5.12919063e-03, 4.26955475e-03],
        [2.50270932e-05, 3.33617878e-04, 1.49602165e-05, ...,
         5.72031013e-06, 4.30688888e-05, 2.42965871e-05],
        ...,
        [5.09803696e-03, 1.53692532e-02, 1.99591159e-03, ...,
         1.26474025e-02, 8.70989356e-03, 6.68602958e-02],
        [9.92615297e-02, 2.90360302e-02, 1.76657643e-02, ...,
         4.95923962e-03, 1.07968241e-01, 6.36317860e-03],
        [2.58844258e-04, 9.64893475e-02, 4.91358223e-04, ...,
         1.90171448e-03, 7.39267038e-04, 2.10552919e-03]],

       [[1.15851872e-04, 2.79524713e-04, 5.65738650e-03, ...,
         2.30725780e-02, 2.07096920e-04, 4.13043797e-02],
        [1.13103772e-04, 1.28607394e-03, 5.71312942e-03, ...,
         3.18116217e-04, 1.11396203e-03, 2.14167478e-04],
        [2.31431404e-04, 

ValueError: cannot select an axis to squeeze out which has size not equal to one

In [239]:
print(concat_pred_probs[0][15][0])
print(concat_pred_probs[0][0][0])

0.0016111665
0.0017615862


In [241]:
max_concat_feats=[0]*2510
for i in range(0,2510):
     max_concat_feats[i]=[]
index=0
while(index < 2510):
     for i in range(0,20):

          max_concat_feats[index].append(np.max([concat_pred_probs[0][index][i],concat_pred_probs[1][index][i],concat_pred_probs[2][index][i],concat_pred_probs[3][index][i],concat_pred_probs[4][index][i]]))
     index=index+1     
          
        

0.0069852713

0.0016111665

In [251]:
min_concat_feats=[0]*2510
for i in range(0,2510):
     min_concat_feats[i]=[]
index=0
while(index < 2510):
     for i in range(0,20):

          min_concat_feats[index].append(np.min([concat_pred_probs[0][index][i],concat_pred_probs[1][index][i],concat_pred_probs[2][index][i],concat_pred_probs[3][index][i],concat_pred_probs[4][index][i]]))
     index=index+1  

In [252]:
avg_concat_feats=[0]*2510
for i in  range(0,2510):
     avg_concat_feats[i]=[]
index=0
while(index < 2510):
     for i in range(0,20):

          avg_concat_feats[index].append(np.mean([concat_pred_probs[0][index][i],concat_pred_probs[1][index][i],concat_pred_probs[2][index][i],concat_pred_probs[3][index][i],concat_pred_probs[4][index][i]]))
     index=index+1  

In [134]:
val_labels=[]
for idx,(data,targets) in enumerate(valid_loader):
    for t in targets:
     
     val_labels.append(t.cpu().detach().numpy())



In [99]:
object_categories = ['aeroplane', 'bicycle', 'bird', 'boat',
                     'bottle', 'bus', 'car', 'cat', 'chair',
                     'cow', 'diningtable', 'dog', 'horse',
                     'motorbike', 'person', 'pottedplant',
                     'sheep', 'sofa', 'train', 'tvmonitor']

label="VOCdevkit-1/VOCdevkit/VOC2007/ImageSets/Main"

import os
img_Label_Dict=dict() 
temp=[]
for object in object_categories:
    for file in os.listdir(label):
        temp=[]
        name=str(object)+'_val.txt'
        if(file==name):
            file_obj=open(os.path.join(label,f"{file}"),"r")
            file_names=file_obj.read()
            lines=file_names.splitlines()
            for line in lines:
                content=line.split()
                if(('-1' or '0') not in content):
                    temp.append((content[0],content[1]))
        if(temp):
            img_Label_Dict[object]=temp




  

In [100]:
from collections import defaultdict
correct_label_per_class=defaultdict(list)
for c in img_Label_Dict:
    for label in img_Label_Dict[c]:
        correct_label_per_class[c].append(int(label[1]))



In [101]:

file_names_list=0
dir='VOCdevkit-1/VOCdevkit/VOC2007/ImageSets/Main/'
image_dir='VOCdevkit-1/VOCdevkit/VOC2007/JPEGImages/'
for file in os.listdir(dir):
    if(file == f"val.txt"):
        #file_path=os.path.join(dir,"")
        file_obj=open(os.path.join(dir,f"{file}"),"r")
        file_names=file_obj.read()
        file_name_list=file_names.splitlines()        

        

Max

In [257]:
counter=0
predicted_probs_per_class=defaultdict(list)
for c in object_categories:
    for label in img_Label_Dict[c]:
    
        for i,s in enumerate(file_name_list):
            if (str(label[0]) in s):
                predicted_probs_per_class[c].append(max_concat_feats[i][counter])
    counter=counter+1
            
              
            
           
                        

                    
       

In [258]:
from sklearn.metrics import average_precision_score
average_percision_per_class=[]
for c in object_categories:
    con_pred=[1 if x > 0.5 else 0 for x in predicted_probs_per_class[c]]
    average_percision_per_class.append(average_precision_score(correct_label_per_class[c],con_pred))
print(np.mean(average_percision_per_class))
                                

0.9599923492762568


avg

In [255]:
counter=0
predicted_probs_per_class=defaultdict(list)
for c in object_categories:
    for label in img_Label_Dict[c]:
    
        for i,s in enumerate(file_name_list):
            if (str(label[0]) in s):
                predicted_probs_per_class[c].append(avg_concat_feats[i][counter])
    counter=counter+1
            
              
            
           
                        

                    
       


In [256]:
from sklearn.metrics import average_precision_score
average_percision_per_class=[]
for c in object_categories:
    con_pred=[1 if x > 0.5 else 0 for x in predicted_probs_per_class[c]]
    average_percision_per_class.append(average_precision_score(correct_label_per_class[c],con_pred))
print(np.mean(average_percision_per_class))
                                

0.9465619850268864


Min

In [253]:
counter=0
predicted_probs_per_class=defaultdict(list)
for c in object_categories:
    for label in img_Label_Dict[c]:
    
        for i,s in enumerate(file_name_list):
            if (str(label[0]) in s):
                predicted_probs_per_class[c].append(min_concat_feats[i][counter])
    counter=counter+1
            
              
            
           
                        

                    
       

In [254]:
from sklearn.metrics import average_precision_score
average_percision_per_class=[]
for c in object_categories:
    con_pred=[1 if x > 0.5 else 0 for x in predicted_probs_per_class[c]]
    average_percision_per_class.append(average_precision_score(correct_label_per_class[c],con_pred))
print(np.mean(average_percision_per_class))
                                

0.9370693386047739


In [246]:
from sklearn.metrics import average_precision_score
average_percision_per_class=[]
for c in object_categories:
    con_pred=[1 if x > 0.5 else 0 for x in predicted_probs_per_class[c]]
    average_percision_per_class.append(average_precision_score(correct_label_per_class[c],con_pred))
print(np.mean(average_percision_per_class))
                                

0.9599923492762568
