In [2]:
'''
We create a dataset capable of loading the images in the nightowl dataset, then
a network to evaluate them and write the result in a json file with the right format
'''

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
from torch.nn import utils
import torchvision

from PIL import Image
import os
import json

from coco import COCO


'''
Custom Dataset in order to load the images with the bbox and the labels
'''
class MyCustomDataset(torch.utils.data.Dataset):
# __init__ function is where the initial logic happens like reading a csv
    def __init__(self, file_path, images_path):

        # Create the transformation that will be applied
        #self.transform_to_apply = transforms.Compose([ transforms.Resize(image_size),
            #transforms.CenterCrop(image_size), transforms.ToTensor(), transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])
        self.transform_to_apply = transforms.Compose([transforms.ToTensor()])
        
        self.images_path = images_path
        
        # get the dataset, the annotiations and the imagesID
        self.cocovar = COCO(file_path)
        self.imgIds = sorted(self.cocovar.getImgIds())

        self.imgs_count = len(self.imgIds)

    # __getitem__ function returns the data and labels. This function is called from dataloader
    # the format of return is a list containing the image with the applied transformation
    # and another list containing for every bbox [x,y,width,height,category_id]
    def __getitem__(self, index):

        # get the image id and its filename
        img_id = self.imgIds[index]
        cocoimg = self.cocovar.loadImgs(ids=img_id)[0]
        img_filename = cocoimg['file_name']

        # Open image and apply the transformation
        img_as_img = Image.open(os.path.join(self.images_path,img_filename))
        img = self.transform_to_apply(img_as_img)

        #load the ids of the annotations for the images
        image_annotations = self.cocovar.getAnnIds(img_id)
        
        anns_to_ret = []
        
        # for every annotation
        for ann_id in image_annotations:
            #load all the annotation data
            ann = self.cocovar.loadAnns(ids=ann_id)[0]
            data = []
            for i in ann['bbox']:
                data.append(i)
            data.append(ann['category_id'])
            # data will be: [x,y,width,height,category_id]
            
            anns_to_ret.append(data)

        return ( img, img_id)

    def __len__(self):
        return self.imgs_count

# Function that will create the dataset and the dataloader and return the dataloader
def getDataLoader(file_path,images_path,batch_size,num_workers):

    dataset = MyCustomDataset(file_path,images_path)

    data_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=True)

    return data_loader

COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
    'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

def get_prediction(img_path, threshold,model):
    img = Image.open(img_path) # Load the image
    transform = transforms.Compose([transforms.ToTensor()]) # Defing PyTorch Transform
    img = transform(img) # Apply the transform to the image
    pred = model([img]) # Pass the image to the model
    pred_class = [COCO_INSTANCE_CATEGORY_NAMES[i] for i in list(pred[0]['labels'].numpy())] # Get the Prediction Score
    pred_boxes = [[(i[0], i[1]), (i[2], i[3])] for i in list(pred[0]['boxes'].detach().numpy())] # Bounding boxes
    pred_score = list(pred[0]['scores'].detach().numpy())
    pred_t = [pred_score.index(x) for x in pred_score if x > threshold][-1] # Get list of index with score greater than threshold.
    pred_boxes = pred_boxes[:pred_t+1]
    pred_class = pred_class[:pred_t+1]
    pred_scores =pred_score[:pred_t+1]
    return pred_boxes, pred_class,pred_scores     
    

def format_predictions(boxes,classes,scores,im_id):
    
    predictions = []
    
    for i in range(len(scores)):
        if(float(scores[i] < 0.3)):
            continue
        # take only the pedestrian
        cat_id = 1
        if(classes[i] != 1): #1=person in the COCO dataset
            continue
        
        x = {
            "image_id": int(im_id),
            "category_id": int(cat_id),
            "bbox":[round(float(boxes[i][0]),5), round(float(boxes[i][1]),5),
                    round(float(boxes[i][2] - boxes[i][0]),5) , round(float(boxes[i][3] -boxes[i][1]),5) ],
            "score": round(float(scores[i]),5)
            
        }
        predictions.append(x)
        
    return predictions
        
def format_output(output,ids):
    
    result = []
    
    for i in range( len(output) ):

        classes = output[i]['labels']
        scores = output[i]['scores']
        bboxes = output[i]['boxes']
        
        result.extend(format_predictions(bboxes,classes,scores,ids[i]))
        
    return result
            
        
def main():
    # Decide which device we want to run on
    device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu")
    print("Use device: " + str(device))
    
    batch_size = 4
    num_workers = 1

    file_path = '/home/test/data/nightowls/nightowls_validation.json'
    images_path = '/home/test/data/nightowls/nightowls_validation/'
    output_file = './test.json'
    
    #dataset = MyCustomDataset(file_path,images_path)
   
    # Load data
    data_loader = getDataLoader(file_path, images_path, batch_size, num_workers)
    
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    model.eval()
    model.to(device)
    
    json_output = []
    
    for batch_index, (data,ids) in enumerate(data_loader,0):
        model
        dataaaa = data.to(device)
        #lbl = lbl.to(device)
        print('batch: ' + str(batch_index))
        outputs = model(dataaaa)
        
        json_output.extend(format_output(outputs,ids))
        
        with open(output_file, 'w') as f:
            json.dump(json_output, f) 
        
    
main()

Use device: cuda:0
loading annotations into memory...
Done (t=0.53s)
creating index...
index created!
here1
batch: 0
batch: 1
batch: 2
batch: 3
batch: 4


KeyboardInterrupt: 

In [3]:
'''
The evaluation works only on all the dataset, and this is inconvenient
Here I try to circumvent it: create a json that represent a smaller part of the dataset,
with only the images processed by the model
'''

import json

annFile = '/home/test/data/nightowls/nightowls_validation.json'
resFile = './test.json'
output_file = './new_dataset.json'

with open(annFile, 'r') as f:
    annData = json.load(f)
    
with open(resFile, 'r') as f:
    resData = json.load(f)


for key, value in annData.items() :
    print (key)
    

newFile = {}
newFile['categories'] = annData['categories']
newFile['poses'] = annData['poses']
newFile['annotations']=[]
newFile['images']=[]

imgs_id_res = []

for i in range(len(resData)):
    imgs_id_res.append(resData[i]['image_id'])

imgs_id_res.sort()
print(imgs_id_res)

for i in range(len(annData['annotations'])):
    im_id = annData['annotations'][i]['image_id']
    if im_id in imgs_id_res:
        newFile['annotations'].append(annData['annotations'][i])
        
for i in range(len(annData['images'])):
    im_id = annData['images'][i]['id']
    if im_id in imgs_id_res:
        newFile['images'].append(annData['images'][i])
        
with open(output_file, 'w') as f:
        json.dump(newFile, f)    
    
print("FINISH")



images
annotations
categories
poses
[7002809, 7002809, 7012224, 7013179, 7027638, 7027638, 7027638, 7027638, 7027638, 7027638, 7027638, 7029050, 7029516, 7029516, 7029516, 7056350]
FINISH


In [4]:
'''
Evaluate the accucacy of a file with resect to another file that is the dataset
'''

from coco import COCO
from eval_MR_multisetup import COCOeval

# Ground truth
annFile = './new_dataset.json' #'/home/test/RMSW/data/nightowls_validation.json'

# Detections
resFile = './test.json'

## running evaluation
res_file = open("results.txt", "w")
for id_setup in range(0,4):
    cocoGt = COCO(annFile)
    cocoDt = cocoGt.loadRes(resFile)
    imgIds = sorted(cocoGt.getImgIds())
    cocoEval = COCOeval(cocoGt,cocoDt,'bbox')
    cocoEval.params.imgIds  = imgIds
    cocoEval.evaluate(id_setup)
    cocoEval.accumulate()
    cocoEval.summarize(id_setup,res_file)

res_file.close()

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.02s).
Accumulating evaluation results...
DONE (t=0.00s).
 Average Miss Rate  (MR) @ Reasonable         [ IoU=0.50      | height=[50:10000000000] | visibility=[0.65:10000000000.00] ] = 0.00%
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.00s).
Accumulating evaluation results...
DONE (t=0.00s).
 Average Miss Rate  (MR) @ Reasonable_small   [ IoU=0.50      | height=[50:75] | visibility=[0.65:10000000000.00] ] = -100.00%
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
i

  mean_s = np.log(mrs[mrs<2])
