In [1]:
from collections import defaultdict
import json
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from sklearn.metrics import average_precision_score
from scipy import stats
import sys

sys.path.insert(0, '../Common/')
from COCOWrapper import COCOWrapper
from Dataset import ImageDataset, my_dataloader
from ModelWrapper import ModelWrapper
from ResNet import get_model


In [2]:
data_dir = '/home/gregory/Datasets/SpatialSense'

modes = {'initial-tune': 'Baseline', 'spire': 'SPIRE', 'fs-3': 'FS'}
trials = [0,1,2,3,4,5,6,7]

baseline = 'initial-tune'
corrected_list = ['spire', 'fs-3']

with open('{}/annotations.json'.format(data_dir), 'r') as f:
    anns = json.load(f)

coco = COCOWrapper(mode = 'val')

In [3]:
# Get the image labels and locations for SpatialSense
files = []
label_dict = defaultdict(list)

def get_info(x):
    # Get the file location
    url = x['url']
    if 'flickr' in url:
        source = 'flickr'
    else:
        source = 'nyu'
    filename = url.split('/')[-1]
    location = '{}/images/{}/{}'.format(data_dir, source, filename)
    # Get the objects in the image
    anns = x['annotations']
    labels = []
    for ann in anns:
        labels.append(ann['object']['name'])
        labels.append(ann['subject']['name'])
    labels = list(set(labels))    
    return location, labels

for x in anns:
    loc, labs = get_info(x)
    files.append(loc)
    for lab in labs:
        label_dict[lab].append(loc)

In [4]:
# Find the common objects that SPIRE identified as 'main' for some SP
with open('./2-Models/HPS/spire/spire.json', 'r') as f:
    mains = json.load(f)

tmp = []
for main in mains:
    tmp.append(main.replace('+', ' '))
    
mains = tmp

tmp = []
for main in mains:
    if len(label_dict[main]) >= 50:
        tmp.append(main)
        
mains = tmp
print(mains)

['bench', 'truck', 'dog', 'couch', 'knife', 'fork', 'spoon', 'bird', 'bowl']


In [5]:
# Setup the labels
y = []
for file in files:
    y_tmp = np.zeros((91))
    for main in mains:
        index = coco.get_class_id(main)
        if file in label_dict[main]:
            y_tmp[index] = 1
    y.append(y_tmp)

# Get the model's predictions
out = {}
for mode in modes:
    out_mode = defaultdict(list)
    for trial in trials:  
        model_dir = './2-Models/Models/{}/trial{}/model.pt'.format(mode, trial)

        model, _ = get_model(mode = 'tune', parent = model_dir, out_features = 91)
        model.eval()
        model.cuda()

        wrapper = ModelWrapper(model)

        dataset = ImageDataset(files, y)
        dataloader = my_dataloader(dataset)
        y_hat, y_true = wrapper.predict_dataset(dataloader)
        
        for main in mains:
            index = coco.get_class_id(main)
            v = average_precision_score(y_true[:, index], y_hat[:, index])
            out_mode[main].append(v)
    out[mode] = out_mode    


In [6]:
# Get results
print('Results per object')
for main in mains:
    print()
    print('Object: ', main)
    for mode in modes:
        v = out[mode][main]
        print(modes[mode], np.round(np.mean(v), 3), np.round(np.std(v), 4))
        
print()
print()
print('Aggregated results')
print()

for mode in modes:
    tmp = []
    for main in mains:
        tmp.append(np.mean(out[mode][main]))
    print(modes[mode], np.round(np.mean(tmp), 3))
 

print()
print()
print('Stat testing on the differences')
print()

agg = {}
for mode in modes:
    avg = np.zeros((len(trials)))
    for main in mains:
        avg += np.array(out[mode][main])
    avg /= len(mains)
    agg[mode] = avg

for corrected in corrected_list:
    print(modes[corrected])
    diff = agg[corrected] - agg[baseline]
    test = stats.ttest_rel(agg[corrected], agg[baseline])
    print(np.round(np.mean(diff), 4), np.round(np.std(diff), 4), np.round(test.pvalue, 4))

Results per object

Object:  bench
Baseline 0.091 0.0174
SPIRE 0.093 0.0154
FS 0.086 0.022

Object:  truck
Baseline 0.262 0.0222
SPIRE 0.28 0.0137
FS 0.254 0.0176

Object:  dog
Baseline 0.471 0.0218
SPIRE 0.462 0.0195
FS 0.441 0.0193

Object:  couch
Baseline 0.192 0.0067
SPIRE 0.198 0.0118
FS 0.175 0.0101

Object:  knife
Baseline 0.129 0.0215
SPIRE 0.139 0.0126
FS 0.074 0.0184

Object:  fork
Baseline 0.127 0.0099
SPIRE 0.134 0.008
FS 0.12 0.0156

Object:  spoon
Baseline 0.106 0.0113
SPIRE 0.109 0.0148
FS 0.108 0.0162

Object:  bird
Baseline 0.286 0.0253
SPIRE 0.278 0.0213
FS 0.273 0.0262

Object:  bowl
Baseline 0.166 0.0145
SPIRE 0.173 0.0098
FS 0.147 0.0135


Aggregated results

Baseline 0.203
SPIRE 0.207
FS 0.186


Stat testing on the differences

SPIRE
0.004 0.0049 0.0722
FS
-0.0169 0.0026 0.0
