# Evaluation Code for the paper

In [1]:
%load_ext autoreload
%autoreload 2

In [19]:
from utils import *
from measurements import ProblemDomain
import datasets.Clevr as clevr
import datasets.Blockworld as block
import numpy as np

# RQ1

## Block

In [29]:
constraints =  [block.SmallObjectConstraint('c1'), block.LargeCubeConstraint('c2'), 
                block.BottomConstraint('c3'), block.YellowObjectConstraint('c4')]
keys = ['SGGen', 'SGGen+', 'SA']

constrained_path = '../scenes/rq1/block/constraint_1234'
original_path = '../scenes/rq1/block/original'
test_paths = ['../scenes/rq1/block/gt/cons', '../scenes/rq1/block/gt/c2', 
              '../scenes/rq1/block/gt/c4']
setup_paths = ['cons', 'c2', 'c4']
schema_path = '../scenes/datasets_schema/block.json'


constrained_results_block = {}
original_results_block = {}

for i, setup in enumerate(setup_paths):
    constrained_result = ProblemDomain(constraints, keys, test_paths[i], f'{constrained_path}/{setup}', schema_path)
    original_result = ProblemDomain(constraints, keys, test_paths[i], f'{original_path}/{setup}', schema_path)
    constrained_results_block[setup] = constrained_result
    original_results_block[setup] = original_result

process_results(constrained_results_block, process_coord_scene)
process_results(original_results_block, process_coord_scene)

../scenes/rq1/block/constraint_1234/cons\rel_scenes.json


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:36<00:00, 55.53it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:36<00:00, 54.99it/s]


../scenes/rq1/block/constraint_1234/c2\rel_scenes.json


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:45<00:00, 43.69it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:46<00:00, 43.25it/s]


../scenes/rq1/block/constraint_1234/c4\rel_scenes.json


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:39<00:00, 50.13it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:39<00:00, 50.02it/s]


../scenes/rq1/block/original/cons\rel_scenes.json


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:36<00:00, 55.50it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:36<00:00, 55.13it/s]


../scenes/rq1/block/original/c2\rel_scenes.json


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:45<00:00, 43.98it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:45<00:00, 43.76it/s]


../scenes/rq1/block/original/c4\rel_scenes.json


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:39<00:00, 50.39it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:39<00:00, 50.04it/s]


## RM

Results for RM+CON

In [32]:
print_metrics(constrained_results_block, ['SGGen', 'SGGen+', 'SA', 'Con'], setup_paths, lambda a: a.scene_stats)

'[0.9919 0.9967 0.898  0.925  0.9901 0.9958 0.854  0.9285 0.992  0.9969\n 0.895  0.9585]'

Results for RM-CON

In [34]:
print_metrics(original_results_block, ['SGGen', 'SGGen+', 'SA', 'Con'], setup_paths, lambda a: a.scene_stats)

'[0.9866 0.9935 0.848  0.924  0.9817 0.991  0.7835 0.9475 0.9855 0.9934\n 0.8295 0.969 ]'

Cohen-d effect size

In [36]:
np.array2string(print_stats(constrained_results_block, original_results_block, ['SGGen', 'SGGen+', 'SA', 'Con'], setup_paths, lambda a: a.scene_stats), precision=4)

'[ 1.6387  4.7712  2.4381  0.0976  2.7002  5.1212  3.0781 -0.949   2.5792\n  2.6622  2.0758 -0.843 ]'

Results  for PM+CON

In [37]:
print_metrics(constrained_results_block, ['SGGen', 'SGGen+', 'SA', 'Con'], setup_paths, lambda a: a.coord_scene_stats)

'[0.9963 0.9997 0.9875 0.9995 0.9963 0.9998 0.9865 1.     0.9943 0.9997\n 0.984  1.    ]'

Results for PM-CON

In [38]:
print_metrics(original_results_block, ['SGGen', 'SGGen+', 'SA', 'Con'], setup_paths, lambda a: a.coord_scene_stats)

'[0.9963 0.9995 0.9885 1.     0.9963 0.9998 0.988  1.     0.9939 0.9995\n 0.9835 1.    ]'

Cohen-d effect size (NAN means 0 here as the two number distributions are identical)

In [39]:
np.array2string(print_stats(constrained_results_block, original_results_block, ['SGGen', 'SGGen+', 'SA', 'Con'], setup_paths, lambda a: a.coord_scene_stats), precision=4)

  return (np.mean(x) - np.mean(y)) / np.sqrt((np.std(x, ddof=1) ** 2 + np.std(y, ddof=1) ** 2) / 2.0)


'[-0.0013  0.3456 -0.13   -0.4472 -0.0235  0.1642 -0.2167     nan  0.1123\n  0.4249  0.0501     nan]'

## Clevr

In [20]:
def print_stats(results1, results2, metrics, setups, mapping):
    values = []
    for setup in setups:
        stats1 = mapping(results1[setup])
        stats2 = mapping(results2[setup])
        for metric in metrics:
            values.append(cohen_d(stats1[metric], stats2[metric]))
    return np.array(values)

def process_results(results, process_coord):
    for _, value in results.items():
        value.load_predicted_scenes(process_coord, coord_fixed_fname=None, fixed_scenes_fname=None)
        value.cal_matrics_statistics()
        
def print_metrics(results, metrics, setups, mapping, precision=4):
    values = []
    for setup in setups:
        stats = mapping(results[setup])
        for metric in metrics:
            values.append(np.mean(stats[metric]))
    return np.array2string(np.array(values), precision=precision)

def cohen_d(x, y):
    return (np.mean(x) - np.mean(y)) / np.sqrt((np.std(x, ddof=1) ** 2 + np.std(y, ddof=1) ** 2) / 2.0)

In [43]:
constraints =  [clevr.SameColorConstraint('c1'), clevr.MaterialConstraint('c2'), 
                clevr.LargeCubeConstraint('c3'), clevr.PairBehindConstraint('c4')]
keys = ['SGGen', 'SGGen+', 'SA']

constrained_path = '../scenes/rq1/clevr/constraint_1234'
original_path = '../scenes/rq1/clevr/original'
test_paths = ['../scenes/rq1/clevr/gt/cons', '../scenes/rq1/clevr/gt/c3', 
              '../scenes/rq1/clevr/gt/c4']
setup_paths = ['cons', 'c3', 'c4']
schema_path = '../scenes/datasets_schema/clevr.json'

constrained_results_clevr = {}
original_results_clevr = {}

for i, setup in enumerate(setup_paths):
    constrained_result = ProblemDomain(constraints, keys, test_paths[i], f'{constrained_path}/{setup}', schema_path)
    original_result = ProblemDomain(constraints, keys, test_paths[i], f'{original_path}/{setup}', schema_path)
    constrained_results_clevr[setup] = constrained_result
    original_results_clevr[setup] = original_result

In [10]:
process_results(constrained_results_clevr, process_coord_scene_clevr)
process_results(original_results_clevr, process_coord_scene_clevr)

../scenes/rq1/clevr/constraint_1234/cons\rel_scenes.json


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:21<00:00, 91.79it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:21<00:00, 93.56it/s]


../scenes/rq1/clevr/constraint_1234/c3\rel_scenes.json


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:31<00:00, 63.45it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:31<00:00, 63.78it/s]


../scenes/rq1/clevr/constraint_1234/c4\rel_scenes.json


100%|█████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:18<00:00, 110.76it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:18<00:00, 110.98it/s]


../scenes/rq1/clevr/original/cons\rel_scenes.json


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:21<00:00, 93.01it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:21<00:00, 92.87it/s]


../scenes/rq1/clevr/original/c3\rel_scenes.json


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:31<00:00, 63.20it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:31<00:00, 63.81it/s]


../scenes/rq1/clevr/original/c4\rel_scenes.json


100%|█████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:18<00:00, 111.01it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:18<00:00, 110.43it/s]


## RM

Result for RM+CONS

In [13]:
print_metrics(constrained_results_clevr, ['SGGen', 'SGGen+', 'SA', 'Con'], setup_paths, lambda a: a.scene_stats)

'[0.9955 0.9994 0.9805 0.996  0.988  0.9979 0.939  0.991  0.997  0.9996\n 0.9875 0.998 ]'

Result for RM-CONS

In [14]:
print_metrics(original_results_clevr, ['SGGen', 'SGGen+', 'SA', 'Con'], setup_paths, lambda a: a.scene_stats)

'[0.9992 0.9998 0.995  0.9975 0.9961 0.9993 0.983  0.993  0.9997 0.9999\n 0.997  0.9995]'

Cohen-d effect size for each case

In [21]:
np.array2string(print_stats(constrained_results_clevr, original_results_clevr, ['SGGen', 'SGGen+', 'SA', 'Con'], setup_paths, lambda a: a.scene_stats), precision=4)

'[-1.6961 -1.6726 -2.0254 -0.339  -2.522  -1.4659 -3.4665 -0.3795 -2.5191\n -1.4704 -2.0883 -0.5529]'

## PM

Result for PM+CONS

In [22]:
print_metrics(constrained_results_clevr, ['SGGen', 'SGGen+', 'SA', 'Con'], setup_paths, lambda a: a.coord_scene_stats)

'[0.992  0.9996 0.9805 0.999  0.9768 0.9981 0.9385 0.985  0.9912 0.9994\n 0.9805 0.9955]'

Result for PM-CONS

In [23]:
print_metrics(original_results_clevr, ['SGGen', 'SGGen+', 'SA', 'Con'], setup_paths, lambda a: a.coord_scene_stats)

'[0.996  0.9998 0.991  0.999  0.9856 0.999  0.961  0.9825 0.9971 0.9999\n 0.9935 0.999 ]'

Cohen-d effect size for each case

In [25]:
np.array2string(print_stats(constrained_results_clevr, original_results_clevr, ['SGGen', 'SGGen+', 'SA', 'Con'], setup_paths, lambda a: a.coord_scene_stats), precision=4)

'[-1.0097 -0.8144 -1.1455  0.     -1.8223 -0.9488 -1.7029  0.3721 -1.6253\n -1.8284 -1.7227 -1.1649]'

# RQ 2-3

In [49]:
def print_metric_values(metric_names, setups):
    for metric_name in metric_names:
        print(f'"{metric_name}": {np.array2string(np.array([np.mean(setup[metric_name]) for setup in setups]), precision=4)}')

## BLOCK
From left to right: 4/4, 3/4, 2/4, 1/4

In [51]:
constraints =  [block.SmallObjectConstraint('c1'), block.LargeCubeConstraint('c2'), 
                block.BottomConstraint('c3'), block.YellowObjectConstraint('c4')]
schema_path = '../scenes/datasets_schema/block.json'
keys = ['SGGen', 'SGGen+', 'SA']
gt_path = '../scenes/rq1/block/gt/cons'
scenes_path = '../scenes/rq2-3/block/'
setup_names = ['data_44', 'data_34', 'data_24', 'data_14']
setups = []

for name in setup_names:
    setups.append(ProblemDomain(constraints, keys, gt_path, f'{scenes_path}/{name}', schema_path))

for domain in setups:
    domain.load_predicted_scenes(process_coord_scene)
    domain.cal_matrics_statistics()

### RM

In [53]:
print_metric_values(['SGGen', 'SGGen+', 'SA', 'Con'], [setup.scene_stats for setup in setups])

"SGGen": [0.9841 0.9727 0.9717 0.9595]
"SGGen+": [0.9919 0.9859 0.9854 0.9783]
"SA": [0.825  0.783  0.7595 0.743 ]
"Con": [0.909  0.8715 0.865  0.8315]


### RM + OP

In [54]:
print_metric_values(['SGGen', 'SGGen+', 'SA', 'Con'], [setup.scene_fixed_stats for setup in setups])

"SGGen": [0.9965 0.995  0.9948 0.9917]
"SGGen+": [0.9997 0.9994 0.9994 0.9985]
"SA": [0.9885 0.986  0.982  0.959 ]
"Con": [1. 1. 1. 1.]


### PM

In [55]:
print_metric_values(['SGGen', 'SGGen+', 'SA', 'Con'], [setup.coord_scene_stats for setup in setups])

"SGGen": [0.9942 0.9913 0.9927 0.9811]
"SGGen+": [0.9996 0.9994 0.9994 0.9984]
"SA": [0.982  0.976  0.9775 0.948 ]
"Con": [1.     0.9995 1.     0.998 ]


### PM + OP

In [56]:
print_metric_values(['SGGen', 'SGGen+', 'SA', 'Con'], [setup.coord_scene_fixed_stats for setup in setups])

"SGGen": [0.9942 0.9913 0.9927 0.981 ]
"SGGen+": [0.9996 0.9994 0.9994 0.9984]
"SA": [0.982  0.976  0.9775 0.949 ]
"Con": [1. 1. 1. 1.]


## CLEVR
From left to right: 4/4, 3/4, 2/4, 1/4

In [59]:
constraints =  [clevr.SameColorConstraint('c1'), clevr.MaterialConstraint('c2'), 
                clevr.LargeCubeConstraint('c3'), clevr.PairBehindConstraint('c4')]
schema_path = '../scenes/datasets_schema/clevr.json'
keys = ['SGGen', 'SGGen+', 'SA']
gt_path = '../scenes/rq1/clevr/gt/cons'
scenes_path = '../scenes/rq2-3/clevr/'
setup_names = ['data_44', 'data_34', 'data_24', 'data_14']
setups = []

for name in setup_names:
    setups.append(ProblemDomain(constraints, keys, gt_path, f'{scenes_path}/{name}', schema_path))

for domain in setups:
    domain.load_predicted_scenes(process_coord_scene_clevr)
    domain.cal_matrics_statistics()

../scenes/rq2-3/clevr//data_44\rel_scenes.json


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:21<00:00, 92.99it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:21<00:00, 92.63it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:21<00:00, 93.14it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:21<00:00, 92.97it/s]


../scenes/rq2-3/clevr//data_34\rel_scenes.json


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:21<00:00, 92.87it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:21<00:00, 92.89it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:21<00:00, 92.97it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:21<00:00, 93.08it/s]


../scenes/rq2-3/clevr//data_24\rel_scenes.json


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:21<00:00, 92.84it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:21<00:00, 92.95it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:21<00:00, 92.94it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:21<00:00, 93.01it/s]


../scenes/rq2-3/clevr//data_14\rel_scenes.json


100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:21<00:00, 93.24it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:21<00:00, 93.10it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:21<00:00, 92.35it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:21<00:00, 92.42it/s]


### RM

In [60]:
print_metric_values(['SGGen', 'SGGen+', 'SA', 'Con'], [setup.scene_stats for setup in setups])

"SGGen": [0.9991 0.9978 0.9981 0.9937]
"SGGen+": [0.9998 0.9998 0.9996 0.999 ]
"SA": [0.993  0.99   0.9845 0.9705]
"Con": [0.999  0.998  0.997  0.9945]


### RM + OP

In [61]:
print_metric_values(['SGGen', 'SGGen+', 'SA', 'Con'], [setup.scene_fixed_stats for setup in setups])

"SGGen": [0.9997 0.9981 0.9992 0.996 ]
"SGGen+": [1.     0.9998 0.9999 0.9997]
"SA": [0.999  0.9945 0.9945 0.988 ]
"Con": [1. 1. 1. 1.]


### PM

In [62]:
print_metric_values(['SGGen', 'SGGen+', 'SA', 'Con'], [setup.coord_scene_stats for setup in setups])

"SGGen": [0.9949 0.9943 0.9898 0.1455]
"SGGen+": [0.9997 0.9997 0.9995 0.9336]
"SA": [0.99   0.9875 0.978  0.031 ]
"Con": [0.9985 0.9975 0.9975 0.654 ]


### PM + OP

In [63]:
print_metric_values(['SGGen', 'SGGen+', 'SA', 'Con'], [setup.coord_scene_fixed_stats for setup in setups])

"SGGen": [0.9958 0.9945 0.9908 0.1596]
"SGGen+": [0.9998 0.9997 0.9995 0.9345]
"SA": [0.992  0.9885 0.98   0.0425]
"Con": [1. 1. 1. 1.]
