In [1]:
%load_ext autoreload
%autoreload 2

In [39]:
from main import get_normalized_probability, cal_most_prob_scene, reverse_relation
import json
import pulp
import numpy as np

In [7]:
with open('../results/clevr_block_val/rel_scenes.json', 'r') as f:
    scenes = json.load(f)['scenes']

with open('../data/clevr_block/clevr_attr_map.json', 'r') as f:
    attr_map = json.load(f)

REL_MAP = {
    'left': 'right',
    'above': 'below'
}

ATTRIBUTES = ['color', ]


In [140]:
scene = scenes[72]

In [180]:
def get_attribute_variables(objects, attributes_map):
    variables = []
    probabilities = {}
    num_objects = len(objects)

    for name, values in attributes_map.items():
        for v, _ in enumerate(values):
            for i in range(num_objects):
                variable_name = f'{name}_{i}_{v}'
                variables.append(variable_name)
                probabilities[variable_name] = objects[i][name][v]
    return variables, probabilities

def get_relationship_variables(scene, relationships):
    variables = []
    probabilities = {}

    for rel in relationships:
        for source, targets in enumerate(scene['relationships'][rel]):
            for j, probability in targets:
                variable_name = f'{rel}_{source}_{j}'
                variables.append(variable_name)
                probabilities[variable_name] = probability
    return variables, probabilities

In [196]:
eps = 1e-50
M = 100

attr_variables, attr_probabilities = get_attribute_variables(scene['objects'], attr_map['attributes'])
rel_variables, rel_probabilities = get_relationship_variables(scene, attr_map['relations'])

attr_variables = pulp.LpVariable.dict('attr', attr_variables, 0, 1, pulp.LpBinary)
rel_variables = pulp.LpVariable.dict('rel', rel_variables, 0, 1, pulp.LpBinary)

In [197]:
prob = pulp.LpProblem("sceneGraphProblem", pulp.LpMaximize)

attr_obj = [attr_variables[i] * np.log(max(attr_probabilities[i], eps)) for i in attr_variables.keys()]
rel_obj = [rel_variables[i] * np.log(max(rel_probabilities[i], eps)) + (1 - rel_variables[i]) * np.log((max(1 - rel_probabilities[i], eps))) for i in rel_variables.keys()]

prob += pulp.lpSum(attr_obj + rel_obj)

In [198]:
# only select one attribute
num_objects = len(scene['objects'])
objects = scene['objects']

for attr in attr_map['attributes']:
    for i in range(num_objects):
        prob += pulp.lpSum([attr_variables[f'{attr}_{i}_{v}'] for v, _ in enumerate(objects[i][attr])]) == 1

#above or right iff not left and right
for i in range(num_objects):
    for j in range(num_objects):
        if i != j:
            dummy1 = pulp.LpVariable(f'd_c1_{i}_{j}_0', cat=pulp.LpBinary)
            dummy2 = pulp.LpVariable(f'd_c1_{i}_{j}_1', cat=pulp.LpBinary)

            prob += rel_variables[f'above_{i}_{j}'] + rel_variables[f'below_{i}_{j}'] >= 1 - M * (1 - dummy1)
            prob += rel_variables[f'above_{i}_{j}'] + rel_variables[f'below_{i}_{j}'] <= M * dummy1

            prob += rel_variables[f'left_{i}_{j}'] + rel_variables[f'right_{i}_{j}'] >= 1 - M * (1 - dummy2)
            prob += rel_variables[f'left_{i}_{j}'] + rel_variables[f'right_{i}_{j}'] <= M * dummy2
            prob += dummy1 == 1 - dummy2
            # rel_variables[f'above_{i}_{j}'] + rel_variables[f'below_{i}_{j}'] + rel_variables[f'left_{i}_{j}'] + rel_variables[f'right_{i}_{j}'] == 1

In [199]:
prob.solve()

1

In [200]:
predicted_scene = {
    'objects': [{} for _ in range(len(scene['objects']))],
    'relationships': { rel: [[] for _ in range(len(scene['objects']))] for rel in attr_map['relations']}
}

for v in prob.variables():
    if v.varValue:
        tokens = v.name.split('_')
        if tokens[0] == 'attr':
            predicted_scene['objects'][int(tokens[2])][tokens[1]] = attr_map['attributes'][tokens[1]][int(tokens[3])]
        elif tokens[0] == 'rel':
            predicted_scene['relationships'][tokens[1]][int(tokens[2])].append(int(tokens[3]))

In [201]:
predicted_scene

{'objects': [{'color': 'yellow',
   'material': 'metal',
   'shape': 'cube',
   'size': 'small'},
  {'color': 'blue', 'material': 'metal', 'shape': 'cube', 'size': 'large'},
  {'color': 'yellow', 'material': 'rubber', 'shape': 'cube', 'size': 'large'},
  {'color': 'green', 'material': 'rubber', 'shape': 'sphere', 'size': 'small'},
  {'color': 'red', 'material': 'metal', 'shape': 'sphere', 'size': 'small'},
  {'color': 'brown',
   'material': 'metal',
   'shape': 'cylinder',
   'size': 'small'}],
 'relationships': {'left': [[1, 2, 3, 4, 5],
   [],
   [1, 4, 5],
   [1, 4, 5],
   [1],
   [1]],
  'right': [[], [0, 2, 3, 4, 5], [0], [0], [0, 2, 3], [0, 2, 3]],
  'above': [[], [], [], [2], [5], []],
  'below': [[], [], [3], [], [], [4]]}}

In [202]:
print(len(prob.variables()))

270


In [203]:
print(len(prob.constraints))

174


In [169]:
rel_variables[f'left_{3}_{5}'].varValue

1.0

In [170]:
prob

sceneGraphProblem:
MAXIMIZE
-36.82955164694234*attr_color_0_0 + -31.138238917745266*attr_color_0_1 + -28.947620307674452*attr_color_0_2 + -34.76335144653448*attr_color_0_3 + -29.493898394840578*attr_color_0_4 + -26.815490750199857*attr_color_0_5 + -40.44316865436368*attr_color_0_6 + -30.338214876780636*attr_color_1_1 + -27.539512588878246*attr_color_1_2 + -31.50299828704129*attr_color_1_3 + -40.9568824584713*attr_color_1_4 + -27.606079045020635*attr_color_1_5 + -50.00104520960452*attr_color_1_6 + -35.719505299867045*attr_color_1_7 + -28.72638318951039*attr_color_2_0 + -21.32378390094922*attr_color_2_1 + -25.575771306472497*attr_color_2_2 + -26.773422304275723*attr_color_2_3 + -25.304447145160623*attr_color_2_4 + -19.722301433720943*attr_color_2_5 + -33.098697652013605*attr_color_2_6 + -34.20243062770304*attr_color_3_0 + -33.951995931120024*attr_color_3_1 + -32.44853588880306*attr_color_3_2 + -36.432872861762995*attr_color_3_3 + -32.09336468104429*attr_color_3_5 + -37.15052032645392*att