In [1]:
import h5py
import numpy as np
import json
import ltn
import torch
from torch.utils.data import Dataset, DataLoader
from collections import defaultdict
from torchvision.models import resnet50, ResNet50_Weights

In [2]:
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
device

device(type='cpu')

In [3]:
BATCH_SIZE = 128
LEARNING_RATE = 0.00001
EPOCHS = 32

In [4]:
def get_tensor(idx, embedding_size=151):
    t = [0] * embedding_size
    t[idx] = 1
    t = torch.tensor(t)
    return t.to(torch.float32)

In [5]:
class VGImageDataset(Dataset):
    def __init__(self, imdb_h5, sgg_h5, sgg_dict):
        self.imdb = h5py.File(imdb_h5)
        self.sgg = h5py.File(sgg_h5)
        with open(sgg_dict) as f:
            self.dicts = json.load(f)
            self.idx_to_labels = self.dicts["idx_to_label"]
            self.label_to_idx = self.dicts["label_to_idx"]
            self.idx_to_predicates = self.dicts["idx_to_predicate"]
            self.predicates_to_idx = self.dicts["predicate_to_idx"]

        def return_set():
            return set()

        self.logic_rules = defaultdict(return_set)

        for i in range(len(self.sgg["relationships"])):
            sub, obj, rel = (
                self.sgg["relationships"][i][0],
                self.sgg["relationships"][i][1],
                self.sgg["predicates"][i],
            )
            self.logic_rules[
                (
                    self.idx_to_labels[str(self.sgg["labels"][sub][0])].upper(),
                    self.idx_to_predicates[str(rel[0])].upper().replace(" ", "_"),
                )
            ].add(self.idx_to_labels[str(self.sgg["labels"][obj][0])].upper())

        self.g = {label: get_tensor(i) for i, label in enumerate(self.label_to_idx)}

    def __len__(self):
        return len(self.imdb["images"])

    def __getitem__(self, idx):
        image = torch.tensor(self.imdb["images"][idx])

        start = self.sgg["img_to_first_box"][idx]
        end = self.sgg["img_to_last_box"][idx]
        object_names = []
        if start > 0:
            object_names = [self.sgg["labels"][i][0] for i in range(start, end + 1)]
        for _ in range(len(object_names), 150):
            object_names += [150]

        return image, object_names

    def get_relations_grounding(self):
        dic = {
            predicate.upper().replace(" ", "_"): ltn.Constant(get_tensor(idx))
            for idx, predicate in enumerate(self.predicates_to_idx)
        }
        return dic

    def get_logic_rules(self):
        return self.logic_rules

    def get_object_grounding(self):
        dic = {
            obj.upper(): ltn.Constant(get_tensor(idx))
            for idx, obj in enumerate(self.label_to_idx)
        }
        return dic

    def colate_fn(self, data):
        batch = [d for d in data if d != None]
        return torch.utils.data.dataloader.default_collate(batch)

In [6]:
def extract_objects(image):
    yolo = torch.hub.load("ultralytics/yolov5", "yolov5s", pretrained=True)
    yolo.eval()
    with torch.no_grad():
        output = yolo(image)

    objects = []
    for *_, confidence, classification in output.pred[0]:
        objects.append((output.names[int(classification.item())], confidence.item()))

    return objects

In [7]:
sgg_path = "../data/mini_VG-SGG.h5"
sgg_dict_path = "../data/mini_VG-SGG-dicts.json"
imdb_path = "../data/mini_imdb_1024.h5"
weights = ResNet50_Weights.DEFAULT
train_data = VGImageDataset(imdb_path, sgg_path, sgg_dict_path)
train_dataloader = DataLoader(
    train_data, BATCH_SIZE, shuffle=True, collate_fn=train_data.colate_fn
)

In [8]:
# we define connectives, quantifiers, and SatAgg
And = ltn.Connective(ltn.fuzzy_ops.AndProd())
Not = ltn.Connective(ltn.fuzzy_ops.NotStandard())
Implies = ltn.Connective(ltn.fuzzy_ops.ImpliesReichenbach())
Exists = ltn.Quantifier(ltn.fuzzy_ops.AggregPMean(p=2), quantifier="e")
Forall = ltn.Quantifier(ltn.fuzzy_ops.AggregPMeanError(p=2), quantifier="f")
SatAgg = ltn.fuzzy_ops.SatAgg()
Or = ltn.Connective(ltn.fuzzy_ops.OrProbSum())

In [9]:
class MLP(torch.nn.Module):
    def __init__(self, layer_sizes=(302, 250, 200, 151)):
        super(MLP, self).__init__()
        self.elu = torch.nn.ELU()
        self.sigmoid = torch.nn.Sigmoid()
        self.linear_layers = torch.nn.ModuleList(
            [
                torch.nn.Linear(layer_sizes[i - 1], layer_sizes[i])
                for i in range(1, len(layer_sizes))
            ]
        )
        self.softmax = torch.nn.Softmax(1)

    def forward(self, l, *x):
        x = list(x)
        if len(x) == 1:
            x = x[0]
        else:
            x = torch.cat(x, dim=1)
        for layer in self.linear_layers[:-1]:
            x = self.elu(layer(x))
        logits = self.linear_layers[-1](x)
        probs = self.softmax(logits)
        out = torch.sum(probs * l, dim=1)
        return out


relation_predicate = ltn.Predicate(MLP())
relation_predicate.to(device)

Predicate(model=MLP(
  (elu): ELU(alpha=1.0)
  (sigmoid): Sigmoid()
  (linear_layers): ModuleList(
    (0): Linear(in_features=302, out_features=250, bias=True)
    (1): Linear(in_features=250, out_features=200, bias=True)
    (2): Linear(in_features=200, out_features=151, bias=True)
  )
  (softmax): Softmax(dim=1)
))

In [10]:
relation_grounding = train_data.get_relations_grounding()
obj_grounding = train_data.get_object_grounding()

In [11]:
params = list(relation_predicate.parameters())
optimizer = torch.optim.Adam(params, lr=0.00001)

In [14]:
# TODO: Implement object predicate.
def return_zero():
    return 0


class Saved_Model:
    def __init__(self) -> None:
        self.yolo = torch.hub.load("ultralytics/yolov5", "yolov5s", pretrained=True)
        self.yolo.eval()
        self.objects = None

    def extract_objects(self, image):
        image = image.permute(1, 2, 0)
        image = image.numpy()
        with torch.no_grad():
            output = self.yolo([image])

        objects = defaultdict(return_zero)
        for *_, confidence, classification in output.pred[0]:
            if (
                obj_grounding.get(output.names[int(classification.item())].upper(), -1)
                == -1
            ):
                continue
            else:
                objects[
                    obj_grounding[
                        output.names[int(classification.item())].upper()
                    ].value
                ] = max(
                    objects[
                        obj_grounding[
                            output.names[int(classification.item())].upper()
                        ].value
                    ],
                    confidence.item(),
                )
            # objects.append((output.names[int(classification.item())], confidence.item()))

        self.objects = objects
        return objects

    def get_prob(self, x, labels):
        res = []
        for i in range(len(x)):
            objects = self.extract_objects(x[i])
            probs = []
            for l in range(labels.shape[0] // x.shape[0]):
                probs.append(objects[labels[l]])
            res.append(probs)
        res = torch.tensor(res)
        return res
        # return torch.tensor(
        #     [[1.0 for _ in range(labels.shape[0] // BATCH_SIZE)] for _ in range(128)]
        # )


model = Saved_Model()
object_predicate = ltn.Predicate(func=lambda a, b: model.get_prob(a, b))
object_predicate.to(device)

Using cache found in /Users/abhiram/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2023-9-27 Python-3.9.18 torch-2.0.1 CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


Predicate(model=LambdaModel())

In [16]:
for epoch in range(1):
    for i, data in enumerate(train_dataloader, 0):
        images = data[0]
        logic_rules = train_data.get_logic_rules()
        axioms = []
        i = 0
        for k in logic_rules:
            sub = obj_grounding[k[0]]
            rel = relation_grounding[k[1]]
            objs = logic_rules[k]
            y = ltn.Variable(
                "y", torch.stack([obj_grounding[idx].value for idx in objs])
            )
            x = ltn.Variable("x", images)
            z = ltn.Variable("z", torch.stack([sub.value]))
            # print(Forall([x, z], object_predicate(x, z)).value)
            # x = []
            # for img in images:
            #     x.append([img, sub.value])
            #     # print(img.shape, sub.value.shape)
            # x = ltn.Variable('x', torch.tensor(x))
            # y = []
            # for obj in objs:
            #     for img in images:
            #         y.append((img, obj_grounding[idx].value))
            # y = ltn.Variable("y", torch.tensor(y))
            axioms += [
                Forall(
                    [z, y],
                    Forall(
                        x,
                        Implies(
                            And(
                                object_predicate(x, z),
                                relation_predicate(rel, y, z),
                            ),
                            object_predicate(x, y),
                        ),
                    ),
                )
            ]
            print("logic rule: ", i)
            i += 1
        print(sat_agg)
        sat_agg = SatAgg(*axioms)
        loss = 1.0 - sat_agg
        loss.backward()
        optimizer.step()
        if epoch % 20 == 0:
            print(" epoch %d | loss %.4f | Train Sat %.3f " % (epoch, loss, sat_agg))

logic rule:  0
logic rule:  1
logic rule:  2
logic rule:  3
logic rule:  4
logic rule:  5
logic rule:  6
logic rule:  7
logic rule:  8
logic rule:  9
logic rule:  10
logic rule:  11
logic rule:  12
logic rule:  13
logic rule:  14
logic rule:  15
logic rule:  16
logic rule:  17
logic rule:  18
logic rule:  19
logic rule:  20
logic rule:  21
logic rule:  22
logic rule:  23
logic rule:  24
logic rule:  25
logic rule:  26
logic rule:  27
logic rule:  28
logic rule:  29
logic rule:  30
logic rule:  31
logic rule:  32
logic rule:  33
logic rule:  34
logic rule:  35
logic rule:  36
logic rule:  37
logic rule:  38
logic rule:  39
logic rule:  40
logic rule:  41
logic rule:  42
logic rule:  43
logic rule:  44
logic rule:  45
logic rule:  46
logic rule:  47


[E thread_pool.cpp:109] Exception in thread pool task: mutex lock failed: Invalid argument
[E thread_pool.cpp:109] Exception in thread pool task: mutex lock failed: Invalid argument
[E thread_pool.cpp:109] Exception in thread pool task: mutex lock failed: Invalid argument
[E thread_pool.cpp:109] Exception in thread pool task: mutex lock failed: Invalid argument


KeyboardInterrupt: 

In [None]:
torch.save(model, "./output/relation_predicate.pt")

In [20]:
from matplotlib import image

img = image.imread("images.jpeg")

In [24]:
objects = extract_objects(img)

Using cache found in /Users/abhiram/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2023-9-27 Python-3.9.18 torch-2.0.1 CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


In [25]:
objects

[('person', 0.7376260161399841),
 ('bicycle', 0.7286328077316284),
 ('person', 0.6313160061836243),
 ('person', 0.6134570240974426),
 ('bicycle', 0.5618255734443665),
 ('bus', 0.4590524435043335)]

In [31]:
img_tensor = ltn.Constant(torch.tensor(img))

In [28]:
sub = obj_grounding["PERSON"]
rel = relation_grounding["made_of".upper()]
obj = obj_grounding["BUS"]

In [None]:
Implies(And(object_predicate(img, sub), relation_predicate(rel, sub, obj)), object_predicate(img, obj))

In [33]:
SatAgg(Implies(And(object_predicate(img_tensor, sub), relation_predicate(rel, sub, obj)), object_predicate(img_tensor, obj)))

torch.Size([1, 255, 197, 3])
torch.Size([1, 151])
torch.Size([1, 1])
2
torch.Size([1, 255, 197, 3])
torch.Size([1, 151])
torch.Size([1, 1])


tensor(0.99980, grad_fn=<RsubBackward1>)