In [5]:
import h5py
import numpy as np
import json
import ltn
import torch
from torch.utils.data import Dataset, DataLoader
from collections import defaultdict
from torchvision.models import resnet50, ResNet50_Weights

2023-09-27 21:57:40.414602: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-09-27 21:57:40.788619: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-09-27 21:57:40.925917: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-09-27 21:57:41.939527: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; 

In [2]:
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"
device

'cpu'

In [3]:
BATCH_SIZE = 128
LEARNING_RATE = 0.00001
EPOCHS = 32

In [4]:
def get_tensor(idx, embedding_size=151):
    t = [0] * embedding_size
    t[idx] = 1
    t = torch.tensor(t)
    return t.to(torch.float32)

In [5]:
class VGImageDataset(Dataset):
    def __init__(
        self, imdb_h5, sgg_h5, sgg_dict, transform=None, target_transform=None
    ):
        self.imdb = h5py.File(imdb_h5)
        self.sgg = h5py.File(sgg_h5)
        with open(sgg_dict) as f:
            self.dicts = json.load(f)
            self.idx_to_labels = self.dicts["idx_to_label"]
            self.label_to_idx = self.dicts["label_to_idx"]
            self.idx_to_predicates = self.dicts["idx_to_predicate"]
            self.predicates_to_idx = self.dicts["predicate_to_idx"]
        self.transform = transform
        self.target_transform = target_transform

        def return_set():
            return set()

        self.logic_rules = defaultdict(return_set)

        for i in range(len(self.sgg["relationships"])):
            sub, obj, rel = (
                self.sgg["relationships"][i][0],
                self.sgg["relationships"][i][1],
                self.sgg["predicates"][i],
            )
            self.logic_rules[
                (
                    self.idx_to_labels[str(self.sgg["labels"][sub][0])].upper(),
                    self.idx_to_predicates[str(rel[0])].upper().replace(" ", "_"),
                )
            ].add(self.idx_to_labels[str(self.sgg["labels"][obj][0])].upper())

        self.g = {label: get_tensor(i) for i, label in enumerate(self.label_to_idx)}

    def __len__(self):
        return len(self.imdb["images"])

    def __getitem__(self, idx):
        image = self.imdb["images"][idx]
        if self.transform:
            image = self.transform(torch.tensor(image))
        image.to(torch.device(device))

        start = self.sgg["img_to_first_box"][idx]
        end = self.sgg["img_to_last_box"][idx]
        object_names = []
        if start > 0:
            object_names = [self.sgg["labels"][i][0] for i in range(start, end + 1)]
        for _ in range(len(object_names), 150):
            object_names += [150]

        return image, object_names

    def get_relations_grounding(self):
        dic = {
            predicate.upper().replace(" ", "_"): ltn.Constant(get_tensor(idx))
            for idx, predicate in enumerate(self.predicates_to_idx)
        }
        return dic

    def get_logic_rules(self):
        return self.logic_rules

    def get_object_grounding(self):
        dic = {
            obj.upper(): ltn.Constant(get_tensor(idx))
            for idx, obj in enumerate(self.label_to_idx)
        }
        return dic

    def colate_fn(self, data):
        batch = [d for d in data if d != None]
        return torch.utils.data.dataloader.default_collate(batch)

In [6]:
yolo = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
yolo.eval()

def extract_objects(image):
    with torch.no_grad():
        output = yolo(image)
    
    objects = []
    for *_, confidence, classification in output.pred[0]:
        objects.append((output.names[int(classification.item())], confidence.item()))
    
    return objects

Using cache found in /home/grav/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2023-9-25 Python-3.10.12 torch-2.0.1+cu118 CUDA:0 (NVIDIA GeForce RTX 3060 Laptop GPU, 6144MiB)

Downloading https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt to yolov5s.pt...
100%|██████████| 14.1M/14.1M [00:09<00:00, 1.62MB/s]

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


In [6]:
# TODO: Implement object predicate.


class Saved_Model:
    def __init__(self, path) -> None:
        weights = ResNet50_Weights.DEFAULT
        resnet = resnet50(weights=weights)
        resnet.fc = torch.nn.Linear(resnet.fc.in_features, 40)
        # self.model = resnet.load_state_dict(torch.load(path, map_location=device))

    def get_prob(self, x, label):
        # prediction = self.model(x)
        
        return torch.tensor([[1.0 for _ in range(label.shape[0]//BATCH_SIZE)] for _ in range(128)]) # TODO: Get probability of a class


saved_model_path = ""
model = Saved_Model(path=saved_model_path)
object_predicate = ltn.Predicate(func=lambda a, b: model.get_prob(a, b))

In [7]:
model_path = "<saved_model_directory>"
sgg_path = "../data/mini_VG-SGG.h5"
sgg_dict_path = "../data/mini_VG-SGG-dicts.json"
imdb_path = "../data/mini_imdb_1024.h5"
weights = ResNet50_Weights.DEFAULT
train_data = VGImageDataset(
    imdb_path, sgg_path, sgg_dict_path, transform=weights.transforms()
)
train_dataloader = DataLoader(
    train_data, BATCH_SIZE, shuffle=True, collate_fn=train_data.colate_fn
)

In [8]:
# we define connectives, quantifiers, and SatAgg
And = ltn.Connective(ltn.fuzzy_ops.AndProd())
Not = ltn.Connective(ltn.fuzzy_ops.NotStandard())
Implies = ltn.Connective(ltn.fuzzy_ops.ImpliesReichenbach())
Exists = ltn.Quantifier(ltn.fuzzy_ops.AggregPMean(p=2), quantifier="e")
Forall = ltn.Quantifier(ltn.fuzzy_ops.AggregPMeanError(p=2), quantifier="f")
SatAgg = ltn.fuzzy_ops.SatAgg()
Or = ltn.Connective(ltn.fuzzy_ops.OrProbSum())

In [9]:
class MLP(torch.nn.Module):
    def __init__(self, layer_sizes=(302, 250, 200, 151)):
        super(MLP, self).__init__()
        self.elu = torch.nn.ELU()
        self.sigmoid = torch.nn.Sigmoid()
        self.linear_layers = torch.nn.ModuleList(
            [
                torch.nn.Linear(layer_sizes[i - 1], layer_sizes[i])
                for i in range(1, len(layer_sizes))
            ]
        )
        self.softmax = torch.nn.Softmax(1)

    def forward(self, l, *x):
        x = list(x)
        if len(x) == 1:
            x = x[0]
        else:
            x = torch.cat(x, dim=1)
        for layer in self.linear_layers[:-1]:
            x = self.elu(layer(x))
        logits = self.linear_layers[-1](x)
        probs = self.softmax(logits)
        out = torch.sum(probs * l, dim=1)
        return out


relation_predicate = ltn.Predicate(MLP())

In [10]:
relation_grounding = train_data.get_relations_grounding()
obj_grounding = train_data.get_object_grounding()

In [11]:
params = list(relation_predicate.parameters())
optimizer = torch.optim.Adam(params, lr=0.00001)

In [12]:
for epoch in range(1):
    for i, data in enumerate(train_dataloader, 0):
        images = data[0]
        logic_rules = train_data.get_logic_rules()
        axioms = []
        for k in logic_rules:
            sub = obj_grounding[k[0]]
            rel = relation_grounding[k[1]]
            objs = logic_rules[k]
            y = [obj_grounding[idx] for idx in objs]
            y = ltn.Variable('y', torch.stack([idx.value for idx in y]))
            x = ltn.Variable('x', images)
            axioms += [
                Forall(
                    [x, y],
                    Implies(
                        And(
                            object_predicate(x, sub),
                            relation_predicate(rel, sub, y),
                        ),
                        object_predicate(x, y),
                    ),
                )
            ]
        sat_agg = SatAgg(*axioms)
        loss = 1.0 - sat_agg
        loss.backward()
        optimizer.step()
        if epoch % 20 == 0:
            print(" epoch %d | loss %.4f | Train Sat %.3f " % (epoch, loss, sat_agg))



KeyboardInterrupt: 