In [1]:
import io, os
import numpy as np
from google.cloud import vision
from PIL import ImageTk, Image, ImageDraw
import pandas as pd
import sqlite3
import json

from transformers import DetrFeatureExtractor, DetrForObjectDetection
import torch

In [2]:
img_path = "static/images/paintings/"
paintings = os.listdir(img_path)

In [3]:
con = sqlite3.connect('rijksstudio.db')
cur = con.cursor()

In [4]:
# turns a combination of painting and object name to an id
def create_obj_id(painting, obj_name):
    obj_id = ''.join([str(ord(char)-96) for char in obj_name])    # convert chars to numbers
    return (painting.split(".")[0] + obj_id) + "H"    # + G for Google Vision

In [5]:
def add_object_data(painting, obj, confidence, min_x, min_y, max_x, max_y):
    obj_id = create_obj_id(painting, obj)
    
    q = cur.execute("""INSERT OR REPLACE INTO Objects 
               VALUES (?,?,?,?,?,?,?,?,?,?);""", (painting, 
                                                  obj, 
                                                  confidence,
                                                  min_x, 
                                                  min_y, 
                                                  max_x, 
                                                  max_y, 
                                                  "HUGGING_FACE", 
                                                  0, 
                                                  0))
    
    con.commit()

In [6]:
# init hugging face model
feature_extractor = DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")

In [7]:
device = ("cuda:0" if torch.cuda.is_available else "cpu")
model = model.to(device)

In [8]:
def get_hugging_objects(painting):
    print("Reading "+painting+" ...")
    im = Image.open(os.path.join(img_path, painting))

    encoding = feature_extractor(im, return_tensors="pt")
    outputs = model(**encoding.to(device))
    
    # keep only predictions of queries with 0.9+ confidence (excluding no-object class)
    probas = outputs.logits.softmax(-1)[0, :, :-1]
    keep = probas.max(-1).values > 0.9
    
    # rescale bounding boxes
    target_sizes = torch.tensor(im.size[::-1]).unsqueeze(0)
    postprocessed_outputs = feature_extractor.post_process(outputs, target_sizes.to(device))
    bboxes_scaled = postprocessed_outputs[0]['boxes'][keep]
    
    objects = get_hugging_obj_info(probas[keep], bboxes_scaled, painting)
    print("Objects ",[obj['object_name'] for obj in objects], " found")
    
    return objects

In [9]:
def get_hugging_obj_info(prob, boxes, painting):    
    objects = []
    
    # for object in painting
    for p, (xmin, ymin, xmax, ymax) in zip(prob, boxes.tolist()):
        cl = p.argmax()
        object_name = model.config.id2label[cl.item()]
        box = [xmin, ymin, xmax, ymax]
        
        objects.append({
          'painting_id': painting,
          'object_name': object_name,
          'confidence': p[cl].item(),
          'min_x': xmin,
          'min_y': ymin,
          'max_x': xmax,
          'max_y': ymax
        }) 
    return objects
    

In [10]:
hugging = []
# vision = []

for painting in paintings:
    objects = get_hugging_objects(painting)
    if len(objects):
        hugging.append(objects)

Reading RP-F-2004-71.png ...


  dim_t = self.temperature ** (2 * (dim_t // 2) / self.embedding_dim)


Objects  ['bowl', 'vase', 'bowl', 'vase']  found
Reading RP-F-2014-7-1-3.png ...
Objects  ['vase', 'bowl']  found
Reading RP-P-H-1086.png ...
Objects  ['dog']  found
Reading RP-T-1881-A-107.png ...
Objects  []  found
Reading RP-T-1951-200.png ...
Objects  ['apple', 'apple']  found
Reading RP-T-1951-203.png ...
Objects  ['vase']  found
Reading SK-A-1107.png ...
Objects  ['vase', 'orange', 'bowl']  found
Reading SK-A-113.png ...
Objects  ['chair', 'book', 'person', 'person', 'person', 'book']  found
Reading SK-A-1130.png ...
Objects  ['book', 'chair', 'person', 'person']  found
Reading SK-A-129.png ...
Objects  ['cat', 'person', 'person', 'person', 'dog', 'person']  found
Reading SK-A-137.png ...
Objects  ['cup', 'knife', 'wine glass']  found
Reading SK-A-1451.png ...
Objects  ['bird', 'person', 'person', 'bowl', 'person', 'person', 'bird', 'bird']  found
Reading SK-A-1595.png ...
Objects  ['person', 'person']  found
Reading SK-A-1627.png ...
Objects  ['scissors', 'person', 'person']  fo

Objects  ['bird', 'bird', 'cow', 'cow', 'cow']  found
Reading SK-C-229.png ...
Objects  ['chair', 'person', 'person', 'person', 'vase', 'person', 'person', 'person', 'person', 'dog', 'person', 'person', 'wine glass', 'person', 'person', 'bottle', 'spoon', 'person', 'person', 'vase']  found
Reading SK-C-291.png ...
Objects  ['handbag', 'cat', 'chair', 'vase', 'vase', 'person', 'person']  found
Reading SK-C-301.png ...
Objects  ['bird', 'dog', 'apple']  found
Reading SK-C-535.png ...
Objects  ['knife', 'person', 'cat', 'bowl']  found
Reading SK-C-610.png ...
Objects  []  found


In [11]:
hugging = [obj for painting in hugging for obj in painting]

hugging[:5]

[{'painting_id': 'RP-F-2004-71.png',
  'object_name': 'bowl',
  'confidence': 0.9883295297622681,
  'min_x': 1583.452880859375,
  'min_y': 1669.08740234375,
  'max_x': 2487.787353515625,
  'max_y': 2012.01806640625},
 {'painting_id': 'RP-F-2004-71.png',
  'object_name': 'vase',
  'confidence': 0.9868892431259155,
  'min_x': 784.371337890625,
  'min_y': 984.2096557617188,
  'max_x': 983.9075317382812,
  'max_y': 1825.298095703125},
 {'painting_id': 'RP-F-2004-71.png',
  'object_name': 'bowl',
  'confidence': 0.9101848006248474,
  'min_x': 57.06950759887695,
  'min_y': 1744.7220458984375,
  'max_x': 428.50567626953125,
  'max_y': 1987.722900390625},
 {'painting_id': 'RP-F-2004-71.png',
  'object_name': 'vase',
  'confidence': 0.9992390871047974,
  'min_x': 1103.9815673828125,
  'min_y': 1082.783203125,
  'max_x': 1687.67236328125,
  'max_y': 1800.97412109375},
 {'painting_id': 'RP-F-2014-7-1-3.png',
  'object_name': 'vase',
  'confidence': 0.9976595640182495,
  'min_x': 633.093994140625,

In [12]:
for i in hugging:
    add_object_data(i["painting_id"], i["object_name"], i["confidence"], i["min_x"], i["min_y"], i["max_x"], i["max_y"])

In [13]:
cur.execute("SELECT * FROM OBJECTS").fetchall()

[('RP-F-2004-71.png',
  'bowl',
  0.9883295297622681,
  1583.452880859375,
  1669.08740234375,
  2487.787353515625,
  2012.01806640625,
  'HUGGING_FACE',
  0,
  0),
 ('RP-F-2004-71.png',
  'vase',
  0.9868892431259155,
  784.371337890625,
  984.2096557617188,
  983.9075317382812,
  1825.298095703125,
  'HUGGING_FACE',
  0,
  0),
 ('RP-F-2004-71.png',
  'bowl',
  0.9101848006248474,
  57.06950759887695,
  1744.7220458984375,
  428.50567626953125,
  1987.722900390625,
  'HUGGING_FACE',
  0,
  0),
 ('RP-F-2004-71.png',
  'vase',
  0.9992390871047974,
  1103.9815673828125,
  1082.783203125,
  1687.67236328125,
  1800.97412109375,
  'HUGGING_FACE',
  0,
  0),
 ('RP-F-2014-7-1-3.png',
  'vase',
  0.9976595640182495,
  633.093994140625,
  482.28790283203125,
  1108.4140625,
  1163.3642578125,
  'HUGGING_FACE',
  0,
  0),
 ('RP-F-2014-7-1-3.png',
  'bowl',
  0.9570128321647644,
  1139.7452392578125,
  592.3972778320312,
  1717.46435546875,
  1105.8341064453125,
  'HUGGING_FACE',
  0,
  0),
 ('

In [14]:
# Save objects into json
json.dump(hugging, open("objects_hugging.json", 'w'))

In [15]:
# con.close()

hugging face code source

@article{DBLP:journals/corr/abs-2005-12872,
  author    = {Nicolas Carion and
               Francisco Massa and
               Gabriel Synnaeve and
               Nicolas Usunier and
               Alexander Kirillov and
               Sergey Zagoruyko},
  title     = {End-to-End Object Detection with Transformers},
  journal   = {CoRR},
  volume    = {abs/2005.12872},
  year      = {2020},
  url       = {https://arxiv.org/abs/2005.12872},
  archivePrefix = {arXiv},
  eprint    = {2005.12872},
  timestamp = {Thu, 28 May 2020 17:38:09 +0200},
  biburl    = {https://dblp.org/rec/journals/corr/abs-2005-12872.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}