In [359]:
!pip install -U -q PyDrive
import json
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

from google.colab import drive

drive.mount('/content/gdrive/')
base_path='/content/gdrive/My Drive/'

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).


In [59]:
!pip install -U anytree

Collecting anytree
[?25l  Downloading https://files.pythonhosted.org/packages/a8/65/be23d8c3ecd68d40541d49812cd94ed0f3ee37eb88669ca15df0e43daed1/anytree-2.8.0-py2.py3-none-any.whl (41kB)
[K     |███████▉                        | 10kB 15.9MB/s eta 0:00:01[K     |███████████████▊                | 20kB 1.6MB/s eta 0:00:01[K     |███████████████████████▋        | 30kB 1.8MB/s eta 0:00:01[K     |███████████████████████████████▍| 40kB 2.1MB/s eta 0:00:01[K     |████████████████████████████████| 51kB 1.6MB/s 
Installing collected packages: anytree
Successfully installed anytree-2.8.0


In [413]:
#!usr/bin/python
import warnings
# -*- coding: utf-8 -*-
import numpy as np
import anytree
from anytree import Node, RenderTree


class Database(object):
  def __init__(self,node):
    self.db = dict()
    if type(node) != str:
      raise TypeError("Node of a wrong type, please convert your node to string")
    self.root = Node(node)
    #the dictionary "update" save the nodes extended with new children 
    self.update={}
    #the init bool indicates if it is the first time we fill in the database in order not to take into account the update dict
    self.init = True
    self.status={0:'valid', 1:'granularity_staged', 2:'coverage_staged', 3:'invalid'}
    
  def add_nodes(self,nodes):   
      for n in nodes:
        #verify the dimension of the node
        if np.array(n).shape != (2,) : 
          raise TypeError("Shape mismatch, the node shoud be a tuple of dimension (2,)")

        #search for the node's parent
        p = anytree.search.findall(self.root, filter_=lambda node: node.name == (n[1]))

        #raise an error if the node's parent does not exist
        if len(p) == 0 :
          raise TypeError("The parent of node " + str(n[0]) + " does not exist")
        
        #search for the node whether it is already stored in the databse and make a warning if it is the case
        c = anytree.search.findall(self.root, filter_=lambda node: node.name == (n[0]))
        if len(c) != 0 :
          warnings.warn("The node " + str(n[0]) + " already exists")
        else :  
          #no error or anomaly occured, we can add the new nodde
          p = p[0]
          Node(n[0], parent=p)
          #in order to use it in extract status to check whether the node has new children
          if not self.init:
            self.update[p.name] = 1

      self.init=False
      self.show()

  
  def add_extract(self,d):
    #store the images in a dictionary
    for k, v in d.items(): 
        self.db[k] = v  

  def get_extract_status(self):
    result = {}
    for key,values in self.db.items():
      result[key]=[]
      #if the image has no label, lets consider it as valid
      if len(values) == 0 : 
        result[key].append(0)
      else:
        for label in values:
          #if the label does nont exist in the database => invalid label
          p = anytree.search.findall(self.root, filter_=lambda node: node.name == (label))
          if len(p) == 0 :
            result[key].append(3)
            continue

          #check the number of my parent's childs
          elif p[0].name != self.root.name and p[0].parent.name in self.update:
            result[key].append(2)
            continue
          
          #check if I have childrens
          elif p[0].name in  self.update:
            result[key].append(1)
            continue
          else:
            result[key].append(0)
      result[key]=self.status[np.max(np.array(result[key]))]
    print("Result",result)
    self.update={}
    return result

  def show(self):
    for pre, fill, node in RenderTree(self.root):
        print("%s%s" % (pre, node.name))

In [417]:
# Initial graph
build = [("core", None), ("A", "core"), ("B", "core"), ("C", "core"), ("C1", "C")]
# Extract
extract = {"img001": ["A"], "img002": ["C1"]}
# Graph edits
edits = [("A1", "A"), ("A2", "A")]
if len(build) > 0:
    # Build graph
    print("Initial graph")
    db = Database(build[0][0])
    if len(build) > 1:
    	db.add_nodes(build[1:])
    # Add extract
    print("Edited graph")
    db.add_extract(extract)
    # Graph edits
    db.add_nodes(edits)
    # Update status
    status = db.get_extract_status()

Initial graph
core
├── A
├── B
└── C
    └── C1
Edited graph
core
├── A
│   ├── A1
│   └── A2
├── B
└── C
    └── C1
Result {'img001': 'granularity_staged', 'img002': 'valid'}


In [416]:
# Initial graph
build = [("core", None), ("A", "core"), ("B", "core"), ("C", "core"), ("C1", "C")]
# Extract
extract = {"img001": ["A", "B"], "img002": ["A", "C1"], "img003": ["B", "E"]}
# Graph edits
edits = [("A1", "A"), ("A2", "A"), ("C2", "C")]
#self.status={0:'valid', 1:'granularity_staged', 2:'coverage_staged', 3:'invalid'}
# Get status (this is only an example, test your code as you please as long as it works)
status = {}
if len(build) > 0:
    # Build graph
    print("Initial graph")
    db = Database(build[0][0])
    if len(build) > 1:
    	db.add_nodes(build[1:])
    # Add extract
    print("Edited graph")
    db.add_extract(extract)
    # Graph edits
    db.add_nodes(edits)
    # Update status
    status = db.get_extract_status()

Initial graph
core
├── A
├── B
└── C
    └── C1
Edited graph
core
├── A
│   ├── A1
│   └── A2
├── B
└── C
    ├── C1
    └── C2
Result {'img001': 'granularity_staged', 'img002': 'coverage_staged', 'img003': 'invalid'}


# **Experiments**

In [408]:
#read data and convert it to python dictionary
expected_status = json.loads(open(base_path+"expected_status.json", "r").read())
graph_build = json.loads(open(base_path+"graph_build.json", "r").read())
graph_edits = json.loads(open(base_path+"graph_edits.json", "r").read())
img_extract = json.loads(open(base_path+"img_extract.json", "r").read())

In [410]:
# Initial graph
build = graph_build
# Extract
extract = img_extract
# Graph edits
edits = graph_edits
#self.status={0:'valid', 1:'granularity_staged', 2:'coverage_staged', 3:'invalid'}
# Get status (this is only an example, test your code as you please as long as it works)
status = {}
if len(build) > 0:
    # Build graph
    print("Initial graph")
    db = Database(build[0][0])
    if len(build) > 1:
    	db.add_nodes(build[1:])
    # Add extract
    print("Edited graph")
    db.add_extract(extract)
    # Graph edits
    db.add_nodes(edits)
    # Update status
    status = db.get_extract_status()

Initial graph
3e3e23bb16cc421eb5d245f5940281bc
└── 59bea0d582874187b145899e0a511259
    ├── 346430ad7b17455d8337b2ce1ab52fb4
    │   ├── 0bb7e038a2f54e94972a67e45140128a
    │   ├── 09e1b6fcd35240ceb80f8351f7864fdb
    │   └── cb5e86852e7c41cfb8a23facc60cc450
    ├── 3c97ca3bc62b4b3f909df9020bd6b977
    │   ├── 1b0d3b51d37b490c9340e22cb4379822
    │   ├── a7705367784a4023ae19639437c6fd0c
    │   ├── babb9bbe7a014a66874baf56a5ea7c92
    │   ├── e3c1f6e40a084c4586c8fec446a99fd8
    │   └── cd86eaa970bd40c1aa16355e16e52fa5
    ├── dade931dc992467c9ee406027a5888ea
    ├── 189847a3f5e44a1aabf0b6f3a1878eef
    └── 020cf926b12840b8aa553d8d5c8770d4
        ├── 00c3d7df0fee4e139243afbca431ac58
        ├── b5d78ac1e4304771af8f250a41b1d210
        ├── 62983ac6deed46fbb4097c1e584fcc10
        ├── 974cf0c5fee44c958c4154dfb6dacefc
        └── 9d91ec7f8de745c1bb46ee7cd818b76f
Edited graph
3e3e23bb16cc421eb5d245f5940281bc
├── 59bea0d582874187b145899e0a511259
│   ├── 346430ad7b17455d8337b2ce1ab52fb4
│ 

In [400]:
 #Status VS expected_status comparaison
 status == expected_status

True