<a href="https://colab.research.google.com/github/MehranDHN/ArchResources/blob/main/CollectionParser.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install requests
!pip install rdflib
!pip install --upgrade certifi

Collecting rdflib
  Downloading rdflib-7.5.0-py3-none-any.whl.metadata (12 kB)
Downloading rdflib-7.5.0-py3-none-any.whl (587 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m587.2/587.2 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rdflib
Successfully installed rdflib-7.5.0


Start from the Root Collection:

In [2]:
import requests

class RDFGenerator:
    def __init__(self, root_collection, root_manifests):
        from rdflib import Graph, URIRef, Literal, Namespace
        from rdflib.namespace import XSD

        self.root_collection = root_collection
        self.root_manifests = root_manifests
        self.manifest2_counter = 0
        self.manifest3_counter = 0
        self.manifest_counter = 0
        self.canvas2_counter = 0
        self.cancas3_counter = 0
        self.canvas_counter = 0
        self.glam_sources = []
        self.iiif_manifests = []
        self.author_collection = {}
        # The constractor should receive target data structure
        self.g = Graph()
        self.IIIF = Namespace("http://iiif.io/api/presentation#")
        self.RDF = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
        self.RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")
        self.MDHN = Namespace("http://example.com/mdhn/")
        self.WD = Namespace("https://www.wikidata.org/wiki/")
        self.XSD = Namespace("http://www.w3.org/2001/XMLSchema#")
        # Binding namespaces
        self.g.bind("mdhn", self.MDHN)
        self.g.bind("rdfs", self.RDFS)
        self.g.bind("rdf", self.RDF)
        self.g.bind("iiif", self.IIIF)
        self.g.bind("wd", self.WD)
        self.g.bind("xsd", self.XSD)
        self.root_collection_uri = URIRef(self.MDHN.RootCollection)
    def manifest_list(self):
        return self.iiif_manifests
    def manifest2_Counter(self):
        """Return the total number of manifest2 counted."""
        return self.manifest2_counter
    def manifest3_Counter(self):
        """Return the total number of manifest3 counted."""
        return self.manifest3_counter
    def manifest_Counter(self):
        """Return the total number of manifest counted."""
        return self.manifest_counter
    def author_Collection(self):
        """Return the total number of manifest counted."""
        return self.author_collection
    def canvas_Counter(self):
        """Return the total number of canvases counted."""
        return self.canvas_counter
    def graph_for_glams(self) :
        from rdflib import Graph, URIRef, Literal
        for glam in self.glam_sources:
            _stripped = glam.replace(' ', '')
            glam_graph_uri = URIRef(f'http://example.com/mdhn/{_stripped}')
            self.g.add((glam_graph_uri, self.RDF.type, self.MDHN.Publisher))
            self.g.add((glam_graph_uri, self.RDFS.label, Literal(glam.strip(), lang='en') ))
        return self.g
    def graph_for_authors(self) :
        from rdflib import Graph, URIRef, Literal
        for author in self.author_collection:
            authorname = self.author_collection[author]
            _stripped = author.replace(' ', '')
            author_graph_uri = URIRef(f'http://example.com/mdhn/{_stripped}')
            self.g.add((author_graph_uri, self.RDF.type, self.MDHN.Creator))
            self.g.add((author_graph_uri, self.RDFS.label, Literal(authorname.strip(), lang='en') ))
            self.g.add((author_graph_uri, self.RDFS.comment, Literal(author.strip(), lang='en') ))
        return self.g
    def parse_manifest_content(self, id):
        manifest_content = fetch_manifest(id)
        if manifest_content:
          context = manifest_content.get('@context')
          if context.find("/2/")>-1:
            self.manifest2_counter += 1
          if context.find("/3/")>-1:
            self.manifest3_counter += 1
    def graph_for_manifest(self, manifest, parneturi):
        self.manifest_counter += 1
        from rdflib import Graph, URIRef, Literal
        manifest_id = manifest.get('id')
        english_label = ""
        persian_label = ""
        french_label = ""
        try:
          english_label = manifest.get('label', {}).get('en')[0]
        except:
          french_label = manifest.get('label', {}).get('fr')[0]
        #self.parse_manifest_content(manifest_id)
        sequences = manifest.get('sequences')
        if manifest_id not in self.iiif_manifests:
          self.iiif_manifests.append(f'{manifest_id}\n')
        else:
          print(f'Duplicate {manifest_id}')
        if sequences != None :
          canvases = sequences[0].get('canvases', [])
          #print(f'Len={len(canvases)}')
          self.canvas_counter += len(canvases)
        else:
          items = manifest.get('items', [])
          if items:
            items2 = items[0].get('items', [])
            if items2:
              items3 = items2[0].get('items', [])
              #print(len(items))
          #print("Should be IIIF 3.0")
        if "fa" in manifest.get('label', {}):
          persian_label = manifest.get('label', {}).get('fa')[0]
        if "fr" in manifest.get('label', {}):
          french_label = manifest.get('label', {}).get('fr')[0]
        metadata = manifest.get('metadata')
        manifest_graph_uri = ""
        manifest_graph_title = ""
        strippedid = ""
        #content_type = ""
        contentTypeList = []
        year_list = []
        glam_source = ""
        _canvasCounter = 0
        _creationYear = ""
        script_style_list = []
        author_list = []
        virtual_collection_list = []
        tgm_list = []
        subject_list = []
        tgn_list = []
        fhkb_list = []
        involver_uri_list = []
        agents_val = {}
        depict_list = []
        folioHasDrawing = False
        folioHasTable = False
        folioIsCover = False
        folioIsOpening = False
        folioIsColophon = False
        folioHasDiagram = False
        folioIsFlyLeaf = False
        for data in metadata:
            key = data.get('label', {}).get('en')
            english_val = data.get('value',{}).get('en')
            if isinstance(key, list):
              idx = 0
              for mkey in key:
                if mkey == "HasDrawing":
                  if english_val[idx].lower().strip()=="true":
                     folioHasDrawing = True
                if mkey == "HasTable":
                  if english_val[idx].lower().strip()=="true":
                     folioHasTable = True
                if mkey == "IsCover":
                  if english_val[idx].lower().strip()=="true":
                     folioIsCover = True
                if mkey == "IsColophon":
                  if english_val[idx].lower().strip()=="true":
                     folioIsColophon = True
                if mkey == "IsOpeningPage":
                  if english_val[idx].lower().strip()=="true":
                     folioIsOpening = True
                if mkey == "HasDiagram":
                  if english_val[idx].lower().strip()=="true":
                     folioHasDiagram = True
                if mkey == "IsFlyLeaf":
                  if english_val[idx].lower().strip()=="true":
                     folioIsFlyLeaf = True
                idx += 1
              match key[0]:
                case "Date":
                    english_val = data.get('value',{}).get('en', [])
                    _creationYear = english_val[0].strip()
                    if _creationYear not in year_list:
                      year_list.append(_creationYear)
                case "Author":
                    #print("Key is Author")
                    j1 = 0
                    author_list = english_val
                case "HasImages":
                    #print("Key is HasImages")
                    j1 =0
                case "HasDrawing":
                    #print("Key is HasImages")
                    j1 =0
            else:
              match key:
                case "Title":
                  #print("Key is Title")
                  z=0
                case "Agents":
                  #english_val = data.get('value',{}).get('en', [])
                  agents_val = data.get('value',{})
                  for agent in agents_val:
                    strippedRole = agent.replace(' ', '_').strip()
                    involver_role_uri = URIRef(f'http://example.com/mdhn/ROLE_{strippedRole}')
                    if involver_role_uri not in involver_uri_list:
                      involver_uri_list.append(involver_role_uri)
                case "Subject Header":
                  for tgmval in english_val:
                    if tgmval.strip() not in subject_list:
                      subject_list.append(tgmval.strip())
                    segments = tgmval.split(':')
                    if len(segments) > 1:
                      if segments[1].strip() not in tgm_list:
                        tgm_list.append(segments[1].strip())
                case "Subject Place":
                  for tgnval in english_val:
                    segments = tgnval.split(':')
                    if len(segments) > 1:
                      if segments[1].strip() not in tgn_list:
                        tgn_list.append(segments[1].strip())
                case "Subject Agent":
                  for fhkbval in english_val:
                    segments = fhkbval.split(':')
                    if len(segments) > 1:
                      if segments[1].strip() not in fhkb_list:
                        fhkb_list.append(segments[1].strip())
                case "Label":
                  #print("Key is Label"
                  z=0
                case "Author":
                    #print("Key is Author")
                    j1 = 0
                    author_list = english_val
                case "Content Type":
                  #Content type is reqired
                  #content_type = english_val[0].replace(':', '')
                  for ctype in english_val:
                    if ctype.find("aat:")>-1 and ctype.strip() not in contentTypeList:
                      contentTypeList.append(ctype.strip().replace(':', ''))
                case "Language":
                  j1=0
                  #print("Key is Language")
                case "CanvasCount":
                  _canvasCounter = english_val
                  self.canvas_counter += int(_canvasCounter)
                  #print("Key is Language")
                case "Styles":
                    #print("Key is Styles")
                    j1 =0
                    script_style_list = data.get('value',{}).get('en', [])
                    #print(script_style_list)
                case "Depicts":
                  depict_list = data.get('value',{}).get('en', [])
                case "PartOf":
                    j6 =0
                    virtual_collection_list = data.get('value',{}).get('en', [])
                case "Hosted By":
                  #print("Key is Hosted By")
                  glam_source = english_val
                  if english_val not in self.glam_sources:
                    self.glam_sources.append(glam_source)
                case "Unique ID":
                  #print("Key is Unique ID")
                  strippedid = english_val.replace('~', '').replace(',', '').replace('.', '').replace('-', '_').replace(':', '_')
                  manifest_graph_uri = URIRef(f'http://example.com/mdhn/{strippedid}')
            if isinstance(english_val, list):
              j1=0
              #print(english_val[0])
            if isinstance(english_val, dict):
              j1=0
              #print(english_val.get("en"))
            else:
              j1=0
              #print(english_val)
        #print(manifest_graph_uri)
        _stripped = glam_source.replace(' ', '').replace(',', '_')
        _strippedCreationYear = _creationYear.replace(' ', '').replace(':', '_')
        if len(manifest_graph_title) <1:
          manifest_graph_title = english_label
        glam_graph_uri = URIRef(f'http://example.com/mdhn/{_stripped}')
        self.g.add((manifest_graph_uri, self.RDF.type, self.MDHN.DigitalResource))
        nolabel = True
        year_uri = URIRef(f'http://example.com/mdhn/Year_{_strippedCreationYear}')
        if len(manifest_graph_title) > 0:
           self.g.add((manifest_graph_uri, self.RDFS.label, Literal(manifest_graph_title, lang='en') ))
           nolabel = False
        if len(persian_label) > 0:
          self.g.add((manifest_graph_uri, self.RDFS.label, Literal(persian_label, lang='fa') ))
          nolabel = False
        if len(french_label) > 0:
          self.g.add((manifest_graph_uri, self.RDFS.label, Literal(french_label, lang='fr') ))
          nolabel = False
        if nolabel:
          self.g.add((manifest_graph_uri, self.RDFS.label, Literal("No Label found", lang='en') ))

        for agent in agents_val:
          strippedRole = agent.replace(' ', '_').strip()
          individual_agent = agents_val.get(agent, [])
          prop_name = "hasParticipantInRole" + agent.strip()
          # Build full IRI
          prop_iri = URIRef(self.MDHN + prop_name)
          for agent in individual_agent:
            stripped_agent = agent.replace('mdhn:', '').strip()
            fhkbagenturi = URIRef(f'http://example.com/mdhn/{stripped_agent}')
            self.g.add((manifest_graph_uri, prop_iri, fhkbagenturi ))
        for _ctype in contentTypeList:
          self.g.add((manifest_graph_uri, self.MDHN.ofType, URIRef(f'http://example.com/mdhn/{_ctype}') ))
        self.g.add((manifest_graph_uri, self.MDHN.hasUrl, Literal(manifest_id, lang='en')))
        self.g.add((manifest_graph_uri, self.MDHN.PublishedBy, glam_graph_uri))
        self.g.add((manifest_graph_uri, self.MDHN.canvasCount,  Literal(_canvasCounter, datatype=self.XSD.integer)))
        self.g.add((manifest_graph_uri, self.MDHN.hasTemporal, year_uri))
        if folioHasDrawing :
          self.g.add((manifest_graph_uri, self.MDHN.folioHasDrawing, Literal(True, datatype=self.XSD.boolean)))
        if folioHasTable :
          self.g.add((manifest_graph_uri, self.MDHN.folioHasTable, Literal(True, datatype=self.XSD.boolean)))
        if folioIsCover :
          self.g.add((manifest_graph_uri, self.MDHN.folioIsCover, Literal(True, datatype=self.XSD.boolean)))
        if folioIsColophon :
          self.g.add((manifest_graph_uri, self.MDHN.folioIsColophon, Literal(True, datatype=self.XSD.boolean)))
        if folioHasDiagram :
          self.g.add((manifest_graph_uri, self.MDHN.foliohasDiagram, Literal(True, datatype=self.XSD.boolean)))
        if folioIsOpening :
          self.g.add((manifest_graph_uri, self.MDHN.folioIsOpening, Literal(True, datatype=self.XSD.boolean)))
        if folioIsFlyLeaf:
          self.g.add((manifest_graph_uri, self.MDHN.folioIsFlyLeaf, Literal(True, datatype=self.XSD.boolean)))
        for depict in depict_list:
          stripped_depicts = depict.replace('mdhn:', '').strip()
          iconography_uri = URIRef(f'http://example.com/mdhn/{stripped_depicts}')
          self.g.add((manifest_graph_uri, self.MDHN.depicts, iconography_uri))
        for subject in subject_list:
          segments = subject.split(':')
          if len(segments) > 1:
            match segments[0]:
              case "tgm":
                 lctgmuri = URIRef(f'http://example.com/mdhn/{segments[1]}')
                 #self.g.add((lctgmuri, self.RDF.type, self.MDHN.TGMSubject))
                 self.g.add((manifest_graph_uri, self.MDHN.hasSubject, lctgmuri))
              case "lcsh":
                 lcshuri = URIRef(f'http://example.com/mdhn/{segments[1]}')
                 #self.g.add((lcshuri, self.RDF.type, self.MDHN.LCSHSubject))
                 self.g.add((manifest_graph_uri, self.MDHN.hasSubject, lcshuri))
              case "aat":
                 aaturi = URIRef(f'http://example.com/mdhn/aat{segments[1]}')
                 #self.g.add((aaturi, self.RDF.type, self.MDHN.AATSubject))
                 self.g.add((manifest_graph_uri, self.MDHN.hasSubject, aaturi))
        #for lctgm in tgm_list:
          #lctgmuri = URIRef(f'http://example.com/mdhn/{lctgm}')
          #self.g.add((manifest_graph_uri, self.MDHN.hasSubject, lctgmuri))

        for gettytgn in tgn_list:
          gettytgnuri = URIRef(f'http://example.com/mdhn/{gettytgn}')
          self.g.add((manifest_graph_uri, self.MDHN.hasTGNPlace, gettytgnuri))



        for fhkbagent in fhkb_list:
          fhkbagenturi = URIRef(f'http://example.com/mdhn/{fhkbagent}')
          self.g.add((manifest_graph_uri, self.MDHN.hasAgential, fhkbagenturi))
        clean_author_wdid = ""
        clean_author_name = ""
        for author in author_list:
          if author.find(":")>-1:
            clean_author_wdid = author.strip().upper().replace('WD:', '')
          else:
            clean_author_name = author.strip()
        if clean_author_wdid not in self.author_collection:
           self.author_collection[clean_author_wdid] = clean_author_name
        self.g.add((manifest_graph_uri, self.MDHN.hasCreator, URIRef(f'http://example.com/mdhn/{clean_author_wdid}') ))
        if len(script_style_list) > 0:
           for style in script_style_list:
            clean_style = style.replace(':', '')
            self.g.add((manifest_graph_uri, self.MDHN.hasScriptStyle, URIRef(f'http://example.com/mdhn/{clean_style}') ))

        if len(virtual_collection_list) > 0:
          #if "vcol:1000109" not in virtual_collection_list:
            #print(virtual_collection_list)
          for col in virtual_collection_list:
            clean_col = col.replace(':', '')
            #Adding resource to its virtual collection
            self.g.add((manifest_graph_uri, self.MDHN.partOf, URIRef(f'http://example.com/mdhn/{clean_col}') ))
        #Adding resource to its immediate collection where it directly belongs
        #self.g.add((parneturi, self.MDHN.hasResource, manifest_graph_uri ))
        self.g.add((year_uri, self.RDF.type, self.MDHN.TemporalInfo ))
        self.g.add((year_uri, self.RDFS.label, Literal(_creationYear.strip(), lang='en') ))
        self.g.add((manifest_graph_uri, self.MDHN.isInCollection, parneturi ))

    def generate_rdf(self):
        from rdflib import Graph, URIRef, Literal
        self.g.add((self.root_collection_uri, self.RDF.type, self.MDHN.ResourceCollection))
        self.g.add((self.root_collection_uri, self.RDFS.label, Literal("Root Collection", lang='en') ))
        self.g.add((self.root_collection_uri, self.RDFS.comment, Literal("Root collection may has its own resources and multiple levels nested subCollections", lang='en') ))
        self.g.add((self.root_collection_uri, self.MDHN.caption, Literal("First Level Root Collection", lang='en') ))
        self.g.add((self.root_collection_uri, self.MDHN.hasUrl, Literal("https://raw.githubusercontent.com/MehranDHN/IIIFCollection/refs/heads/master/IIIFCollection/IIIF2Collection.json", lang='en') ))

        for c in self.root_collection:
            collection_url = c.get('@id')
            collection_lbl = c.get("label")
            stripped_lbl = collection_lbl.replace('https://raw.githubusercontent.com/MehranDHN/IIIFCollection/refs/heads/master/IIIFCollection/' , '').replace('Collection.json', '').replace(' Collection', '').replace(' ', '').replace('\'','').replace(',', '').replace('.','').replace('-','').replace(',', '')
            collection_uri = URIRef(f'http://example.com/mdhn/{stripped_lbl}')
            self.g.add((collection_uri, self.RDF.type, self.MDHN.ResourceCollection))
            self.g.add((collection_uri, self.RDFS.label, Literal(collection_lbl, lang='en') ))
            self.g.add((collection_uri, self.MDHN.caption, Literal(collection_lbl, lang='en') ))
            self.g.add((collection_uri, self.MDHN.subCollectionOf, self.root_collection_uri ))
            self.g.add((collection_uri, self.MDHN.hasUrl, Literal(collection_url, lang='en') ))
            inner_Collection = fetch_manifest(c.get('@id'))
            if "manifests" in inner_Collection:
                for _manifest in inner_Collection.get('manifests'):
                  self.graph_for_manifest(_manifest, collection_uri)
            if "collections" in inner_Collection:
                for cc in inner_Collection.get('collections'):
                  collection2_url = cc.get('@id')
                  collection2_lbl = cc.get("label")
                  stripped2_lbl = collection2_lbl.replace('https://raw.githubusercontent.com/MehranDHN/IIIFCollection/refs/heads/master/IIIFCollection/' , '').replace('Collection.json', '').replace(' Collection', '').replace(' ', '').replace('\'','').replace('.','').replace('-','').replace(',', '')
                  collection2_uri = URIRef(f'http://example.com/mdhn/{stripped2_lbl}')
                  self.g.add((collection2_uri, self.RDF.type, self.MDHN.ResourceCollection))
                  self.g.add((collection2_uri, self.RDFS.label, Literal(collection2_lbl, lang='en') ))
                  self.g.add((collection2_uri, self.RDFS.comment, Literal("Second Level Collection", lang='en') ))
                  self.g.add((collection2_uri, self.MDHN.caption, Literal(collection2_lbl, lang='en') ))
                  self.g.add((collection2_uri, self.MDHN.subCollectionOf, collection_uri ))
                  self.g.add((collection2_uri, self.MDHN.hasUrl, Literal(collection2_url, lang='en') ))
                  inner_Collection2 = fetch_manifest(collection2_url)
                  if 'manifests' in inner_Collection2:
                      l2_Manifests = inner_Collection2.get('manifests')
                      for _manifest2 in l2_Manifests:
                        self.graph_for_manifest(_manifest2, collection2_uri)
                  if "collections" in inner_Collection2:
                    for cc2 in inner_Collection2.get('collections'):
                      collection3_url = cc2.get('@id')
                      collection3_lbl = cc2.get("label")
                      stripped3_lbl = collection3_lbl.replace('https://raw.githubusercontent.com/MehranDHN/IIIFCollection/refs/heads/master/IIIFCollection/' , '').replace('Collection.json', '').replace(' Collection', '').replace(' ', '').replace('\'','').replace('.','').replace('-','').replace(',', '')
                      collection3_uri = URIRef(f'http://example.com/mdhn/{stripped3_lbl}')
                      self.g.add((collection3_uri, self.RDF.type, self.MDHN.ResourceCollection))
                      self.g.add((collection3_uri, self.RDFS.label, Literal(collection3_lbl, lang='en') ))
                      self.g.add((collection3_uri, self.RDFS.comment, Literal("Third Level Collection", lang='en') ))
                      self.g.add((collection3_uri, self.MDHN.caption, Literal(collection3_lbl, lang='en') ))
                      self.g.add((collection3_uri, self.MDHN.subCollectionOf, collection2_uri ))
                      self.g.add((collection3_uri, self.MDHN.hasUrl, Literal(collection3_url, lang='en') ))
                      inner_Collection3 = fetch_manifest(collection3_url)
                      if 'manifests' in inner_Collection3:
                        l3_Manifests = inner_Collection3.get('manifests')
                        for _manifest3 in l3_Manifests:
                          self.graph_for_manifest(_manifest3, collection3_uri)
        return self.g
    def serialize(self):
        from google.colab import files
        rdf_graph = self.generate_rdf()
        rdf_graph = self.parse_root_manifests()
        rdf_graph = self.graph_for_glams()
        rdf_graph = self.graph_for_authors()
        ttl_data = rdf_graph.serialize(format='turtle')
        with open("output4.ttl", "w") as f:
           f.write(ttl_data)
        files.download("output4.ttl")
    def parse_root_manifests(self):
        from rdflib import Graph, URIRef, Literal
        for _manifest in self.root_manifests:
            self.graph_for_manifest(_manifest, self.root_collection_uri)
        return self.g
def fetch_manifest(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Failed to fetch manifest from {url}")
        return None
def parse_inner_manifests(root):
    for manifest in root:
       manifest_id = manifest.get('id')
       english_label = manifest.get('label', {}).get('en')[0]
       metadata = manifest.get('metadata')
       print(english_label)
       print(manifest_id)
       for data in metadata:
          key = data.get('label', {}).get('en')
          english_val = data.get('value',{}).get('en')
          if isinstance(key, list):
            match key[0]:
              case "Date":
                m1 =0
                #print("Key is Date")
              case "Author":
                m1=0
                #print("Key is Author")
              case "HasImages":
                m1=0
                #print("Key is HasImages")
          else:
            match key:
              case "Title":
                m1=0
                #print("Key is Title")
              case "Content Type":
                m1=0
                #print("Key is Content Type")
              case "Language":
                m1=0
                #print("Key is Language")
              case "Hosted By":
                m1=0
                #print("Key is Hosted By")
              case "Unique ID":
                m1=0
                #print("Key is Unique ID")
          if isinstance(english_val, list):
            m2=0
            #print(english_val[0])
          if isinstance(english_val, dict):
            m2=0
            #print(english_val.get("en"))
          else:
            m3=0
            #print(english_val)
       #print("\n")

def parse_inner_collection(root):
    print(f'Parsing {len(root_Collection)} resources' )
    for c in root:
        inner_Collection = fetch_manifest(c.get('@id'))
        lbl = c.get("label")
        if "collections" in inner_Collection:
            pmpt = f'Inner collection in {lbl}'
            print(pmpt)
            for cc in inner_Collection.get('collections'):
              #print("Start to Fetch " + cc.get('@id'))
              l2_collection = fetch_manifest(cc.get('@id'))
              if 'manifests' in l2_collection:
                 l2_Manifests = l2_collection.get('manifests')
                 parse_inner_manifests(l2_Manifests)
                 #print(f'    Manifests ----> {len(l2_Manifests)}')
        if 'manifests' in inner_Collection:
            #pmpt = f'Inner manifest in {lbl}'
            #print(pmpt)
            manifests = inner_Collection.get('manifests')
            parse_inner_manifests(manifests)


Loading the root collection:

In [3]:

from google.colab import files
rootCollection = "https://raw.githubusercontent.com/MehranDHN/IIIFCollection/refs/heads/master/IIIFCollection/IIIF2Collection.json"

root_data = fetch_manifest(rootCollection)
root_Collection = root_data.get('collections')
root_Manifests = root_data.get('manifests')
#parse_inner_collection(root_Collection)
def parse_collections(root):
    #print(root)
    if "collections" in root:
        innerCollection = root.get('collections')
        print(len(innerCollection))
        for c in innerCollection:
            collection = fetch_manifest(c.get('@id'))
            parse_collections(collection)

parse_collections(root_data)
#parse_inner_manifests(root_Manifests)

g = RDFGenerator(root_Collection, root_Manifests)
g.serialize()
with open("manifests.txt", "w") as f:
  f.writelines(g.manifest_list())
files.download("manifests.txt")
print(f'Glam Sources {g.glam_sources}')
print(f'Glam Source counts {len(g.glam_sources)}')
#print(f'Manifest2 Counter {g.manifest2_Counter()}')
#print(f'Manifest3 Counter {g.manifest3_Counter()}')
print(f'Manifest Counter {g.manifest_Counter()}')
print(f'Canvas Counter {g.canvas_Counter()}')
#for author in g.author_Collection():
  #print(f'{author} : {g.author_collection[author]}')






72
1
2
1
9
2
1
1
4
1
12
13
2


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Glam Sources ['Internet Archive', 'Berlin', 'RARA', 'Leiden University Libraries', 'Gallica', 'Toyo Bunko Digital Archive', 'McGill University Libraries', 'WaltersArt', 'Ketebe', 'University of Edinburgh', 'Tubingen University', 'INHA', 'QDL', 'LOC', 'Metropolitan Museum', 'Smithsonian', 'Heidelberg', 'NLS', 'Michigan Library', 'Bodleian Libraries', 'Digital Vatican Library', 'Harvard Art Museums', 'Cogapp', 'Chester Beatty', 'Cleveland', 'Harvard Library', 'British Musuem', 'Khalili Collections', 'Manchester University Library', 'Princeton', 'München, Bayerische Staatsbibliothek', 'Bulac', 'Istanbull University', 'SOAS, University of London', 'ARCA', 'Bayerische Staatsbibliothek', 'e-codices', 'SLUB Dresden', 'Trinity College', 'Erfurt University Thuringian', 'NLI', 'University Of Cambridge', 'NYPL', 'David Rumsey', 'University of Wisconsin-Milwaukee', 'DUKE', 'Austrian National Library', 'LCDL', 'British Library', 'Rietberg Museum', 'Swiss Literary Archives', 'SOVA', 'Kenyon College'

In [None]:
import json

# Example JSON data
json_data = '''
[
    {"key1": "value1", "key2": "value2"},
    {"key1": ["value1", "value2"], "key2": "value3"},
    {"key1": {"subkey1": "subvalue1"}, "key2": "value4"}
]
'''

# Load JSON data
data = json.loads(json_data)

# Traverse the array of dictionaries
for entry in data:
    for key, value in entry.items():
        print(f"Key: {key}, Value: {value}")
        if key == "key1":
            if isinstance(value, str):
                print(f"\tString value: {value}")
                # Perform operation for string value
            elif isinstance(value, list):
                print(f"\tList of strings: {value}")
                # Perform operation for list of strings
            elif isinstance(value, dict):
                print(f"\tDictionary value: {value}")
                # Perform operation for dictionary value

Key: key1, Value: value1
	String value: value1
Key: key2, Value: value2
Key: key1, Value: ['value1', 'value2']
	List of strings: ['value1', 'value2']
Key: key2, Value: value3
Key: key1, Value: {'subkey1': 'subvalue1'}
	Dictionary value: {'subkey1': 'subvalue1'}
Key: key2, Value: value4
