In [1]:
# %load dbmodels.py
#!/usr/bin/python

"""
This module contains the schemata for collections in the MongoDB
described by using :py:mod:`mongoengine`.
"""

__author__ = """Giovanni Colavizza, Matteo Romanello"""

import mongoengine
from mongoengine import *
from datetime import datetime

PROVENANCE_FIELD = StringField(required=True
                            , choices=("groundtruth", "processing", "lbcatalogue")
                            )

############
# METADATA #
############
class ImgPageStart(EmbeddedDocument):
    """
    The schema of embedded documents in :py:class:`MetadataIssue`.
    """
    type = StringField(required=True, choices=("arab","roman",""))
    img_number = IntField(required=False)
    page_number = IntField(required=False)

class MetadataIssue(EmbeddedDocument):
    """
    The schema of embedded documents in `metadata.issues`.
    """
    foldername = StringField(required=True)
    year = IntField(required=False)
    imgpagestart = EmbeddedDocumentField(ImgPageStart, required=True)
    imgindex = StringField(required=False)
    digitisation_note = StringField(required=False)
    provenance = StringField(required=True)
    issue = StringField(required=True)
    operator = StringField(required=True)
    marked_as_removed = BooleanField(required=True, default=False)

class MetadataTitle(EmbeddedDocument):
    """
    The schema of embedded documents in :py:class:`MetadataRelation` and :py:class:`Metadata`.
    """
    surface = StringField(required=True)
    responsible = StringField(required=True)
    publisher = StringField(required=True)
    materiality = StringField(required=True)
    specifications = StringField(required=True)

class MetadataRelation(EmbeddedDocument):
    """
    The schema of embedded documents in :py:class:`Metadata`.
    """
    type = StringField(required=True)
    title = EmbeddedDocumentField(MetadataTitle,required=True)

class Metadata(DynamicDocument):
    """
    The schema of documents in the `metadata` collection in MongoDB.

    """
    meta = {
        "collection" : "metadata"
    }
    bid = StringField(required=True)
    sbn_id = StringField(required=True)
    creator = StringField(required=True)
    language = StringField(required=True)
    title = EmbeddedDocumentField(MetadataTitle,required=True)
    date = StringField(required=True)
    relations = ListField(EmbeddedDocumentField(MetadataRelation),required=False)
    provenance = StringField(required=True) # partner libraries, empty if more than 1 (for journals)
    operator = StringField(required=True)
    digitisation_note = StringField(required=False)
    img_bib = ListField(required=False)
    type_catalogue = StringField(required=False)
    subjects = ListField(required=False)
    issues = ListField(EmbeddedDocumentField(MetadataIssue), required=False)
    foldername = StringField(required=True)
    type_document = StringField(required=True, choices=("monograph","journal"))
    marked_as_removed = BooleanField(required=True, default=False)
    updated_at = DateTimeField(required=True)
    created_at = DateTimeField(required=True)
#########
# PAGES #
#########
class Token(DynamicEmbeddedDocument):
    """
    The schema of embedded documents in :py:class:`Line`.
    """
    offset_start = IntField(required=True)
    offset_end = IntField(required=True)
    token_number = IntField(required=True)
    surface = StringField(required=True, default="")
    features = ListField(required=False)
    def __repr__(self):
        return "<Token: n=%i, surface=\"%s\">"%(self.token_number
                                                , self.surface)
class Line(DynamicEmbeddedDocument):
    """
    The schema of embedded documents in :py:class:`Page`.
    """
    tokens = ListField(EmbeddedDocumentField(Token), required=False, default=[])
    line_number = IntField(required=False)
    #split_after_line = BooleanField(default=False, required=False)

###############
# ANNOTATIONS #
###############

class PagePosition(EmbeddedDocument):
    """
    The schema of embedded documents in :py:class:`Annotation`.
    """
    page_id = ReferenceField("Page")
    start = IntField(required=True)
    end = IntField(required=True)
    line_n = IntField(required=True)

class Annotation(DynamicDocument):
    """
    The schema of documents in the `annotations` collection in MongoDB.
    """
    meta = {
        "collection": "annotations"
    }
    pageid = StringField(required=False)
    entity_type = StringField(required=True)
    ingestion_timestamp = DateTimeField(required=True)
    positions = ListField(EmbeddedDocumentField("PagePosition"),required=False, default=[])
    bid = StringField(required=True)
    ann_id = StringField(required=True)
    surface = StringField(required=True)
    annotation_ingester_version = StringField(required=False)
    continuation = BooleanField(required=True, default=False)
    container = BooleanField(required=True, default=False)
    filename = StringField(required=True)
    # is there a way to have a conditional required?
    contains = ListField(ReferenceField("Annotation", required = False))
    top_entities = ListField(ReferenceField("Annotation", required = False))

class Page(DynamicDocument):
    """
    The schema of documents in the `page` collection in MongoDB.
    """
    meta = {
        "collection" : "pages"
    }
    dbl_side_scan_number = IntField(required=False)
    single_page_file_number = IntField(required=True)
    fulltext = StringField(required=True, default="")
    filename = StringField(required=True)
    in_index = BooleanField(required=True, default=False)
    in_golden = BooleanField(required=True, default=False)
    is_annotated = BooleanField(required=True, default=False)
    annotations_ids = ListField(ReferenceField(Annotation, required=False, default=[], reverse_delete_rule=mongoengine.PULL))
    lines = ListField(EmbeddedDocumentField(Line),required=False, default=[])
    printed_page_number = ListField(required=False, default=[])
    updated_at = DateTimeField(required=True, default=datetime.utcnow())
    #document_id = ReferenceField('LBDocument')
    def __repr__(self):
        return "<Page: %s, number offset_end lines = %i, in_golden = %s, fulltext = %s (...truncated...)>"%(
                                                        self.printed_page_number
                                                        , len(self.lines)
                                                        , self.in_golden
                                                        , " ".join(self.fulltext.replace("\n","").split()[:20])
                                                        )
#############
# DOCUMENTS #
#############
class PageRef(EmbeddedDocument):
    """
    """
    page_id = ReferenceField(Page, db_field = "_id")

class Index(DynamicEmbeddedDocument):
    """
    The schema of embedded documents in :py:class:`LBDocument`.
    """
    page_ids = ListField(EmbeddedDocumentField(PageRef), required = False, default=[])

class LBDocument(DynamicDocument):
    """
    The schema of documents in the `documents` collection in MongoDB.
    """
    meta = {
        "collection" : "documents"
    }
    metadata_id = ReferenceField(Metadata)
    index = EmbeddedDocumentField(Index)
    ingestion_timestamp = DateTimeField(required=True)
    updated_at = DateTimeField(required=True)
    issue_number = StringField(required=False, db_field = "number", default="")
    internal_id = StringField(required=True)
    bid = StringField(required=True, unique_with="issue_number")
    content_ingester_version = StringField(required=True)
    pages = ListField(ReferenceField(Page), reverse_delete_rule=mongoengine.CASCADE)
    path = StringField(required=True)
    type = StringField(required=True, choices=("monograph","journal_issue"))
    def __repr__(self):
        #return "<Document: mongoid = %s, bid=%s, internal_id=%s, type=%s, number of pages=%i>"%(
        return "<Document: mongoid = %s, bid=%s, internal_id=%s, type=%s>"%(
                                                                                self.id
                                                                                , self.bid
                                                                                , self.internal_id
                                                                                , self.type
                                                                                #, len(self.pages)
                                                                                )
##############
# PROCESSING #
##############
class Processing(DynamicDocument):
    """
    The schema of documents in the `processing` collection in MongoDB.
    """
    meta = {
        "collection": "processing"
    }
    bid = StringField(required=True)
    number = StringField(required=True)
    foldername = StringField(required=True)
    type_document = StringField(required=True, choices=("monograph","issue"))
    dont_process = BooleanField(required=True, default=False)
    is_digitized = BooleanField(required=True, default=False) # if it has been digitised and stored in the NAS
    is_img = BooleanField(required=True, default=False) # if it has been split and stored as jpg on the Image Server folders
    is_ocr = BooleanField(required=True, default=False) # if it has been OCRed using ABBYY
    is_ingested_metadata = BooleanField(required=True, default=False) # if its metadata has been ingested. This step generates a new entry here, so it is always True in theory
    is_ingested_ocr = BooleanField(required=True, default=False) # if the OCR has been ingested, thus the collections documents and pages updated
    is_bibliodbed = BooleanField(required=True, default=False) # if the entries for books, articles, contributions and authors have been added to their respective bibliodb collections
    is_parsed = BooleanField(required=True, default=False) # if references have been parsed, results added to the collection references
    is_disambiguated_s = BooleanField(required=True, default=False) # if full references have been disambiguated, SS
    is_disambiguated_p = BooleanField(required=True, default=False)  # if full references have been disambiguated, PS
    is_disambiguated_partial = BooleanField(required=True, default=False)  # if partial references have been disambiguated, both SS and PS
    updated_at = DateTimeField(required=True)
    created_at = DateTimeField(required=True)


###############
# REFERENCES #
###############
class Reference(DynamicDocument):
    """
    The schema of documents in the `references` collection in MongoDB.
    """
    meta = {
        "collection": "references"
    }
    ref_type = StringField(required=True, choices=("primary","secondary","meta-annotation"))
    document_id = ReferenceField(LBDocument)
    reference_string = StringField(required=True)
    in_golden = BooleanField(required=True,default=False)
    order_in_page = IntField(required=True)
    continuation_candidate_in = BooleanField(required=False,default=False)
    continuation_candidate_out = BooleanField(required=False, default=False)
    continuation = BooleanField(required=False, default=False)
    bid = StringField(required=True)
    issue = StringField(required=True) # this is the number in Documents...
    contents = DictField(required=True)
    start_img_number = IntField(required=True)
    end_img_number = IntField(required=True)
    updated_at = DateTimeField(required=True)

    def get_snippet(self, character_size=100):
        """
        Returns text snippet around the reference.
        """
        context_before = ""
        context_after = ""

        fields = [str(field) for field in sorted([int(key) for key in self.contents.keys()])]
        first_field = self.contents[fields[0]]
        last_field = self.contents[fields[-1]]
        try:
            start_page_id = dict(first_field)['page_mongo_id']
            end_page_id = dict(last_field)['page_mongo_id']
            start_page = Page.objects(id=start_page_id).first()
            end_page = Page.objects(id=end_page_id).first()
            assert start_page is not None
            assert end_page is not None
            context_before = start_page.fulltext[first_field["start"]-character_size:first_field["start"]]
            context_after = end_page.fulltext[last_field["end"]:last_field["end"]+character_size]
            return (context_before, self.reference_string, context_after)
        except AssertionError as e:
            return ("", self.reference_string, "")

    def get_containing_publication(self):
        if self.document_id.type=="journal_issue":
            article = Article.objects(document_id=self.document_id
                                    , start_img_number__lte=self.start_img_number
                                    , end_img_number__gte=self.end_img_number)
            return article.first()
        elif self.document_id.type=="monograph":
            try:
                return Book.objects(document_id=self.document_id).get()
            except Exception as e:
                print("Reference %s returned error: %s" % (self.id, e))
                return None


    def __repr__(self):
        return "<Reference: %s>" % self.reference_string

########################
# BIBLIODB COLLECTIONS #
########################
class Author(DynamicDocument):
    """
    The schema of documents in the `bibliodb_authors` collection in MongoDB.
    """
    meta = {
        "collection": "bibliodb_authors",
        "index_background": True,
        "indexes":[
            ("$author_final_form","$surface_forms")
        ]
    }
    # `internal_id` is redundant, that's why I've "removed" it (but still in the DB)
    author_final_form = StringField(required=False, unique_with="viaf_id")
    notes = StringField(required=True, default="")
    checked = BooleanField(required=True, default=False)
    viaf_id = StringField(required=False, default="")
    surface_forms = ListField(required=False, default=[])
    provenance = PROVENANCE_FIELD

    def get_viaf_link(self, viaf_base_uri="http://viaf.org/viaf/"):
        """
        Returns a link to VIAF or None.
        """
        if(self.viaf_id!=""):
            return "%s%s"%(viaf_base_uri, self.viaf_id)
        else:
            return None

    def get_surface_forms(self): #TODO: implement
        """
        To be implemented: find checked disambiguations that refer to this author.
        """
        pass
    def __repr__(self):
        return "<Author: %s (%s)>"%(self.author_final_form, self.get_viaf_link())

class Article(DynamicDocument): #TODO finish
    """
    The schema of documents in the `bibliodb_articles` collection in MongoDB.
    """
    meta = {
        "collection": "bibliodb_articles",
        "index_background": True,
        "indexes":[
            {"fields":("document_id", "start_img_number", "end_img_number")}
        ]

    }
    journal_bid = StringField(required=True)
    journal_short_title = StringField(required=False)
    document_id = ReferenceField(LBDocument)
    title = StringField(required=True)
    internal_id = StringField(required=True)
    year = IntField(required=True)
    volume = StringField(required=True)
    issue_number = StringField(required=False)
    start_img_number = IntField(required=False)
    end_img_number = IntField(required=False)
    start_page_number = IntField(required=False)
    end_page_number = IntField(required=False)
    provenance = PROVENANCE_FIELD
    digitization_provenance = StringField(required=True)

    def get_author(self):
        """
        Get the author(s) of the article by going via the `disambiguation` collection.
        """
        try:
            return [d.author for d in Disambiguation.objects(type="author_of_disambiguation", article=self)]
        except DoesNotExist as e:
            print("Author of article %s does not exist: check this!" % self.id)
            print(e)
            return None

    def __repr__(self):
        journal_title = Journal.objects(bid=self.journal_bid).first().short_title
        return "<Article: \"%s\" in \"%s\", %s(%s) %s, internal_id=%s>"%(self.title
                                                        , journal_title
                                                        , self.volume
                                                        , self.issue_number
                                                        , self.year
                                                        , self.internal_id)

class SBN_Identifier(EmbeddedDocument):
    """
    TODO
    """
    identifier_type = StringField(required=True)
    value = StringField(required=True)

class Journal(DynamicDocument):
    """
    The schema of documents in the `bibliodb_journals` collection in MongoDB.
    """
    meta = {
        "collection": "bibliodb_journals",
    }
    bid = StringField(required=True)
    short_title = StringField(required=True)
    full_title = StringField(required=True)
    identifiers = ListField(EmbeddedDocumentField(SBN_Identifier), required=False)
    sbn_link = URLField(required=False)
    provenance = PROVENANCE_FIELD
    document_id = ReferenceField(LBDocument)
    previous_series = ReferenceField("Journal")
    following_series = ReferenceField("Journal")

    def __repr__(self):
        return "<Journal: %s, bid=%s, link=%s>" % (self.short_title, self.bid, self.sbn_link)

class Disambiguation(DynamicDocument):
    """
    The schema of documents in the `disambiguations` collection in MongoDB.
    """
    meta = {
        "collection": "disambiguations",
        "index_background": True,
        "indexes":[
            {"fields":("type", "author", "article")}
            , {"fields":("type", "author", "book")}
            , {"fields":("type", "reference", "article")}
            , {"fields":("type", "reference", "book")}
            , {"fields":("type", "reference", "archival_document")}
        ]
    }
    surface = StringField(required=False)
    reference = ReferenceField('Reference')
    author = ReferenceField(Author)
    book = ReferenceField('Book')
    journal = ReferenceField('Journal')
    bookpart = ReferenceField('BookPart')
    article = ReferenceField('Article')
    document_id = ReferenceField(LBDocument)
    archival_document = ReferenceField('ArchivalRecordASVE')
    checked = BooleanField(required=True, default=False)
    correct = BooleanField(required=True, default=False)
    updated_at = DateTimeField(required=True, default=datetime.now())
    provenance = PROVENANCE_FIELD
    type = StringField(required=True
                            , choices=(
                                    "author_of_disambiguation"
                                    , "editor_of_disambiguation"
                                    , "reference_disambiguation"
                                    , "in_journal_disambiguation"
                                    , "bookpart_disambiguation"
                                    )
                            )

class Book(DynamicDocument): #TODO finish
    """
    The schema of documents in the `bibliodb_books` collection in MongoDB.
    """
    meta = {
        "collection": "bibliodb_books",
    }
    document_id = ReferenceField(LBDocument)
    provenance = PROVENANCE_FIELD
    digitization_provenance = StringField(required=True)
    bid = StringField(required=True)
    title = StringField(required=True)
    publication_year = StringField(required=True)
    publication_place = StringField(required=True)
    publication_country = StringField(required=True)
    publication_language = StringField(required=True)
    publisher = StringField(required=True)
    identifiers = ListField(EmbeddedDocumentField(SBN_Identifier), required=False)

    def get_author(self):
        """
        Get the author(s) of the book by going via the `disambiguation` collection.
        """
        try:
            authors = set([d.author for d in Disambiguation.objects(type="author_of_disambiguation", book=self)])
        except DoesNotExist as e:
            print("Author of book %s does not exist: check this!" % self.id)
            print(e)
            return None
        return list(authors)
    
    # Tao 
    def get_reference(self):
        """
        Get the reference(s) from the book by going via `disambiguation` collection.
        """
        try:
            refs = set([d.reference for d in Disambiguation.objects(type="reference_disambiguation", document_id = self.document_id)])
        except DoesNotExist as e:
            print("Reference from book %s does not exist: check this!" % self.id)
            print(e)
            return None
        return list(refs)
    
class ArchivalRecordASVE(DynamicDocument):
    """
    The schema of documents in the `bibliodb_asve` collection in MongoDB.

    TODO use inheritance (in the future)
    """
    meta = {
        "collection": "bibliodb_asve",
    }
    title = StringField(required=True)
    label = StringField(required=False, default=None) # NEW
    archive = StringField(required=False, default=None) # NEW
    url = StringField(required=True)
    notes = StringField(required=False, default="")
    html = StringField(required=False)
    document_type = StringField(required=True, default="")
    size = StringField(required=False, default=None)
    internal_id = StringField(required=True)

    def get_hierarchy(self):

        hierarchical_bit = self.internal_id.split(":")[-1]
        base_id = self.internal_id.replace(":"+hierarchical_bit, '')
        segments = hierarchical_bit.split('.')
        internal_ids = ["%s:%s" % (base_id, ".".join(segments[:i+1])) for i in range(0, len(segments))]
        hierarchy = []
        for i, internal_id in enumerate(internal_ids):
            record = ArchivalRecordASVE.objects(internal_id=internal_id).first()

            if record is not None:
                hierarchy.append({"id":str(record.id)
                                , "internal_id":record.internal_id
                                , "level":i
                                , "title":record.title
                                , "current": True if internal_id == self.internal_id else False
                                })
            else:
                hierarchy.append({"id":None
                                , "internal_id": internal_id
                                , "level":i
                                , "title":""
                                , "current": True if internal_id == internal_id else False
                                })

        return hierarchy

    def get_label(self, hierarchical_separator=" >> ", skip_errors=False):

        hierarchical_bit = self.internal_id.split(":")[-1]
        base_id = self.internal_id.replace(":"+hierarchical_bit, '')

        if "." in hierarchical_bit:
            segments = hierarchical_bit.split('.')
            internal_ids = ["%s:%s" % (base_id, ".".join(segments[:i+1])) for i in range(0, len(segments))]
            hierarchical_labels = []
            for internal_id in internal_ids:
                record = ArchivalRecordASVE.objects(internal_id=internal_id).first()
                if skip_errors:

                    if record is not None:
                        hierarchical_labels.append(record.title)
                    else:
                        hierarchical_labels.append("?")
                else:
                    hierarchical_labels.append(record.title)

            return hierarchical_separator.join(hierarchical_labels)
        else:
            return self.title

    def __repr__(self):
        return "<ArchivalRecordASVE: %s (%s)>"%(self.title, self.internal_id)

class Cluster(DynamicDocument):
    pass

class BookPart(DynamicDocument):
    pass


Connect to the database.

In [4]:
connect(
    db = "linkedbooks_refactored",
    username = "sun.tao",
    password = "!lb-dhlab-2018",
    host = "128.178.60.49", 
    port = 27017, 
)

MongoClient(host=['128.178.60.49:27017'], document_class=dict, tz_aware=False, connect=True, read_preference=Primary())

See the connection, in any, between what is in reference and what is in reference disambiguation.

In [5]:
for r in Reference.objects[:5]:
    d = Disambiguation.objects(reference=r)
    for dd in d:
        if dd is not None:
            print(r.reference_string)
            print(r.document_id.id)
            print(dd.document_id.id)
            print(dd.type)

Hans Tietzte - Tietzte Conrad The drawings of the Venetian painters New York, 1944, p. 222 N. 1180.
56d5a04bfe768325f46b3ca2
56d5a04bfe768325f46b3ca2
reference_disambiguation


  app.launch_new_instance()
  if __name__ == '__main__':


Count four types of disambiguation.

In [6]:
choices=("author_of_disambiguation"
         , "editor_of_disambiguation"
         , "reference_disambiguation"
         , "in_journal_disambiguation"
         , "bookpart_disambiguation"
        )
for c in choices:
    c_count = Disambiguation.objects(type=c).count()
    print("{} : {}".format(c, c_count))

author_of_disambiguation : 259504
editor_of_disambiguation : 17029
reference_disambiguation : 358156
in_journal_disambiguation : 5496
bookpart_disambiguation : 0


See what's like for ref_dis of monograph.

In [7]:
ref_dis = Disambiguation.objects(type="reference_disambiguation")

In [8]:
for rd in ref_dis[10000:10010]:
    r = rd.reference
    d = rd.document_id
    if d.type == "journal_issue":
        # b = list(Journal.objects(bid = d.bid))
        pass
    else:
        b = list(Book.objects(bid = d.bid))
    print("{} : {} : {}".format(d.type, b[0].title, r.reference_string))

monograph : La quadreria di Agostino e Giovan Donato Correggio nel collezionismo veneziano del Seicento : ASVE, Dieci savi alle decime in Rialto, Catastico di Venezia 1712, reg. 431, nn. 452-454.
monograph : La quadreria di Agostino e Giovan Donato Correggio nel collezionismo veneziano del Seicento : Cancellier grande. Registro degli ordini della Cancelleria, 1638-1657.




monograph : La quadreria di Agostino e Giovan Donato Correggio nel collezionismo veneziano del Seicento : Stato, il Consiglio dei Dieci, impegnato nel controllo della sicurezza pubblica. 56ASVE, Fraterna grande di Sant’Antonin,
monograph : La quadreria di Agostino e Giovan Donato Correggio nel collezionismo veneziano del Seicento : Commissaria Correggio, bb. 3 e 5, cc. 162 e 89.
monograph : La quadreria di Agostino e Giovan Donato Correggio nel collezionismo veneziano del Seicento : Ivi, Avogaria di Común , b. 180,
monograph : La quadreria di Agostino e Giovan Donato Correggio nel collezionismo veneziano del Seicento : Ivi, Fraterna grande di Sant’Antonin,
monograph : La quadreria di Agostino e Giovan Donato Correggio nel collezionismo veneziano del Seicento : Paolo, San Luca, San Samuele e San Giovanni Crisostomo. ASVE, Fraterna grande di Sant’Antonin, Commissaria Correggio, b. 3, c. 58v.
monograph : La quadreria di Agostino e Giovan Donato Correggio nel collezionismo veneziano del Se

  if __name__ == '__main__':


All ref-dis of that book 

In [22]:
book_id = Book.objects(title='La quadreria di Agostino e Giovan Donato Correggio nel collezionismo veneziano del Seicento').all()[0].document_id
ref_book = set(Disambiguation.objects(type="reference_disambiguation", document_id = book_id))
for index, r in enumerate(ref_book):
    print(index, r.reference.reference_string)

  from ipykernel import kernelapp as app


0 1913 A. LUZIO, La Galleria dei Gonzaga venduta all’Inghilterra nel 1627-1628, Milano.
1 ASVE, Notarile, Testamenti,  notaio Gaspare Acerbi, b. 1146, n. 319. 15APR, n. 1.
2 L'Arte, II, a cura di R. Pallucchini, Roma, pp. 63-117.
3 1854 M.FOSCARINI, Della letteratura veneziana, Venezia.
4 Fraterna grande di Sant'Antonin, Commissaria Correggio, b. 7)
5 Fraterna Grande di Sant’Antonin,
6 1879 G. Tassini, Alcuni palazzi ed antichi edifici di Venezia storicamente illustrati, Venezia.
7 Ivi, Fraterna grande di Sant’Antonin,
8 Fraterna grande di Sant’Antonin, Commissaria Correggio, b. 7)
9 1870 G. Campori, Raccolta di cataloghi ed inventari inediti, Modena.
10 Cancellier grande. Registro degli ordini della Cancelleria, 1638-1657.
11 Notarile, Atti, notaio Andrea Porta, b. 11189, cc. 289v-291v.
12 F.SANSOVINO - G. MartiniONI, Venetia città nobilissima et singolare descritta in XIIII Libri da M. Francesco Sansovino con aggiunta di tutte le cose notabili fatte et occorse dall’anno 1580 fino al 

Make sure all bids point to the same book.

In [10]:
ref_dis = Disambiguation.objects(type="reference_disambiguation")
for rd in ref_dis[10000:10010]:
    r = rd.reference
    d = rd.document_id
    print("{} : {} : {}".format(r.bid, r.document_id.bid, d.bid))

MIL0479269 : MIL0479269 : MIL0479269
MIL0479269 : MIL0479269 : MIL0479269
MIL0479269 : MIL0479269 : MIL0479269
MIL0479269 : MIL0479269 : MIL0479269
MIL0479269 : MIL0479269 : MIL0479269
MIL0479269 : MIL0479269 : MIL0479269
MIL0479269 : MIL0479269 : MIL0479269
MIL0479269 : MIL0479269 : MIL0479269
MIL0479269 : MIL0479269 : MIL0479269
MIL0479269 : MIL0479269 : MIL0479269


  from ipykernel import kernelapp as app


See what the auther is like.

In [11]:
books = Book.objects()[:10]
for b in books:
    list_author = b.get_author()
    try:
        print("{}:\n{}".format(b.names, list_author))
    except:
        print(":\n{}".format(list_author))

['Schulz, Anne Markham']:
[<Author: Schulz, Anne Markham, 1938- (http://viaf.org/viaf/110676004)>]
['Viggiano, Alfredo']:
[<Author: Viggiano, Alfredo (http://viaf.org/viaf/186348)>]
['Gallo, Andrea', 'Spadavecchia, Fiorella']:
[<Author: Spadavecchia, Fiorella (http://viaf.org/viaf/54010168)>, <Author: Gallo, Andrea, 1928-2013 (http://viaf.org/viaf/3603839)>]
['McCray, W. Patrick']:
[<Author: McCray, Patrick (http://viaf.org/viaf/2641929)>]




['Howard, Deborah']:
[<Author: Howard, Deborah (http://viaf.org/viaf/91775698)>]
:
[]
['Andolfo, Gianluigi', 'Leonardi, Maria']:
[<Author: Leonardi, Maria Lucia Azevedo (http://viaf.org/viaf/26684826)>, <Author: Andolfo, Gianluigi (http://viaf.org/viaf/218053784)>]
:
[]
['Franzina, Emilio']:
[<Author: Franzina, Emilio, 1948- (http://viaf.org/viaf/276219103)>]
['Cacciavillani, Ivone']:
[<Author: Cacciavillani, Ivone (http://viaf.org/viaf/12329146)>]


  from ipykernel import kernelapp as app


# Get references from one book

In [12]:
# get all ref-disambiguation of one book
def ref_str_of_book_from_dis(num):
    book = Book.objects()[num]
    ref_book = book.get_reference() 
    if(ref_book!=None):
        ref_str = [r.reference_string for r in ref_book]
        ref_str.sort()
        return ref_str
    else:
        return []

In [13]:
ref_str_of_book_from_dis(0)[:20]



["1910\xa0Vasari, Giorgio. Le vite de' più eccellenti pittori scultori e\xa0architettori, nelle redazioni del i550 e i56 8 , ed. by Rosanna Bettarini, commentary by Paola Barocchi.\xa0Florence, 1966-",
 '3Sartori /Luisetto, 1986, pt. 2, pp 2077f, nos. 27, 36, 37, ASV,\xa0Deputazione ad pias causas, Busta 58,',
 'A." 3ASV, Avogaria di Común, Registro 106/ 1, Cronaca matrimoni, i, c.\xa0I32v',
 'ASV, Arch not., Atti, Busta 3346 (Giovanni Maria Cava-gnis), cc. 496v-497r',
 'ASV, Arch, not , Testamenti, Busta 1227 (not. Cristoforo Rizzo), no. 74 (protocol and copy) .. .',
 'ASV, Arch. not., Testamenti, Busta 51 (not. Girolamo de Bossis),',
 'ASV, Archivio notarile, Testamenti, Busta 1227 (not Cristoforo Rizzo), no.',
 'ASV, Archivio notarile, Testamenti, Busta 50 (not. Girolamo de Bossis), no. 214, cc. 192r-193v,',
 'ASV, Archivio notarile, Testamenti, Busta 506 (not. Daniele Giordan), no. 565, c. 3r:',
 'ASV, Avogaria\xa0di Común, Busta 159, Necrologia dei nobili, filza 1, n.c,',
 'ASV, A

In [14]:
# get all ref one book
def ref_str_of_book_from_ref(num):
    book = Book.objects()[num]
    ref_book = set(Reference.objects(bid = book.bid))
    if(ref_book!={}):
        ref_str = [r.reference_string for r in ref_book]
        ref_str.sort()
        return ref_str
    else:
        return []

In [15]:
ref_str_of_book_from_ref(0)[:20]



['!S Paoletti, 1893, ii, p. 252.',
 '" Coletti, 1935, p. 150.',
 '" The Venetian Arie degli scarpellini prescribed an apprenticeship of at least five years for all (Sagredo, 1856, p. 298, no. XLÎ),\xa0but no more than five years for those apprenticed to relatives\xa0(ibid., p. 290, no. XXI).',
 '"A di 20... Et eri fu messo la\xa0palla granda di l\'aitar di Santa Maria di Frati Menori suso, depenta\xa0per Ticiano, et prima li fu fato atorno una opera grande di marmo a\xa0spese di maistro Zerman, eh e guardian adesso."',
 '"A lato sinistro\xa0di questo Altare Cassa sepolcrale di marmo grande con nobile arco,\xa0e nelle sommità due Armi gentilizie con Iscrizione in lapida di Bar-tolomea Azzoni, e Pietro Maria Ostiano da Verona."',
 '"Alcune notizie sugli intagliatori\xa0della Zecca di Venezia," Archivio veneto, xxxv, 1888,\xa0pp. 271—7, repr. in Rivista italiana di numismatica, i,\xa01888, pp. 351-9',
 '"Appunti storici," La vita del popolo,\xa0 Feb. 17, 1923, [p 4], Coletti, [1926], pp 8

In [16]:
r = 'ASV, Arch not., Atti, Busta 3346 (Giovanni Maria Cava-gnis), cc. 496v-497r'

In [17]:
r_list = ref_str_of_book_from_ref(0)



In [18]:
r_list.index(r)

399

In [19]:
r_list[390:400]

['AST, Arch, not., I“ Serie, Busta 366 (not Girolamo di Gaspare da Pederobba), filza 5, cc. 84r-87r',
 'AST, Arch, not., I“ serie, Busta 366 (not. Girolamo di Gaspare da Pederobba), filza 8, c. 59v.',
 'AST, Arch, not., T serie, Busta 422 (not. Giovanni Leonardo di Marco Berenghi), Testamenti, 1482, 1490—1501, cc. 101',
 'AST, Arch, not., Γ serie, Busta 356 (not. Giovanni Matteo Zibetto), filza 12 (1514-15), cc. 120 left-121',
 'AST, Arch, not., Γ serie, Busta 422 (not. Giovanni Leonardo di Marco Berenghi), Testamenti, 1482, 1490—1501, cc. 108r—',
 'AST, Archivio notarile, la serie, Busta 355 (not. Giovanni Matteo Zibetto), filza 8 (1512), c. 98v.',
 'AST, Corporazioni religiose soppresse, S Chiara\xa0di Treviso, Busta 1, Catastico, cc. 2—5.',
 'AST, Corporazioni religiose soppresse, S. Chiara di Treviso, Busta I, Catastico,\xa0c. 7.',
 'AST, Corporazioni religiose soppresse, S. Chiara di Treviso, Busta\xa01, Catastico, c. 7.',
 'ASV, Arch not., Atti, Busta 3346 (Giovanni Maria Cava-gn

In [20]:
r_dis = ref_str_of_book_from_dis(0)



In [21]:
r_dis

["1910\xa0Vasari, Giorgio. Le vite de' più eccellenti pittori scultori e\xa0architettori, nelle redazioni del i550 e i56 8 , ed. by Rosanna Bettarini, commentary by Paola Barocchi.\xa0Florence, 1966-",
 '3Sartori /Luisetto, 1986, pt. 2, pp 2077f, nos. 27, 36, 37, ASV,\xa0Deputazione ad pias causas, Busta 58,',
 'A." 3ASV, Avogaria di Común, Registro 106/ 1, Cronaca matrimoni, i, c.\xa0I32v',
 'ASV, Arch not., Atti, Busta 3346 (Giovanni Maria Cava-gnis), cc. 496v-497r',
 'ASV, Arch, not , Testamenti, Busta 1227 (not. Cristoforo Rizzo), no. 74 (protocol and copy) .. .',
 'ASV, Arch. not., Testamenti, Busta 51 (not. Girolamo de Bossis),',
 'ASV, Archivio notarile, Testamenti, Busta 1227 (not Cristoforo Rizzo), no.',
 'ASV, Archivio notarile, Testamenti, Busta 50 (not. Girolamo de Bossis), no. 214, cc. 192r-193v,',
 'ASV, Archivio notarile, Testamenti, Busta 506 (not. Daniele Giordan), no. 565, c. 3r:',
 'ASV, Avogaria\xa0di Común, Busta 159, Necrologia dei nobili, filza 1, n.c,',
 'ASV, A