In [1]:
import os
from spacy.tokens import Doc
import sddk
import pandas as pd
import re
import matplotlib.pyplot as plt
import json
import shutil

import gspread
from gspread_dataframe import get_as_dataframe, set_with_dataframe
from google.oauth2 import service_account # based on google-auth library

In [8]:
# setup communication with gsheet "noscemus_overview
# to make this work, you need your ServiceAccountKey.json file located somewhere and to point out to it
# I have it in the data harmonia and in the .gitignore file, what makes it invisible to others; you can make the same
file_data = json.load(open(os.path.expanduser("~/ServiceAccountsKey.json")))
# (2) transform the content into crendentials object
credentials = service_account.Credentials.from_service_account_info(file_data)
# (3) specify your usage of the credentials
scoped_credentials = credentials.with_scopes(['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive'])
# (4) use the constrained credentials for authentication of gspread package
gc = gspread.Client(auth=scoped_credentials)

noscemus_gs = gc.open_by_url("https://docs.google.com/spreadsheets/d/1ekf9RBfd4yqy0p0IWJ4SDk1kUT70hnoIVX1H6KPdIts/edit?usp=sharing")

# Merging metadata

In [25]:
# load table with filenames and ids
ids_filenames_df = pd.read_csv("../data/ids_filenames_df.csv")
ids_filenames_df.head(5)

Unnamed: 0.1,Unnamed: 0,id,filenames_list
0,0,1031760,"['Bacon,_Francis_-_Instauratio_magna__London_1..."
1,1,1085290,"['Linden,_Johannes_Antonides_van_der_-_Lindeni..."
2,2,1285853,"['de_Conde,_Ioannes_Baptista_-_Aphorismi_seu_a..."
3,3,1285854,"['van_Poort,_Henricus_-_Hippocratis_Aphorismi_..."
4,4,1285855,"['Hippocrates_&_Denisot,_Gérard_-_Hippocratis_..."


In [26]:
ids_filenames_df["id"].dtype

dtype('int64')

In [27]:
# load the table with wiki metadata
metadata_table = pd.read_csv("../data/metadata_table.csv")

In [28]:
len(metadata_table)

994

In [29]:
metadata_table.head(20)

Unnamed: 0,Author,Full title,In,Year,Place,Publisher/Printer,Era,Form/Genre,Discipline/Content,Original,Digital sourcebook,Description,References,Cited in,How to cite this entry,Internal notes,Of interest to,Transkribus text available,Written by,Library and Signature
0,"Pardies, Ignace Gaston",A Latin Letter written to the Publisher April ...,Philosophical Transactions of the Royal Societ...,1672,London,Martyn,17th century,"Letter, Review",Physics,A Latin Letter(JSTOR),607720,"In Feburary of 1672,Isaac Newtonpublished his ...","Harmon; Gross 2007, 22–5","Pardies, Ignace Gaston‎(← links)Mr. Newtons Le...","Pardies, Ignace Gaston:A Latin Letter containi...",RECENSIO,,Yes,IT,
1,"Scheuchzer, Johann Jakob","Acarnania sive Relatio eorum, quae hactenus el...","ΟΥΡΕΣΙΦΟΙΤΗΣ (Ouresiphoites) Helveticus, 609–35",1723,Leiden,"van der Aa, Pieter",18th century,"Biography, Bibliography","Mathematics, Physics, Geography/Cartography, M...","Acarnania (1st edition), inMiscellanea Lipsien...",605919(in:ΟΥΡΕΣΙΦΟΙΤΗΣ Helveticus),The remarknunc primum editain the title of thi...,,Charta invitatoria‎(← links)ΟΥΡΕΣΙΦΟΙΤΗΣ (Oure...,"Scheuchzer, Johann Jakob:Acarnania, in: Noscem...",,MK,Yes,MK,
2,"Morabito, Giuseppe",Ad astronautas Americanos carmen Iosephi Morab...,Fons pacis. Nova aetas. Ad astronautas America...,1969,Amsterdam,Nord-Hollandsche Uitgevers Maatschapij,After 1800,Panegyric poem,Astronomy/Astrology/Cosmography,Download PDF(Transcript),not available,"As late as the 20th century, the imagination o...","Giustiniani 1979, 108 (list of award-winning p...","Morabito, Giuseppe‎(← links)","Morabito, Giuseppe:Ad astronautas Americanos, ...","The Earthrise picture and ""Please be informed ...",IT,,IT,
3,"Addison, Joseph",Ad insignissimum virum dominum Thomam Burnettu...,"Examen poeticum duplex, sive, Musarum anglican...",1698,London,Richard Wellington I.,17th century,Panegyric poem,Meteorology/Earth sciences,Ad Burnettum sacrae theoriae telluris auctorem...,769230,"To the Very Famous Mr. Thomas Burnet, Author o...",,"Addison, Joseph‎(← links)Nova philosophia vete...","Addison, Joseph:Ad Burnettum sacrae theoriae t...",,"MK, IT",Yes,MK,
4,"Lipsius, Justus",Ad Clusii nomen lusus,"L'Ecluse, Charles de, Rariorum aliquot stirpiu...",1583,Antwerp,Plantin,16th century,Panegyric poem,"Biology, Medicine, Other (see description)",Ad Clusii nomen lusus(Biblioteca Digital Real ...,749217,This witty epigram by the famous Flemish philo...,"Ogilvie 2011, 28-9",Rariorum stirpium per Pannoniam et Austriam ob...,"Lipsius, Justus:Ad Clusii nomen lusus, in: Nos...","Possibly, this epigram could be found in Lipsi...",IT,Yes,IT,
5,"Owen, John",Ad Dominum Gilbertum,Epigrammatum libri tres. Auctore Ioanne Owen B...,1606,London,"Windet, John, Waterson, Simon",17th century,Other (see description),Astronomy/Astrology/Cosmography,3rd edition (Amberg 1608)(Google Books)Digital...,"900767(3rd edition, Amberg 1608)",John Owen was the most popular Anglo-Latin epi...,Martyn 1976–1978;Durand 2016(editions)Jansen 2009,"Owen, John‎(← links)An terra moveatur‎(← links)","Owen, John:Ad Gilbertum, in: Noscemus Wiki, UR...",First edition in sharefolder.The epigram was a...,"JL, IT",Yes,IT,
6,"Costus, Petrus",Petrus Costus ad Gulielmum Rondeletium medicum...,"Aquatilium historia, vol. 1, fol. α3r",1554,Lyon,Bonhomme,16th century,Panegyric poem,Biology,"Aquatilium historia, vol. 1(Google Books)",748052,The first volume of Rondelet'sAquatilium histo...,,"Aquatilium historia‎(← links)Costus, Petrus‎(←...","Costus, Petrus:Ad Gulielmum Rondeletium, in: N...",,MK,Yes,MK,
7,"Acidalius, Valens","Ad Iordanum Brunum Nolanum, Italum","Poematum Iani Lernutii, Iani Gulielmi, Valenti...",1603,"Liegnitz, Wrocław","Albert, David",17th century,Panegyric poem,Astronomy/Astrology/Cosmography,Ad Iordanum Brunum (1603)(CAMENA)Ad Iordanum B...,801745,Most of the posthumously edited epigrams of th...,"Kühlmann 1979, 152–53","Acidalius, Valens‎(← links)","Acidalius, Valens:Ad Iordanum Brunum, in: Nosc...",Kühlmann must have overlooked the poem in the ...,"MK, IT",Yes,MK,
8,"Paulinus, Fabius","Ad clarissimum virum Laurentium Massam, sereni...","Avicennae, Arabum medicorum principis, ex Gera...",1595,Venice,I Giunti,16th century,Panegyric poem,Medicine,Ad Laurentium Massam pro Avicenna ode(Google B...,900763,Giovanni Costeo's and Giovanni Paolo Mongio's ...,"Siraisi 1987, 143","Paulinus, Fabius‎(← links)","Paulinus, Fabius:Ad Laurentium Massam pro Avic...","On the title page of the edition, the many acc...",MK,Yes,MK,
9,"Sands, Patrick",Ad lectorem trigonometriae studiosum,Mirifici logarithmorum canonis descriptioeiusq...,1614,Edinburgh,Andro Hart,17th century,Panegyric poem,Mathematics,Ad lectorem trigonometriae studiosum(Google Bo...,801744,When John Napier communicated his invention of...,McOmish 2017,Mirifici logarithmorum canonis descriptio‎(← l...,"Sands, Patrick:Ad lectorem trigonometriae stud...",Sands also wrote a poem for Napier'sRabdologia...,IT,Yes,IT,


In [30]:
# how does the "Digital sourcebook" attrbute looks like?
metadata_table["Digital sourcebook"].tolist()[:10]

['607720',
 '605919(in:ΟΥΡΕΣΙΦΟΙΤΗΣ Helveticus)',
 'not available',
 '769230',
 '749217',
 '900767(3rd edition, Amberg 1608)',
 '748052',
 '801745',
 '900763',
 '801744']

In [31]:
# one specific case for testing:
id_text = metadata_table["Digital sourcebook"].tolist()[21]
id_text

'913057(vol. 1)913059(vol. 2)913060(vol. 3)913061(vol. 4)913062(vol. 5)913063(vol. 6)913064(vol. 7)913065(vol. 8)913066(vol. 9)913058(vol. 10)'

In [32]:
# get all IDs as a list
def clean_id(id_text):
    try:
        ids = [int(id) for id in re.findall("\d{5,8}", id_text)]
    except:
        ids = []
    return ids

In [33]:
clean_id(id_text)

[913057,
 913059,
 913060,
 913061,
 913062,
 913063,
 913064,
 913065,
 913066,
 913058]

In [34]:
metadata_table["ids"] = metadata_table["Digital sourcebook"].apply(clean_id)

In [35]:
metadata_table.head(10)

Unnamed: 0,Author,Full title,In,Year,Place,Publisher/Printer,Era,Form/Genre,Discipline/Content,Original,...,Description,References,Cited in,How to cite this entry,Internal notes,Of interest to,Transkribus text available,Written by,Library and Signature,ids
0,"Pardies, Ignace Gaston",A Latin Letter written to the Publisher April ...,Philosophical Transactions of the Royal Societ...,1672,London,Martyn,17th century,"Letter, Review",Physics,A Latin Letter(JSTOR),...,"In Feburary of 1672,Isaac Newtonpublished his ...","Harmon; Gross 2007, 22–5","Pardies, Ignace Gaston‎(← links)Mr. Newtons Le...","Pardies, Ignace Gaston:A Latin Letter containi...",RECENSIO,,Yes,IT,,[607720]
1,"Scheuchzer, Johann Jakob","Acarnania sive Relatio eorum, quae hactenus el...","ΟΥΡΕΣΙΦΟΙΤΗΣ (Ouresiphoites) Helveticus, 609–35",1723,Leiden,"van der Aa, Pieter",18th century,"Biography, Bibliography","Mathematics, Physics, Geography/Cartography, M...","Acarnania (1st edition), inMiscellanea Lipsien...",...,The remarknunc primum editain the title of thi...,,Charta invitatoria‎(← links)ΟΥΡΕΣΙΦΟΙΤΗΣ (Oure...,"Scheuchzer, Johann Jakob:Acarnania, in: Noscem...",,MK,Yes,MK,,[605919]
2,"Morabito, Giuseppe",Ad astronautas Americanos carmen Iosephi Morab...,Fons pacis. Nova aetas. Ad astronautas America...,1969,Amsterdam,Nord-Hollandsche Uitgevers Maatschapij,After 1800,Panegyric poem,Astronomy/Astrology/Cosmography,Download PDF(Transcript),...,"As late as the 20th century, the imagination o...","Giustiniani 1979, 108 (list of award-winning p...","Morabito, Giuseppe‎(← links)","Morabito, Giuseppe:Ad astronautas Americanos, ...","The Earthrise picture and ""Please be informed ...",IT,,IT,,[]
3,"Addison, Joseph",Ad insignissimum virum dominum Thomam Burnettu...,"Examen poeticum duplex, sive, Musarum anglican...",1698,London,Richard Wellington I.,17th century,Panegyric poem,Meteorology/Earth sciences,Ad Burnettum sacrae theoriae telluris auctorem...,...,"To the Very Famous Mr. Thomas Burnet, Author o...",,"Addison, Joseph‎(← links)Nova philosophia vete...","Addison, Joseph:Ad Burnettum sacrae theoriae t...",,"MK, IT",Yes,MK,,[769230]
4,"Lipsius, Justus",Ad Clusii nomen lusus,"L'Ecluse, Charles de, Rariorum aliquot stirpiu...",1583,Antwerp,Plantin,16th century,Panegyric poem,"Biology, Medicine, Other (see description)",Ad Clusii nomen lusus(Biblioteca Digital Real ...,...,This witty epigram by the famous Flemish philo...,"Ogilvie 2011, 28-9",Rariorum stirpium per Pannoniam et Austriam ob...,"Lipsius, Justus:Ad Clusii nomen lusus, in: Nos...","Possibly, this epigram could be found in Lipsi...",IT,Yes,IT,,[749217]
5,"Owen, John",Ad Dominum Gilbertum,Epigrammatum libri tres. Auctore Ioanne Owen B...,1606,London,"Windet, John, Waterson, Simon",17th century,Other (see description),Astronomy/Astrology/Cosmography,3rd edition (Amberg 1608)(Google Books)Digital...,...,John Owen was the most popular Anglo-Latin epi...,Martyn 1976–1978;Durand 2016(editions)Jansen 2009,"Owen, John‎(← links)An terra moveatur‎(← links)","Owen, John:Ad Gilbertum, in: Noscemus Wiki, UR...",First edition in sharefolder.The epigram was a...,"JL, IT",Yes,IT,,[900767]
6,"Costus, Petrus",Petrus Costus ad Gulielmum Rondeletium medicum...,"Aquatilium historia, vol. 1, fol. α3r",1554,Lyon,Bonhomme,16th century,Panegyric poem,Biology,"Aquatilium historia, vol. 1(Google Books)",...,The first volume of Rondelet'sAquatilium histo...,,"Aquatilium historia‎(← links)Costus, Petrus‎(←...","Costus, Petrus:Ad Gulielmum Rondeletium, in: N...",,MK,Yes,MK,,[748052]
7,"Acidalius, Valens","Ad Iordanum Brunum Nolanum, Italum","Poematum Iani Lernutii, Iani Gulielmi, Valenti...",1603,"Liegnitz, Wrocław","Albert, David",17th century,Panegyric poem,Astronomy/Astrology/Cosmography,Ad Iordanum Brunum (1603)(CAMENA)Ad Iordanum B...,...,Most of the posthumously edited epigrams of th...,"Kühlmann 1979, 152–53","Acidalius, Valens‎(← links)","Acidalius, Valens:Ad Iordanum Brunum, in: Nosc...",Kühlmann must have overlooked the poem in the ...,"MK, IT",Yes,MK,,[801745]
8,"Paulinus, Fabius","Ad clarissimum virum Laurentium Massam, sereni...","Avicennae, Arabum medicorum principis, ex Gera...",1595,Venice,I Giunti,16th century,Panegyric poem,Medicine,Ad Laurentium Massam pro Avicenna ode(Google B...,...,Giovanni Costeo's and Giovanni Paolo Mongio's ...,"Siraisi 1987, 143","Paulinus, Fabius‎(← links)","Paulinus, Fabius:Ad Laurentium Massam pro Avic...","On the title page of the edition, the many acc...",MK,Yes,MK,,[900763]
9,"Sands, Patrick",Ad lectorem trigonometriae studiosum,Mirifici logarithmorum canonis descriptioeiusq...,1614,Edinburgh,Andro Hart,17th century,Panegyric poem,Mathematics,Ad lectorem trigonometriae studiosum(Google Bo...,...,When John Napier communicated his invention of...,McOmish 2017,Mirifici logarithmorum canonis descriptio‎(← l...,"Sands, Patrick:Ad lectorem trigonometriae stud...",Sands also wrote a poem for Napier'sRabdologia...,IT,Yes,IT,,[801744]


In [36]:
# ok, the problem is, that is now often mapped on multiple ids
# so let create an independent row for each id (the name of the author and work will be multiplicated in this case...
metadata_table_long = []
for n in range(len(metadata_table)):
    df_row_dict = metadata_table.iloc[n].to_dict()
    for oneid in df_row_dict["ids"]:
        df_row_dict["id"] = oneid
        metadata_table_long.append(df_row_dict)
        df_row_dict = df_row_dict.copy()

In [37]:
metadata_table_long = pd.DataFrame(metadata_table_long)
metadata_table_long.head(20)

Unnamed: 0,Author,Full title,In,Year,Place,Publisher/Printer,Era,Form/Genre,Discipline/Content,Original,...,References,Cited in,How to cite this entry,Internal notes,Of interest to,Transkribus text available,Written by,Library and Signature,ids,id
0,"Pardies, Ignace Gaston",A Latin Letter written to the Publisher April ...,Philosophical Transactions of the Royal Societ...,1672,London,Martyn,17th century,"Letter, Review",Physics,A Latin Letter(JSTOR),...,"Harmon; Gross 2007, 22–5","Pardies, Ignace Gaston‎(← links)Mr. Newtons Le...","Pardies, Ignace Gaston:A Latin Letter containi...",RECENSIO,,Yes,IT,,[607720],607720
1,"Scheuchzer, Johann Jakob","Acarnania sive Relatio eorum, quae hactenus el...","ΟΥΡΕΣΙΦΟΙΤΗΣ (Ouresiphoites) Helveticus, 609–35",1723,Leiden,"van der Aa, Pieter",18th century,"Biography, Bibliography","Mathematics, Physics, Geography/Cartography, M...","Acarnania (1st edition), inMiscellanea Lipsien...",...,,Charta invitatoria‎(← links)ΟΥΡΕΣΙΦΟΙΤΗΣ (Oure...,"Scheuchzer, Johann Jakob:Acarnania, in: Noscem...",,MK,Yes,MK,,[605919],605919
2,"Addison, Joseph",Ad insignissimum virum dominum Thomam Burnettu...,"Examen poeticum duplex, sive, Musarum anglican...",1698,London,Richard Wellington I.,17th century,Panegyric poem,Meteorology/Earth sciences,Ad Burnettum sacrae theoriae telluris auctorem...,...,,"Addison, Joseph‎(← links)Nova philosophia vete...","Addison, Joseph:Ad Burnettum sacrae theoriae t...",,"MK, IT",Yes,MK,,[769230],769230
3,"Lipsius, Justus",Ad Clusii nomen lusus,"L'Ecluse, Charles de, Rariorum aliquot stirpiu...",1583,Antwerp,Plantin,16th century,Panegyric poem,"Biology, Medicine, Other (see description)",Ad Clusii nomen lusus(Biblioteca Digital Real ...,...,"Ogilvie 2011, 28-9",Rariorum stirpium per Pannoniam et Austriam ob...,"Lipsius, Justus:Ad Clusii nomen lusus, in: Nos...","Possibly, this epigram could be found in Lipsi...",IT,Yes,IT,,[749217],749217
4,"Owen, John",Ad Dominum Gilbertum,Epigrammatum libri tres. Auctore Ioanne Owen B...,1606,London,"Windet, John, Waterson, Simon",17th century,Other (see description),Astronomy/Astrology/Cosmography,3rd edition (Amberg 1608)(Google Books)Digital...,...,Martyn 1976–1978;Durand 2016(editions)Jansen 2009,"Owen, John‎(← links)An terra moveatur‎(← links)","Owen, John:Ad Gilbertum, in: Noscemus Wiki, UR...",First edition in sharefolder.The epigram was a...,"JL, IT",Yes,IT,,[900767],900767
5,"Costus, Petrus",Petrus Costus ad Gulielmum Rondeletium medicum...,"Aquatilium historia, vol. 1, fol. α3r",1554,Lyon,Bonhomme,16th century,Panegyric poem,Biology,"Aquatilium historia, vol. 1(Google Books)",...,,"Aquatilium historia‎(← links)Costus, Petrus‎(←...","Costus, Petrus:Ad Gulielmum Rondeletium, in: N...",,MK,Yes,MK,,[748052],748052
6,"Acidalius, Valens","Ad Iordanum Brunum Nolanum, Italum","Poematum Iani Lernutii, Iani Gulielmi, Valenti...",1603,"Liegnitz, Wrocław","Albert, David",17th century,Panegyric poem,Astronomy/Astrology/Cosmography,Ad Iordanum Brunum (1603)(CAMENA)Ad Iordanum B...,...,"Kühlmann 1979, 152–53","Acidalius, Valens‎(← links)","Acidalius, Valens:Ad Iordanum Brunum, in: Nosc...",Kühlmann must have overlooked the poem in the ...,"MK, IT",Yes,MK,,[801745],801745
7,"Paulinus, Fabius","Ad clarissimum virum Laurentium Massam, sereni...","Avicennae, Arabum medicorum principis, ex Gera...",1595,Venice,I Giunti,16th century,Panegyric poem,Medicine,Ad Laurentium Massam pro Avicenna ode(Google B...,...,"Siraisi 1987, 143","Paulinus, Fabius‎(← links)","Paulinus, Fabius:Ad Laurentium Massam pro Avic...","On the title page of the edition, the many acc...",MK,Yes,MK,,[900763],900763
8,"Sands, Patrick",Ad lectorem trigonometriae studiosum,Mirifici logarithmorum canonis descriptioeiusq...,1614,Edinburgh,Andro Hart,17th century,Panegyric poem,Mathematics,Ad lectorem trigonometriae studiosum(Google Bo...,...,McOmish 2017,Mirifici logarithmorum canonis descriptio‎(← l...,"Sands, Patrick:Ad lectorem trigonometriae stud...",Sands also wrote a poem for Napier'sRabdologia...,IT,Yes,IT,,[801744],801744
9,"Biancani, Giuseppe",Additamentum de natura scientiarum mathematicarum,Aristotelis loca mathematica ex universis ipsi...,1615,Bologna,Cochi,17th century,"Monograph, Other (see description)",Mathematics,Additamentum de natura scientiarum mathematica...,...,"Mancosu 1996, pp. 15-19 and 178-212 (English t...","Biancani, Giuseppe‎(← links)Clarorum mathemati...","Biancani, Giuseppe:Additamentum de natura scie...","I would recommend to include Piccolomini's ""Co...","MK, FB",Yes,"MK, FB",,[653295],653295


In [38]:
# look at the Year attribute....
for date in metadata_table_long["Year"]:
    try:
        int(date)
    except:
        print(date)

[c. 1490]
1651; 1665
1749–1790
1749–1790
1749–1790
1749–1790
1749–1790
1749–1790
1749–1790
1749–1790
1749–1790
1749–1790
1675–1679
1754–1770
1754–1770
1817–1826
1554–1555
1554–1555
[1557]
1771–1772
1545 (title page); 1544 (last page)
[1476]
1498 [1554]
[1727]
[1524]
1495 more Veneto / 1496
nan
1554 [1507]
1579 [1576]
1659–1661
[1500]
1612 [1591]
1665 [1616]
1743 [1700; 1713]
nan
1680–1681
1680–1681
1588; 1603 (2nd edition)
1588; 1603 (2nd edition)
1540 [ca. 1484–1487]
1495–1497
1532/1534
1714 [1754]
1576 [editio princeps: 1492]
1500–1504
nan
[1475]
[1700]
1773–1775
1773–1775
1773–1775
1773–1775
1773–1775
1773–1775
1618 (books 1–3); 1620 (book 4); 1621 (books 5–7)
[1617]
1747–1769
Antwerp
1736 [date on title page; in fact, 1735]
1736 [date on title page; in fact, 1735]
1659–1661
1741–1750
1741–1750
1741–1750
1741–1750
1741–1750
1741–1750
1741–1750
1741–1750
1530–1536
1530–1536
1530–1536
1551; 1554; 1555; 1558
1551; 1554; 1555; 1558
1551; 1554; 1555; 1558
1551; 1554; 1555; 1558
1779–1781

In [39]:
# again, we have a problem here, as the year is often much more than a simple number
# make test
dates_str = "1718; 1720; 1727; 1729; 1732; 1737; 1739; 1739; 1740"
sorted([int(date) for date in re.findall("\d{4}", dates_str)])

[1718, 1720, 1727, 1729, 1732, 1737, 1739, 1739, 1740]

In [40]:
# in such cases, we will extract only the lowest number ("date_min") and the highest number ("date_max")
dates_min_max = []
for date_str in metadata_table_long["Year"]:
    try:
        date_min = int(date_str)
        date_max = date_min
    except:
        try:   
            dates_int = sorted([int(date) for date in re.findall("\d{4}", date_str)])
            date_min = dates_int[0]
            date_max = dates_int[-1]
        except:
            date_min = None
            date_max = None
    dates_min_max.append((date_min, date_max))

In [41]:
dates_min_max[:50]

[(1672, 1672),
 (1723, 1723),
 (1698, 1698),
 (1583, 1583),
 (1606, 1606),
 (1554, 1554),
 (1603, 1603),
 (1595, 1595),
 (1614, 1614),
 (1615, 1615),
 (1620, 1620),
 (1648, 1648),
 (1648, 1648),
 (1648, 1648),
 (1591, 1591),
 (1597, 1597),
 (1572, 1572),
 (1561, 1561),
 (1490, 1490),
 (1651, 1665),
 (1749, 1790),
 (1749, 1790),
 (1749, 1790),
 (1749, 1790),
 (1749, 1790),
 (1749, 1790),
 (1749, 1790),
 (1749, 1790),
 (1749, 1790),
 (1749, 1790),
 (1712, 1712),
 (1720, 1720),
 (1654, 1654),
 (1723, 1723),
 (1651, 1651),
 (1666, 1666),
 (1675, 1679),
 (1666, 1666),
 (1611, 1611),
 (1580, 1580),
 (1683, 1683),
 (1533, 1533),
 (1787, 1787),
 (1631, 1631),
 (1614, 1614),
 (1747, 1747),
 (1754, 1770),
 (1754, 1770),
 (1817, 1826),
 (1738, 1738)]

In [42]:
metadata_table_long["date_min"] = [el[0] for el in dates_min_max]
metadata_table_long["date_max"] = [el[1] for el in dates_min_max]

In [43]:
metadata_table_long

Unnamed: 0,Author,Full title,In,Year,Place,Publisher/Printer,Era,Form/Genre,Discipline/Content,Original,...,How to cite this entry,Internal notes,Of interest to,Transkribus text available,Written by,Library and Signature,ids,id,date_min,date_max
0,"Pardies, Ignace Gaston",A Latin Letter written to the Publisher April ...,Philosophical Transactions of the Royal Societ...,1672,London,Martyn,17th century,"Letter, Review",Physics,A Latin Letter(JSTOR),...,"Pardies, Ignace Gaston:A Latin Letter containi...",RECENSIO,,Yes,IT,,[607720],607720,1672.0,1672.0
1,"Scheuchzer, Johann Jakob","Acarnania sive Relatio eorum, quae hactenus el...","ΟΥΡΕΣΙΦΟΙΤΗΣ (Ouresiphoites) Helveticus, 609–35",1723,Leiden,"van der Aa, Pieter",18th century,"Biography, Bibliography","Mathematics, Physics, Geography/Cartography, M...","Acarnania (1st edition), inMiscellanea Lipsien...",...,"Scheuchzer, Johann Jakob:Acarnania, in: Noscem...",,MK,Yes,MK,,[605919],605919,1723.0,1723.0
2,"Addison, Joseph",Ad insignissimum virum dominum Thomam Burnettu...,"Examen poeticum duplex, sive, Musarum anglican...",1698,London,Richard Wellington I.,17th century,Panegyric poem,Meteorology/Earth sciences,Ad Burnettum sacrae theoriae telluris auctorem...,...,"Addison, Joseph:Ad Burnettum sacrae theoriae t...",,"MK, IT",Yes,MK,,[769230],769230,1698.0,1698.0
3,"Lipsius, Justus",Ad Clusii nomen lusus,"L'Ecluse, Charles de, Rariorum aliquot stirpiu...",1583,Antwerp,Plantin,16th century,Panegyric poem,"Biology, Medicine, Other (see description)",Ad Clusii nomen lusus(Biblioteca Digital Real ...,...,"Lipsius, Justus:Ad Clusii nomen lusus, in: Nos...","Possibly, this epigram could be found in Lipsi...",IT,Yes,IT,,[749217],749217,1583.0,1583.0
4,"Owen, John",Ad Dominum Gilbertum,Epigrammatum libri tres. Auctore Ioanne Owen B...,1606,London,"Windet, John, Waterson, Simon",17th century,Other (see description),Astronomy/Astrology/Cosmography,3rd edition (Amberg 1608)(Google Books)Digital...,...,"Owen, John:Ad Gilbertum, in: Noscemus Wiki, UR...",First edition in sharefolder.The epigram was a...,"JL, IT",Yes,IT,,[900767],900767,1606.0,1606.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1099,"Scheuchzer, Johann Jakob",ΟΥΡΕΣΙΦΟΙΤΗΣ Helveticus sive itinera per Helve...,,1723,Leiden,"van der Aa, Pieter",18th century,"Report, Bibliography","Geography/Cartography, Meteorology/Earth scien...",ΟΥΡΕΣΙΦΟΙΤΗΣ (Ouresiphoites) Helveticus(e-rara...,...,"Scheuchzer, Johann Jakob:ΟΥΡΕΣΙΦΟΙΤΗΣ (Ouresip...","Tomus primus (= Itinera 1702, 1703, 1704)Praef...",MK,Yes,MK,,[605919],605919,1723.0,1723.0
1100,"Bauhin, Caspar",ΠΙΝΑΞ (Pinax) theatri botanici Caspari Bauhini...,,1623,Basel,König,17th century,"Dictionary/Lexicon, Historia, Encyclopedic work",Biology,Pinax theatri botanici(e-rara.ch)Alternative l...,...,"Bauhin, Caspar:ΠΙΝΑΞ (Pinax) theatri botanici,...",,"DB, MK",Yes,DB,,[632437],632437,1623.0,1623.0
1101,"Colonna, Fabio",ΦΥΤΟΒΑΣΑΝΟΣ (Phytobasanos) sive plantarum aliq...,,1592,Naples,"Salviani, Orazio",16th century,Historia,"Biology, Medicine",Phytobasanos(Biodiversity Heritage Library),...,"Colonna, Fabio:ΦΥΤΟΒΑΣΑΝΟΣ (Phytobasanos), in:...",,DB,Yes,DB,,[752863],752863,1592.0,1592.0
1102,"Scultetus, Johannes","ΧΕΙΡΟΠΛΟΘΗΚΗ seu domini Ioannis Sculteti, phys...",,1655,Ulm,Kühn,17th century,"Monograph, Report, Other (see description)",Medicine,ΧΕΙΡΟΠΛΟΘΗΚΗ(Google Books)German translation (...,...,"Scultetus, Johannes:ΧΕΙΡΟΠΛΟΘΗΚΗ (Cheiroplothe...",Indications regarding the size of the instrume...,MK,Yes,MK,,[918558],918558,1655.0,1655.0


In [45]:
# let's return back to the other table, mapping filenames on the IDs
ids_filenames_df.head(5)

Unnamed: 0.1,Unnamed: 0,id,filenames_list
0,0,1031760,"['Bacon,_Francis_-_Instauratio_magna__London_1..."
1,1,1085290,"['Linden,_Johannes_Antonides_van_der_-_Lindeni..."
2,2,1285853,"['de_Conde,_Ioannes_Baptista_-_Aphorismi_seu_a..."
3,3,1285854,"['van_Poort,_Henricus_-_Hippocratis_Aphorismi_..."
4,4,1285855,"['Hippocrates_&_Denisot,_Gérard_-_Hippocratis_..."


In [47]:
# so far the filenames are encapsaleted within a list, for case that there would be more than one file in the original ID harmonia. but it never happened at the end, so we can just extract the first value from the list
def get_filename(filenames_str):
    try:
        filenames = eval(filenames_str)
        filename = filenames[0]
    except:
        filename = ""
    return filename
ids_filenames_df["filename"] = ids_filenames_df["filenames_list"].apply(get_filename)

In [48]:
ids_filenames_df.head(10)

Unnamed: 0.1,Unnamed: 0,id,filenames_list,filename
0,0,1031760,"['Bacon,_Francis_-_Instauratio_magna__London_1...","Bacon,_Francis_-_Instauratio_magna__London_162..."
1,1,1085290,"['Linden,_Johannes_Antonides_van_der_-_Lindeni...","Linden,_Johannes_Antonides_van_der_-_Lindenius..."
2,2,1285853,"['de_Conde,_Ioannes_Baptista_-_Aphorismi_seu_a...","de_Conde,_Ioannes_Baptista_-_Aphorismi_seu_axi..."
3,3,1285854,"['van_Poort,_Henricus_-_Hippocratis_Aphorismi_...","van_Poort,_Henricus_-_Hippocratis_Aphorismi_me..."
4,4,1285855,"['Hippocrates_&_Denisot,_Gérard_-_Hippocratis_...","Hippocrates_&_Denisot,_Gérard_-_Hippocratis_Ap..."
5,5,1285856,"['Hippocrates_&_Berigardus,_Petrus_-_Hippocrat...","Hippocrates_&_Berigardus,_Petrus_-_Hippocratis..."
6,6,1365811,['Acta_literaria_Sueciae__Vol__1__Uppsala_[172...,Acta_literaria_Sueciae__Vol__1__Uppsala_[1723–...
7,7,1370560,"['Quensel,_Conrad_&_Eurodius,_Hans_-_De_lumine...","Quensel,_Conrad_&_Eurodius,_Hans_-_De_lumine_n..."
8,8,1378359,"['Celsius,_Anders_-_Observationes_de_lumine_Bo...","Celsius,_Anders_-_Observationes_de_lumine_Bore..."
9,9,1424044,"['Bacci,_Andrea_&_Gabelkover,_Wolfgang_-_De_mo...","Bacci,_Andrea_&_Gabelkover,_Wolfgang_-_De_mono..."


In [49]:
# the information about year of publication in the filename is useful here, since it is not that straightforward in the wiki data, as we have seen above
# so let extract the year here
def get_year(filename):
    try:
        year = int(re.search("\d{4}", filename)[0])
    except:
        year = None    
    return year

ids_filenames_df["file_year"] = ids_filenames_df["filename"].apply(get_year)
ids_filenames_df.head(5)

Unnamed: 0.1,Unnamed: 0,id,filenames_list,filename,file_year
0,0,1031760,"['Bacon,_Francis_-_Instauratio_magna__London_1...","Bacon,_Francis_-_Instauratio_magna__London_162...",1620.0
1,1,1085290,"['Linden,_Johannes_Antonides_van_der_-_Lindeni...","Linden,_Johannes_Antonides_van_der_-_Lindenius...",1686.0
2,2,1285853,"['de_Conde,_Ioannes_Baptista_-_Aphorismi_seu_a...","de_Conde,_Ioannes_Baptista_-_Aphorismi_seu_axi...",1647.0
3,3,1285854,"['van_Poort,_Henricus_-_Hippocratis_Aphorismi_...","van_Poort,_Henricus_-_Hippocratis_Aphorismi_me...",1657.0
4,4,1285855,"['Hippocrates_&_Denisot,_Gérard_-_Hippocratis_...","Hippocrates_&_Denisot,_Gérard_-_Hippocratis_Ap...",1634.0


In [50]:
metadata_table_long = pd.merge(metadata_table_long, ids_filenames_df[["id", "filename", "file_year"]], on="id")
metadata_table_long.head(40)

Unnamed: 0,Author,Full title,In,Year,Place,Publisher/Printer,Era,Form/Genre,Discipline/Content,Original,...,Of interest to,Transkribus text available,Written by,Library and Signature,ids,id,date_min,date_max,filename,file_year
0,"Pardies, Ignace Gaston",A Latin Letter written to the Publisher April ...,Philosophical Transactions of the Royal Societ...,1672,London,Martyn,17th century,"Letter, Review",Physics,A Latin Letter(JSTOR),...,,Yes,IT,,[607720],607720,1672.0,1672.0,"Pardies,_Ignace_Gaston_-_A_Latin_Letter_contai...",1672.0
1,"Scheuchzer, Johann Jakob","Acarnania sive Relatio eorum, quae hactenus el...","ΟΥΡΕΣΙΦΟΙΤΗΣ (Ouresiphoites) Helveticus, 609–35",1723,Leiden,"van der Aa, Pieter",18th century,"Biography, Bibliography","Mathematics, Physics, Geography/Cartography, M...","Acarnania (1st edition), inMiscellanea Lipsien...",...,MK,Yes,MK,,[605919],605919,1723.0,1723.0,"Scheuchzer,_Johann_Jakob_-_ΟΥΡΕΣΙΦΟΙΤΗΣ_(Oures...",1723.0
2,"Scheuchzer, Johann Jakob",ΟΥΡΕΣΙΦΟΙΤΗΣ Helveticus sive itinera per Helve...,,1723,Leiden,"van der Aa, Pieter",18th century,"Report, Bibliography","Geography/Cartography, Meteorology/Earth scien...",ΟΥΡΕΣΙΦΟΙΤΗΣ (Ouresiphoites) Helveticus(e-rara...,...,MK,Yes,MK,,[605919],605919,1723.0,1723.0,"Scheuchzer,_Johann_Jakob_-_ΟΥΡΕΣΙΦΟΙΤΗΣ_(Oures...",1723.0
3,"Addison, Joseph",Ad insignissimum virum dominum Thomam Burnettu...,"Examen poeticum duplex, sive, Musarum anglican...",1698,London,Richard Wellington I.,17th century,Panegyric poem,Meteorology/Earth sciences,Ad Burnettum sacrae theoriae telluris auctorem...,...,"MK, IT",Yes,MK,,[769230],769230,1698.0,1698.0,Examen_poeticum_duplex__London_1698_pdf.txt,1698.0
4,"Addison, Joseph",Barometri descriptio,"Examen poeticum duplex, sive, Musarum anglican...",1698,London,Richard Wellington I.,17th century,Didactic poem,Physics,"Barometri descriptio, in:Examen poeticum duple...",...,,Yes,,,[769230],769230,1698.0,1698.0,Examen_poeticum_duplex__London_1698_pdf.txt,1698.0
5,"Owen, John",Ad Dominum Gilbertum,Epigrammatum libri tres. Auctore Ioanne Owen B...,1606,London,"Windet, John, Waterson, Simon",17th century,Other (see description),Astronomy/Astrology/Cosmography,3rd edition (Amberg 1608)(Google Books)Digital...,...,"JL, IT",Yes,IT,,[900767],900767,1606.0,1606.0,"Owen,_Johan_-_Epigrammatum_libri_tres__Amberg_...",1608.0
6,"Acidalius, Valens","Ad Iordanum Brunum Nolanum, Italum","Poematum Iani Lernutii, Iani Gulielmi, Valenti...",1603,"Liegnitz, Wrocław","Albert, David",17th century,Panegyric poem,Astronomy/Astrology/Cosmography,Ad Iordanum Brunum (1603)(CAMENA)Ad Iordanum B...,...,"MK, IT",Yes,MK,,[801745],801745,1603.0,1603.0,Janus_Lernutius_et_al__-_Poemata__Liegnitz_160...,1603.0
7,"Paulinus, Fabius","Ad clarissimum virum Laurentium Massam, sereni...","Avicennae, Arabum medicorum principis, ex Gera...",1595,Venice,I Giunti,16th century,Panegyric poem,Medicine,Ad Laurentium Massam pro Avicenna ode(Google B...,...,MK,Yes,MK,,[900763],900763,1595.0,1595.0,"Costeo,_Giovanni_&_Mongio,_Paolo_-_Avicennae_c...",1595.0
8,"Sands, Patrick",Ad lectorem trigonometriae studiosum,Mirifici logarithmorum canonis descriptioeiusq...,1614,Edinburgh,Andro Hart,17th century,Panegyric poem,Mathematics,Ad lectorem trigonometriae studiosum(Google Bo...,...,IT,Yes,IT,,[801744],801744,1614.0,1614.0,"Napier,_John_-_Mrifici_logarithmorum_canonis_d...",1614.0
9,"Napier, John",Mirifici logarithmorum canonis descriptio eius...,,1614,Edinburgh,Andro Hart,17th century,"Monograph, Tables and charts",Mathematics,Mirifici logarithmorum canonis descriptio(Goog...,...,"MK, FB",Yes,"MK, FB",,[801744],801744,1614.0,1614.0,"Napier,_John_-_Mrifici_logarithmorum_canonis_d...",1614.0


In [51]:
len(metadata_table_long)

1054

In [55]:
# there are some duplicates now, so let's remove them
metadata_table_long.drop_duplicates(subset="id", keep="first", inplace=True)
len(metadata_table_long)

975

In [56]:
metadata_table_long["file_year"].between(1517, 1716).sum()

652

In [58]:
metadata_table_long.sort_values("Author", inplace=True)

In [59]:
metadata_table_long.to_csv("../data/metadata_table_long.csv", index=False)

In [61]:
set_with_dataframe(noscemus_gs.add_worksheet("metadata_table_long_v4", 1,1), metadata_table_long)