# Second notebook created to facilitate and separate the generation and evaluation of biographies from the previous parts of the pipeline

In [1]:
# Prepare environment
!pip install transformers

import re
import pandas as pd
import numpy as np
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from tqdm.notebook import tqdm
from nltk.translate import bleu_score, gleu_score

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m


In [None]:
# Load resources
tokenizer = GPT2Tokenizer.from_pretrained("mbien/fdh-wikibio")
model = GPT2LMHeadModel.from_pretrained("mbien/fdh-wikibio")
model = model.to("cuda:0")

In [None]:
# Define generation function

def generate_biography(model, tokenizer, person_definition, sections, top_p, repetition_penalty, num_beams, no_repeat_ngram_size):
  prompts = ""
  if sections is None:
    prompts = [person_definition]
  else:
    prompts = [f"<|section|>== {section} ==" for section in sections]
    prompts.insert(0, person_definition)

  outputs = ""

  end_token = tokenizer.encode("<|pad|>")[0]

  for i, prompt in enumerate(tqdm(prompts)):
    outputs += prompt 

    encoded_prompt = tokenizer.encode(outputs, add_special_tokens=True, return_tensors="pt")
    if len(encoded_prompt) >= 1024:
      break

    encoded_prompt = encoded_prompt.to("cuda:0")

    outputs_enc = model.generate(encoded_prompt, do_sample=True, max_length=1025, device=0, 
                              top_k=0, top_p=top_p, num_return_sequences=1, num_beams=num_beams, repetition_penalty=repetition_penalty,
                              eos_token_id=end_token, no_repeat_ngram_size=no_repeat_ngram_size)[0]

    outputs = tokenizer.decode(outputs_enc)

    # We don't care about end tokens, we may want to add another section afterwards!
    outputs=outputs.replace("<|end|>", "").replace("<|pad|>", "")
    
    # For some reason GPT-2 is especially bad in digressions
    #outputs = re.sub(r"\([^\)]+\)", "", outputs)

    if sections is not None:
      # Allow only one new section in every iteration
      sections_split = outputs.split("<|section|>")
      outputs = "<|section|>".join(outputs.split("<|section|>")[:min(len(sections_split), i+1)])
    else:
      break

  return outputs

# Use this code to generate biographies for the human evaluation

In [None]:
df_none = pd.read_csv('/content/Formulaire sans titre.csv')
# b = df_none[df_none['itemDescription'].notnull()]
# df_none_sample = b.sample(n=1).copy()
a=df_none#_sample['summary']
a

Unnamed: 0,Horodateur,This biography is rated as 3.,This biography is rated as 5.,This biography is rated as 10.,Rank each text for the quality of the generation. [Text A],Rank each text for the quality of the generation. [Text B],Rank each text for the quality of the generation. [Text C],Rank each text for the quality of the generation. [Text D],Rank each text for the quality of the generation. [Text E],Text A,Text B,Text C,Text D,Text E,Rank each text for the quality of the generation. [Text A].1,Rank each text for the quality of the generation. [Text B].1,Rank each text for the quality of the generation. [Text C].1,Rank each text for the quality of the generation. [Text D].1,Rank each text for the quality of the generation. [Text E].1,Rank each text for the quality of the generation. [Text F],Rank each text for the quality of the generation. [Text G],Text A.1,Text B.1,Text C.1,Text D.1,Text E.1,Rank each text for the quality of the generation. [Text A].2,Rank each text for the quality of the generation. [Text B].2,Rank each text for the quality of the generation. [Text C].2,Rank each text for the quality of the generation. [Text D].2,Rank each text for the quality of the generation. [Text E].2,Rank each text for the quality of the generation. [Text F].1,Rank each text for the quality of the generation. [Text G].1,Text A.2,Text B.2,Text C.2,Text D.2,Text E.2,Rank each text for the quality of the generation. [Text A].3,Rank each text for the quality of the generation. [Text B].3,Rank each text for the quality of the generation. [Text C].3,Rank each text for the quality of the generation. [Text D].3,Rank each text for the quality of the generation. [Text E].3,Rank each text for the quality of the generation. [Text F].2,Rank each text for the quality of the generation. [Text G].2,Text A.3,Text B.3,Text C.3,Text D.3,Text E.3,Rank each text for the quality of the generation. [Text A].4,Rank each text for the quality of the generation. [Text B].4,Rank each text for the quality of the generation. [Text C].4,Rank each text for the quality of the generation. [Text D].4,Rank each text for the quality of the generation. [Text E].4,Rank each text for the quality of the generation. [Text F].3,Rank each text for the quality of the generation. [Text G].3,Text A.4,Text B.4,Text C.4,Text D.4,Text E.4,Rank each text for the quality of the generation. [Text A].5,Rank each text for the quality of the generation. [Text B].5,Rank each text for the quality of the generation. [Text C].5,Rank each text for the quality of the generation. [Text D].5,Rank each text for the quality of the generation. [Text E].5,Rank each text for the quality of the generation. [Text F].4,Rank each text for the quality of the generation. [Text G].4,Text A.5,Text B.5,Text C.5,Text D.5,Text E.5
0,2020/12/01 1:11:22 AM UTC+1,3,5,10,1st,2nd,3rd,4th,5th,4,6,7,8,9,1st,2nd,3rd,4th,7th,6th,5th,3,5,10,8,5,1st,7th,2nd,6th,3rd,5th,4th,5,5,8,7,8,1st,2nd,7th,6th,5th,4th,3rd,9,8,7,1,1,7th,6th,5th,4th,3rd,2nd,1st,3,7,4,10,3,1st,2nd,3rd,7th,4th,6th,5th,3,7,10,3,7


In [None]:
df_none_sample.to_csv('real_sample.csv')

In [None]:
df_none_sample

Unnamed: 0,item,itemLabel,fnames,lnames,itemDescription,birth,death,professions,summary
1658,Q436738,Gian Giorgio Trissino,"Giovanni, Gian, Giorgio",,Italian Renaissance humanist,1478,1550,"writer, poet, playwright, philosopher",Gian Giorgio Trissino (8 July 1478 – 8 Decembe...


In [None]:
with open("Output.txt", "w") as text_file:

    top_p = 0.0 
    beams= [3,8]
    ngram=[0]
    sections = [
        "Biography"
        ]
    for b in beams:
      # no_repeat_ngram_size = n
      num_beams = b

      pen=[3.0,6.0]
      for i in pen:
        repetition_penalty=i  
        text_file.write(f'\n\ntop_p= {top_p}, num_beams= {num_beams}, repetition_penalty= {repetition_penalty}\n')
        for id, row in df_none_sample.iterrows():
          print(row)
          person_definition = f"<|start|> {row.itemLabel} <|description|> {row.itemDescription} <|professions|> {row.professions} <|birth|> {row.birth} <|death|> {row.death} <|summary|>"
          print(person_definition)
          text_file.write(generate_biography(model, tokenizer, person_definition, sections, top_p, repetition_penalty, num_beams, no_repeat_ngram_size))

item                                                         Q436738
itemLabel                                      Gian Giorgio Trissino
fnames                                       Giovanni, Gian, Giorgio
lnames                                                           NaN
itemDescription                         Italian Renaissance humanist
birth                                                           1478
death                                                           1550
professions                    writer, poet, playwright, philosopher
summary            Gian Giorgio Trissino (8 July 1478 – 8 Decembe...
Name: 1658, dtype: object
<|start|> Gian Giorgio Trissino <|description|> Italian Renaissance humanist <|professions|> writer, poet, playwright, philosopher <|birth|> 1478 <|death|> 1550 <|summary|>


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

Setting `pad_token_id` to `eos_token_id`:50265 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50265 for open-end generation.



item                                                         Q436738
itemLabel                                      Gian Giorgio Trissino
fnames                                       Giovanni, Gian, Giorgio
lnames                                                           NaN
itemDescription                         Italian Renaissance humanist
birth                                                           1478
death                                                           1550
professions                    writer, poet, playwright, philosopher
summary            Gian Giorgio Trissino (8 July 1478 – 8 Decembe...
Name: 1658, dtype: object
<|start|> Gian Giorgio Trissino <|description|> Italian Renaissance humanist <|professions|> writer, poet, playwright, philosopher <|birth|> 1478 <|death|> 1550 <|summary|>


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

Setting `pad_token_id` to `eos_token_id`:50265 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50265 for open-end generation.



item                                                         Q436738
itemLabel                                      Gian Giorgio Trissino
fnames                                       Giovanni, Gian, Giorgio
lnames                                                           NaN
itemDescription                         Italian Renaissance humanist
birth                                                           1478
death                                                           1550
professions                    writer, poet, playwright, philosopher
summary            Gian Giorgio Trissino (8 July 1478 – 8 Decembe...
Name: 1658, dtype: object
<|start|> Gian Giorgio Trissino <|description|> Italian Renaissance humanist <|professions|> writer, poet, playwright, philosopher <|birth|> 1478 <|death|> 1550 <|summary|>


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

Setting `pad_token_id` to `eos_token_id`:50265 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50265 for open-end generation.



item                                                         Q436738
itemLabel                                      Gian Giorgio Trissino
fnames                                       Giovanni, Gian, Giorgio
lnames                                                           NaN
itemDescription                         Italian Renaissance humanist
birth                                                           1478
death                                                           1550
professions                    writer, poet, playwright, philosopher
summary            Gian Giorgio Trissino (8 July 1478 – 8 Decembe...
Name: 1658, dtype: object
<|start|> Gian Giorgio Trissino <|description|> Italian Renaissance humanist <|professions|> writer, poet, playwright, philosopher <|birth|> 1478 <|death|> 1550 <|summary|>


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

Setting `pad_token_id` to `eos_token_id`:50265 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50265 for open-end generation.





In [None]:
df_none_sample.to_csv('generated_samples')

In [None]:
#### MODIFY THESE PARAMETERS TO GET BETTER/WORSE RESULTS ####
# The person's input text - put the inputs from wikidata which is not in wikipages here
person_definition = "<|start|> Baptiste Thiébault <|summary|>"

# Two methods, either num_beams=1 and varying top_p, or num_beams>1 and top_p=0.0
# The text generated by beam search is much less random, but has better coherence
top_p = 0.0
num_beams = 3

# Increase this if the model repeats the same phrases over and over, decrease if the text is not reasonable anymore
repetition_penalty=3.0

# Choose the sections to appear in biography. Remember to make it realistic
sections = [
    "Biography"
    ]

#############################################################


out = generate_biography(model, tokenizer, person_definition, sections, top_p, repetition_penalty, num_beams,2)
print("\n\n==== FINAL OUTPUT ==== \n\n")
print(out.split("<|summary|>")[1].replace("<|section|>", "\n\n"))

HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

Setting `pad_token_id` to `eos_token_id`:50265 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50265 for open-end generation.





==== FINAL OUTPUT ==== 


 Catholic priest, catholic deacon <|birth|> 1517 <|death|> 1592 


In [None]:
samples = pd.read_csv('/content/generated_samples.csv')
sections = [
    "Biography"
    ]

for id, row in samples.iterrows():


  print(f"\n\n==== {row.itemLabel}==== \n\n")
  print(row.summary.split("<|summary|>")[1].replace("<|section|>", "\n\n"))



==== Giambattista Maderni==== 


 Giambattista Maderni (also known as Giambattista della Vigna) was an Italian painter of the Baroque period. He is best known for his portraits of Madonna and Child with Saints in the church of San Marco in Venice. 

 == Biography ==
Maderni was born in Verona, Italy on 23 January 1758. His father, Giovanni Antonio Maderni, was a merchant who worked at the court of Pope Clement VII. In 1770 he moved to Rome where he painted frescoes depicting the life of Saint Jerome, St. John Chrysostom, St. Anthony of Padua, St. Mark and St. Peter.  The painting was completed in 1802 by Giuseppe Zuccarelli.In 1804 he returned to Verona where he painted two canvases of Santa Maria Maggiore and St. Catherine of Siena.He died in Venice on 9 March 1803.  


==== Giacomino da Verona==== 


 Giacomino da Verona (c. 1255 – c. 1259) was an Italian Renaissance writer and a member of the Venetian School. 

 == Biography ==
Giacomino da Verona was born in Venice to a noble fam

# Automatic evaluation (WIP)

In [None]:
def generate_biographies_autoeval(model, tokenizer, person_definition):
  prompt = person_definition
  end_token = tokenizer.encode("<|pad|>")[0]

  encoded_prompt = tokenizer.encode(prompt, add_special_tokens=True, return_tensors="pt")
  encoded_prompt = encoded_prompt.to("cuda:0")

  outputs_enc = model.generate(encoded_prompt, do_sample=True, max_length=1024, device=0, 
                            top_k=0, top_p=0.95, num_return_sequences=10, repetition_penalty=2.0,
                            eos_token_id=end_token)

  outputs = [tokenizer.decode(output_enc) for output_enc in outputs_enc]

  # We don't care about end tokens, we may want to add another section afterwards!
  outputs=[output.replace("<|end|>", "").replace("<|pad|>", "") for output in outputs]
  # For some reason GPT-2 is especially bad in digressions
  #outputs = [re.sub(r"\s*\([^\)]+\)\s*", " ", output) for output in outputs]
  return outputs

# TODO: put here some random sample of the articles
test_set = [' <|start|> Francesco Aviani <|description|> Italian painter (1662-1715) <|professions|> painter <|birth|> 1662 <|death|> 1715 <|summary|> Francesco Aviani (1662?–1715), a native of Vicenza, flourished about the year 1630. He excelled in painting perspective and architectural views, which were frequently embellished with figures by Giulio Carpioni. His pictures usually represent the most remarkable views in Venice. He also produced some landscapes and seaports.\nHe was born in Venice, most likely on 25 November 1662, by Bernardo and by a Magdalene whose surname is unknown, and was most probably baptized in the cathedral on 3 December  1662. Between 1701 and 1703 he decorated with frescoes (today illegible) the villa Chiericati in Soella (his brother Marco the sculptor was also with him). On 16 October 1703 he married Isabella Carcano. On 26 March 1715 he made a will. On 3 April 1715 he died in Vicenza and it is from his age at the moment of death, about fifty-two years that his date of birth was traced to 1662.\n\n\n<|end|>',
 ' <|start|> Francesco Tironi <|description|> Italian painter (1745-1797) <|professions|> painter <|birth|> 1745 <|death|> 1797 <|summary|> Francesco Tironi (circa 1745-1797) was an Italian painter, active in painting vedute of Venice in a Neoclassical style.\nAmong his works are a vedute of the Riva degli Schiavoni; of a Large Crowd in a Piazza before the Church of Santi Giovanni e Paolo; of the Isola Santa Maria della Grazia, Venice; and of the Meeting of Pope Pius VI and the Doge on the Island of San Giorgio in Alga He also provided the drawings for Antonio Sandi’s twenty-four engravings (1779) of islands in the Venetian Lagoon. Many of the engravings depict thriving communities in islands that are now desolate.\n\n\n<|end|>',
 ' <|start|> Simone Brentana <|description|> Italian painter (1656-1742) <|professions|> painter <|birth|> 1656 <|death|> 1742 <|summary|> Simone Brentana (1656 – 9 June 1742) was an Italian painter of the Baroque period, active in Verona. He was born in Venice to Domenico Brentana, but became orphaned by age nine. After a prolonged desultory education in various fields including music, he trained as a painter in Venice with Pietro Negri, frequenting the Accademia di Belle Arti, moving in 1685 to Verona, where most of his paintings are located.\nAmong his pupils were Antonio Baroni, Michelangelo Spada, Tommaso Dossi, Antonio Elenetti, Giovanni Battista Marcola, and Lodovico Buffetti.\n\n\n<|end|>',
 ' <|start|> Anselmo Costadoni <|description|> Italian monk <|professions|> monk <|birth|> 1714 <|death|> 1785 <|summary|> Dom Anselmo Costadoni, O.S.B. Cam., (1714–1785) was an Italian Camaldolese monk, historian and theologian.\n\n<|section|>\n== Biography ==\nHe was born on 6 October 1714, at Venice and christened Giovanni Domenico. The son of a rich merchant, he sacrificed at an early date his prospects of success in the world and took the religious habit of the Camaldolese monks at the Monastery of St. Michael, situated on the island of Murano in the Venetian lagoon. Here he studied philosophy and theology with more than usual success.\nAt the age of twenty-three he revealed his literary ability in a letter (Lettera critica) written in defense of certain Camaldolese writers, who had been attacked by Giusto Fontanini in his "Library of Italian Eloquence".\nCostadoni subsequently collaborated for eighteen years with the learned Dom Gian Benedetto Mittarelli of his monastery in the publication of the "Annales Camaldulenses ordinis S. Benedicti, ab anno 907 ad annum 1770" (The Annals of the Camaldolese of the Order of St. Benedict, 907-1770), printed in 9 volumes folio (Venice, 1755–73). It follows the plan of Mabillon\'s "Annales ordinis S. Benedicti". (Venice, 1755–73).\nHe died on 23 January 1785, in Venice.\n\n<|section|>\n== Works ==\nSome archæological papers due to his pen, such as "Dissertazione sopra il pesce come simbolo degli antichi cristiani", were published in the voluminous collection of historical essays edited by Calogerà, a monk of the same Order.\nHis works also include: "Avvisi ed istruzioni pratiche intorno ai principali doveri de\' regolari" (Faenza, 1770; Venice, 1771); "Lettere consolatorie" (Venice, 1775); "Lettere sopra questione teologiche" (Venice, 1773).\nCostadoni\'s unpublished manuscripts were transferred, after his death, to St. Gregory\'s monastery at Rome, by order of the Camaldolese abbot, Mauro Cappellari (later Pope Gregory XVI).\n\n\n<|end|>',
 ' <|start|> Nicholas of Crotone <|description|> Venetian bishop, theologist and humanist <|professions|> priest, diplomat <|birth|> 1250 <|death|> 1276 <|summary|> Nicholas of Crotone was a 13th-century, Greek-speaking bishop of Crotone, an Italian coastal city on the Ionian Sea which, from the Middle Ages until 1928, was known as Cotrone.\nAs a Roman Catholic who was fluent in Greek, Nicholas spent much time in Constantinople and as an ambassador between that city and the Pope in Rome.  His fluency in Greek also gave the Byzantine emperor Michael VIII Palaeologus the capacity to begin negotiations for the reconciliation of the Eastern Orthodox and Roman Catholic churches. The rapprochement was predicated on the grounds that previous interpreters had incorrectly conveyed the theological ideas on both sides, leading to an unjustified disagreement.\n\n\n<|end|>',
 ' <|start|> Bartol Kašić <|description|> Croatian linguist and lexicographer <|professions|> writer, translator, linguist, lexicographer, Bible translator <|birth|> 1575 <|death|> 1650 <|summary|> Bartol Kašić (Latin: Bartholomaeus Cassius, Italian: Bartolomeo Cassio; August 15, 1575 – December 28, 1650) was a Jesuit clergyman and grammarian during the Counter-Reformation, who wrote the first Croatian grammar and translated the Bible and the Roman Rite into Croatian.\n\n<|section|>\n== Life ==\nBartol was born in Pag , in the Republic of Venice (in modern Croatia) of his father Ivan Petar Kašić who participated in the 1571 Battle of Lepanto and mother Ivanica. In 1574 Ivan Petar Kašić married for Ivanica Bogdančić and they had a son Bartol next year. His father died when he was a small child, so he was raised by his uncle Luka Deodati Bogdančić, a priest from Pag, who taught him to read and write. He attended the municipal school in the town of Pag. After 1590 he studied at the Illyric College in Loreto near Ancona, in the Papal States (in modern Italy), managed by the Jesuits. As a gifted and industrious pupil, he was sent to further studies in Rome in 1593, where he joined the Society of Jesus in 1595. Kašić continued propaganda activities of Aleksandar Komulović after his death, being even greater Pan-Slav then Komulović was. Kašić censored and edited Komulović\'s 1606 work (Zrcalo od Ispovijesti).Kašić was made a priest in 1606 and served as a confessor in the St. Peter\'s Basilica in Rome. He lived in Dubrovnik from 1609 to 1612. In 1612/13, disguised as a merchant, he went on a mission to the Ottoman provinces of Bosnia, central Serbia and eastern Slavonia (Valpovo, Osijek, Vukovar), which he reported to the pope. From 1614 to 1618 he was the Croatian confessor in Loreto. He went on his second mission in 1618/19. In old age, he described both missions in his incomplete autobiography. His second stay in Dubrovnik lasted from 1620 to 1633. Then he returned to Rome, where he spent the rest of his life.\n\n<|section|>\n== Literary activity ==\nAlready as a student, Kašić started teaching Croatian in the Illyric Academy in Rome, which awakened his interest in the Croatian language. By 1599 he made a Croatian-Italian dictionary, which has been preserved as a manuscript in Dubrovnik since the 18th century. Some experts believe it is one of three dictionaries made by Kašić and that the other two are archived in Perugia and Oxford.\nKašić\'s native dialect was Chakavian. In the 16th century, the Chakavian dialect was prevalent in Croatian works, though it now shifted towards the Shtokavian. Kašić opted for Shtokavian as it was the most common dialect among his South Slavic (Illyrian) people.\n\n<|section|>\n=== The first Croatian grammar ===\n\nIt qualified Kašić for further work in the area of Croatian language. Since the Jesuits took care of the Christians in the Ottoman Empire and tried to teach in the local language, they needed an adequate textbook for working among the Croats. In 1582 Marin Temperica wrote a report to general Claudio Acquaviva in which he emphasized the importance of the Slavic language understandable all over the Balkans. In this report of Temperica requested publishing of the Illyrian language dictionaries and grammars. Based on this request, Kašić provided such a textbook: he published Institutionum linguae illyricae libri duo ("The Structure of the Illyrian Language in Two Books") in Rome in 1604. It was the first Slavic language grammar.In almost 200 pages and two parts ("books"), he provided the basic information on the Croatian language and explained the Croatian morphology in great detail. The language is basically Shtokavian with many Chakavian elements, mixing older and newer forms. For unknown reasons, the grammar was not accompanied by a dictionary, as was the practice with Jesuit dictionaries and grammars of Croatian.\nIn periods 1612–1613 and 1618–1620 Kašić visited various regions of Ottoman Serbia, Bosnia and Croatia. After 1613 Kašić published several works of religious and instructive content and purpose (the lives of the saints Ignatius of Loyola and Francis Xavier, the lives of Jesus and Mary), a hagiographic collection Perivoj od djevstva (Virginal Garden; 1625 and 1628), two catechisms etc. In the late 1627 he completed the spiritual tragedy St Venefrida, subtitled triomfo od čistoće (a triumph of purity), which remained in manuscript until 1938.\n\n<|section|>\n=== Translation of the Bible ===\nIn 1622, Kašić started translating the New Testament into the local Slavic vernacular – more precisely, the Shtokavian dialect of Dubrovnik (Dubrovnik subdialect). In 1625, he was in charge of translating the entire Bible. He submitted the entire translation in Rome in 1633 to obtain the approval for printing, but he encountered difficulties because some Croatians were against translations in that vernacular. The translation was eventually forbidden (non est expediens ut imprimatur).\nConsidering the fact that the translations of the Bible to local languages had a crucial role in the creation of the standard languages of many peoples, the ban on Kašić\'s translation has been described by Josip Lisac as "the greatest catastrophe in the history of Croatian language". The preserved manuscripts were used to publish the translation, with detailed expert notes, in 2000.\nThe great linguistic variety and invention of his translation can be seen from the comparison with the King James Version of the Bible. The King James Version, which has had a profound impact on English, was published in 1611, two decades before Kašić\'s translation. It has 12,143 different words. Kašić\'s Croatian translation, even incomplete (some parts of the Old Testament are missing), has around 20,000 different words – more than the English version and even more than the original Bible!\n\n<|section|>\n=== Roman Rite ===\n\nRitual rimski ("Roman Rite"; 1640), covering more than 400 pages, was the most famous Kašić\'s work, which was used by all Croatian dioceses and archdioceses except for the one in Zagreb, which also accepted it in the 19th century.\nKašić called the language used in Ritual rimski as naški ("our language") or bosanski ("Bosnian"). He used the term "Bosnian" even though he was born in a Chakavian region: instead he decided to adopt a "common language" (lingua communis), a version of Shtokavian Ikavian, spoken by the majority the speakers of Serbo-Croatian. He used the terms dubrovački (from Dubrovnik) for the Ijekavian version used in his Bible, and dalmatinski (Dalmatian) for the Chakavian version.\n\n<|section|>\n== Works ==\nRazlika skladanja slovinska (Croatian-Italian dictionary), Rome, 1599\nInstitutionum linguae illyricae libri duo (The Structure of the Illyrian (Croatian) Language in Two Books), Rome, 1604\nVarious hagiographies; collection Perivoj od djevstva (Virginal Garden; 1625 and 1628) *Two catechisms\nSpiritual tragedy St Venefrida, 1627, published in 1938\nThe Bible, 1633\nRitual rimski (Roman Rite), 1640\n\n\n<|end|>',
 " <|start|> Marco Antonio Bassetti <|description|> Italian painter (1586-1630) <|professions|> painter <|birth|> 1586 <|death|> 1630 <|summary|> Marco Antonio Bassetti (1586–1630) was an Italian painter.\n\n<|section|>\n== Life ==\nHe was born in Verona, and was a pupil of Felice Ricci. He then went to Venice where he was particularly influenced by  the works of Tintoretto, Veronese and Jacopo Bassano. He is known to have been in Rome in 1616, and may have arrived there two years earlier. In Rome he came under the influence of the paintings of Caravaggio and Orazio Borgianni.On his return to Verona he painted a St. Peter and Saints for the church of San Tomaso and a Coronation of the Virgin for Sant' Anastasia. He died from the plague in Verona in 1630. Among his pupils were Fra Semplice and Paolo Massimo.His Dead Christ supported by the Virgin Mary and Mary Magdalene (c. 1616), painted on slate, is in the collection of the Fitzwilliam Museum, Cambridge.\n\n\n<|end|>",
 " <|start|> Sante Piatti <|description|> nan <|professions|> painter <|birth|> 1679 <|death|> 1749 <|summary|> Sante Piatti (1687–1747) was an Italian painter of the Baroque period, active mainly in his native Venice. He is attributed to be a pupil of Giuseppe Diamantini, and possibly Gregorio Lazzarini. He appears to be influenced by Sebastiano Ricci. During 1726 and 1727 he was a member of the Venetian painter's guild (Fraglia). He painted a series of works for the Scuola Grande dei Carmini in Venice and an altarpiece of St Antony for the church of San Nicolò dei Mendicoli.\n\n<|section|>\n== Paintings from the Carmini, Venice ==\n\n\t\t\n\t\t\n\n\n<|end|>",
 ' <|start|> Cristoforo Moro <|description|> Doge of Venice <|professions|> diplomat <|birth|> 1390 <|death|> 1471 <|summary|> Cristoforo Moro (1390 – November 10, 1471) was the 67th Doge of Venice. He reigned from 1462 to 1471.\n\n<|section|>\n== Family ==\nThe Moro family settled in Venice in the mid-12th century when Stephanus Maurus, a great-grandson of Maurus, built a church on the island of Murano. Cristoforo was the eleventh person from the family to be elected doge. His dogaressa was Cristina Sanudo.\n\n<|section|>\n== Life ==\nAfter graduating from university, Moro held various public offices. He was the Venetian ambassador to the Popes Eugene IV and Nicholas V. Saint Bernardino of Siena was said to have prophesied that Moro would one day become doge, and as the fulfillment of a solemn vow Moro had the Church of Saint Giobbe built and dedicated to Bernardino\'s memory. He bequeathed his fortune to various charitable organizations and foundations, including the Church of Saint Giobbe.\n\n<|section|>\n=== Doge ===\nMoro\'s reign was marked by the beginning of a long war between Venice and the Turks. In 1463 Pope Pius II sent Moro a consecrated sword with the intention of convincing Venice to join the anti-Turk alliance. The reaction in Venice was initially hesitant as the Republic\'s main priority was their economic interests.\nIn April 1463, 10 years after the conquest of Constantinople, Turkish troops occupied the Venetian fortress of Argos in Greece. The Latin Patriarch Cardinal Johannes Bessarion traveled to Venice to call on the Republic to join the "defense of the faith"; i.e. join the war against the Turks. That same year a coalition was formed between Venice, Hungary and the Albanian prince Skanderbeg with the blessing of the Pope to counter the threat of Sultan Mehmed II\'s aggressive policy of conquest. The coalition succeeded in temporarily halting Turkish expansion; however, the new territorial limits acquired by the Turks in their conquests had by and large been accepted.\nIn 1469 the Venetian fleet commander Niccolò Canal retook the town of Ainos in Thrace, but he was not able to defend the island of Negroponte (Euboea), a major granary of Venice, from Turkish attack. Euboea was conquered by the Sultan while inflicting enormous losses on the Venetian forces.\nThe Republic faced further threats from the northern Italian cities who coveted Venetian land, as well as from the French king Louis XI who was seeking to expand Lombardy at the expense of Venice.\n\n<|section|>\n== Tomb ==\nMoro\'s tomb is located in the sanctuary of the Church of Saint Giobbe. The tomb is above ground, covered with a marble tombstone.\n\n\n<|end|>',
 " <|start|> Valeria Miani <|description|> Italian playwright <|professions|> writer, playwright <|birth|> 1563 <|death|> 1620 <|summary|> Valeria Miani (1563 – 1620) was a female playwright noted for her works Celinda, a Tragedy, and Amorosa Speranza. Miani married Domenico Negri in 1593, with whom she had five children, Isabetta, Isabella, Lucretia, Guilio, and Anzolo. Miani is known for being the first woman to publish a tragedy prior to the 18th century. In addition, she was the third woman in Italy to write in the newly popular genre, the pastoral. Miani's works explored themes such as cross-dressing, death and punishment, female virtue, and female resilience.\n\n<|section|>\n== Early life ==\nMiani was born in the year 1563, most likely in the northern Italian city of Padua.\n\n<|section|>\n=== Family ===\nWhile it is unknown who Miani's mother was, Miani's father was Vidal Miani, a practicing Paduan lawyer. In addition to earning an income by practicing law, he also taught law, and rented rooms to students. There is written record of Miani having two siblings, a brother who was a priest and  a sister named Cornelia. Miani most likely had more siblings, as there would have been another son who would be the heir in the family. It would have been unlikely in this time period for a family to have their only son enter into the clergy, and have no son to continue the family line.\n\n<|section|>\n=== Education ===\n\nThere is no written record of where Miani attended school. However, many young girls in this century were educated in convents, so it is possible that Miani was educated in a convent as well. Miani did however, have contact with the Ricovrati of Padua, a society made up of intellectuals, and both foreign and domestic professors. This academy was founded in 1599, and was the first academy to be established in Padua. Among its 25 founders was Galileo Galilei, famed Italian astronomer and philosopher. Decades after Valeria Miani had contact with the Ricovrati, the academy became known for its wide acceptance of women into its membership. It is not known however, if the Ricovrati allowed women to become members in Miani's time, and if so, whether or not Miani was a member.\n\n<|section|>\n== Marriage and family ==\nMiani got married on September 22, 1593, at the age of 30. Her husband was Domenico Negri, a man whose occupation is unknown. The formal ceremony was at the Church of the Eremitani, while the marriage contract was signed in the groom's home city of Venice. Compared to other women of a similar background in Italy during this time period, this was considered late in a woman's life to get married. While this may have been late for a typical woman of Miani's background, it was not for women of Miani's occupation. Other female writers during this century were married late in their life as well. Venetian writer and poet Moderata Fonte married at the age of 27, while Venetian author and women's rights activist Lucrezia Marinella married at the age of 35. Negri's death occurred sometime between the years 1612 and 1614. Following the time of his death, Miani no longer published anything else, seemingly ending her writing career.\nMiani and her husband are known to have five children. Their three daughters were Lucretia, Isabella, and Isabetta, and their two sons were Guilio ad Anzolo.\n\n<|section|>\n== Death ==\nVery little is known regarding the death of Valeria Miani. While it is known that she died in the year 1620, a specific date is not given. The place of her death is not known either, though it is likely that she passed somewhere in her home country of Italy.\n\n<|section|>\n== Works ==\n\n<|section|>\n=== Amorosa Speranza ===\n\nMiani's first pastoral drama, Amorosa Speranza was published in 1604. It took Miani six years to have her play published, having submitted it to her publisher in 1598. Her publisher and editor Francesco Bolzetta had the play in his possession for years before he decided to put it into print. Bolzetta was the primary publisher for the Ricovrati of Padua, the same academy which Miani was in contact with. By publishing Amorosa Speranza, Miani became the third woman in Italy to have ever published a nonreligious play. The play Amorosa Speranza appears to have not been written for print, but rather for performance. Multiple times during the play the performer addresses the audience, even so much as having the prologue of the performance primarily spoken to the audience. The plot of Amorosa Speranza revolves around the life of the virtuous sprite Venelia, who has been abandoned by her husband following their wedding night. Venezia struggles throughout the play to rid herself of the unwanted advances of two shepherds, Alliseo, and Isandro.In Amorosa Speranza, Miani shields her two main characters from the satyrs and their deviance by having a third nymph be the one to outsmart and trick the satyr. By this third nymph deceiving the satyr who would want to do her harm, she is emphasizing the independence and autonomy that her female characters have within the story.\n\n<|section|>\n=== Celinda, A Tragedy ===\nMiani's second published work, Celinda, a Tragedy, was published seven years after Amorosa Speranza, in 1611. The publishing of this play marked the first known tragedy written and published by an Italian woman. Just as Amorosa Speranza was, Celinda, a Tragedy was published by the official publisher for the Ricovrati, Francesco Bolzetta. During the time when Celinda was published, the tragedy genre was exploding in popularity, with approximately 60 new tragedies published in 1611. While Celinda enjoyed popularity in print, there is no record of it ever making it to the stage. This was due in part to the fact that audiences considered tragedies bad omens to watch. Additionally, these performances were very expensive to put on, due to the cost of hiring actors, and having to recreate the bloody and violent imagery detailed in the play. The plot of Celinda, a Tragedy follows the story of the title character, 15 year old princess Celinda, and her prohibited relationship with Persian prince Autilio. Despite the fact that the two meet under false pretenses, with Autilio disguised as a woman, they fall in love. The entirety of the play details the horror and tragedy that befalls the two young lovers, complete with suicide and vendettas, both common motifs in the tragedy genre.\n\n\n<|end|>"]

eval_samples = []
for sample in tqdm(test_set):
  # We generate without fixed sections, assuming this should be covered well by the generator
  eval_samples.append(generate_biographies_autoeval(model, tokenizer, sample.split("<|summary|>")[0]+"<|summary|>"))

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))

Setting `pad_token_id` to `eos_token_id`:50265 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50265 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50265 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50265 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50265 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50265 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50265 for open-end generation.


KeyboardInterrupt: ignored

In [None]:
eval_samples

[["<|start|> Francesco Aviani <|description|> Italian painter (1662-1715) <|professions|> painter <|birth|> 1662 <|death|> 1715 <|summary|> François Fabriuzzi, also known as FRANCò Bastian Paolo or Franca Lucchese Opere da Marcheina had very strong artistic connections to painting in Venice.  He was the first artist painted with a camera situated outside of his workshop located at Bergamo and more recently he seems free from having an established studio near Venetia towards Dubrovnik – priority explained by Frank Gasparino who described him may have asked for permission:\nWhile still young(14), they began collaborating on all sorts postmodern projects.(2nd ed., Aldine). <|section|> == Works ==\nPalazzo Giovane Figueredo di Allegro the Patriarch ;\u2009Opus dix Messonico del Trianna di Lanzi dell'Accademia novitiura classicana antichi par Cosimo sopra Contarini mai capitale; scionegno degli contra il hasta famiglia e countendacchitta; conte futuram fuono immagiana Adoration new latin fo

In [None]:
processed_test_set = [sample.split("<|summary|>")[1].replace("<|section|>", "").replace("<|end|>", "") for sample in test_set]
processed_samples = [[subsample.split("<|summary|>")[1].replace(" ,",",").replace("<|section|>", "") for subsample in sample] for sample in eval_samples]
print(processed_samples[2][0])
print(processed_test_set[2])

 Vittoria Civiti, Il Defenza - Simona della Torre
Tradition: Simone Trevisan was born and died at Ceneda in Vercellina a century before the invention of agriculture. He began his training as an apprenticed draughtsman practicing under Francesco Stefano Cataldi there on 16 January 1727; only after being initiated by Federico Izquierdo for this company did he learn to paint pottery himself with all manner factors drawn from Jacobite style traditions which both Josox Igatto's revivalism contributed towards becoming European standard masterpiece once constituted... In September 1700 Sogamora Canal close is named during construction where following oblique connections it can be seen that Ignazio Diodoro also lived down small Yucatáni tutta il Habsburg who taunted Trentine nobles during their retaliation against those scurrilous lords returning Dalmatian possessions into Poland  Bienvenu valley extends perpendicular formation indicating what had become known throughout history based mainly u

In [None]:
def evaluate_bleu(row):
  return bleu_score.sentence_bleu([sample.split(" ") for sample in row.generated], row.real.split(" "))

def evaluate_gleu(row):
  return gleu_score.sentence_gleu([sample.split(" ") for sample in row.generated], row.real.split(" "))

eval_df = pd.DataFrame(np.array([processed_samples, processed_test_set]).T, columns=["generated", "real"])
eval_df.values[0]

array([list([" François Fabriuzzi, also known as FRANCò Bastian Paolo or Franca Lucchese Opere da Marcheina had very strong artistic connections to painting in Venice.  He was the first artist painted with a camera situated outside of his workshop located at Bergamo and more recently he seems free from having an established studio near Venetia towards Dubrovnik – priority explained by Frank Gasparino who described him may have asked for permission:\nWhile still young(14), they began collaborating on all sorts postmodern projects.(2nd ed., Aldine).  == Works ==\nPalazzo Giovane Figueredo di Allegro the Patriarch ;\u2009Opus dix Messonico del Trianna di Lanzi dell'Accademia novitiura classicana antichi par Cosimo sopra Contarini mai capitale; scionegno degli contra il hasta famiglia e countendacchitta; conte futuram fuono immagiana Adoration new latin formij eleko facilitarie confetti ab mezzone brutabboot eritament cruzontani di Carnestà mediumittori disciplinescopeen Matteousanes ifrin

In [None]:
print(eval_df.apply(evaluate_bleu, axis=1))
print(eval_df.apply(evaluate_gleu, axis=1))

Corpus/Sentence contains 0 counts of 3-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().
Corpus/Sentence contains 0 counts of 4-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


0    0.324775
1    0.127471
2    0.004783
3    0.049870
4    0.077648
5    0.037603
6    0.027574
7    0.025599
8    0.046257
9    0.009868
dtype: float64
0    0.025084
1    0.026477
2    0.022175
3    0.026252
4    0.018987
5    0.013621
6    0.036364
7    0.033138
8    0.019361
9    0.024260
dtype: float64
