In [49]:
import requests
from bs4 import BeautifulSoup
from typing import NamedTuple, Optional, Iterable, TypedDict
import pandas as pd
import os
from tqdm import tqdm


def format_number(number):
    return "{:04d}".format(number)


def get_alto(urn: str, page: int = 1):
    """Get alto from a page in a book from the National Library of Norway.
    Args:
        urn (str): URN number for the book
        page (int): page number
    Returns:
        str: text from the page
    """
    r = requests.get(api_str.format(urn=urn, page=format_number(page)))
    
    if r.status_code != 200:
        # print(r.status_code, api_str.format(urn=urn, page=format_number(page)))
        raise ValueError("Could not get page {} from urn {}".format(page, urn))
    return r.text

def check_alto_style(soup: BeautifulSoup) -> str:
    if soup.find("ComposedBlock"):
        return "alto_2"
    elif soup.find("composedblock"):
        return "alto_3"
    elif soup.find("PrintSpace"):
        return "alto_1"
    else:
        raise ValueError("Could not find alto style")
    


def get_text(soup: BeautifulSoup) -> str:
    """Get text from a alto soup object.

    Args:
        soup (BeautifulSoup): soup object from alto xml

    Returns:
        str: text from the page
    """
    alto_dict : dict = {
        "composed_block": {
            "alto_2" : "ComposedBlock",
            "alto_3" : "composedblock",
            "alto_1" : "PrintSpace"
            },
        "text_block": {
            "alto_2" : "TextBlock",
            "alto_3" : "textblock",
            "alto_1" : "TextBlock"
            },
        "text_line": {
            "alto_2" : "TextLine",
            "alto_3" : "textline",
            "alto_1" : "TextLine"
            },
        "string": {
            "alto_2" : "String",
            "alto_3" : "string",
            "alto_1" : "String"
            },
        "content": {
            "alto_2" : "CONTENT",
            "alto_3" : "content",
            "alto_1" : "CONTENT"
        }
    }   
    
    alto_style = check_alto_style(soup)
    
    text = ""
    for composed_block in soup.find_all(alto_dict["composed_block"][alto_style]):
        for text_block in composed_block.find_all(alto_dict["text_block"][alto_style]):
            for text_line in text_block.find_all(alto_dict["text_line"][alto_style]):
                for string in text_line.find_all(alto_dict["string"][alto_style]):
                    # print(string["CONTENT"])
                    text += string[alto_dict["content"][alto_style]] + " "
                text += "\n"
            text += "\n"
        text += "\n"

    return text


urn = "URN:NBN:no-nb_digibok_2014110308039"
api_str = "https://api.nb.no/catalog/v1/metadata/{urn}/altos/{urn}_{page}"

In [50]:
class Poem(NamedTuple):
    urn: str
    title: str
    page_start: int
    page_end: int
    overlapp: str
    digital_visning: str
    comment: str
    pages : Optional[Iterable[str]] = None


df = pd.read_csv("poems.csv")

# Get poem objects
poem_list = []
for ind, row in df.iterrows():
    poem = Poem(*row)
    poem_list.append(poem)

In [52]:
alto_list = []
errors = []
for poem in tqdm(poem_list):
    if poem.urn in errors:
        continue
    
    for r in range(poem.page_start-1, poem.page_end):
        try: 
            alto = get_alto(poem.urn, r)
            
        except Exception as e:
            print("Error with", poem.urn, poem.title, r, e)
            errors.append(poem.urn)
            break
            
        alto_list.append(alto)

  2%|▏         | 55/2940 [00:12<02:02, 23.63it/s]

Error with URN:NBN:no-nb_digibok_2011031412004 Mudderpramma 30 Could not get page 30 from urn URN:NBN:no-nb_digibok_2011031412004


 21%|██        | 620/2940 [03:47<08:00,  4.83it/s]

Error with URN:NBN:no-nb_digibok_2018020748046 Fred paa jorden! 8 Could not get page 8 from urn URN:NBN:no-nb_digibok_2018020748046


 33%|███▎      | 970/2940 [06:06<02:06, 15.54it/s]

Error with URN:NBN:no-nb_digibok_2018042548139 Innledningsdikt 6 Could not get page 6 from urn URN:NBN:no-nb_digibok_2018042548139
Error with URN:NBN:no-nb_digibok_2017031548123 Ved julekveldstid 11 Could not get page 11 from urn URN:NBN:no-nb_digibok_2017031548123


 42%|████▏     | 1245/2940 [08:03<09:23,  3.01it/s]  

Error with URN:NBN:no-nb_digibok_2017080748090 Forord til 1ste oplag 7 Could not get page 7 from urn URN:NBN:no-nb_digibok_2017080748090


 48%|████▊     | 1424/2940 [08:56<00:45, 33.25it/s]

Error with URN:NBN:no-nb_digibok_2020110907594 Innledningsvers 4 Could not get page 4 from urn URN:NBN:no-nb_digibok_2020110907594


 51%|█████▏    | 1509/2940 [09:19<00:23, 60.33it/s]

Error with URN:NBN:no-nb_digibok_2017060248006 Ved Beresina 9 Could not get page 9 from urn URN:NBN:no-nb_digibok_2017060248006
Error with URN:NBN:no-nb_digibok_2009040303003 Introduction 11 Could not get page 11 from urn URN:NBN:no-nb_digibok_2009040303003


 62%|██████▏   | 1830/2940 [10:34<00:18, 60.82it/s] 

Error with URN:NBN:no-nb_digibok_2021041348661 I. 1 7 Could not get page 7 from urn URN:NBN:no-nb_digibok_2021041348661


 64%|██████▍   | 1890/2940 [10:51<03:47,  4.62it/s]

Error with URN:NBN:no-nb_digibok_2018042748096 Ungdom 7 Could not get page 7 from urn URN:NBN:no-nb_digibok_2018042748096


 66%|██████▋   | 1950/2940 [11:07<06:38,  2.48it/s]

Error with URN:NBN:no-nb_digibok_2016101848013 Ouverture 11 Could not get page 11 from urn URN:NBN:no-nb_digibok_2016101848013


 80%|████████  | 2352/2940 [13:51<03:01,  3.24it/s]

Error with URN:NBN:no-nb_digibok_2009042803030 Kong dag 11 Could not get page 11 from urn URN:NBN:no-nb_digibok_2009042803030
Error with URN:NBN:no-nb_digibok_2009050712001 Tre jenter 11 Could not get page 11 from urn URN:NBN:no-nb_digibok_2009050712001


100%|██████████| 2940/2940 [18:03<00:00,  2.71it/s]

Error with URN:NBN:no-nb_digibok_2010081610001 Sommerpragt 2 Could not get page 2 from urn URN:NBN:no-nb_digibok_2010081610001





In [53]:
# import pickle

# with open("alto_list.pkl", "wb") as f:
#     pickle.dump(alto_list, f)

In [56]:
pd.DataFrame(errors).to_csv("error_urn.csv", index=False)

In [84]:
print(get_text(BeautifulSoup(alto_list[0], "lxml-xml")))

Variation 


(Simplicius barnet). 


a eneboeren dode, 
græd Simplicius først sine oine rode, 
derpå stjal han ham kutten af, 
kasted kroppen ned i den færdige grav 
og flygtede gjennem de ode 
skove bort fra de dode. 


Sulten og bleg 
ved kvældstid han ind i en by sig sneg, 
men hvert eneste hus stod ode, 
og torvet lå fuldt af dode. 





In [85]:
print(get_text(BeautifulSoup(alto_list[1], "lxml-xml")))

Med strittende hår krøb gutten 
hylende sammen i kutten. 
Men da månen steg, 
hans rædsel veg, 

og der legtes i natten en underlig leg 
af barnet mellem de ode 
hus og de stinkende dode. 


Simplicius, du lille, 

det gik mig som dig, da jeg vilde 
flygte bort fra det døde. 
Også jeg kom ad ode 
veie ind blandt de dode. 


ø 





In [95]:
len(alto_list), [page for x in poem_list for page in x.pages ]

TypeError: 'NoneType' object is not iterable

In [96]:
for poem in poem_list:
    for r in range(poem.page_start-1, poem.page_end):
        

[Poem(urn='URN:NBN:no-nb_digibok_2009032303011', title='Variation', page_start=8, page_end=9, overlapp=nan, digital_visning=nan, comment=nan, pages=None),
 Poem(urn='URN:NBN:no-nb_digibok_2009032303011', title='Pigen med fuglefælden', page_start=10, page_end=12, overlapp=nan, digital_visning=nan, comment=nan, pages=None),
 Poem(urn='URN:NBN:no-nb_digibok_2009032303011', title='Troldsøstre', page_start=13, page_end=14, overlapp=nan, digital_visning=nan, comment=nan, pages=None),
 Poem(urn='URN:NBN:no-nb_digibok_2009032303011', title='I Sarons dal', page_start=15, page_end=16, overlapp=nan, digital_visning=nan, comment=nan, pages=None),
 Poem(urn='URN:NBN:no-nb_digibok_2009032303011', title='Idyl', page_start=17, page_end=18, overlapp=nan, digital_visning=nan, comment=nan, pages=None),
 Poem(urn='URN:NBN:no-nb_digibok_2009032303011', title='Jeg bygger mit hus', page_start=19, page_end=20, overlapp=nan, digital_visning=nan, comment=nan, pages=None),
 Poem(urn='URN:NBN:no-nb_digibok_200903

In [None]:
[x for x in page.pages]

In [98]:
from tqdm.contrib.concurrent import process_map

In [99]:
poem_list_w_pages = []
errors = []
for poem in tqdm(poem_list):
    if poem.urn in errors:
        continue
    
    txts = []
    
    for r in range(poem.page_start-1, poem.page_end):
        try: 
            alto = get_alto(poem.urn, r)
            txt = get_text(BeautifulSoup(alto, "lxml-xml"))
            txts.append(txt)
            
            
        except Exception as e:
            print("Error with", poem.urn, poem.title, r, e)
            errors.append(poem.urn)
            break
            
        poem_list_w_pages.append(Poem(*poem[:-1], txts))

  2%|▏         | 55/2940 [00:13<01:57, 24.51it/s]

Error with URN:NBN:no-nb_digibok_2011031412004 Mudderpramma 30 Could not get page 30 from urn URN:NBN:no-nb_digibok_2011031412004


 11%|█         | 316/2940 [01:37<13:26,  3.25it/s]


KeyboardInterrupt: 

In [101]:
# multiprocessing

   
def process_poem(poem):
    txts = []
    
    for r in range(poem.page_start-1, poem.page_end):
        try: 
            alto = get_alto(poem.urn, r)
            txt = get_text(BeautifulSoup(alto, "lxml-xml"))
            txts.append(txt)
            
            
        except Exception as e:
            print("Error with", poem.urn, poem.title, r, e)
            errors.append(poem.urn)
            break
            
        new_poem = Poem(*poem[:-1], txts)
        
        save_poem(new_poem)
        
def save_poem(poem):
    base_path = os.path.join("texts_joined", poem.urn, poem.title)
    # count = 1
    text_list = [page for page in poem.pages]
    
    text = "\n".join(text_list)
    
    path = base_path + ".txt"
    os.makedirs(os.path.dirname(path), exist_ok=True)
    with open(path, "w") as f:
        f.write(text)

In [102]:
from tqdm.contrib.concurrent import process_map


if __name__ == '__main__':
    process_map(process_poem, poem_list, max_workers=5)

  process_map(process_poem, poem_list, max_workers=5)


  0%|          | 0/2940 [00:00<?, ?it/s]

Error with URN:NBN:no-nb_digibok_2011031412004 Mudderpramma 30 Could not get page 30 from urn URN:NBN:no-nb_digibok_2011031412004
Error with URN:NBN:no-nb_digibok_2011031412004 Telefonvise 33 Could not get page 33 from urn URN:NBN:no-nb_digibok_2011031412004
Error with URN:NBN:no-nb_digibok_2011031412004 Fakkeltogvise 35 Could not get page 35 from urn URN:NBN:no-nb_digibok_2011031412004
Error with URN:NBN:no-nb_digibok_2011031412004 Bazarvise 37 Could not get page 37 from urn URN:NBN:no-nb_digibok_2011031412004
Error with URN:NBN:no-nb_digibok_2011031412004Error withError with  URN:NBN:no-nb_digibok_2011031412004  VandværksviseError withDa Oscar sak ved Hylla URN:NBN:no-nb_digibok_2011031412004  Torpedovise URN:NBN:no-nb_digibok_201103141200440  4345Prolog  Could not get page 43 from urn URN:NBN:no-nb_digibok_2011031412004 
Could not get page 45 from urn URN:NBN:no-nb_digibok_2011031412004 Could not get page 40 from urn URN:NBN:no-nb_digibok_201103141200447

 Could not get page 47 from

In [91]:
alto_index = 0
poem_list_w_pages = []
for poem in tqdm(poem_list, total=len(poem_list)):
    txts = []
    
    for r in range(poem.page_start-1, poem.page_end):
            #print(r)
            #print(p.urn, p.title)
        try:    
           
            txt = get_text(BeautifulSoup(alto_list[alto_index], "lxml-xml"))
            txts.append(txt)

        except Exception as e:
            print("Error with", poem.urn, poem.title, r, e)
            
        alto_index += 1
        
    poem_list_w_pages.append(Poem(*poem[:-1], txts))
    
        # poem["pages"] = txts
        #poem_list_w_pages.append(poem)
    # except Exception as e:
    #     print("Error with", poem.urn, poem.title, e)
    #     # p.pages = None

100%|██████████| 2940/2940 [00:51<00:00, 56.83it/s]

Error with URN:NBN:no-nb_digibok_2006112401018 Avskje mæ Høvringen 12 list index out of range
Error with URN:NBN:no-nb_digibok_2006112401018 Avskje mæ Høvringen 13 list index out of range
Error with URN:NBN:no-nb_digibok_2006112401018 Songen aat ei finsk Bonnejente 14 list index out of range
Error with URN:NBN:no-nb_digibok_2006112401018 Songen aat ei finsk Bonnejente 15 list index out of range
Error with URN:NBN:no-nb_digibok_2006112401018 Songen aat ei finsk Bonnejente 16 list index out of range
Error with URN:NBN:no-nb_digibok_2006112401018 Sæterliv 17 list index out of range
Error with URN:NBN:no-nb_digibok_2006112401018 Sæterliv 18 list index out of range
Error with URN:NBN:no-nb_digibok_2006112401018 Sæterliv 19 list index out of range
Error with URN:NBN:no-nb_digibok_2006112401018 Sæterliv 20 list index out of range
Error with URN:NBN:no-nb_digibok_2006112401018 Sæterliv 21 list index out of range
Error with URN:NBN:no-nb_digibok_2006112401018 Sæterliv 22 list index out of range




In [6]:
## Parse and save poems by page

# for poem in poem_list_w_pages:
#     base_path = path = os.path.join("texts", poem.urn, poem.title)
#     count = 1
#     for page in poem.pages:
#         path = os.path.join(base_path, str(count) + ".txt")
#         os.makedirs(os.path.dirname(path), exist_ok=True)
#         with open(path, "w") as f:
#             f.write(page)
#         count += 1

In [92]:
## Parse and save poems as one text file

for poem in poem_list_w_pages:
    base_path = os.path.join("texts_joined", poem.urn, poem.title)
    # count = 1
    text_list = [page for page in poem.pages]
    
    text = "\n".join(text_list)
    
    path = base_path + ".txt"
    os.makedirs(os.path.dirname(path), exist_ok=True)
    with open(path, "w") as f:
        f.write(text)
    
    
    # for page in poem.pages:
    #     path = os.path.join(base_path,  ".txt")
    #     os.makedirs(os.path.dirname(path), exist_ok=True)
    #     with open(path, "w") as f:
    #         f.write(page)
    #     count += 1

In [80]:
poem_list_w_pages[0].pages

['Variation \n\n\n(Simplicius barnet). \n\n\na eneboeren dode, \ngræd Simplicius først sine oine rode, \nderpå stjal han ham kutten af, \nkasted kroppen ned i den færdige grav \nog flygtede gjennem de ode \nskove bort fra de dode. \n\n\nSulten og bleg \nved kvældstid han ind i en by sig sneg, \nmen hvert eneste hus stod ode, \nog torvet lå fuldt af dode. \n\n\n',
 'Variation \n\n\n(Simplicius barnet). \n\n\na eneboeren dode, \ngræd Simplicius først sine oine rode, \nderpå stjal han ham kutten af, \nkasted kroppen ned i den færdige grav \nog flygtede gjennem de ode \nskove bort fra de dode. \n\n\nSulten og bleg \nved kvældstid han ind i en by sig sneg, \nmen hvert eneste hus stod ode, \nog torvet lå fuldt af dode. \n\n\n']

In [70]:
df

Unnamed: 0,urn,title,page_start,page_end,overlapp,digital_visning,comment
0,URN:NBN:no-nb_digibok_2009032303011,Variation,8,9,,,
1,URN:NBN:no-nb_digibok_2009032303011,Pigen med fuglefælden,10,12,,,
2,URN:NBN:no-nb_digibok_2009032303011,Troldsøstre,13,14,,,
3,URN:NBN:no-nb_digibok_2009032303011,I Sarons dal,15,16,,,
4,URN:NBN:no-nb_digibok_2009032303011,Idyl,17,18,,,
...,...,...,...,...,...,...,...
2935,URN:NBN:no-nb_digibok_2010081610001,Et elskovsdrama,17,18,x,,
2936,URN:NBN:no-nb_digibok_2010081610001,Eneboerens livshistorie,18,21,x,,Del I til V
2937,URN:NBN:no-nb_digibok_2010081610001,Forkröblet kludder,22,32,,,"Replikker, dramatisk dikt"
2938,URN:NBN:no-nb_digibok_2010081610001,For sent,33,33,,,


In [9]:
pd.DataFrame(poem_list_w_pages).urn.value_counts()

urn
URN:NBN:no-nb_digibok_2016051048054    258
URN:NBN:no-nb_digibok_2014110308161    218
URN:NBN:no-nb_digibok_2009030403044    122
URN:NBN:no-nb_digibok_2006082400076     86
URN:NBN:no-nb_digibok_2013072408071     79
                                      ... 
IV.                                      6
URN:NBN:no-nb_digibok_2009042112004      3
URN:NBN:no-nb_digibok_2010031912006      1
URN:NBN:no-nb_digibok_2009010803031      1
URN:NBN:no-nb_digibok_2009040712005      1
Name: count, Length: 78, dtype: int64

In [10]:
df

Unnamed: 0,urn,title,page_start,page_end,overlapp,digital_visning,comment
0,URN:NBN:no-nb_digibok_2009032303011,Variation,8,9,,,
1,URN:NBN:no-nb_digibok_2009032303011,Pigen med fuglefælden,10,12,,,
2,URN:NBN:no-nb_digibok_2009032303011,Troldsøstre,13,14,,,
3,URN:NBN:no-nb_digibok_2009032303011,I Sarons dal,15,16,,,
4,URN:NBN:no-nb_digibok_2009032303011,Idyl,17,18,,,
...,...,...,...,...,...,...,...
2919,URN:NBN:no-nb_digibok_2010081610001,Et elskovsdrama,17,18,x,,
2920,URN:NBN:no-nb_digibok_2010081610001,Eneboerens livshistorie,18,21,x,,Del I til V
2921,URN:NBN:no-nb_digibok_2010081610001,Forkröblet kludder,22,32,,,"Replikker, dramatisk dikt"
2922,URN:NBN:no-nb_digibok_2010081610001,For sent,33,33,,,


In [11]:
target_df = pd.DataFrame(poem_list_w_pages)

In [12]:
len(set(df.title) - set(target_df.title))

0

In [13]:
poem_list_w_pages

[Poem(urn='URN:NBN:no-nb_digibok_2009032303011', title='Variation', page_start=8, page_end=9, overlapp=nan, digital_visning=nan, comment=nan, pages=['Variation \n\n\n(Simplicius barnet). \n\n\na eneboeren dode, \ngræd Simplicius først sine oine rode, \nderpå stjal han ham kutten af, \nkasted kroppen ned i den færdige grav \nog flygtede gjennem de ode \nskove bort fra de dode. \n\n\nSulten og bleg \nved kvældstid han ind i en by sig sneg, \nmen hvert eneste hus stod ode, \nog torvet lå fuldt af dode. \n\n\n', 'Med strittende hår krøb gutten \nhylende sammen i kutten. \nMen da månen steg, \nhans rædsel veg, \n\nog der legtes i natten en underlig leg \naf barnet mellem de ode \nhus og de stinkende dode. \n\n\nSimplicius, du lille, \n\ndet gik mig som dig, da jeg vilde \nflygte bort fra det døde. \nOgså jeg kom ad ode \nveie ind blandt de dode. \n\n\nø \n\n\n']),
 Poem(urn='URN:NBN:no-nb_digibok_2009032303011', title='Pigen med fuglefælden', page_start=10, page_end=12, overlapp=nan, digita

In [14]:
len(poem_list_w_pages)

2924

In [15]:
df.loc[df.title.isin(set(df.title) - set(target_df.title))]

Unnamed: 0,urn,title,page_start,page_end,overlapp,digital_visning,comment


In [16]:
target_df.title.count()

2924

In [17]:
target_df

Unnamed: 0,urn,title,page_start,page_end,overlapp,digital_visning,comment,pages
0,URN:NBN:no-nb_digibok_2009032303011,Variation,8,9,,,,[Variation \n\n\n(Simplicius barnet). \n\n\na ...
1,URN:NBN:no-nb_digibok_2009032303011,Pigen med fuglefælden,10,12,,,,[Pigen med fuglefælden \n\n\n(Gammelægyptisk m...
2,URN:NBN:no-nb_digibok_2009032303011,Troldsøstre,13,14,,,,[Troldsøstre. \n\n\nTr troldsestre sad i bjerg...
3,URN:NBN:no-nb_digibok_2009032303011,I Sarons dal,15,16,,,,[I Sarons dal. \n\n\nRoser der vokser i Sarons...
4,URN:NBN:no-nb_digibok_2009032303011,Idyl,17,18,,,,"[lyl. \n\n\nggla, stygge tussemor, \n\ndu ved,..."
...,...,...,...,...,...,...,...,...
2919,URN:NBN:no-nb_digibok_2010081610001,Et elskovsdrama,17,18,x,,,[]
2920,URN:NBN:no-nb_digibok_2010081610001,Eneboerens livshistorie,18,21,x,,Del I til V,[]
2921,URN:NBN:no-nb_digibok_2010081610001,Forkröblet kludder,22,32,,,"Replikker, dramatisk dikt",[]
2922,URN:NBN:no-nb_digibok_2010081610001,For sent,33,33,,,,[]


In [18]:
len(txts)

0

In [19]:
soup = BeautifulSoup(alto, "lxml")

  soup = BeautifulSoup(alto, "lxml")


In [20]:
soup

<html><body><p>{"timestamp":"2023-12-05T12:08:49.786+00:00","status":401,"error":"Unauthorized","message":"No access","path":"/catalog/v1/metadata/URN:NBN:no-nb_digibok_2010081610001/altos/URN:NBN:no-nb_digibok_2010081610001_0033"}</p></body></html>

In [21]:
bool(soup.find("ComposedBlock"))

False

In [22]:
soup.find_all("composedblock")

[]

In [23]:
get_text(soup)

ValueError: Could not find alto style

In [None]:
print(alto)

<alto xmlns="http://www.loc.gov/standards/alto/ns-v3#" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/standards/alto/ns-v3# http://www.loc.gov/alto/v3/alto-3-0.xsd">
	<Description>
		<MeasurementUnit>mm10</MeasurementUnit>
		<sourceImageInformation>
			<fileName>https://www.nb.no/services/image/resolver/URN:NBN:no-nb_digibok_2006112300015_0040/full/pct:50/0/native.jpg</fileName>
		</sourceImageInformation>
		<OCRProcessing ID="OCR_0">
			<ocrProcessingStep>
				<processingSoftware>
					<softwareName>tesseract 5.2.0</softwareName>
				</processingSoftware>
			</ocrProcessingStep>
		</OCRProcessing>
	</Description>
	<Styles><TextStyle ID="TXT_0" FONTSIZE="32" FONTFAMILY="Times Roman"/><ParagraphStyle ID="PAR_BLOCK" ALIGN="Block"/></Styles><Layout>
		<Page WIDTH="1192" HEIGHT="1686" PHYSICAL_IMG_NR="0" ID="page_0">
			<PrintSpace HPOS="0" VPOS="0" WIDTH="1192" HEIGHT="1686">
				<GraphicalElement ID=

In [None]:
txt

''

In [None]:
p

Poem(urn='URN:NBN:no-nb_digibok_2006112300015', title='36.', page_start=41, page_end=41, overlapp=nan, digital_visning=nan, comment=nan)

In [None]:
soup = BeautifulSoup(altos, "lxml-xml")

In [None]:
txt = get_text(soup)

In [None]:
print(txt)

37 


Nei «fram og atter* er ei Veien lige; 
thi den, der ei gaar frem, han gaar tilbage, 
og den, der ikke giver, han maa tage, 


naar Trinet slutter paa hans Himmelstige. 


Thi ,udenom* formaar han ei at vige, 
saalidtsom tage sine Tvivl afdage, 
og derfor maa hån rundtom Lyset jage 


og stadig brænde sine Vingeflige. 


Men der, hvor Lyset kun formaar at svide, 
er Skylden tit en Safternes Forarmen, 


en Tidsmarasmes Peg mod det Senile. 


Man oparbeider sig og tror at tvivle, 
men væk er Viljekraft og Hjertevarmen, 
og Tvivlen glattet til en Tankesklide. 


41 





In [None]:
p

Poem(urn='URN:NBN:no-nb_digibok_2016070808151', title='Sigrdrifa', page_start=30, page_end=31, overlapp='x', digital_visning=nan, comment=nan)

In [None]:
res = get_alto(urn, 25)

In [None]:
soup = BeautifulSoup(res, "lxml-xml")

In [None]:
print(text)

17 


Ingen Rullader, ingen høie Sving, 
Med sænkte Vinger 

Sidder hun i Ro 

I Herrens Bo 

Og sagte nynner. 

Fast som et Suk, næsten som en Bøn 
Ingen, slet Ingen 

Ved hvad som rører sig i Løn 
Indenfor Vingen. 


I Vintermorgen aarle og trist 

Som Graven derude 

Utænkt hun kommer saa som med Iist 
Og pikker paa min Rude. 

Ol velkommen mumler jeg igjen: 

Er vi alt vaagen? 

Aarle af Seng, min udvalgte Ven, 
Angrer ei Nogen! 


Himmel! hvad Lyst i tidligste Gry 
At høre din Vise, 

Da vil Tanken høit over Sky, 
Herren at prise. 

O, den, der kunde saa ha' i Stel 
Strax paa Stunden 

Bøn og Suk i Morgen og Kvel 
Med Sang i Munden. 


Tysl hvad var det?  En Rovfugleflok, 
Skjærende Luften, 

Liden Spurv over Sten, over Stok 

Flyr til Kirke-Tuften, 

Kjendt i hver Grind 

Smutter hun ind 



