In [1]:
import requests
import json
from datetime import datetime
import pytz
import pandas as pd
import traceback

from liquer import *

HEADERS_FOR_JSON = {
    'accept': "application/json; charset=utf-8",
    'content-type': "application/json"
}

SEARCH_URL = "http://distribution.virk.dk/offentliggoerelser/_search"


def to_datetime(d):
    if isinstance(d, datetime):
        return d.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z'
    if d in (None, "NOW"):
        return datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z'


NS = "virk"


@first_command(ns=NS)
def offentliggoerelser_raw(day=None, size=2999):
    """Get public register as JSON document.

    Arguments:
    day -- date string in YYYYMMDD format
    size -- size argument in the query
    """
    if day in (None, "TODAY"):
        d = datetime.utcnow()
    else:
        d = datetime.strptime(day, "%Y%m%d")
    dd = d.strftime("%Y-%m-%d")
    gt = f"{dd}T00:00:00.001Z"
    lt = f"{dd}T23:59:59.505Z"
    data = {
        "query": {
            "bool": {
                "must": [
                    {
                        "term": {
                            "dokumenter.dokumentMimeType": "application"
                        }
                    },
                    {
                        "term": {
                            "dokumenter.dokumentMimeType": "xml"
                        }
                    },
                    {
                        "range": {
                            "offentliggoerelsesTidspunkt": {
                                "gt": gt,
                                "lt": lt
                            }
                        }
                    }
                ],
                "must_not": [],
                "should": []
            }
        },
        "size": size
    }

    response = requests.post(SEARCH_URL, data=json.dumps(data),
                             headers=HEADERS_FOR_JSON)
    return response.json()


@command(ns=NS)
def register2df(data):
    """Convert JSON response from offentliggoerelser_raw to DataFrame"""
    df = pd.DataFrame(columns=["_id", "_index", "_score", "cvrNummer"])
    for r in data["hits"]["hits"]:
        d = dict(
            _id=r["_id"],
            _index=r["_index"],
            _score=r["_score"],
            cvrNummer=str(r["_source"]["cvrNummer"]),
            indlaesningsId=r["_source"]["indlaesningsId"],
            indlaesningsTidspunkt=r["_source"]["indlaesningsTidspunkt"],
            offentliggoerelsesTidspunkt=r["_source"]["offentliggoerelsesTidspunkt"],
            offentliggoerelsestype=r["_source"]["offentliggoerelsestype"],
            omgoerelse=r["_source"]["omgoerelse"],
            regNummer=r["_source"]["regNummer"],
            regnskabsperiode_startDato=r["_source"]["regnskab"]["regnskabsperiode"]["startDato"],
            regnskabsperiode_slutDato=r["_source"]["regnskab"]["regnskabsperiode"]["slutDato"],
            sagsNummer=r["_source"]["sagsNummer"],
            sidstOpdateret=r["_source"]["sidstOpdateret"],
        )
        for doc in r["_source"]["dokumenter"]:
            t = {"application/xhtml+xml": "html", "application/pdf": "pdf",
                 "application/xml": "xml"}.get(doc["dokumentMimeType"])
            if t is not None:
                if doc["dokumentType"] == "AARSRAPPORT":
                    d[f"AARSRAPPORT_{t}"] = doc["dokumentUrl"]
        df = df.append(d, ignore_index=True)
    return df


@first_command(ns=NS)
def register(day="TODAY", size=2999):
    """Get public register as dataframe.
    Uses offentliggoerelser_raw to fetch the data and register2df to convert the data afterwards.

    Arguments:
    day -- date string in YYYYMMDD format
    size -- size argument in the query
    """
    return evaluate(f"ns-{NS}/offentliggoerelser_raw-{day}-{size}/register2df").get()


@first_command(ns=NS)
def cvr(cvr, ext="xml", day="TODAY", size=2999):
    """
    Get the document for a specified CVR number.
    Document has to be found in an index (obtained by the 'index' function taking day and size parameters).
    Multiple formats of document are supported by VIRK, format is specified by 'ext' (xml by default).
    """
    df = register(day=day, size=size)
    url = list(df.loc[df.cvrNummer.map(str) == str(cvr), f"AARSRAPPORT_{ext}"])
    if len(url):
        try:
            return requests.get(url[0]).text
        except:
            import traceback
            traceback.print_exc()
            return ""
    else:
        return ""


@command(ns=NS, volatile=True)
def tojson(xml):
    """Convert XML document with financial statements to JSON"""
    from xmljson import yahoo
    from lxml import etree
    try:
        root = etree.fromstring(xml.encode("utf-8"))
    except:
        return {}
    for elem in root.getiterator():
        try:
            tag = etree.QName(elem.tag)
        except:
            traceback.print_exc()
            continue
        elem.tag = tag.localname
        d = {}
        for key, value in elem.attrib.items():
            nkey = etree.QName(key).localname
            d[nkey] = value
            del elem.attrib[key]
        elem.attrib.update(d)

    d = yahoo.data(root)
    return d["xbrl"]


@command(ns=NS, volatile=True)
def json2df(doc, keep_multiline_values=False, init=None):
    """Convert JSON document with financial statements to DataFrame

    Arguments:
    keep_multiline_values -- if True, arguments with multiline strings are kept
    init -- parameter for internal use
    """
    if not isinstance(init, dict):
        init = {}
    df = pd.DataFrame(columns=["entity", "start_date", "end_date", "context"])
    context = {c["id"]: c for c in doc.get("context", [])}
    cdates = []
    for c in doc.get("context", []):
        period = c.get("period", {})
        cid = c["id"]
        if "instant" in period:
            cdates.append((period["instant"], period["instant"], cid))
        elif "startDate" in period and "endDate" in period:
            cdates.append((period["startDate"], period["endDate"], cid))
        else:
            raise Exception("Context without known dates")

    cdates = sorted(cdates)

    cols = sorted(key for key in doc.keys() if key not in ["context", "unit"])
    for start, end, identifier in cdates:
        c = context[identifier]
        entity = c["entity"]["identifier"]["content"]
        d = dict(init, start_date=start, end_date=end,
                 entity=entity, context=identifier)
        for key in cols:
            keydata = doc[key]
            if isinstance(keydata, list):
                row = [r for r in keydata if r["contextRef"] == identifier]
#                if len(row) > 1:
#                    print(
#                        f"Warning: multiple context entries for {entity} {key}, context:{identifier}")
                if len(row):
                    keydata = row[0]
                else:
                    keydata = None
            if keydata is not None:
                if isinstance(keydata, dict) and "content" in keydata:
                    use = keep_multiline_values or "\n" not in keydata["content"]
                    if use:
                        value = keydata["content"]
                        try:
                            value = float(value)
                            if int(value) == value:
                                value = int(value)
                        except:
                            pass
                        d[key] = value
        df = df.append(d, ignore_index=True)
    return df


@first_command(ns=NS)
def cvrdf(cvr, keep_multiline_values=False, day="TODAY", size=2999):
    """
    Get the document for a specified CVR number as DataFrame.
    Document has to be found in an index (obtained by the 'index' function taking day and size parameters).
    If keep_multiline_values is True, arguments with multiline strings are kept, otherwise they are removed (default).
    """
    register_df = register(day=day, size=size)
    register_df = register_df.loc[register_df.cvrNummer.map(
        str) == str(cvr), :]
    df = pd.DataFrame()
    for index, row in register_df.iterrows():
        cvr = str(row.cvrNummer)
        doc = evaluate(f"ns-{NS}/cvr-{cvr}-xml-{day}-{size}/tojson").get()
        cvr_df = json2df(
            doc, keep_multiline_values=keep_multiline_values, init=dict(row))
        df = df.append(cvr_df, ignore_index=True)
    return df

In [2]:
import liquer.ext.basic
import liquer.ext.lq_pandas
from datetime import datetime
from tqdm import tqdm_notebook
import warnings, sys, os
from liquer.cache import FileCache, set_cache
set_cache(FileCache("cache"))



In [3]:
def find_common_attributes(start_date, end_date):
    # Define the dates we are interested in
    dates = [date.strftime('%Y%m%d') for date in pd.date_range(start=start_date, end=end_date)] 

    attributes = []
    for date in tqdm_notebook(dates): # Loop over the dates and get the cvr of each update for that day
        register_df2 = register(date)
        cvrs = register_df2.cvrNummer  
        print(f'Number of cvr numbers in current iteration: {len(cvrs)}')
        for cvr_num in tqdm_notebook(cvrs):             # Find the attributes of that specific company and store it
            attributes.append(set(cvrdf(str(cvr_num), day=date).columns))

    return set.intersection(*attributes)

In [4]:
def find_all_attributes(start_date, end_date):
    # Define the dates we are interested in
    dates = [date.strftime('%Y%m%d') for date in pd.date_range(start=start_date, end=end_date)] 

    attributes = []
    for date in tqdm_notebook(dates): # Loop over the dates and get the cvr of each update for that day
        register_df2 = register(date)
        cvrs = register_df2.cvrNummer  
        print(f'Number of cvr numbers in current iteration: {len(cvrs)}')
        for cvr_num in tqdm_notebook(cvrs):             # Find the attributes of that specific company and store it
            attributes.append(set(cvrdf(str(cvr_num), day=date).columns))

    return set.union(*attributes)

In [5]:
def download_files(start_date, end_date, found_cvrs=set()):
    # Define the dates we are interested in
    dates = [date.strftime('%Y%m%d') for date in pd.date_range(start=start_date, end=end_date)] 
    skipped_cvrs = []
    cvr_set = found_cvrs
    print(type(cvr_set))
    for date in tqdm_notebook(dates): # Loop over the dates and get the cvr of each update for that day
        register_df2 = register(date)
        cvrs = register_df2.cvrNummer    
        print(f'Number of cvr numbers in current iteration: {len(cvrs)}')
        for cvr_num in tqdm_notebook(cvrs):             # Find the attributes of that specific company and store it
            if cvr_num in cvr_set:
                continue
            else:
                try:
                    cvrdf(str(cvr_num), day=date).to_csv(path_or_buf='New_data/' + str(cvr_num) + '.csv')
                except:
                    skipped_cvrs.append(cvr_num)
                    continue
        cvr_set.update(set(cvrs))
    return skipped_cvrs

In [6]:
start_date = '20190101'
end_date = '20190101'
common_attr = find_common_attributes(start_date, end_date)

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Number of cvr numbers in current iteration: 159


HBox(children=(IntProgress(value=0, max=159), HTML(value='')))

Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cy

ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", l

  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx",

  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, i

Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cy

ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", l

  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx",

  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, i

Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cy

ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", l

  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx",

  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, i

Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cy

ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", l

  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx",

  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, i

Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cy

ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", l

  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx",

  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, i

Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cy

ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", l

  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx",




In [10]:
print(common_attr)
len(common_attr)

{'offentliggoerelsestype', 'NameOfReportingEntity', 'regnskabsperiode_slutDato', 'TypeOfAuditorAssistance', 'offentliggoerelsesTidspunkt', 'AARSRAPPORT_pdf', 'sidstOpdateret', 'cvrNummer', 'LiabilitiesAndEquity', '_index', 'context', 'indlaesningsTidspunkt', 'ProfitLoss', 'indlaesningsId', 'entity', 'regNummer', 'end_date', '_id', 'ClassOfReportingEntity', 'AddressOfSubmittingEnterpriseStreetAndNumber', 'AARSRAPPORT_xml', 'sagsNummer', 'omgoerelse', 'ReportingPeriodEndDate', 'InformationOnTypeOfSubmittedReport', 'AddressOfSubmittingEnterprisePostcodeAndTown', 'ReportingPeriodStartDate', '_score', 'start_date', 'Equity', 'IdentificationNumberCvrOfReportingEntity', 'AARSRAPPORT_html', 'regnskabsperiode_startDato', 'NameAndSurnameOfChairmanOfGeneralMeeting', 'Assets'}


35

In [9]:
all_attr = find_all_attributes(start_date, end_date)
len(all_attr)

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Number of cvr numbers in current iteration: 159


HBox(children=(IntProgress(value=0, max=159), HTML(value='')))

Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cy

ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", l

  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx",

  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, i

Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cy

ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", l

  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx",

  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, i

Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cy

ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", l

  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx",

  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, i

Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cy

ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", l

  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx",

  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, i

Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cy

ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", l

  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx",

  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, i

Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cy

ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", l

  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx", line 1835, in lxml.etree.QName.__init__
  File "src/lxml/apihelpers.pxi", line 1680, in lxml.etree._tagValidOrRaise
ValueError: Invalid tag name '<cyfunction Comment at 0x000001DB73381C80>'
Traceback (most recent call last):
  File "<ipython-input-1-d648b050eb9b>", line 152, in tojson
    tag = etree.QName(elem.tag)
  File "src/lxml/etree.pyx",




452

In [7]:
import os
current_cvrs = set()
#current_cvrs = set([int(file.split('.')[0]) for file in os.listdir("Data/")])
#current_cvrs.update([int(file.split('.')[0]) for file in os.listdir("Valid_Data/")])
#current_cvrs.update([int(file.split('.')[0]) for file in os.listdir("New_Data/")])
skipped_cvrs = download_files(start_date='20180101', end_date='20181231', found_cvrs=current_cvrs)

<class 'set'>


HBox(children=(IntProgress(value=0, max=365), HTML(value='')))

Number of cvr numbers in current iteration: 53


HBox(children=(IntProgress(value=0, max=53), HTML(value='')))

Number of cvr numbers in current iteration: 314


HBox(children=(IntProgress(value=0, max=314), HTML(value='')))

Number of cvr numbers in current iteration: 333


HBox(children=(IntProgress(value=0, max=333), HTML(value='')))

Number of cvr numbers in current iteration: 425


HBox(children=(IntProgress(value=0, max=425), HTML(value='')))

Number of cvr numbers in current iteration: 373


HBox(children=(IntProgress(value=0, max=373), HTML(value='')))

Number of cvr numbers in current iteration: 50


HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

Number of cvr numbers in current iteration: 66


HBox(children=(IntProgress(value=0, max=66), HTML(value='')))

Number of cvr numbers in current iteration: 510


HBox(children=(IntProgress(value=0, max=510), HTML(value='')))

Number of cvr numbers in current iteration: 665


HBox(children=(IntProgress(value=0, max=665), HTML(value='')))

Number of cvr numbers in current iteration: 309


HBox(children=(IntProgress(value=0, max=309), HTML(value='')))

Number of cvr numbers in current iteration: 328


HBox(children=(IntProgress(value=0, max=328), HTML(value='')))

Number of cvr numbers in current iteration: 380


HBox(children=(IntProgress(value=0, max=380), HTML(value='')))

Number of cvr numbers in current iteration: 42


HBox(children=(IntProgress(value=0, max=42), HTML(value='')))

Number of cvr numbers in current iteration: 65


HBox(children=(IntProgress(value=0, max=65), HTML(value='')))

Number of cvr numbers in current iteration: 440


HBox(children=(IntProgress(value=0, max=440), HTML(value='')))

Number of cvr numbers in current iteration: 354


HBox(children=(IntProgress(value=0, max=354), HTML(value='')))

Number of cvr numbers in current iteration: 343


HBox(children=(IntProgress(value=0, max=343), HTML(value='')))

Number of cvr numbers in current iteration: 407


HBox(children=(IntProgress(value=0, max=407), HTML(value='')))

Number of cvr numbers in current iteration: 429


HBox(children=(IntProgress(value=0, max=429), HTML(value='')))

Number of cvr numbers in current iteration: 62


HBox(children=(IntProgress(value=0, max=62), HTML(value='')))

Number of cvr numbers in current iteration: 80


HBox(children=(IntProgress(value=0, max=80), HTML(value='')))

Number of cvr numbers in current iteration: 463


HBox(children=(IntProgress(value=0, max=463), HTML(value='')))

Number of cvr numbers in current iteration: 344


HBox(children=(IntProgress(value=0, max=344), HTML(value='')))

Number of cvr numbers in current iteration: 350


HBox(children=(IntProgress(value=0, max=350), HTML(value='')))

Number of cvr numbers in current iteration: 472


HBox(children=(IntProgress(value=0, max=472), HTML(value='')))

Number of cvr numbers in current iteration: 522


HBox(children=(IntProgress(value=0, max=522), HTML(value='')))

Number of cvr numbers in current iteration: 85


HBox(children=(IntProgress(value=0, max=85), HTML(value='')))

Number of cvr numbers in current iteration: 83


HBox(children=(IntProgress(value=0, max=83), HTML(value='')))

Number of cvr numbers in current iteration: 565


HBox(children=(IntProgress(value=0, max=565), HTML(value='')))

Number of cvr numbers in current iteration: 545


HBox(children=(IntProgress(value=0, max=545), HTML(value='')))

Number of cvr numbers in current iteration: 527


HBox(children=(IntProgress(value=0, max=527), HTML(value='')))

Number of cvr numbers in current iteration: 278


HBox(children=(IntProgress(value=0, max=278), HTML(value='')))

Number of cvr numbers in current iteration: 888


HBox(children=(IntProgress(value=0, max=888), HTML(value='')))

Number of cvr numbers in current iteration: 80


HBox(children=(IntProgress(value=0, max=80), HTML(value='')))

Number of cvr numbers in current iteration: 81


HBox(children=(IntProgress(value=0, max=81), HTML(value='')))

Number of cvr numbers in current iteration: 663


HBox(children=(IntProgress(value=0, max=663), HTML(value='')))

Number of cvr numbers in current iteration: 599


HBox(children=(IntProgress(value=0, max=599), HTML(value='')))

Number of cvr numbers in current iteration: 622


HBox(children=(IntProgress(value=0, max=622), HTML(value='')))

Number of cvr numbers in current iteration: 651


HBox(children=(IntProgress(value=0, max=651), HTML(value='')))

Number of cvr numbers in current iteration: 699


HBox(children=(IntProgress(value=0, max=699), HTML(value='')))

Number of cvr numbers in current iteration: 93


HBox(children=(IntProgress(value=0, max=93), HTML(value='')))

Number of cvr numbers in current iteration: 132


HBox(children=(IntProgress(value=0, max=132), HTML(value='')))

Number of cvr numbers in current iteration: 760


HBox(children=(IntProgress(value=0, max=760), HTML(value='')))

Number of cvr numbers in current iteration: 601


HBox(children=(IntProgress(value=0, max=601), HTML(value='')))

Number of cvr numbers in current iteration: 613


HBox(children=(IntProgress(value=0, max=613), HTML(value='')))

Number of cvr numbers in current iteration: 512


HBox(children=(IntProgress(value=0, max=512), HTML(value='')))

Number of cvr numbers in current iteration: 479


HBox(children=(IntProgress(value=0, max=479), HTML(value='')))

Number of cvr numbers in current iteration: 89


HBox(children=(IntProgress(value=0, max=89), HTML(value='')))

Number of cvr numbers in current iteration: 106


HBox(children=(IntProgress(value=0, max=106), HTML(value='')))

Number of cvr numbers in current iteration: 616


HBox(children=(IntProgress(value=0, max=616), HTML(value='')))

Number of cvr numbers in current iteration: 667


HBox(children=(IntProgress(value=0, max=667), HTML(value='')))

Number of cvr numbers in current iteration: 653


HBox(children=(IntProgress(value=0, max=653), HTML(value='')))

Number of cvr numbers in current iteration: 718


HBox(children=(IntProgress(value=0, max=718), HTML(value='')))

Number of cvr numbers in current iteration: 783


HBox(children=(IntProgress(value=0, max=783), HTML(value='')))

Number of cvr numbers in current iteration: 136


HBox(children=(IntProgress(value=0, max=136), HTML(value='')))

Number of cvr numbers in current iteration: 207


HBox(children=(IntProgress(value=0, max=207), HTML(value='')))

Number of cvr numbers in current iteration: 1061


HBox(children=(IntProgress(value=0, max=1061), HTML(value='')))

Number of cvr numbers in current iteration: 1204


HBox(children=(IntProgress(value=0, max=1204), HTML(value='')))

Number of cvr numbers in current iteration: 1864


HBox(children=(IntProgress(value=0, max=1864), HTML(value='')))

Number of cvr numbers in current iteration: 1288


HBox(children=(IntProgress(value=0, max=1288), HTML(value='')))

Number of cvr numbers in current iteration: 1142


HBox(children=(IntProgress(value=0, max=1142), HTML(value='')))

Number of cvr numbers in current iteration: 177


HBox(children=(IntProgress(value=0, max=177), HTML(value='')))

Number of cvr numbers in current iteration: 245


HBox(children=(IntProgress(value=0, max=245), HTML(value='')))

Number of cvr numbers in current iteration: 1417


HBox(children=(IntProgress(value=0, max=1417), HTML(value='')))

Number of cvr numbers in current iteration: 1129


HBox(children=(IntProgress(value=0, max=1129), HTML(value='')))

Number of cvr numbers in current iteration: 1088


HBox(children=(IntProgress(value=0, max=1088), HTML(value='')))

Number of cvr numbers in current iteration: 1057


HBox(children=(IntProgress(value=0, max=1057), HTML(value='')))

Number of cvr numbers in current iteration: 1273


HBox(children=(IntProgress(value=0, max=1273), HTML(value='')))

Number of cvr numbers in current iteration: 158


HBox(children=(IntProgress(value=0, max=158), HTML(value='')))

Number of cvr numbers in current iteration: 205


HBox(children=(IntProgress(value=0, max=205), HTML(value='')))

Number of cvr numbers in current iteration: 1317


HBox(children=(IntProgress(value=0, max=1317), HTML(value='')))

Number of cvr numbers in current iteration: 1302


HBox(children=(IntProgress(value=0, max=1302), HTML(value='')))

Number of cvr numbers in current iteration: 1648


HBox(children=(IntProgress(value=0, max=1648), HTML(value='')))

Number of cvr numbers in current iteration: 836


HBox(children=(IntProgress(value=0, max=836), HTML(value='')))

Number of cvr numbers in current iteration: 922


HBox(children=(IntProgress(value=0, max=922), HTML(value='')))

Number of cvr numbers in current iteration: 117


HBox(children=(IntProgress(value=0, max=117), HTML(value='')))

Number of cvr numbers in current iteration: 163


HBox(children=(IntProgress(value=0, max=163), HTML(value='')))

Number of cvr numbers in current iteration: 880


HBox(children=(IntProgress(value=0, max=880), HTML(value='')))

Number of cvr numbers in current iteration: 962


HBox(children=(IntProgress(value=0, max=962), HTML(value='')))

Number of cvr numbers in current iteration: 1001


HBox(children=(IntProgress(value=0, max=1001), HTML(value='')))

Number of cvr numbers in current iteration: 995


HBox(children=(IntProgress(value=0, max=995), HTML(value='')))

Number of cvr numbers in current iteration: 1286


HBox(children=(IntProgress(value=0, max=1286), HTML(value='')))

Number of cvr numbers in current iteration: 146


HBox(children=(IntProgress(value=0, max=146), HTML(value='')))

Number of cvr numbers in current iteration: 132


HBox(children=(IntProgress(value=0, max=132), HTML(value='')))

Number of cvr numbers in current iteration: 1154


HBox(children=(IntProgress(value=0, max=1154), HTML(value='')))

Number of cvr numbers in current iteration: 946


HBox(children=(IntProgress(value=0, max=946), HTML(value='')))

Number of cvr numbers in current iteration: 832


HBox(children=(IntProgress(value=0, max=832), HTML(value='')))

Number of cvr numbers in current iteration: 148


HBox(children=(IntProgress(value=0, max=148), HTML(value='')))

Number of cvr numbers in current iteration: 105


HBox(children=(IntProgress(value=0, max=105), HTML(value='')))

Number of cvr numbers in current iteration: 166


HBox(children=(IntProgress(value=0, max=166), HTML(value='')))

Number of cvr numbers in current iteration: 122


HBox(children=(IntProgress(value=0, max=122), HTML(value='')))

Number of cvr numbers in current iteration: 184


HBox(children=(IntProgress(value=0, max=184), HTML(value='')))

Number of cvr numbers in current iteration: 1189


HBox(children=(IntProgress(value=0, max=1189), HTML(value='')))

Number of cvr numbers in current iteration: 979


HBox(children=(IntProgress(value=0, max=979), HTML(value='')))

Number of cvr numbers in current iteration: 875


HBox(children=(IntProgress(value=0, max=875), HTML(value='')))

Number of cvr numbers in current iteration: 966


HBox(children=(IntProgress(value=0, max=966), HTML(value='')))

Number of cvr numbers in current iteration: 120


HBox(children=(IntProgress(value=0, max=120), HTML(value='')))

Number of cvr numbers in current iteration: 152


HBox(children=(IntProgress(value=0, max=152), HTML(value='')))

Number of cvr numbers in current iteration: 1087


HBox(children=(IntProgress(value=0, max=1087), HTML(value='')))

Number of cvr numbers in current iteration: 1041


HBox(children=(IntProgress(value=0, max=1041), HTML(value='')))

Number of cvr numbers in current iteration: 1004


HBox(children=(IntProgress(value=0, max=1004), HTML(value='')))

Number of cvr numbers in current iteration: 997


HBox(children=(IntProgress(value=0, max=997), HTML(value='')))

Number of cvr numbers in current iteration: 1090


HBox(children=(IntProgress(value=0, max=1090), HTML(value='')))

Number of cvr numbers in current iteration: 131


HBox(children=(IntProgress(value=0, max=131), HTML(value='')))

Number of cvr numbers in current iteration: 118


HBox(children=(IntProgress(value=0, max=118), HTML(value='')))

Number of cvr numbers in current iteration: 1296


HBox(children=(IntProgress(value=0, max=1296), HTML(value='')))

Number of cvr numbers in current iteration: 1226


HBox(children=(IntProgress(value=0, max=1226), HTML(value='')))

Number of cvr numbers in current iteration: 1112


HBox(children=(IntProgress(value=0, max=1112), HTML(value='')))

Number of cvr numbers in current iteration: 1250


HBox(children=(IntProgress(value=0, max=1250), HTML(value='')))

Number of cvr numbers in current iteration: 1064


HBox(children=(IntProgress(value=0, max=1064), HTML(value='')))

Number of cvr numbers in current iteration: 152


HBox(children=(IntProgress(value=0, max=152), HTML(value='')))

Number of cvr numbers in current iteration: 154


HBox(children=(IntProgress(value=0, max=154), HTML(value='')))

Number of cvr numbers in current iteration: 1429


HBox(children=(IntProgress(value=0, max=1429), HTML(value='')))

Number of cvr numbers in current iteration: 1290


HBox(children=(IntProgress(value=0, max=1290), HTML(value='')))

Number of cvr numbers in current iteration: 1267


HBox(children=(IntProgress(value=0, max=1267), HTML(value='')))

Number of cvr numbers in current iteration: 1445


HBox(children=(IntProgress(value=0, max=1445), HTML(value='')))

Number of cvr numbers in current iteration: 315


HBox(children=(IntProgress(value=0, max=315), HTML(value='')))

Number of cvr numbers in current iteration: 125


HBox(children=(IntProgress(value=0, max=125), HTML(value='')))

Number of cvr numbers in current iteration: 178


HBox(children=(IntProgress(value=0, max=178), HTML(value='')))

Number of cvr numbers in current iteration: 1854


HBox(children=(IntProgress(value=0, max=1854), HTML(value='')))

Number of cvr numbers in current iteration: 1495


HBox(children=(IntProgress(value=0, max=1495), HTML(value='')))

Number of cvr numbers in current iteration: 1311


HBox(children=(IntProgress(value=0, max=1311), HTML(value='')))

Number of cvr numbers in current iteration: 1476


HBox(children=(IntProgress(value=0, max=1476), HTML(value='')))

Number of cvr numbers in current iteration: 1450


HBox(children=(IntProgress(value=0, max=1450), HTML(value='')))

Number of cvr numbers in current iteration: 175


HBox(children=(IntProgress(value=0, max=175), HTML(value='')))

Number of cvr numbers in current iteration: 191


HBox(children=(IntProgress(value=0, max=191), HTML(value='')))

Number of cvr numbers in current iteration: 1665


HBox(children=(IntProgress(value=0, max=1665), HTML(value='')))

Number of cvr numbers in current iteration: 1636


HBox(children=(IntProgress(value=0, max=1636), HTML(value='')))

Number of cvr numbers in current iteration: 1569


HBox(children=(IntProgress(value=0, max=1569), HTML(value='')))

Number of cvr numbers in current iteration: 453


HBox(children=(IntProgress(value=0, max=453), HTML(value='')))

Number of cvr numbers in current iteration: 701


HBox(children=(IntProgress(value=0, max=701), HTML(value='')))

Number of cvr numbers in current iteration: 174


HBox(children=(IntProgress(value=0, max=174), HTML(value='')))

Number of cvr numbers in current iteration: 192


HBox(children=(IntProgress(value=0, max=192), HTML(value='')))

Number of cvr numbers in current iteration: 1868


HBox(children=(IntProgress(value=0, max=1868), HTML(value='')))

Number of cvr numbers in current iteration: 1648


HBox(children=(IntProgress(value=0, max=1648), HTML(value='')))

Number of cvr numbers in current iteration: 1974


HBox(children=(IntProgress(value=0, max=1974), HTML(value='')))

Number of cvr numbers in current iteration: 2196


HBox(children=(IntProgress(value=0, max=2196), HTML(value='')))

Number of cvr numbers in current iteration: 2102


HBox(children=(IntProgress(value=0, max=2102), HTML(value='')))

Number of cvr numbers in current iteration: 321


HBox(children=(IntProgress(value=0, max=321), HTML(value='')))

Number of cvr numbers in current iteration: 268


HBox(children=(IntProgress(value=0, max=268), HTML(value='')))

Number of cvr numbers in current iteration: 548


HBox(children=(IntProgress(value=0, max=548), HTML(value='')))

Number of cvr numbers in current iteration: 2604


HBox(children=(IntProgress(value=0, max=2604), HTML(value='')))

Number of cvr numbers in current iteration: 2910


HBox(children=(IntProgress(value=0, max=2910), HTML(value='')))

Number of cvr numbers in current iteration: 2999


HBox(children=(IntProgress(value=0, max=2999), HTML(value='')))

Number of cvr numbers in current iteration: 2999


HBox(children=(IntProgress(value=0, max=2999), HTML(value='')))

Number of cvr numbers in current iteration: 651


HBox(children=(IntProgress(value=0, max=651), HTML(value='')))

Number of cvr numbers in current iteration: 846


HBox(children=(IntProgress(value=0, max=846), HTML(value='')))

Number of cvr numbers in current iteration: 2999


HBox(children=(IntProgress(value=0, max=2999), HTML(value='')))

Number of cvr numbers in current iteration: 2999


HBox(children=(IntProgress(value=0, max=2999), HTML(value='')))

Number of cvr numbers in current iteration: 2999


HBox(children=(IntProgress(value=0, max=2999), HTML(value='')))

Number of cvr numbers in current iteration: 2999


HBox(children=(IntProgress(value=0, max=2999), HTML(value='')))

Number of cvr numbers in current iteration: 2999


HBox(children=(IntProgress(value=0, max=2999), HTML(value='')))

Number of cvr numbers in current iteration: 624


HBox(children=(IntProgress(value=0, max=624), HTML(value='')))

Number of cvr numbers in current iteration: 729


HBox(children=(IntProgress(value=0, max=729), HTML(value='')))

Number of cvr numbers in current iteration: 2999


HBox(children=(IntProgress(value=0, max=2999), HTML(value='')))

Number of cvr numbers in current iteration: 2391


HBox(children=(IntProgress(value=0, max=2391), HTML(value='')))

Number of cvr numbers in current iteration: 2999


HBox(children=(IntProgress(value=0, max=2999), HTML(value='')))

Number of cvr numbers in current iteration: 2999


HBox(children=(IntProgress(value=0, max=2999), HTML(value='')))

Number of cvr numbers in current iteration: 2960


HBox(children=(IntProgress(value=0, max=2960), HTML(value='')))

Traceback (most recent call last):
KeyError: 'xbrl'


Traceback (most recent call last):
KeyError: 'xbrl'

Number of cvr numbers in current iteration: 997


HBox(children=(IntProgress(value=0, max=997), HTML(value='')))

Number of cvr numbers in current iteration: 601


HBox(children=(IntProgress(value=0, max=601), HTML(value='')))

Number of cvr numbers in current iteration: 2999


HBox(children=(IntProgress(value=0, max=2999), HTML(value='')))

Number of cvr numbers in current iteration: 2999


HBox(children=(IntProgress(value=0, max=2999), HTML(value='')))

Number of cvr numbers in current iteration: 2999


HBox(children=(IntProgress(value=0, max=2999), HTML(value='')))

Number of cvr numbers in current iteration: 2837


HBox(children=(IntProgress(value=0, max=2837), HTML(value='')))

Number of cvr numbers in current iteration: 2638


HBox(children=(IntProgress(value=0, max=2638), HTML(value='')))

Number of cvr numbers in current iteration: 524


HBox(children=(IntProgress(value=0, max=524), HTML(value='')))

Number of cvr numbers in current iteration: 711


HBox(children=(IntProgress(value=0, max=711), HTML(value='')))

Number of cvr numbers in current iteration: 2999


HBox(children=(IntProgress(value=0, max=2999), HTML(value='')))

Number of cvr numbers in current iteration: 2999


HBox(children=(IntProgress(value=0, max=2999), HTML(value='')))

Number of cvr numbers in current iteration: 2999


HBox(children=(IntProgress(value=0, max=2999), HTML(value='')))

Number of cvr numbers in current iteration: 1814


HBox(children=(IntProgress(value=0, max=1814), HTML(value='')))

Number of cvr numbers in current iteration: 1257


HBox(children=(IntProgress(value=0, max=1257), HTML(value='')))

Number of cvr numbers in current iteration: 134


HBox(children=(IntProgress(value=0, max=134), HTML(value='')))

Number of cvr numbers in current iteration: 245


HBox(children=(IntProgress(value=0, max=245), HTML(value='')))

Number of cvr numbers in current iteration: 1217


HBox(children=(IntProgress(value=0, max=1217), HTML(value='')))

Number of cvr numbers in current iteration: 600


HBox(children=(IntProgress(value=0, max=600), HTML(value='')))

Number of cvr numbers in current iteration: 564


HBox(children=(IntProgress(value=0, max=564), HTML(value='')))

Number of cvr numbers in current iteration: 666


HBox(children=(IntProgress(value=0, max=666), HTML(value='')))

Number of cvr numbers in current iteration: 956


HBox(children=(IntProgress(value=0, max=956), HTML(value='')))

Number of cvr numbers in current iteration: 413


HBox(children=(IntProgress(value=0, max=413), HTML(value='')))

Number of cvr numbers in current iteration: 183


HBox(children=(IntProgress(value=0, max=183), HTML(value='')))

Number of cvr numbers in current iteration: 506


HBox(children=(IntProgress(value=0, max=506), HTML(value='')))

Number of cvr numbers in current iteration: 477


HBox(children=(IntProgress(value=0, max=477), HTML(value='')))

Number of cvr numbers in current iteration: 631


HBox(children=(IntProgress(value=0, max=631), HTML(value='')))

Number of cvr numbers in current iteration: 750


HBox(children=(IntProgress(value=0, max=750), HTML(value='')))

Number of cvr numbers in current iteration: 807


HBox(children=(IntProgress(value=0, max=807), HTML(value='')))

Number of cvr numbers in current iteration: 72


HBox(children=(IntProgress(value=0, max=72), HTML(value='')))

Number of cvr numbers in current iteration: 114


HBox(children=(IntProgress(value=0, max=114), HTML(value='')))

Number of cvr numbers in current iteration: 452


HBox(children=(IntProgress(value=0, max=452), HTML(value='')))

Number of cvr numbers in current iteration: 333


HBox(children=(IntProgress(value=0, max=333), HTML(value='')))

Number of cvr numbers in current iteration: 224


HBox(children=(IntProgress(value=0, max=224), HTML(value='')))

Number of cvr numbers in current iteration: 77


HBox(children=(IntProgress(value=0, max=77), HTML(value='')))

Number of cvr numbers in current iteration: 312


HBox(children=(IntProgress(value=0, max=312), HTML(value='')))

Number of cvr numbers in current iteration: 35


HBox(children=(IntProgress(value=0, max=35), HTML(value='')))

Number of cvr numbers in current iteration: 31


HBox(children=(IntProgress(value=0, max=31), HTML(value='')))

Number of cvr numbers in current iteration: 195


HBox(children=(IntProgress(value=0, max=195), HTML(value='')))

Number of cvr numbers in current iteration: 137


HBox(children=(IntProgress(value=0, max=137), HTML(value='')))

Number of cvr numbers in current iteration: 110


HBox(children=(IntProgress(value=0, max=110), HTML(value='')))

Number of cvr numbers in current iteration: 78


HBox(children=(IntProgress(value=0, max=78), HTML(value='')))

Number of cvr numbers in current iteration: 89


HBox(children=(IntProgress(value=0, max=89), HTML(value='')))

Number of cvr numbers in current iteration: 13


HBox(children=(IntProgress(value=0, max=13), HTML(value='')))

Number of cvr numbers in current iteration: 19


HBox(children=(IntProgress(value=0, max=19), HTML(value='')))

Number of cvr numbers in current iteration: 67


HBox(children=(IntProgress(value=0, max=67), HTML(value='')))

Number of cvr numbers in current iteration: 69


HBox(children=(IntProgress(value=0, max=69), HTML(value='')))

Traceback (most recent call last):
KeyError: 'xbrl'


Traceback (most recent call last):
KeyError: 'xbrl'

Number of cvr numbers in current iteration: 67


HBox(children=(IntProgress(value=0, max=67), HTML(value='')))

Number of cvr numbers in current iteration: 80


HBox(children=(IntProgress(value=0, max=80), HTML(value='')))

Number of cvr numbers in current iteration: 45


HBox(children=(IntProgress(value=0, max=45), HTML(value='')))

Number of cvr numbers in current iteration: 5


HBox(children=(IntProgress(value=0, max=5), HTML(value='')))

Number of cvr numbers in current iteration: 22


HBox(children=(IntProgress(value=0, max=22), HTML(value='')))

Number of cvr numbers in current iteration: 106


HBox(children=(IntProgress(value=0, max=106), HTML(value='')))

Number of cvr numbers in current iteration: 146


HBox(children=(IntProgress(value=0, max=146), HTML(value='')))

Number of cvr numbers in current iteration: 98


HBox(children=(IntProgress(value=0, max=98), HTML(value='')))

Number of cvr numbers in current iteration: 56


HBox(children=(IntProgress(value=0, max=56), HTML(value='')))

Number of cvr numbers in current iteration: 70


HBox(children=(IntProgress(value=0, max=70), HTML(value='')))

Number of cvr numbers in current iteration: 14


HBox(children=(IntProgress(value=0, max=14), HTML(value='')))

Number of cvr numbers in current iteration: 32


HBox(children=(IntProgress(value=0, max=32), HTML(value='')))

Number of cvr numbers in current iteration: 115


HBox(children=(IntProgress(value=0, max=115), HTML(value='')))

Number of cvr numbers in current iteration: 113


HBox(children=(IntProgress(value=0, max=113), HTML(value='')))

Number of cvr numbers in current iteration: 90


HBox(children=(IntProgress(value=0, max=90), HTML(value='')))

Number of cvr numbers in current iteration: 108


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Number of cvr numbers in current iteration: 140


HBox(children=(IntProgress(value=0, max=140), HTML(value='')))

Number of cvr numbers in current iteration: 49


HBox(children=(IntProgress(value=0, max=49), HTML(value='')))

Number of cvr numbers in current iteration: 26


HBox(children=(IntProgress(value=0, max=26), HTML(value='')))

Number of cvr numbers in current iteration: 157


HBox(children=(IntProgress(value=0, max=157), HTML(value='')))

Number of cvr numbers in current iteration: 170


HBox(children=(IntProgress(value=0, max=170), HTML(value='')))

Number of cvr numbers in current iteration: 173


HBox(children=(IntProgress(value=0, max=173), HTML(value='')))

Number of cvr numbers in current iteration: 168


HBox(children=(IntProgress(value=0, max=168), HTML(value='')))

Number of cvr numbers in current iteration: 139


HBox(children=(IntProgress(value=0, max=139), HTML(value='')))

Number of cvr numbers in current iteration: 16


HBox(children=(IntProgress(value=0, max=16), HTML(value='')))

Number of cvr numbers in current iteration: 23


HBox(children=(IntProgress(value=0, max=23), HTML(value='')))

Number of cvr numbers in current iteration: 164


HBox(children=(IntProgress(value=0, max=164), HTML(value='')))

Number of cvr numbers in current iteration: 148


HBox(children=(IntProgress(value=0, max=148), HTML(value='')))

Number of cvr numbers in current iteration: 174


HBox(children=(IntProgress(value=0, max=174), HTML(value='')))

Number of cvr numbers in current iteration: 161


HBox(children=(IntProgress(value=0, max=161), HTML(value='')))

Number of cvr numbers in current iteration: 167


HBox(children=(IntProgress(value=0, max=167), HTML(value='')))

Number of cvr numbers in current iteration: 10


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

Number of cvr numbers in current iteration: 20


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))

Number of cvr numbers in current iteration: 221


HBox(children=(IntProgress(value=0, max=221), HTML(value='')))

Number of cvr numbers in current iteration: 217


HBox(children=(IntProgress(value=0, max=217), HTML(value='')))

Number of cvr numbers in current iteration: 245


HBox(children=(IntProgress(value=0, max=245), HTML(value='')))

Number of cvr numbers in current iteration: 308


HBox(children=(IntProgress(value=0, max=308), HTML(value='')))

Number of cvr numbers in current iteration: 273


HBox(children=(IntProgress(value=0, max=273), HTML(value='')))

Number of cvr numbers in current iteration: 31


HBox(children=(IntProgress(value=0, max=31), HTML(value='')))

Number of cvr numbers in current iteration: 36


HBox(children=(IntProgress(value=0, max=36), HTML(value='')))

Number of cvr numbers in current iteration: 355


HBox(children=(IntProgress(value=0, max=355), HTML(value='')))

Number of cvr numbers in current iteration: 231


HBox(children=(IntProgress(value=0, max=231), HTML(value='')))

Number of cvr numbers in current iteration: 310


HBox(children=(IntProgress(value=0, max=310), HTML(value='')))

Number of cvr numbers in current iteration: 284


HBox(children=(IntProgress(value=0, max=284), HTML(value='')))

Number of cvr numbers in current iteration: 230


HBox(children=(IntProgress(value=0, max=230), HTML(value='')))

Number of cvr numbers in current iteration: 19


HBox(children=(IntProgress(value=0, max=19), HTML(value='')))

Number of cvr numbers in current iteration: 36


HBox(children=(IntProgress(value=0, max=36), HTML(value='')))

Number of cvr numbers in current iteration: 351


HBox(children=(IntProgress(value=0, max=351), HTML(value='')))

Number of cvr numbers in current iteration: 261


HBox(children=(IntProgress(value=0, max=261), HTML(value='')))

Number of cvr numbers in current iteration: 282


HBox(children=(IntProgress(value=0, max=282), HTML(value='')))

Number of cvr numbers in current iteration: 331


HBox(children=(IntProgress(value=0, max=331), HTML(value='')))

Number of cvr numbers in current iteration: 388


HBox(children=(IntProgress(value=0, max=388), HTML(value='')))

Traceback (most recent call last):
KeyError: 'xbrl'


Traceback (most recent call last):
KeyError: 'xbrl'

Number of cvr numbers in current iteration: 23


HBox(children=(IntProgress(value=0, max=23), HTML(value='')))

Number of cvr numbers in current iteration: 43


HBox(children=(IntProgress(value=0, max=43), HTML(value='')))

Number of cvr numbers in current iteration: 410


HBox(children=(IntProgress(value=0, max=410), HTML(value='')))

Number of cvr numbers in current iteration: 291


HBox(children=(IntProgress(value=0, max=291), HTML(value='')))

Number of cvr numbers in current iteration: 376


HBox(children=(IntProgress(value=0, max=376), HTML(value='')))

Number of cvr numbers in current iteration: 333


HBox(children=(IntProgress(value=0, max=333), HTML(value='')))

Number of cvr numbers in current iteration: 344


HBox(children=(IntProgress(value=0, max=344), HTML(value='')))

Number of cvr numbers in current iteration: 37


HBox(children=(IntProgress(value=0, max=37), HTML(value='')))

Number of cvr numbers in current iteration: 46


HBox(children=(IntProgress(value=0, max=46), HTML(value='')))

Number of cvr numbers in current iteration: 490


HBox(children=(IntProgress(value=0, max=490), HTML(value='')))

Number of cvr numbers in current iteration: 482


HBox(children=(IntProgress(value=0, max=482), HTML(value='')))

Number of cvr numbers in current iteration: 592


HBox(children=(IntProgress(value=0, max=592), HTML(value='')))

Number of cvr numbers in current iteration: 593


HBox(children=(IntProgress(value=0, max=593), HTML(value='')))

Number of cvr numbers in current iteration: 676


HBox(children=(IntProgress(value=0, max=676), HTML(value='')))

Number of cvr numbers in current iteration: 40


HBox(children=(IntProgress(value=0, max=40), HTML(value='')))

Number of cvr numbers in current iteration: 137


HBox(children=(IntProgress(value=0, max=137), HTML(value='')))

Number of cvr numbers in current iteration: 829


HBox(children=(IntProgress(value=0, max=829), HTML(value='')))

Number of cvr numbers in current iteration: 631


HBox(children=(IntProgress(value=0, max=631), HTML(value='')))

Number of cvr numbers in current iteration: 552


HBox(children=(IntProgress(value=0, max=552), HTML(value='')))

Number of cvr numbers in current iteration: 651


HBox(children=(IntProgress(value=0, max=651), HTML(value='')))

Number of cvr numbers in current iteration: 461


HBox(children=(IntProgress(value=0, max=461), HTML(value='')))

Number of cvr numbers in current iteration: 38


HBox(children=(IntProgress(value=0, max=38), HTML(value='')))

Number of cvr numbers in current iteration: 38


HBox(children=(IntProgress(value=0, max=38), HTML(value='')))

Number of cvr numbers in current iteration: 634


HBox(children=(IntProgress(value=0, max=634), HTML(value='')))

Number of cvr numbers in current iteration: 527


HBox(children=(IntProgress(value=0, max=527), HTML(value='')))

Number of cvr numbers in current iteration: 506


HBox(children=(IntProgress(value=0, max=506), HTML(value='')))

Number of cvr numbers in current iteration: 505


HBox(children=(IntProgress(value=0, max=505), HTML(value='')))

Number of cvr numbers in current iteration: 585


HBox(children=(IntProgress(value=0, max=585), HTML(value='')))

Number of cvr numbers in current iteration: 24


HBox(children=(IntProgress(value=0, max=24), HTML(value='')))

Number of cvr numbers in current iteration: 57


HBox(children=(IntProgress(value=0, max=57), HTML(value='')))

Number of cvr numbers in current iteration: 623


HBox(children=(IntProgress(value=0, max=623), HTML(value='')))

Number of cvr numbers in current iteration: 549


HBox(children=(IntProgress(value=0, max=549), HTML(value='')))

Number of cvr numbers in current iteration: 303


HBox(children=(IntProgress(value=0, max=303), HTML(value='')))

Number of cvr numbers in current iteration: 275


HBox(children=(IntProgress(value=0, max=275), HTML(value='')))

Number of cvr numbers in current iteration: 218


HBox(children=(IntProgress(value=0, max=218), HTML(value='')))

Number of cvr numbers in current iteration: 23


HBox(children=(IntProgress(value=0, max=23), HTML(value='')))

Number of cvr numbers in current iteration: 33


HBox(children=(IntProgress(value=0, max=33), HTML(value='')))

Number of cvr numbers in current iteration: 470


HBox(children=(IntProgress(value=0, max=470), HTML(value='')))

Number of cvr numbers in current iteration: 372


HBox(children=(IntProgress(value=0, max=372), HTML(value='')))

Number of cvr numbers in current iteration: 317


HBox(children=(IntProgress(value=0, max=317), HTML(value='')))

Number of cvr numbers in current iteration: 433


HBox(children=(IntProgress(value=0, max=433), HTML(value='')))

Number of cvr numbers in current iteration: 454


HBox(children=(IntProgress(value=0, max=454), HTML(value='')))

Number of cvr numbers in current iteration: 36


HBox(children=(IntProgress(value=0, max=36), HTML(value='')))

Number of cvr numbers in current iteration: 53


HBox(children=(IntProgress(value=0, max=53), HTML(value='')))

Number of cvr numbers in current iteration: 581


HBox(children=(IntProgress(value=0, max=581), HTML(value='')))

Number of cvr numbers in current iteration: 541


HBox(children=(IntProgress(value=0, max=541), HTML(value='')))

Number of cvr numbers in current iteration: 663


HBox(children=(IntProgress(value=0, max=663), HTML(value='')))

Number of cvr numbers in current iteration: 497


HBox(children=(IntProgress(value=0, max=497), HTML(value='')))

Number of cvr numbers in current iteration: 536


HBox(children=(IntProgress(value=0, max=536), HTML(value='')))

Number of cvr numbers in current iteration: 63


HBox(children=(IntProgress(value=0, max=63), HTML(value='')))

Number of cvr numbers in current iteration: 65


HBox(children=(IntProgress(value=0, max=65), HTML(value='')))

Number of cvr numbers in current iteration: 672


HBox(children=(IntProgress(value=0, max=672), HTML(value='')))

Number of cvr numbers in current iteration: 605


HBox(children=(IntProgress(value=0, max=605), HTML(value='')))

Number of cvr numbers in current iteration: 502


HBox(children=(IntProgress(value=0, max=502), HTML(value='')))

Number of cvr numbers in current iteration: 485


HBox(children=(IntProgress(value=0, max=485), HTML(value='')))

Number of cvr numbers in current iteration: 505


HBox(children=(IntProgress(value=0, max=505), HTML(value='')))

Number of cvr numbers in current iteration: 42


HBox(children=(IntProgress(value=0, max=42), HTML(value='')))

Number of cvr numbers in current iteration: 116


HBox(children=(IntProgress(value=0, max=116), HTML(value='')))

Number of cvr numbers in current iteration: 749


HBox(children=(IntProgress(value=0, max=749), HTML(value='')))

Number of cvr numbers in current iteration: 576


HBox(children=(IntProgress(value=0, max=576), HTML(value='')))

Number of cvr numbers in current iteration: 774


HBox(children=(IntProgress(value=0, max=774), HTML(value='')))

Number of cvr numbers in current iteration: 612


HBox(children=(IntProgress(value=0, max=612), HTML(value='')))

Number of cvr numbers in current iteration: 654


HBox(children=(IntProgress(value=0, max=654), HTML(value='')))

Number of cvr numbers in current iteration: 55


HBox(children=(IntProgress(value=0, max=55), HTML(value='')))

Number of cvr numbers in current iteration: 70


HBox(children=(IntProgress(value=0, max=70), HTML(value='')))

Number of cvr numbers in current iteration: 774


HBox(children=(IntProgress(value=0, max=774), HTML(value='')))

Number of cvr numbers in current iteration: 651


HBox(children=(IntProgress(value=0, max=651), HTML(value='')))

Number of cvr numbers in current iteration: 747


HBox(children=(IntProgress(value=0, max=747), HTML(value='')))

Number of cvr numbers in current iteration: 773


HBox(children=(IntProgress(value=0, max=773), HTML(value='')))

Number of cvr numbers in current iteration: 797


HBox(children=(IntProgress(value=0, max=797), HTML(value='')))

Number of cvr numbers in current iteration: 58


HBox(children=(IntProgress(value=0, max=58), HTML(value='')))

Number of cvr numbers in current iteration: 147


HBox(children=(IntProgress(value=0, max=147), HTML(value='')))

Number of cvr numbers in current iteration: 1190


HBox(children=(IntProgress(value=0, max=1190), HTML(value='')))

Number of cvr numbers in current iteration: 1086


HBox(children=(IntProgress(value=0, max=1086), HTML(value='')))

Number of cvr numbers in current iteration: 1327


HBox(children=(IntProgress(value=0, max=1327), HTML(value='')))

Number of cvr numbers in current iteration: 1736


HBox(children=(IntProgress(value=0, max=1736), HTML(value='')))

Number of cvr numbers in current iteration: 2623


HBox(children=(IntProgress(value=0, max=2623), HTML(value='')))

Number of cvr numbers in current iteration: 236


HBox(children=(IntProgress(value=0, max=236), HTML(value='')))

Number of cvr numbers in current iteration: 166


HBox(children=(IntProgress(value=0, max=166), HTML(value='')))

Number of cvr numbers in current iteration: 1756


HBox(children=(IntProgress(value=0, max=1756), HTML(value='')))

Number of cvr numbers in current iteration: 1355


HBox(children=(IntProgress(value=0, max=1355), HTML(value='')))

Number of cvr numbers in current iteration: 1058


HBox(children=(IntProgress(value=0, max=1058), HTML(value='')))

Number of cvr numbers in current iteration: 981


HBox(children=(IntProgress(value=0, max=981), HTML(value='')))

Number of cvr numbers in current iteration: 861


HBox(children=(IntProgress(value=0, max=861), HTML(value='')))

Number of cvr numbers in current iteration: 87


HBox(children=(IntProgress(value=0, max=87), HTML(value='')))

Number of cvr numbers in current iteration: 132


HBox(children=(IntProgress(value=0, max=132), HTML(value='')))

Number of cvr numbers in current iteration: 1121


HBox(children=(IntProgress(value=0, max=1121), HTML(value='')))

Number of cvr numbers in current iteration: 897


HBox(children=(IntProgress(value=0, max=897), HTML(value='')))

Number of cvr numbers in current iteration: 894


HBox(children=(IntProgress(value=0, max=894), HTML(value='')))

Number of cvr numbers in current iteration: 817


HBox(children=(IntProgress(value=0, max=817), HTML(value='')))

Number of cvr numbers in current iteration: 964


HBox(children=(IntProgress(value=0, max=964), HTML(value='')))

Number of cvr numbers in current iteration: 107


HBox(children=(IntProgress(value=0, max=107), HTML(value='')))

Number of cvr numbers in current iteration: 1


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Number of cvr numbers in current iteration: 1050


HBox(children=(IntProgress(value=0, max=1050), HTML(value='')))

Number of cvr numbers in current iteration: 948


HBox(children=(IntProgress(value=0, max=948), HTML(value='')))

Number of cvr numbers in current iteration: 927


HBox(children=(IntProgress(value=0, max=927), HTML(value='')))

Number of cvr numbers in current iteration: 1213


HBox(children=(IntProgress(value=0, max=1213), HTML(value='')))

Number of cvr numbers in current iteration: 1233


HBox(children=(IntProgress(value=0, max=1233), HTML(value='')))

Number of cvr numbers in current iteration: 1


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Number of cvr numbers in current iteration: 0


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Number of cvr numbers in current iteration: 0


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Number of cvr numbers in current iteration: 0


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Number of cvr numbers in current iteration: 0


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Number of cvr numbers in current iteration: 758


HBox(children=(IntProgress(value=0, max=758), HTML(value='')))

Number of cvr numbers in current iteration: 347


HBox(children=(IntProgress(value=0, max=347), HTML(value='')))

Number of cvr numbers in current iteration: 0


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Number of cvr numbers in current iteration: 0


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Number of cvr numbers in current iteration: 595


HBox(children=(IntProgress(value=0, max=595), HTML(value='')))




In [91]:
# Function to use later for aggregating our data for each CVR file. Also able to handle strings, return NaN if 
# different strings are present in the column
import numpy as np
from pandas.api.types import is_numeric_dtype

def mean_str(col):
    if is_numeric_dtype(col):
        return col.mean()
    else:
        return tuple(col.unique()) if col.nunique() == 1 else np.NaN

common_attr
#print(common_attr)
#useful_common_att = ['Assets','Equity','_score','ProfitLoss','LiabilitiesAndEquity']
#useful_all_att = ['Assets','Equity','_score','ProfitLoss','LiabilitiesAndEquity','Revenue','Dividend(Paid)',
                   'NumberOfEmployees','AverageNumberOfEmployees']

In [119]:
all_cvrs_df = pd.DataFrame()

for file in tqdm_notebook(os.listdir('Valid_Data/')):
    # Get the CVR number 
    cvr_num = file.split('.')[0]
    # Load the company CSV file into a pandas DF
    company_df = pd.read_csv('Valid_Data/' + file).drop(columns = 'Unnamed: 0')
    
    # Change the start_date column into only containing years for easier aggregation
    for idx in range(0, len(company_df)):
        company_df.at[idx, 'start_date'] = company_df.at[idx, 'start_date'][0:4]
    
    # Aggregate the rows by the 
    company_df = company_df.groupby('start_date').agg(mean_str)
    
    # Make a new column called target which includes next years revenue 
    company_df['Target'] = company_df['Revenue'].shift(-1)
     
    company_df = company_df[~company_df.Target.isna()]    
    
    all_cvrs_df = pd.concat([company_df, all_cvrs_df], ignore_index=True, axis = 0)
    

HBox(children=(IntProgress(value=0, max=23854), HTML(value='')))

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




In [121]:
all_cvrs_df.to_pickle('all_data.pkl')