In [None]:
import requests
from requests.auth import HTTPBasicAuth
import pandas as pd
from tqdm.notebook import tqdm  

#Define extract functiion
def extract_company_features(cvr_number, username, password):
    url = "http://distribution.virk.dk/cvr-permanent/virksomhed/_search"
    body = {
        "query": {
            "bool": {
                "must": [{"term": {"Vrvirksomhed.cvrNummer": cvr_number}}]
            }
        }
    }
    response = requests.post(url, json=body, auth=HTTPBasicAuth(username, password))
    
    if response.status_code == 200:
        data = response.json()
        if not data['hits']['hits']:
            return {}
        
        company_data = data['hits']['hits'][0]['_source']['Vrvirksomhed']
        
        virksomhedsform = company_data.get('virksomhedsform', [])
        kortBeskrivelse_periode = ', '.join(
            [f"{form.get('kortBeskrivelse', 'N/A')} from {form.get('periode', {}).get('gyldigFra', 'N/A')} to {form.get('periode', {}).get('gyldigTil', 'now')}"
             for form in virksomhedsform]) if virksomhedsform else 'N/A'
        
        branchekode_periode = ', '.join(
            [f"{branche.get('branchekode', 'N/A')} from {branche.get('periode', {}).get('gyldigFra', 'N/A')} to {branche.get('periode', {}).get('gyldigTil', 'now')}"
             for branche in company_data.get('hovedbranche', [])]) if company_data.get('hovedbranche') else 'N/A'
        
        livsforloeb = ', '.join(
            [f"from {liv.get('periode', {}).get('gyldigFra', 'N/A')} to {liv.get('periode', {}).get('gyldigTil', 'now')}"
             for liv in company_data.get('livsforloeb', [])]) if company_data.get('livsforloeb') else 'N/A'
        
        
        virksomhedsstatus_list = company_data.get('virksomhedsstatus', [])
        virksomhedsstatus = 'N/A'
        if virksomhedsstatus_list:
            seneste_status = virksomhedsstatus_list[-1]
            status = seneste_status.get('status', 'N/A')
            gyldigFra = seneste_status.get('periode', {}).get('gyldigFra', 'N/A')
            gyldigTil = seneste_status.get('periode', {}).get('gyldigTil', 'N/A')
            virksomhedsstatus = f"{status} (fra {gyldigFra} til {gyldigTil})"
        
        features = {
            "CVR": cvr_number,
            "kortBeskrivelse, periode": kortBeskrivelse_periode,
            "branchekode, periode": branchekode_periode,
            "livsforloeb": livsforloeb,
            "virksomhedsstatus": virksomhedsstatus,
        }
        
        return features
    else:
        print(f"ERROR: {response.status_code}")
        return {}


def collect_features_for_multiple_cvrs(cvr_numbers, username, password):
    all_features = []
    unique_cvr_numbers = set(cvr_numbers)
    for cvr_number in tqdm(unique_cvr_numbers, desc="#"):
        features = extract_company_features(cvr_number, username, password)
        if features:
            all_features.append(features)
    
    return pd.DataFrame(all_features)


# Log in to Virk 
CVRUSER = "INSERT HERE"
CVRPASSWORD = "INSERT HERE"

# CVR-numbers
filtered_df['cvr_number'] = filtered_df['cvr_number'].astype(int)
cvr_numbers = filtered_df['cvr_number'].tolist()

# Fetch
df_cvr = collect_features_for_multiple_cvrs(cvr_numbers, CVRUSER, CVRPASSWORD)

# Print
print(df_cvr)


#Export
output_path = r'INSERT HERE'
df.to_csv(output_path, index=False, encoding='utf-8-sig')