In [None]:
import pandas as pd
import requests, os, json
from pandas import json_normalize

In [None]:
suspects = '../../analyses/kindergartens/suspects.csv'

# Load the CSV file
df = pd.read_csv(suspects, dtype=str)
all_ids= pd.concat([df[col] for col in df.columns]).unique()
fd = pd.DataFrame(all_ids, columns=['organization number'])

In [None]:
# # Function to get organization details from organization number
# def get_org_details(orgnr):
#     url = f'https://data.brreg.no/enhetsregisteret/api/enheter/{orgnr}'
#     response = requests.get(url)
#     if response.status_code == 200:
#         return response.json()
#     else:
#         return {'organisasjonsnummer': orgnr, 'status': '404 - Not Found'}

In [None]:
# Function to get organization details from organization number
def get_org_details(orgnr):
    url = f'https://data.brreg.no/enhetsregisteret/api/enheter/{orgnr}'
    filename = f'json_data/{orgnr}.json'
    error_log_file = f'json_data/{orgnr}_error.log'

    # Check if the JSON file already exists
    if os.path.exists(filename):
        with open(filename, 'r') as file:
            return json.load(file)
    # Check if the JSON file already exists
    if os.path.exists(error_log_file):
        with open(error_log_file, 'r') as file:
            return {'organisasjonsnummer': orgnr, 'status': file.read()}

    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        data = response.json()
        # Save the JSON data to a file
        os.makedirs('json_data', exist_ok=True)
        with open(filename, 'w') as file:
            json.dump(data, file)
        return data
    except requests.exceptions.HTTPError as http_err:
        error_message = f"HTTP error occurred: {http_err}"
    except requests.exceptions.ConnectionError as conn_err:
        error_message = f"Connection error occurred: {conn_err}"
    except requests.exceptions.Timeout as timeout_err:
        error_message = f"Timeout error occurred: {timeout_err}"
    except requests.exceptions.RequestException as req_err:
        error_message = f"An error occurred: {req_err}"

    # Log the error to a file
    with open(error_log_file, 'w') as file:
        file.write(error_message)

    return {'organisasjonsnummer': orgnr, 'status': error_message}

In [None]:
len(fd)

In [None]:
get_org_details('811616532')

In [None]:
get_org_details('996920917')

In [None]:
# for id in all_ids[-100:]:
#     print(id, type(id))
#     print(get_org_details(id))

In [None]:
# fd1 = fd.iloc[-30:]
# fd2 = fd.iloc[:5]
# fd = pd.concat((fd1, fd2))

In [None]:
# Fetch details for all organization numbers
fd['Org Details'] = fd['organization number'].apply(get_org_details)

In [None]:
fd

In [None]:
# Flatten the JSON structure and handle nested fields
details_df= pd.json_normalize(fd['Org Details'], sep='_')

In [None]:
details_df

In [None]:
df_404= details_df.loc[details_df.status.notnull(), ['organisasjonsnummer']]

In [None]:
df_404

In [None]:
df_404.to_csv('suspect_ids_not_found.csv', index=False)

In [None]:
details_df = details_df.loc[details_df.status.isnull()]

In [None]:
# Save the updated dataframe to a new CSV file
details_df.to_csv('suspect_ids_full_details.csv', index=False)

In [None]:
details_df


In [None]:
colnames(details_df)

In [None]:
details_df.columns

In [None]:
column_translations = {
    'organisasjonsnummer': 'organization_number',
    'status': 'status',
    'navn': 'name',
    'hjemmeside': 'website',
    'registreringsdatoEnhetsregisteret': 'registration_date_unit_register',
    'registrertIMvaregisteret': 'registered_in_vat_register',
    'frivilligMvaRegistrertBeskrivelser': 'voluntary_vat_registered_descriptions',
    'antallAnsatte': 'number_of_employees',
    'harRegistrertAntallAnsatte': 'has_registered_number_of_employees',
    'stiftelsesdato': 'establishment_date',
    'registrertIForetaksregisteret': 'registered_in_business_register',
    'registrertIStiftelsesregisteret': 'registered_in_foundation_register',
    'registrertIFrivillighetsregisteret': 'registered_in_volunteer_register',
    'sisteInnsendteAarsregnskap': 'last_submitted_annual_accounts',
    'konkurs': 'bankruptcy',
    'underAvvikling': 'under_liquidation',
    'underTvangsavviklingEllerTvangsopplosning': 'under_forced_liquidation_or_dissolution',
    'maalform': 'language_form',
    'vedtektsdato': 'articles_of_association_date',
    'vedtektsfestetFormaal': 'statutory_purpose',
    'aktivitet': 'activity',
    'organisasjonsform_kode': 'organization_form_code',
    'organisasjonsform_beskrivelse': 'organization_form_description',
    'organisasjonsform__links_self_href': 'organization_form_links_self_href',
    'postadresse_land': 'postal_address_country',
    'postadresse_landkode': 'postal_address_country_code',
    'postadresse_postnummer': 'postal_address_postal_code',
    'postadresse_poststed': 'postal_address_city',
    'postadresse_adresse': 'postal_address_address',
    'postadresse_kommune': 'postal_address_municipality',
    'postadresse_kommunenummer': 'postal_address_municipality_number',
    'naeringskode1_kode': 'industry_code1_code',
    'naeringskode1_beskrivelse': 'industry_code1_description',
    'hjelpeenhetskode_kode': 'auxiliary_unit_code',
    'hjelpeenhetskode_beskrivelse': 'auxiliary_unit_description',
    'forretningsadresse_land': 'business_address_country',
    'forretningsadresse_landkode': 'business_address_country_code',
    'forretningsadresse_postnummer': 'business_address_postal_code',
    'forretningsadresse_poststed': 'business_address_city',
    'forretningsadresse_adresse': 'business_address_address',
    'forretningsadresse_kommune': 'business_address_municipality',
    'forretningsadresse_kommunenummer': 'business_address_municipality_number',
    'institusjonellSektorkode_kode': 'institutional_sector_code',
    'institusjonellSektorkode_beskrivelse': 'institutional_sector_description',
    '_links_self_href': 'links_self_href',
    'naeringskode2_kode': 'industry_code2_code',
    'naeringskode2_beskrivelse': 'industry_code2_description',
    'naeringskode3_kode': 'industry_code3_code',
    'naeringskode3_beskrivelse': 'industry_code3_description',
    'slettedato': 'deletion_date',
    'konkursdato': 'bankruptcy_date',
    'overordnetEnhet': 'parent_unit',
    '_links_overordnetEnhet_href': 'links_parent_unit_href',
    'underAvviklingDato': 'under_liquidation_date'
}


In [None]:
details_df_t = details_df.rename(columns = column_translations)

In [None]:
details_df_t['voluntary_vat_registered_descriptions'].unique()

In [None]:
details_df_t