In [4]:
# Stdlib imports
import re
from pathlib import Path
from datetime import datetime as dt

# 3rd party imports
import pandas as pd

# Local imports
from pymonet import monet_scraper as scr

In [None]:
indicator_table_path = Path("../results/indicator_table.csv")
if not indicator_table_path.exists():
    print("Scraping...")
    monet_soup = await parse_dynamic_webpage('https://www.bfs.admin.ch/bfs/en/home/statistics/sustainable-development/monet-2030/all-indicators.html')
    monet_indicator_df = create_monet_indicator_list(monet_soup)
    monet_indicator_df.to_csv(indicator_table_path)
    print("-> done!")
else:
    print("Reading from disk...")
    monet_indicator_df = pd.read_csv(indicator_table_path).set_index("ID")
    print("-> done!")

In [None]:
monet_indicator_df.head(10)

In [None]:
df_list = []
counter = 0
n_indicators = len(monet_indicator_df)

start = dt.now()
for idx, indicator in monet_indicator_df.iterrows():
    counter += 1
    print(f"{counter}/{n_indicators}", end="\r")
    data_elements = await scrape_indicator_info(indicator["Hyperlink"])
    df = extract_all_data_files(data_elements)
    df["Indicator"] = indicator["Indicator"]
    df["SDG"] = indicator["SDG"]
    df["Topic"] = indicator["Topic"]
    df_list.append(df)
end = dt.now()
elapsed = end - start
print(f"Finished after {elapsed.seconds} seconds.")

In [None]:
complete_data_df = pd.concat(df_list, ignore_index=True)[["SDG", "Topic", "Indicator", "Observable", "Description", "Units", "damid", "Data_url"]]

In [None]:
complete_data_df.to_csv("../results/monet_datafile_summary_table.csv", index=False)

In [None]:
len(complete_data_df)

In [None]:
database = []
for href in complete_data_df["Data_url"]:
    database.append(pd.read_excel(href, sheet_name=None))

In [None]:
len(database)