In [9]:
import pandas as pd
from tqdm import tqdm
import json
import os

In [10]:
with open('../Data/BDL/gus_bdl.json', encoding='utf-8') as f:
    data = json.load(f)

In [11]:
def parseData(subject: str, dimension: str, folder: str, out: str):
    records = []
    measure_unit =  data['measureUnits'][str(data['subjects'][subject]['dimensions'][dimension]['measureUnit'])]['name']
    for key, value in data['subjects'][subject]['dimensions'][dimension]['records'].items():

        for row in value:
            rok: int
            msc_od: int
            msc_do: int

            match row['dateRange']['tag']:
                case 'YearRange':
                    rok = row['dateRange']['contents']
                    msc_od = 1
                    msc_do = 12
                case 'MonthRange':
                    tmp = row['dateRange']['contents'][0].split('-')
                    rok = int(tmp[0])
                    msc_od = int(tmp[1])

                    tmp = row['dateRange']['contents'][1].split('-')
                    msc_do = int(tmp[1])
                case 'QuarterRange':
                    tmp = row['dateRange']['contents'][0].split('-q')
                    rok = int(tmp[0])
                    q = int(tmp[1])
                    msc_od = 3*(q - 1) + 1

                    tmp = row['dateRange']['contents'][1].split('-q')
                    q = int(tmp[1])
                    msc_do = 3*(q - 1) + 3
                case _:
                    raise ValueError
            new = {
                'wojewodztwo': data['territorialUnitNames'][key],
                'rok': rok,
                'msc_od': msc_od,
                'msc_do': msc_do,
                'wartosc': row['value'],
                'jednostka': measure_unit
            }
            records.append(new)
    df = pd.DataFrame.from_records(records)
    df.to_csv(f'../Data/BDL/csv/{folder}/{out}.csv', index=False)

In [12]:
for subject in tqdm(data['subjects'].keys()):
    path = data['subjects'][subject]['name'].lower().replace('\n', '__').replace(' ', '_').replace('/', '')
    if not os.path.exists(f'../Data/BDL/csv/{path}'):
        os.makedirs(f'../Data/BDL/csv/{path}')

    for key in data['subjects'][subject]['dimensions'].keys():
        output = key.lower().replace('\n', '__').replace(' ', '_')
        parseData(subject, key, path, output)

100%|██████████| 34/34 [00:01<00:00, 27.70it/s]
