In [83]:
import itertools
import requests
import datetime
import re
import json
import collections

In [84]:
class TimeSeries():
    def __init__(self, code, index, values):
        self.code = code
        self.index = index
        self.values = values

In [103]:
class SSBData():
    url_start = r'https://data.ssb.no/api/v0/en/table/'

    def __init__(self, table_id, query_patch):
        self.table_id = table_id
        self.meta = self._get_api_params(query_patch)
        self.content = self._get_data_from_api(self.url_start + table_id, self.meta)
        self.shape = self._get_shape(self.content['size'])
        self.categories = self._extract_categories(self.content['id'])
        self.columns = self._extract_columns()

    def _extract_categories(self, catego):
        catego.remove('Tid')
        #catego.sort(key=lambda x: 0 if re.search(r'^Content', str(x), re.I) else 1)
        return catego 

    def _extract_columns(self):
        out = {}
        for cate in self.categories:
            out[cate] = {}
            for k, v in self.content['dimension'][cate]['category']['label'].items():
                out[cate][k] = v
        return out

    def _get_shape(self, size):
            out = {'Category ' + str(i): v for i, v in enumerate(size[:-1])}
            out['index'] = size[-1]
            return out

    def _get_api_params(self, patch):
        with open(patch, 'r', encoding='UTF-8') as j_f:
            return json.load(j_f)

    def _get_data_from_api(self, url, params):
        raw_data = requests.post(url, json=params)
        return json.loads(raw_data.content)

    def _parse_date(self, date, freq):
        repl = {'quarter': {'1': 1,
                            '2': 4, 
                            '3': 7, 
                            '4': 10}}
        d_temp = date.split('K')
        return datetime.date(int(d_temp[0]), repl[freq][d_temp[1]], 1)

    def get_index(self):
        frequency = self.content['dimension']['Tid']['label']
        dates = self.content['dimension']['Tid']['category']['index']
        return {v: self._parse_date(k, frequency) for k, v in dates.items()}

    def iter_columns(self):
        categories = [[val for val in self.columns[cat]] for cat in self.columns]
        yield from itertools.product(*categories, repeat=1)

    def iter_values(self):
        vals = self.content['value']
        for val in vals:
            yield val

    def serialize(self):
        idx = self.get_index()
        cols = self.iter_columns()
        vals = self.iter_values()
        out = collections.defaultdict(list)
        for col_name in cols:
            for _ in idx:
                out[col_name].append(next(vals))
        return out

In [104]:
ob = SSBData('09171', 'tab_09171.json')

In [107]:
dane = ob.serialize()

In [108]:
import pandas as pd

In [113]:
df = pd.DataFrame(dane, index=ob.get_index().values())

In [115]:
df

Unnamed: 0_level_0,nr23_6,nr23_6,nr23_6,pub2X01_02,pub2X01_02,pub2X01_02,pub2X03,pub2X03,pub2X03
Unnamed: 0_level_1,Prob,Pin,BNPB,Prob,Pin,BNPB,Prob,Pin,BNPB
2019-10-01,1590874,761709,829165,10348,5039,5309,30455,17926,12529
2020-01-01,1534042,732772,801269,9543,4860,4683,29182,14994,14188
2020-04-01,1355521,663782,691740,9811,9998,-187,24700,14174,10526
2020-07-01,1383458,664331,719126,19172,6886,12286,23661,16296,7365
2020-10-01,1535161,737733,797428,10120,4902,5219,27867,18803,9064
