In [1]:
import itertools
import requests
import datetime
import re
import json
import collections

In [40]:
class SSBData():
    """Pull data from ssb.no and parse to user friendly format"""

    url_start = r'https://data.ssb.no/api/v0/en/table/'

    def __init__(self, table_id, query_patch):
        self.table_id = table_id
        self._query_json = self._get_api_params(query_patch)
        self._content = self._get_data_from_api(self.url_start + table_id, self._query_json)

    @property
    def content(self):
        return self._content

    @property
    def shape(self):
        out = {'Category ' + str(i): v for i, v in enumerate(self._content['size'][:-1])}
        out['Index: '] = self._content['size'][-1]
        return out

    @property
    def categories(self):
        catego = self._content['id'][::]
        catego.remove('Tid')
        return catego 

    @property
    def columns(self):
        out = {}
        for cate in self.categories:
            out[cate] = {}
            for k, v in self._content['dimension'][cate]['category']['label'].items():
                out[cate][k] = v
        return out

    def _get_api_params(self, patch):
        with open(patch, 'r', encoding='UTF-8') as j_f:
            return json.load(j_f)

    def _get_data_from_api(self, url, params):
        raw_data = requests.post(url, json=params)
        return json.loads(raw_data.content)

    def _parse_date(self, date, freq):
        repl = {'quarter': {'1': 1,
                            '2': 4, 
                            '3': 7, 
                            '4': 10}}
        d_temp = date.split('K')
        return datetime.date(int(d_temp[0]), repl[freq][d_temp[1]], 1)

    @property
    def index(self):
        frequency = self._content['dimension']['Tid']['label']
        dates = self._content['dimension']['Tid']['category']['index']
        return {v: self._parse_date(k, frequency) for k, v in dates.items()}

    def iter_columns(self):
        meta = self.columns
        categories = [[val for val in meta[cat]] for cat in meta]
        yield from itertools.product(*categories, repeat=1)

    def iter_values(self):
        vals = self._content['value']
        for val in vals:
            yield val

    def serialize(self):
        idx = self.index
        cols = self.iter_columns()
        vals = self.iter_values()
        out = collections.defaultdict(list)
        for col_name in cols:
            for _ in idx:
                out[col_name].append(next(vals))
        return out

In [41]:
ob = SSBData('09171', 'tab_09171.json')

In [44]:
ob.serialize()

defaultdict(list,
            {('nr23_6', 'Prob'): [1590874, 1534042, 1355521, 1383458, 1535161],
             ('nr23_6', 'Pin'): [761709, 732772, 663782, 664331, 737733],
             ('nr23_6', 'BNPB'): [829165, 801269, 691740, 719126, 797428],
             ('pub2X01_02', 'Prob'): [10348, 9543, 9811, 19172, 10120],
             ('pub2X01_02', 'Pin'): [5039, 4860, 9998, 6886, 4902],
             ('pub2X01_02', 'BNPB'): [5309, 4683, -187, 12286, 5219],
             ('pub2X03', 'Prob'): [30455, 29182, 24700, 23661, 27867],
             ('pub2X03', 'Pin'): [17926, 14994, 14174, 16296, 18803],
             ('pub2X03', 'BNPB'): [12529, 14188, 10526, 7365, 9064]})

In [45]:
dane = ob.serialize()

In [46]:
import pandas as pd

In [49]:
df = pd.DataFrame(dane, index=ob.index.values())

In [50]:
df

Unnamed: 0_level_0,nr23_6,nr23_6,nr23_6,pub2X01_02,pub2X01_02,pub2X01_02,pub2X03,pub2X03,pub2X03
Unnamed: 0_level_1,Prob,Pin,BNPB,Prob,Pin,BNPB,Prob,Pin,BNPB
2019-10-01,1590874,761709,829165,10348,5039,5309,30455,17926,12529
2020-01-01,1534042,732772,801269,9543,4860,4683,29182,14994,14188
2020-04-01,1355521,663782,691740,9811,9998,-187,24700,14174,10526
2020-07-01,1383458,664331,719126,19172,6886,12286,23661,16296,7365
2020-10-01,1535161,737733,797428,10120,4902,5219,27867,18803,9064
