<a href="https://colab.research.google.com/github/BetaUliansyah/automasi-portal-djpk/blob/main/Automasi_Data_APBD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Portal APBD DJPK v3 (full version)

In [None]:
import requests
import re
from bs4 import BeautifulSoup
from requests.exceptions import ConnectionError
import datetime
from pytz import timezone
import urllib3
import json
from random import randint
import time
import pandas as pd
import time

session = None
def google_drive():
    from google.colab import drive
    drive.mount('/content/drive')
    # pajak = pd.read_csv(path+filename, error_bad_lines=False)
    
    filename = "data-apbd-"+ datetime.now(timezone('Asia/Jakarta')).strftime("%Y-%m-%d--%H-%M") + ".csv"
    path = F"/content/drive/My Drive/Colab Notebooks/Portal APBD/"

def set_global_session():
    from requests.adapters import HTTPAdapter
    from requests.packages.urllib3.util.retry import Retry
    
    retry_strategy = Retry(
        total=300,
        backoff_factor=0.5,
        status_forcelist=[429, 500, 502, 503, 504],
        method_whitelist=["HEAD", "GET", "OPTIONS"]
    )
    # Credits: https://findwork.dev/blog/advanced-usage-python-requests-timeouts-retries-hooks/

    MAX_RETRIES = 20
    global session
    if not session:
        adapter = HTTPAdapter(max_retries=retry_strategy)
        session = requests.Session()
        # adapter = requests.adapters.HTTPAdapter(max_retries=MAX_RETRIES) # Credits: https://stackoverflow.com/questions/33895739/python-requests-module-error-cant-load-any-url-remote-end-closed-connection
        session.mount('https://', adapter)
        session.mount('http://', adapter)
        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
        session.verify = False

def get_token():
    with session.get('http://www.djpk.kemenkeu.go.id/portal/data/apbd') as response:
        if response.status_code==200 and len(response.text) != 0:
            bsoup = BeautifulSoup(response.text, 'html.parser')
            token = bsoup.find("input", {"name":"_token"})['value']
            #print("Token: " + token)
            return token

def filter(token, tahun, provinsi, pemda):
    try:
        with session.post('http://www.djpk.kemenkeu.go.id/portal/filter',
            data={'_token': token, 'data': 'apbd ', 'tahun': tahun, 'provinsi': provinsi, 'pemda': pemda}) as response:
            if response.status_code==200:
                # return (json.loads(response.text))
                return response.text
    #except requests.exceptions.ConnectionError as e:
    #    pass
    except Exception as e:
           print('It failed :(', e.__class__.__name__)
    else:
        print('It eventually worked', response.status_code)
    finally:
        t1 = time.time()
        print('Took', t1 - t0, 'seconds')
    # Credits: https://www.peterbe.com/plog/best-practice-with-retries-with-requests

                            
def get_all_tahun():
    with session.get('http://www.djpk.kemenkeu.go.id/portal/data/apbd') as response:
        if response.status_code==200:
            bsoup = BeautifulSoup(response.text, 'html.parser')
            token = bsoup.find("input", {"name":"_token"})['value']
            alltahun = bsoup.find("select", {"name":"tahun"}) # print this var is essential
            # print(alltahun)

            result_tahun = []

            for tiap_tahun in alltahun.find_all("option"): # populate all tahun
                tahun = tiap_tahun['value']
                result_tahun.append(tahun)
            return result_tahun
                            
def get_all_provinsi(): # return dict with kdprov as key, nmprov as value
    with session.get('http://www.djpk.kemenkeu.go.id/portal/data/apbd') as response:
        if response.status_code==200:
            bsoup = BeautifulSoup(response.text, 'html.parser')
            # token = bsoup.find("input", {"name":"_token"})['value']
            allprovinsi = bsoup.find("select", {"id":"sel_prov"}) # print this var is essential
            # print(allprovinsi)
            provinsi = {}
            for option_provinsi in allprovinsi.find_all("option"): # populate all provinsi
               provinsi[option_provinsi['value']] = option_provinsi.text
            return provinsi

def get_pemdas(provinsi): # return dict with kdpemda as key, nmpemda as value
    with session.get('http://www.djpk.kemenkeu.go.id/portal/pemda/' + provinsi) as response:
        if response.status_code==200 and len(response.text) != 0:
            pemdas = json.loads(response.text)
            return pemdas

def get_kodeakun_tahun(): # return 5 list values
    with session.get('http://www.djpk.kemenkeu.go.id/portal/data/apbd') as response:
        if response.status_code==200:
            print("Preparing prerequisite datas: kode akun, tahun and token")
            bsoup = BeautifulSoup(response.text, 'html.parser')
            token = bsoup.find("input", {"name":"_token"})['value']
            tahunsoup = bsoup.find("select", {"name":"tahun"}) # print this var is essential
            alltahun = []
            for tiap_tahun in tahunsoup.find_all("option"): # populate all tahun
                tahun = tiap_tahun['value']
                alltahun.append(tahun)
            
            kodepostur_list = []
            kodeakun_list = []
            kodesubakun_list = []
            #for tahun in range(2010, 2011): # hehehe
            
            for tahun in alltahun: # populate all tahun
                print("Populating kode akun from tahun: " + tahun)
                result = filter(token, tahun, "--", "--")
                json_result = json.loads(result)
                
                # populate all kode akun
                if len(json_result) == 0:
                    continue
                for kode_postur in json_result['postur'].keys():
                    kodepostur_list.append(kode_postur)
                    for kode_akun in json_result['postur'][kode_postur]['child'].keys():
                        kodeakun_list.append(kode_akun) 
                        for kode_subakun in json_result['postur'][kode_postur]['child'][kode_akun]['child'].keys():
                            kodesubakun_list.append(kode_subakun) 
            kodepostur_list.sort()
            kodepostur_list = list(dict.fromkeys(kodepostur_list))
            kodeakun_list.sort()
            kodeakun_list = list(dict.fromkeys(kodeakun_list))
            kodesubakun_list.sort()
            kodesubakun_list = list(dict.fromkeys(kodesubakun_list))
            print("Populating kode akun completed")
            return kodepostur_list, kodeakun_list, kodesubakun_list, alltahun, token
            
def generate_header():
    with session.get('http://www.djpk.kemenkeu.go.id/portal/data/apbd') as response:
        if response.status_code==200:
            print("Generating header")
            bsoup = BeautifulSoup(response.text, 'html.parser')
            token = bsoup.find("input", {"name":"_token"})['value']
            alltahun = get_all_tahun()
            
            kodepostur_list = []
            kodeakun_list = []
            kodesubakun_list = []
            
            header_dict = {'no': '',
                      'tahun': '',
                      'pemda': '',
                      'provinsi': '' ,
                      'wilayah': '',
                      'disclaimer': '',
                      'special_row': '',
                      }
            #for tahun in range(2010, 2011): # hehehe
            for tahun in alltahun: # populate all tahun
                print("Populating kode akun from", tahun)
                result = filter(token, tahun, "--", "--")
                json_result = json.loads(result)

                # populate all kode akun
                if len(json_result) == 0:
                    continue
                for kode_postur in json_result['postur'].keys():
                    kodepostur_list.append(kode_postur)
                    header_dict[kode_postur+"_a"] = 0
                    header_dict[kode_postur+"_r"] = 0
                    header_dict[kode_postur+"_p"] = 0
                    for kode_akun in json_result['postur'][kode_postur]['child'].keys():
                        kodeakun_list.append(kode_akun) 
                        header_dict[kode_akun+"_a"] = 0
                        header_dict[kode_akun+"_r"] = 0
                        header_dict[kode_akun+"_p"] = 0
                        for kode_subakun in json_result['postur'][kode_postur]['child'][kode_akun]['child'].keys():
                            kodesubakun_list.append(kode_subakun) 
                            header_dict[kode_subakun+"_a"] = 0
                            header_dict[kode_subakun+"_r"] = 0
                            header_dict[kode_subakun+"_p"] = 0
#                header_row = []
#                for key in header_dict.keys():
#                    header_row.append(key)
            return header_row

def parse_data(json_result, kodeposturs, kodeakuns, kodesubakuns):
    data_dict = {}
    data_dict['wilayah'] = str(json_result['wilayah'])
    data_dict['disclaimer'] = str(json_result['disclaimer'])
    data_dict['special_row'] = json_result['special_row']

    for kodepostur in kodeposturs:
        if kodepostur in json_result['postur'].keys():
            data_dict[kodepostur + "_a"] = str(json_result['postur'][str(kodepostur)]['total']['anggaran'])
            data_dict[kodepostur + "_r"] = str(json_result['postur'][kodepostur]['total']['realisasi'])
            data_dict[kodepostur + "_p"] = str(json_result['postur'][kodepostur]['total']['persentase']).replace(",",".").replace(" ","")
        for kodeakun in kodeakuns:
            kodepostur = kodeakun[:-2] + '00'
            if kodepostur in json_result['postur'].keys() and kodeakun in json_result['postur'][kodepostur]['child'].keys():
                data_dict[kodeakun + "_a"] = str(json_result['postur'][kodepostur]['child'][kodeakun]['total']['anggaran'])
                data_dict[kodeakun + "_r"] = str(json_result['postur'][kodepostur]['child'][kodeakun]['total']['realisasi'])
                data_dict[kodeakun + "_p"] = str(json_result['postur'][kodepostur]['child'][kodeakun]['total']['persentase']).replace(",",".").replace(" ","")
            for kodesubakun in kodesubakuns:
                kodepostur = kodesubakun[:-2] + '00'
                kodeakun = kodesubakun[:-1] + '0'
                if kodepostur in json_result['postur'].keys() and kodeakun in json_result['postur'][kodepostur]['child'].keys() and kodesubakun in json_result['postur'][kodepostur]['child'][kodeakun]['child'].keys():
                    data_dict[kodesubakun + "_a"] = str(json_result['postur'][kodepostur]['child'][kodeakun]['child'][kodesubakun]['value']['anggaran'])
                    data_dict[kodesubakun + "_r"] = str(json_result['postur'][kodepostur]['child'][kodeakun]['child'][kodesubakun]['value']['realisasi'])
                    data_dict[kodesubakun + "_p"] = str(json_result['postur'][kodepostur]['child'][kodeakun]['child'][kodesubakun]['value']['persentase']).replace(",",".").replace(" ","")
    return data_dict
    
def nusasms_kirim_wa(tujuan, pesan, test=0, apikey="110C9015F177631FDF2FD8042CA1A040"):
    if (test):
        BASE_URL = 'https://dev.nusasms.com/nusasms_api/1.0'
    else:
        BASE_URL = 'https://api.nusasms.com/nusasms_api/1.0'

    HEADERS = {"Accept": "application/json", "APIKey": apikey }
    PAYLOADS = {'destination': tujuan, 'message': pesan}

    with session.post(f'{BASE_URL}/whatsapp/message',
                      headers=HEADERS, 
                      json=PAYLOADS) as response:
        if response.status_code==200:
            return True

def wachat_send_message(tujuan, pesan, sender, apikey='F0C584900AB90E1040862FC0B43F561E'):
    HEADERS = {"Accept": "application/json", "APIKey": apikey }
    PAYLOADS = {'destination': tujuan, 'sender': sender, 'message': pesan}

    with session.post(
        'https://api.wachat-api.com/wachat_api/1.0/message',
        headers=HEADERS,
        json=PAYLOADS,
        # Skip SSL Verification
        # verify=False
    ) as response:
        if response.status_code==200:
            #print("Status code: " + str(r.status_code))
            #print("Response: " + r.text)
            return True

# gabungan
if __name__ == "__main__":
    set_global_session()
    data_dict = {}
    start_time = datetime.datetime.now(timezone('Asia/Jakarta'))
    print("Start script at: " + datetime.datetime.now(timezone('Asia/Jakarta')).strftime("%-d-%m-%Y") + " at " + str(start_time))
    
    kodeposturs, kodeakuns, kodesubakuns, alltahun, token = get_kodeakun_tahun()
    #kodeposturs = ['400', '500', '600']
    #kodeakuns = ['410', '420', '430', '510', '520', '530', '540', '610', '620']
    #kodesubakuns = ['411', '412', '413', '414', '416', '421', '422', '423', '430', '431', '432', '433', '434', '435', '436', '439', '511', '512', '513', '514', '515', '516', '517', '518', '519', '520', '521', '522', '523', '524', '525', '526', '531', '541', '542', '611', '612', '613', '614', '615', '616', '617', '619', '621', '622', '623', '624', '625', '626', '627', '629']
    
    # header_data = ['tahun', 'kdpemda', 'pemda', 'kdprov', 'prov', 'wilayah', 'disclaimer', 'special_row', '400_a', '400_r', '400_p', '410_a', '410_r', '410_p', '411_a', '411_r', '411_p', '412_a', '412_r', '412_p', '413_a', '413_r', '413_p', '414_a', '414_r', '414_p', '420_a', '420_r', '420_p', '421_a', '421_r', '421_p', '422_a', '422_r', '422_p', '423_a', '423_r', '423_p', '430_a', '430_r', '430_p', '431_a', '431_r', '431_p', '432_a', '432_r', '432_p', '433_a', '433_r', '433_p', '434_a', '434_r', '434_p', '435_a', '435_r', '435_p', '436_a', '436_r', '436_p', '500_a', '500_r', '500_p', '510_a', '510_r', '510_p', '511_a', '511_r', '511_p', '512_a', '512_r', '512_p', '513_a', '513_r', '513_p', '514_a', '514_r', '514_p', '515_a', '515_r', '515_p', '516_a', '516_r', '516_p', '517_a', '517_r', '517_p', '518_a', '518_r', '518_p', '520_a', '520_r', '520_p', '521_a', '521_r', '521_p', '522_a', '522_r', '522_p', '523_a', '523_r', '523_p', '600_a', '600_r', '600_p', '610_a', '610_r', '610_p', '611_a', '611_r', '611_p', '612_a', '612_r', '612_p', '613_a', '613_r', '613_p', '614_a', '614_r', '614_p', '615_a', '615_r', '615_p', '620_a', '620_r', '620_p', '621_a', '621_r', '621_p', '622_a', '622_r', '622_p', '623_a', '623_r', '623_p', '624_a', '624_r', '624_p', '626_a', '626_r', '626_p', '625_a', '625_r', '625_p', '616_a', '616_r', '616_p', '617_a', '617_r', '617_p', '627_a', '627_r', '627_p', '439_a', '439_r', '439_p', '519_a', '519_r', '519_p', '619_a', '619_r', '619_p', '629_a', '629_r', '629_p', '416_a', '416_r', '416_p', '524_a', '524_r', '524_p', '525_a', '525_r', '525_p', '526_a', '526_r', '526_p', '530_a', '530_r', '530_p', '531_a', '531_r', '531_p', '540_a', '540_r', '540_p', '541_a', '541_r', '541_p', '542_a', '542_r', '542_p']
    # header_data = generate_header()
    
    header_data = ['no', 'tahun', 'kdpemda', 'nmpemda', 'kdprov', 'nmprov', 'wilayah', 'disclaimer',  'special_row' ]
    gabung = kodeposturs + kodeakuns + kodesubakuns
    gabung.sort()
    gabung_tail = []
    for i in gabung:
        a = i + "_a"
        r = i + "_r"
        p = i + "_p"
        gabung_tail = gabung_tail + [a, r, p]

    header_data = header_data + gabung_tail

    allprovinsi = get_all_provinsi()
    #kode_prov = "09" # get_all_prov return dict with kdprov as key and nama prov as value
    #nmprov = "Test wae DKI dul"
    
    datadf = pd.DataFrame(columns = header_data)
    print("Are you executing this?")
    for tahun in alltahun:
        print("Executing tahun: ", tahun)
        for kdprov, nmprov in allprovinsi.items():
            print("Executing provinsi loop: ", kdprov, nmprov)
            for kdpemda, nmpemda in get_pemdas(kdprov).items():    
                print("Executing pemda loop: ", tahun, kdpemda, nmpemda)
                result_dict = {}
                pemda_result = filter(token, tahun, kdprov, kdpemda)
                if pemda_result is None:
                    continue
                json_result = json.loads(pemda_result)
                result_dict = parse_data(json_result, kodeposturs, kodeakuns, kodesubakuns)
                result_dict['tahun'] = tahun
                result_dict['kdpemda'] = kdpemda
                result_dict['nmpemda'] = nmpemda
                result_dict['kdprov'] = kdprov
                result_dict['nmprov'] = nmprov
                print("Get data tahun {tahun}, prov {nmprov}, pemda {nmpemda}")
                print(result_dict)
                datadf = datadf.append(result_dict, ignore_index = True)
                filename = "data-apbd-"+ datetime.datetime.now(timezone('Asia/Jakarta')).strftime("%Y-%m-%d--%H-%M") + ".csv"
                datadf.to_csv(filename)
    print("datadf:")
    print(datadf)
    filename = "data-apbd-"+ datetime.datetime.now(timezone('Asia/Jakarta')).strftime("%Y-%m-%d--%H-%M") + ".csv"
    datadf.to_csv(filename)
    duration = datetime.datetime.now(timezone('Asia/Jakarta')) - start_time
    print("Duration: " + str(duration))            

Start script at: 4-10-2021 at 2021-10-04 09:00:31.785457+07:00
Preparing prerequisite datas: kode akun, tahun and token
Populating kode akun from tahun: 2010


KeyboardInterrupt: ignored

In [None]:
# gabungan
if __name__ == "__main__":
    set_global_session()
    data_dict = {}
    start_time = datetime.datetime.now(timezone('Asia/Jakarta'))
    print("Start script at: " + datetime.datetime.now(timezone('Asia/Jakarta')).strftime("%-d-%m-%Y") + " at " + str(start_time))
    
    kodeposturs, kodeakuns, kodesubakuns, alltahun, token = get_kodeakun_tahun()
    #kodeposturs = ['400', '500', '600']
    #kodeakuns = ['410', '420', '430', '510', '520', '530', '540', '610', '620']
    #kodesubakuns = ['411', '412', '413', '414', '416', '421', '422', '423', '430', '431', '432', '433', '434', '435', '436', '439', '511', '512', '513', '514', '515', '516', '517', '518', '519', '520', '521', '522', '523', '524', '525', '526', '531', '541', '542', '611', '612', '613', '614', '615', '616', '617', '619', '621', '622', '623', '624', '625', '626', '627', '629']
    
    # header_data = ['tahun', 'kdpemda', 'pemda', 'kdprov', 'prov', 'wilayah', 'disclaimer', 'special_row', '400_a', '400_r', '400_p', '410_a', '410_r', '410_p', '411_a', '411_r', '411_p', '412_a', '412_r', '412_p', '413_a', '413_r', '413_p', '414_a', '414_r', '414_p', '420_a', '420_r', '420_p', '421_a', '421_r', '421_p', '422_a', '422_r', '422_p', '423_a', '423_r', '423_p', '430_a', '430_r', '430_p', '431_a', '431_r', '431_p', '432_a', '432_r', '432_p', '433_a', '433_r', '433_p', '434_a', '434_r', '434_p', '435_a', '435_r', '435_p', '436_a', '436_r', '436_p', '500_a', '500_r', '500_p', '510_a', '510_r', '510_p', '511_a', '511_r', '511_p', '512_a', '512_r', '512_p', '513_a', '513_r', '513_p', '514_a', '514_r', '514_p', '515_a', '515_r', '515_p', '516_a', '516_r', '516_p', '517_a', '517_r', '517_p', '518_a', '518_r', '518_p', '520_a', '520_r', '520_p', '521_a', '521_r', '521_p', '522_a', '522_r', '522_p', '523_a', '523_r', '523_p', '600_a', '600_r', '600_p', '610_a', '610_r', '610_p', '611_a', '611_r', '611_p', '612_a', '612_r', '612_p', '613_a', '613_r', '613_p', '614_a', '614_r', '614_p', '615_a', '615_r', '615_p', '620_a', '620_r', '620_p', '621_a', '621_r', '621_p', '622_a', '622_r', '622_p', '623_a', '623_r', '623_p', '624_a', '624_r', '624_p', '626_a', '626_r', '626_p', '625_a', '625_r', '625_p', '616_a', '616_r', '616_p', '617_a', '617_r', '617_p', '627_a', '627_r', '627_p', '439_a', '439_r', '439_p', '519_a', '519_r', '519_p', '619_a', '619_r', '619_p', '629_a', '629_r', '629_p', '416_a', '416_r', '416_p', '524_a', '524_r', '524_p', '525_a', '525_r', '525_p', '526_a', '526_r', '526_p', '530_a', '530_r', '530_p', '531_a', '531_r', '531_p', '540_a', '540_r', '540_p', '541_a', '541_r', '541_p', '542_a', '542_r', '542_p']
    # header_data = generate_header()
    
    header_data = ['no', 'tahun', 'kdpemda', 'nmpemda', 'kdprov', 'nmprov', 'wilayah', 'disclaimer',  'special_row' ]
    gabung = kodeposturs + kodeakuns + kodesubakuns
    gabung.sort()
    gabung_tail = []
    for i in gabung:
        a = i + "_a"
        r = i + "_r"
        p = i + "_p"
        gabung_tail = gabung_tail + [a, r, p]

    header_data = header_data + gabung_tail

    allprovinsi = get_all_provinsi()
    #kode_prov = "09" # get_all_prov return dict with kdprov as key and nama prov as value
    #nmprov = "Test wae DKI dul"
    
    datadf = pd.DataFrame(columns = header_data)
    print("Are you executing this?")
    for tahun in alltahun:
        print("Executing tahun: ", tahun)
        for kdprov, nmprov in allprovinsi.items():
            print("Executing provinsi loop: ", kdprov, nmprov)
            for kdpemda, nmpemda in get_pemdas(kdprov).items():    
                print("Executing pemda loop: ", tahun, kdpemda, nmpemda)
                result_dict = {}
                pemda_result = filter(token, tahun, kdprov, kdpemda)
                if pemda_result is None:
                    continue
                json_result = json.loads(pemda_result)
                result_dict = parse_data(json_result, kodeposturs, kodeakuns, kodesubakuns)
                result_dict['tahun'] = tahun
                result_dict['kdpemda'] = kdpemda
                result_dict['nmpemda'] = nmpemda
                result_dict['kdprov'] = kdprov
                result_dict['nmprov'] = nmprov
                print("Get data tahun {tahun}, prov {nmprov}, pemda {nmpemda}")
                print(result_dict)
                datadf = datadf.append(result_dict, ignore_index = True)
                filename = "data-apbd-"+ datetime.datetime.now(timezone('Asia/Jakarta')).strftime("%Y-%m-%d--%H-%M") + ".csv"
                datadf.to_csv(filename)
    print("datadf:")
    print(datadf)
    filename = "data-apbd-"+ datetime.datetime.now(timezone('Asia/Jakarta')).strftime("%Y-%m-%d--%H-%M") + ".csv"
    datadf.to_csv(filename)
    duration = datetime.datetime.now(timezone('Asia/Jakarta')) - start_time
    print("Duration: " + str(duration))

# Portal APBD DJPK v3 (per tahun version) slim

In [None]:
import requests
import re
from bs4 import BeautifulSoup
from requests.exceptions import ConnectionError
import datetime
from pytz import timezone
import urllib3
import json
from random import randint
import time
import pandas as pd
import time

session = None

def set_global_session():
    from requests.adapters import HTTPAdapter
    from requests.packages.urllib3.util.retry import Retry
    
    retry_strategy = Retry(
        total=300,
        backoff_factor=0.5,
        status_forcelist=[429, 500, 502, 503, 504],
        method_whitelist=["HEAD", "GET", "OPTIONS"]
    )
    # Credits: https://findwork.dev/blog/advanced-usage-python-requests-timeouts-retries-hooks/

    MAX_RETRIES = 20
    global session
    if not session:
        adapter = HTTPAdapter(max_retries=retry_strategy)
        session = requests.Session()
        # adapter = requests.adapters.HTTPAdapter(max_retries=MAX_RETRIES) # Credits: https://stackoverflow.com/questions/33895739/python-requests-module-error-cant-load-any-url-remote-end-closed-connection
        session.mount('https://', adapter)
        session.mount('http://', adapter)
        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
        session.verify = False

def get_token():
    with session.get('http://www.djpk.kemenkeu.go.id/portal/data/apbd') as response:
        if response.status_code==200 and len(response.text) != 0:
            bsoup = BeautifulSoup(response.text, 'html.parser')
            token = bsoup.find("input", {"name":"_token"})['value']
            #print("Token: " + token)
            return token

def filter(token, tahun, provinsi, pemda):
    t0 = time.time()
    try:
        with session.post('http://www.djpk.kemenkeu.go.id/portal/filter',
            data={'_token': token, 'data': 'apbd ', 'tahun': tahun, 'provinsi': provinsi, 'pemda': pemda}) as response:
            if response.status_code==200:
                # return (json.loads(response.text))
                return response.text
    #except requests.exceptions.ConnectionError as e:
    #    pass
    except Exception as e:
            print('It failed :(', e.__class__.__name__)
    else:
        print('It eventually worked', response.status_code)
    finally:
        t1 = time.time()
        print('Took', t1 - t0, 'seconds')

                            
def get_all_tahun():
    with session.get('http://www.djpk.kemenkeu.go.id/portal/data/apbd') as response:
        if response.status_code==200:
            bsoup = BeautifulSoup(response.text, 'html.parser')
            token = bsoup.find("input", {"name":"_token"})['value']
            alltahun = bsoup.find("select", {"name":"tahun"}) # print this var is essential
            # print(alltahun)

            result_tahun = []

            for tiap_tahun in alltahun.find_all("option"): # populate all tahun
                tahun = tiap_tahun['value']
                result_tahun.append(tahun)
            return result_tahun
                            
def get_pemdas(provinsi): # return dict with kdpemda as key, nmpemda as value
    with session.get('http://www.djpk.kemenkeu.go.id/portal/pemda/' + provinsi) as response:
        if response.status_code==200 and len(response.text) != 0:
            pemdas = json.loads(response.text)
            return pemdas

def parse_data(json_result, kodeposturs, kodeakuns, kodesubakuns):
    data_dict = {}
    data_dict['wilayah'] = str(json_result['wilayah'])
    data_dict['disclaimer'] = str(json_result['disclaimer'])
    data_dict['special_row'] = json_result['special_row']

    for kodepostur in kodeposturs:
        if kodepostur in json_result['postur'].keys():
            data_dict[kodepostur + "_a"] = str(json_result['postur'][str(kodepostur)]['total']['anggaran'])
            data_dict[kodepostur + "_r"] = str(json_result['postur'][kodepostur]['total']['realisasi'])
            data_dict[kodepostur + "_p"] = str(json_result['postur'][kodepostur]['total']['persentase']).replace(",",".").replace(" ","")
        for kodeakun in kodeakuns:
            kodepostur = kodeakun[:-2] + '00'
            if kodepostur in json_result['postur'].keys() and kodeakun in json_result['postur'][kodepostur]['child'].keys():
                data_dict[kodeakun + "_a"] = str(json_result['postur'][kodepostur]['child'][kodeakun]['total']['anggaran'])
                data_dict[kodeakun + "_r"] = str(json_result['postur'][kodepostur]['child'][kodeakun]['total']['realisasi'])
                data_dict[kodeakun + "_p"] = str(json_result['postur'][kodepostur]['child'][kodeakun]['total']['persentase']).replace(",",".").replace(" ","")
            for kodesubakun in kodesubakuns:
                kodepostur = kodesubakun[:-2] + '00'
                kodeakun = kodesubakun[:-1] + '0'
                if kodepostur in json_result['postur'].keys() and kodeakun in json_result['postur'][kodepostur]['child'].keys() and kodesubakun in json_result['postur'][kodepostur]['child'][kodeakun]['child'].keys():
                    data_dict[kodesubakun + "_a"] = str(json_result['postur'][kodepostur]['child'][kodeakun]['child'][kodesubakun]['value']['anggaran'])
                    data_dict[kodesubakun + "_r"] = str(json_result['postur'][kodepostur]['child'][kodeakun]['child'][kodesubakun]['value']['realisasi'])
                    data_dict[kodesubakun + "_p"] = str(json_result['postur'][kodepostur]['child'][kodeakun]['child'][kodesubakun]['value']['persentase']).replace(",",".").replace(" ","")
    return data_dict

def wachat_send_message(tujuan, pesan, sender, apikey='F0C584900AB90E1040862FC0B43F561E'):
    HEADERS = {"Accept": "application/json", "APIKey": apikey }
    PAYLOADS = {'destination': tujuan, 'sender': sender, 'message': pesan}

    with session.post(
        'https://api.wachat-api.com/wachat_api/1.0/message',
        headers=HEADERS,
        json=PAYLOADS,
        # Skip SSL Verification
        # verify=False
    ) as response:
        if response.status_code==200:
            #print("Status code: " + str(r.status_code))
            #print("Response: " + r.text)
            return True

# gabungan
if __name__ == "__main__":
    set_global_session()
    data_dict = {}
    start_time = datetime.datetime.now(timezone('Asia/Jakarta'))
    print("Start script at: " + datetime.datetime.now(timezone('Asia/Jakarta')).strftime("%-d-%m-%Y") + " at " + str(start_time))
    
    # kodeposturs, kodeakuns, kodesubakuns, alltahun, token = get_kodeakun_tahun()
    kodeposturs = ['400', '500', '600']
    kodeakuns = ['410', '420', '430', '510', '520', '530', '540', '610', '620']
    kodesubakuns = ['411', '412', '413', '414', '416', '421', '422', '423', '430', '431', '432', '433', '434', '435', '436', '439', '511', '512', '513', '514', '515', '516', '517', '518', '519', '520', '521', '522', '523', '524', '525', '526', '531', '541', '542', '611', '612', '613', '614', '615', '616', '617', '619', '621', '622', '623', '624', '625', '626', '627', '629']
    token = get_token()
    
    # header_data = ['tahun', 'kdpemda', 'pemda', 'kdprov', 'prov', 'wilayah', 'disclaimer', 'special_row', '400_a', '400_r', '400_p', '410_a', '410_r', '410_p', '411_a', '411_r', '411_p', '412_a', '412_r', '412_p', '413_a', '413_r', '413_p', '414_a', '414_r', '414_p', '420_a', '420_r', '420_p', '421_a', '421_r', '421_p', '422_a', '422_r', '422_p', '423_a', '423_r', '423_p', '430_a', '430_r', '430_p', '431_a', '431_r', '431_p', '432_a', '432_r', '432_p', '433_a', '433_r', '433_p', '434_a', '434_r', '434_p', '435_a', '435_r', '435_p', '436_a', '436_r', '436_p', '500_a', '500_r', '500_p', '510_a', '510_r', '510_p', '511_a', '511_r', '511_p', '512_a', '512_r', '512_p', '513_a', '513_r', '513_p', '514_a', '514_r', '514_p', '515_a', '515_r', '515_p', '516_a', '516_r', '516_p', '517_a', '517_r', '517_p', '518_a', '518_r', '518_p', '520_a', '520_r', '520_p', '521_a', '521_r', '521_p', '522_a', '522_r', '522_p', '523_a', '523_r', '523_p', '600_a', '600_r', '600_p', '610_a', '610_r', '610_p', '611_a', '611_r', '611_p', '612_a', '612_r', '612_p', '613_a', '613_r', '613_p', '614_a', '614_r', '614_p', '615_a', '615_r', '615_p', '620_a', '620_r', '620_p', '621_a', '621_r', '621_p', '622_a', '622_r', '622_p', '623_a', '623_r', '623_p', '624_a', '624_r', '624_p', '626_a', '626_r', '626_p', '625_a', '625_r', '625_p', '616_a', '616_r', '616_p', '617_a', '617_r', '617_p', '627_a', '627_r', '627_p', '439_a', '439_r', '439_p', '519_a', '519_r', '519_p', '619_a', '619_r', '619_p', '629_a', '629_r', '629_p', '416_a', '416_r', '416_p', '524_a', '524_r', '524_p', '525_a', '525_r', '525_p', '526_a', '526_r', '526_p', '530_a', '530_r', '530_p', '531_a', '531_r', '531_p', '540_a', '540_r', '540_p', '541_a', '541_r', '541_p', '542_a', '542_r', '542_p']
    # header_data = generate_header()
    
    header_data = ['tahun', 'kdpemda', 'nmpemda', 'kdprov', 'nmprov', 'wilayah', 'disclaimer',  'special_row' ]
    gabung = kodeposturs + kodeakuns + kodesubakuns
    gabung.sort()
    gabung_tail = []
    for i in gabung:
        a = i + "_a"
        r = i + "_r"
        p = i + "_p"
        gabung_tail = gabung_tail + [a, r, p]

    header_data = header_data + gabung_tail

    # allprovinsi = get_all_provinsi()
    allprovinsi = {'01': 'Prov. Aceh', '29': 'Prov. Babel', '22': 'Prov. Bali', '28': 'Prov. Banten', '07': 'Prov. Bengkulu', '12': 'Prov. DIY', '09': 'Prov. DKI Jakarta', '30': 'Prov. Gorontalo', '10': 'Prov. Jabar', '05': 'Prov. Jambi', '11': 'Prov. Jateng', '13': 'Prov. Jawa Timur', '14': 'Prov. Kalbar', '34': 'Prov. Kalimantan Utara', '16': 'Prov. Kalsel', '15': 'Prov. Kalteng', '17': 'Prov. Kaltim', '31': 'Prov. Kepulauan Riau', '08': 'Prov. Lampung', '25': 'Prov. Maluku', '27': 'Prov. Malut', '23': 'Prov. NTB', '24': 'Prov. NTT', '26': 'Prov. Papua', '32': 'Prov. Papua Barat', '04': 'Prov. Riau', '33': 'Prov. Sulawesi Barat', '20': 'Prov. Sulsel', '19': 'Prov. Sulteng', '21': 'Prov. Sultra', '18': 'Prov. Sulut', '03': 'Prov. Sumatera Barat', '06': 'Prov. Sumsel', '02': 'Prov. Sumut', '--': 'Semua Provinsi'}

    #kode_prov = "09" # get_all_prov return dict with kdprov as key and nama prov as value
    #nmprov = "Test wae DKI dul"
    
    datadf = pd.DataFrame(columns = header_data)
    print("Are you executing this?")
    # for tahun in alltahun:
    for tahun in range(2011, 2021):
        print("Executing tahun: ", tahun)
        for kdprov, nmprov in allprovinsi.items():
            print("Executing provinsi loop: ", kdprov, nmprov)
            for kdpemda, nmpemda in get_pemdas(kdprov).items():    
                print("Executing pemda loop: ", tahun, kdpemda, nmpemda)
                result_dict = {}
                pemda_result = filter(token, tahun, kdprov, kdpemda)
                if pemda_result is None:
                    continue
                json_result = json.loads(pemda_result)
                result_dict = parse_data(json_result, kodeposturs, kodeakuns, kodesubakuns)
                result_dict['tahun'] = tahun
                result_dict['kdpemda'] = kdpemda
                result_dict['nmpemda'] = nmpemda
                result_dict['kdprov'] = kdprov
                result_dict['nmprov'] = nmprov
                print("Get data tahun "+ str(tahun) +", prov " + nmprov + ", pemda " + nmpemda)
                print(result_dict)
                datadf = datadf.append(result_dict, ignore_index = True)
                filename = "data-apbd-"+ datetime.datetime.now(timezone('Asia/Jakarta')).strftime("%Y-%m-%d--%H-%M") + ".csv"
                datadf.to_csv(filename)
            pesan = 'Get data APBD tahun ' + str(tahun) + ', prov ' + nmprov + ' sudah selesai'
            wachat_send_message('628567074554', pesan, '6282189096866', apikey='F0C584900AB90E1040862FC0B43F561E')
    print("datadf:")
    print(datadf)
    filename = "final-data-apbd-"+ datetime.datetime.now(timezone('Asia/Jakarta')).strftime("%Y-%m-%d--%H-%M") + ".csv"
    datadf.to_csv(filename)
    duration = datetime.datetime.now(timezone('Asia/Jakarta')) - start_time
    print("Duration: " + str(duration))

# Portal APBD DJP v3 (custom)

In [None]:
import requests
import re
from bs4 import BeautifulSoup
from requests.exceptions import ConnectionError
import datetime
from pytz import timezone
import urllib3
import json
from random import randint
import time
import pandas as pd

session = None
def google_drive():
    from google.colab import drive
    drive.mount('/content/drive')
    # pajak = pd.read_csv(path+filename, error_bad_lines=False)
    
    filename = "data-apbd-"+ datetime.now(timezone('Asia/Jakarta')).strftime("%Y-%m-%d--%H-%M") + ".csv"
    path = F"/content/drive/My Drive/Colab Notebooks/Portal APBD/"

def set_global_session():
    from requests.adapters import HTTPAdapter
    from requests.packages.urllib3.util.retry import Retry
    
    retry_strategy = Retry(
        total=3,
        backoff_factor=0.5,
        status_forcelist=[429, 500, 502, 503, 504],
        method_whitelist=["HEAD", "GET", "OPTIONS"]
    )
    # Credits: https://findwork.dev/blog/advanced-usage-python-requests-timeouts-retries-hooks/

    MAX_RETRIES = 20
    global session
    if not session:
        adapter = HTTPAdapter(max_retries=retry_strategy)
        session = requests.Session()
        # adapter = requests.adapters.HTTPAdapter(max_retries=MAX_RETRIES) # Credits: https://stackoverflow.com/questions/33895739/python-requests-module-error-cant-load-any-url-remote-end-closed-connection
        session.mount('https://', adapter)
        session.mount('http://', adapter)
        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
        session.verify = False

def get_token():
    with session.get('http://www.djpk.kemenkeu.go.id/portal/data/apbd') as response:
        if response.status_code==200 and len(response.text) != 0:
            bsoup = BeautifulSoup(response.text, 'html.parser')
            token = bsoup.find("input", {"name":"_token"})['value']
            #print("Token: " + token)
            return token

def filter(token, tahun, provinsi, pemda):
    try:
        with session.post('http://www.djpk.kemenkeu.go.id/portal/filter',
            data={'_token': token, 'data': 'apbd ', 'tahun': tahun, 'provinsi': provinsi, 'pemda': pemda}) as response:
            if response.status_code==200:
                # return (json.loads(response.text))
                return response.text
    #except requests.exceptions.ConnectionError as e:
    #    pass
    except Exception as e:
        #logger.error(e)
        randomtime = random.randint(1,5)
        #logger.warn('ERROR - Retrying again website %s, retrying in %d secs' % (url, randomtime))
        #print('ERROR - Retrying again website %s, retrying in %d secs' % (url, randomtime))
        time.sleep(randomtime)
                            
def get_all_tahun():
    with session.get('http://www.djpk.kemenkeu.go.id/portal/data/apbd') as response:
        if response.status_code==200:
            bsoup = BeautifulSoup(response.text, 'html.parser')
            token = bsoup.find("input", {"name":"_token"})['value']
            alltahun = bsoup.find("select", {"name":"tahun"}) # print this var is essential
            # print(alltahun)

            result_tahun = []

            for tiap_tahun in alltahun.find_all("option"): # populate all tahun
                tahun = tiap_tahun['value']
                result_tahun.append(tahun)
            return result_tahun
                            
def get_all_provinsi(): # return dict with kdprov as key, nmprov as value
    with session.get('http://www.djpk.kemenkeu.go.id/portal/data/apbd') as response:
        if response.status_code==200:
            bsoup = BeautifulSoup(response.text, 'html.parser')
            # token = bsoup.find("input", {"name":"_token"})['value']
            allprovinsi = bsoup.find("select", {"id":"sel_prov"}) # print this var is essential
            # print(allprovinsi)
            provinsi = {}
            for option_provinsi in allprovinsi.find_all("option"): # populate all provinsi
               provinsi[option_provinsi['value']] = option_provinsi.text
            return provinsi

def get_pemdas(provinsi): # return dict with kdpemda as key, nmpemda as value
    with session.get('http://www.djpk.kemenkeu.go.id/portal/pemda/' + provinsi) as response:
        if response.status_code==200 and len(response.text) != 0:
            pemdas = json.loads(response.text)
            return pemdas

def get_kodeakun_tahun(): # return 5 list values
    with session.get('http://www.djpk.kemenkeu.go.id/portal/data/apbd') as response:
        if response.status_code==200:
            print("Preparing prerequisite datas: kode akun, tahun and token")
            bsoup = BeautifulSoup(response.text, 'html.parser')
            token = bsoup.find("input", {"name":"_token"})['value']
            tahunsoup = bsoup.find("select", {"name":"tahun"}) # print this var is essential
            alltahun = []
            for tiap_tahun in tahunsoup.find_all("option"): # populate all tahun
                tahun = tiap_tahun['value']
                alltahun.append(tahun)
            
            kodepostur_list = []
            kodeakun_list = []
            kodesubakun_list = []
            #for tahun in range(2010, 2011): # hehehe
            
            for tahun in alltahun: # populate all tahun
                print("Populating kode akun from tahun: " + tahun)
                result = filter(token, tahun, "--", "--")
                json_result = json.loads(result)
                
                # populate all kode akun
                if len(json_result) == 0:
                    continue
                for kode_postur in json_result['postur'].keys():
                    kodepostur_list.append(kode_postur)
                    for kode_akun in json_result['postur'][kode_postur]['child'].keys():
                        kodeakun_list.append(kode_akun) 
                        for kode_subakun in json_result['postur'][kode_postur]['child'][kode_akun]['child'].keys():
                            kodesubakun_list.append(kode_subakun) 
            kodepostur_list.sort()
            kodepostur_list = list(dict.fromkeys(kodepostur_list))
            kodeakun_list.sort()
            kodeakun_list = list(dict.fromkeys(kodeakun_list))
            kodesubakun_list.sort()
            kodesubakun_list = list(dict.fromkeys(kodesubakun_list))
            print("Populating kode akun completed")
            return kodepostur_list, kodeakun_list, kodesubakun_list, alltahun, token
            
def generate_header():
    with session.get('http://www.djpk.kemenkeu.go.id/portal/data/apbd') as response:
        if response.status_code==200:
            print("Generating header")
            bsoup = BeautifulSoup(response.text, 'html.parser')
            token = bsoup.find("input", {"name":"_token"})['value']
            alltahun = get_all_tahun()
            
            kodepostur_list = []
            kodeakun_list = []
            kodesubakun_list = []
            
            header_dict = {'no': '',
                      'tahun': '',
                      'pemda': '',
                      'provinsi': '' ,
                      'wilayah': '',
                      'disclaimer': '',
                      'special_row': '',
                      }
            #for tahun in range(2010, 2011): # hehehe
            for tahun in alltahun: # populate all tahun
                print("Populating kode akun from", tahun)
                result = filter(token, tahun, "--", "--")
                json_result = json.loads(result)

                # populate all kode akun
                if len(json_result) == 0:
                    continue
                for kode_postur in json_result['postur'].keys():
                    kodepostur_list.append(kode_postur)
                    header_dict[kode_postur+"_a"] = 0
                    header_dict[kode_postur+"_r"] = 0
                    header_dict[kode_postur+"_p"] = 0
                    for kode_akun in json_result['postur'][kode_postur]['child'].keys():
                        kodeakun_list.append(kode_akun) 
                        header_dict[kode_akun+"_a"] = 0
                        header_dict[kode_akun+"_r"] = 0
                        header_dict[kode_akun+"_p"] = 0
                        for kode_subakun in json_result['postur'][kode_postur]['child'][kode_akun]['child'].keys():
                            kodesubakun_list.append(kode_subakun) 
                            header_dict[kode_subakun+"_a"] = 0
                            header_dict[kode_subakun+"_r"] = 0
                            header_dict[kode_subakun+"_p"] = 0
#                header_row = []
#                for key in header_dict.keys():
#                    header_row.append(key)
            return header_row

def parse_data(json_result, kodeposturs, kodeakuns, kodesubakuns):
    data_dict = {}
    data_dict['wilayah'] = str(json_result['wilayah'])
    data_dict['disclaimer'] = str(json_result['disclaimer'])
    data_dict['special_row'] = json_result['special_row']

    for kodepostur in kodeposturs:
        if kodepostur in json_result['postur'].keys():
            data_dict[kodepostur + "_a"] = str(json_result['postur'][str(kodepostur)]['total']['anggaran'])
            data_dict[kodepostur + "_r"] = str(json_result['postur'][kodepostur]['total']['realisasi'])
            data_dict[kodepostur + "_p"] = str(json_result['postur'][kodepostur]['total']['persentase']).replace(",",".").replace(" ","")
        for kodeakun in kodeakuns:
            kodepostur = kodeakun[:-2] + '00'
            if kodepostur in json_result['postur'].keys() and kodeakun in json_result['postur'][kodepostur]['child'].keys():
                data_dict[kodeakun + "_a"] = str(json_result['postur'][kodepostur]['child'][kodeakun]['total']['anggaran'])
                data_dict[kodeakun + "_r"] = str(json_result['postur'][kodepostur]['child'][kodeakun]['total']['realisasi'])
                data_dict[kodeakun + "_p"] = str(json_result['postur'][kodepostur]['child'][kodeakun]['total']['persentase']).replace(",",".").replace(" ","")
            for kodesubakun in kodesubakuns:
                kodepostur = kodesubakun[:-2] + '00'
                kodeakun = kodesubakun[:-1] + '0'
                if kodepostur in json_result['postur'].keys() and kodeakun in json_result['postur'][kodepostur]['child'].keys() and kodesubakun in json_result['postur'][kodepostur]['child'][kodeakun]['child'].keys():
                    data_dict[kodesubakun + "_a"] = str(json_result['postur'][kodepostur]['child'][kodeakun]['child'][kodesubakun]['value']['anggaran'])
                    data_dict[kodesubakun + "_r"] = str(json_result['postur'][kodepostur]['child'][kodeakun]['child'][kodesubakun]['value']['realisasi'])
                    data_dict[kodesubakun + "_p"] = str(json_result['postur'][kodepostur]['child'][kodeakun]['child'][kodesubakun]['value']['persentase']).replace(",",".").replace(" ","")
    return data_dict
    
def nusasms_kirim_wa(tujuan, pesan, test=0, apikey="110C9015F177631FDF2FD8042CA1A040"):
    if (test):
        BASE_URL = 'https://dev.nusasms.com/nusasms_api/1.0'
    else:
        BASE_URL = 'https://api.nusasms.com/nusasms_api/1.0'

    HEADERS = {"Accept": "application/json", "APIKey": apikey }
    PAYLOADS = {'destination': tujuan, 'message': pesan}

    with session.post(f'{BASE_URL}/whatsapp/message',
                      headers=HEADERS, 
                      json=PAYLOADS) as response:
        if response.status_code==200:
            return True

def wachat_send_message(tujuan, pesan, sender, apikey='F0C584900AB90E1040862FC0B43F561E'):
    HEADERS = {"Accept": "application/json", "APIKey": apikey }
    PAYLOADS = {'destination': tujuan, 'sender': sender, 'message': pesan}

    with session.post(
        'https://api.wachat-api.com/wachat_api/1.0/message',
        headers=HEADERS,
        json=PAYLOADS,
        # Skip SSL Verification
        # verify=False
    ) as response:
        if response.status_code==200:
            #print("Status code: " + str(r.status_code))
            #print("Response: " + r.text)
            return True

# gabungan
if __name__ == "__main__":
    set_global_session()
    data_dict = {}
    start_time = datetime.datetime.now(timezone('Asia/Jakarta'))
    print("Start script at: " + datetime.datetime.now(timezone('Asia/Jakarta')).strftime("%-d-%m-%Y") + " at " + str(start_time))
    
    # kodeposturs, kodeakuns, kodesubakuns, alltahun, token = get_kodeakun_tahun()
    kodeposturs = ['400', '500', '600']
    kodeakuns = ['410', '420', '430', '510', '520', '530', '540', '610', '620']
    kodesubakuns = ['411', '412', '413', '414', '416', '421', '422', '423', '430', '431', '432', '433', '434', '435', '436', '439', '511', '512', '513', '514', '515', '516', '517', '518', '519', '520', '521', '522', '523', '524', '525', '526', '531', '541', '542', '611', '612', '613', '614', '615', '616', '617', '619', '621', '622', '623', '624', '625', '626', '627', '629']
    token = get_token()
    
    # header_data = ['tahun', 'kdpemda', 'pemda', 'kdprov', 'prov', 'wilayah', 'disclaimer', 'special_row', '400_a', '400_r', '400_p', '410_a', '410_r', '410_p', '411_a', '411_r', '411_p', '412_a', '412_r', '412_p', '413_a', '413_r', '413_p', '414_a', '414_r', '414_p', '420_a', '420_r', '420_p', '421_a', '421_r', '421_p', '422_a', '422_r', '422_p', '423_a', '423_r', '423_p', '430_a', '430_r', '430_p', '431_a', '431_r', '431_p', '432_a', '432_r', '432_p', '433_a', '433_r', '433_p', '434_a', '434_r', '434_p', '435_a', '435_r', '435_p', '436_a', '436_r', '436_p', '500_a', '500_r', '500_p', '510_a', '510_r', '510_p', '511_a', '511_r', '511_p', '512_a', '512_r', '512_p', '513_a', '513_r', '513_p', '514_a', '514_r', '514_p', '515_a', '515_r', '515_p', '516_a', '516_r', '516_p', '517_a', '517_r', '517_p', '518_a', '518_r', '518_p', '520_a', '520_r', '520_p', '521_a', '521_r', '521_p', '522_a', '522_r', '522_p', '523_a', '523_r', '523_p', '600_a', '600_r', '600_p', '610_a', '610_r', '610_p', '611_a', '611_r', '611_p', '612_a', '612_r', '612_p', '613_a', '613_r', '613_p', '614_a', '614_r', '614_p', '615_a', '615_r', '615_p', '620_a', '620_r', '620_p', '621_a', '621_r', '621_p', '622_a', '622_r', '622_p', '623_a', '623_r', '623_p', '624_a', '624_r', '624_p', '626_a', '626_r', '626_p', '625_a', '625_r', '625_p', '616_a', '616_r', '616_p', '617_a', '617_r', '617_p', '627_a', '627_r', '627_p', '439_a', '439_r', '439_p', '519_a', '519_r', '519_p', '619_a', '619_r', '619_p', '629_a', '629_r', '629_p', '416_a', '416_r', '416_p', '524_a', '524_r', '524_p', '525_a', '525_r', '525_p', '526_a', '526_r', '526_p', '530_a', '530_r', '530_p', '531_a', '531_r', '531_p', '540_a', '540_r', '540_p', '541_a', '541_r', '541_p', '542_a', '542_r', '542_p']
    # header_data = generate_header()
    
    header_data = ['no', 'tahun', 'kdpemda', 'nmpemda', 'kdprov', 'nmprov', 'wilayah', 'disclaimer',  'special_row' ]
    gabung = kodeposturs + kodeakuns + kodesubakuns
    gabung.sort()
    gabung_tail = []
    for i in gabung:
        a = i + "_a"
        r = i + "_r"
        p = i + "_p"
        gabung_tail = gabung_tail + [a, r, p]

    header_data = header_data + gabung_tail

    # allprovinsi = get_all_provinsi()
    provinsi = {'26': 'Prov. Papua', '28': 'Prov. Banten', '29': 'Prov. Babel', '30': 'Prov. Gorontalo', '31': 'Prov. Kepulauan Riau', '32': 'Prov. Papua Barat', '34': 'Prov. Kalimantan Utara'}

    #kode_prov = "09" # get_all_prov return dict with kdprov as key and nama prov as value
    #nmprov = "Test wae DKI dul"
    
    datadf = pd.DataFrame(columns = header_data)
    print("Are you executing this?")
    # for tahun in alltahun:
    tahun = '2011'
    print("Executing tahun: ", tahun)
    for kdprov, nmprov in provinsi.items():
        print("Executing provinsi loop: ", kdprov, nmprov)
        for kdpemda, nmpemda in get_pemdas(kdprov).items():    
            print("Executing pemda loop: ", tahun, kdpemda, nmpemda)
            result_dict = {}
            pemda_result = filter(token, tahun, kdprov, kdpemda)
            print(pemda_result)
            if pemda_result is None:
                continue
            json_result = json.loads(pemda_result)
            
            print(json_result)
            result_dict = parse_data(json_result, kodeposturs, kodeakuns, kodesubakuns)
            result_dict['tahun'] = tahun
            result_dict['kdpemda'] = kdpemda
            result_dict['nmpemda'] = nmpemda
            result_dict['kdprov'] = kdprov
            result_dict['nmprov'] = nmprov
            print("Get data tahun {tahun}, prov {nmprov}, pemda {nmpemda}")
            print(result_dict)
            datadf = datadf.append(result_dict, ignore_index = True)
            filename = "data-apbd-"+ datetime.datetime.now(timezone('Asia/Jakarta')).strftime("%Y-%m-%d--%H-%M") + ".csv"
            datadf.to_csv(filename)
    print("datadf:")
    print(datadf)
    filename = "final-data-apbd-"+ datetime.datetime.now(timezone('Asia/Jakarta')).strftime("%Y-%m-%d--%H-%M") + ".csv"
    datadf.to_csv(filename)
    duration = datetime.datetime.now(timezone('Asia/Jakarta')) - start_time
    print("Duration: " + str(duration))            

Start script at: 4-10-2021 at 2021-10-04 05:32:03.756814+07:00
Are you executing this?
Executing tahun:  2011
Executing provinsi loop:  26 Prov. Papua
Executing pemda loop:  2011 21 Kab. Mamberamo Raya
{"postur":{"400":{"text":"Pendapatan","child":{"410":{"text":"PAD","child":{"414":{"text":"Lain-lain PAD yang sah","value":{"anggaran":1500000000,"realisasi":15540264457,"persentase":"1.036,02 %"}}},"total":{"anggaran":1500000000,"realisasi":15540264457,"persentase":"1.036,02 %"}},"420":{"text":"Daper","child":{"421":{"text":"Dana bagi hasil pajak\/bagi hasil bukan pajak","value":{"anggaran":61143096679,"realisasi":68874048071,"persentase":"112,64 %"}},"422":{"text":"Dana alokasi umum","value":{"anggaran":438951482000,"realisasi":438464821000,"persentase":"99,89 %"}},"423":{"text":"Dana alokasi khusus","value":{"anggaran":52612600000,"realisasi":52611600000,"persentase":"100,00 %"}}},"total":{"anggaran":552707178679,"realisasi":559950469071,"persentase":"101,31 %"}},"430":{"text":"Lain-l

KeyboardInterrupt: ignored

# Get Single Pemda-Tahun v3

In [None]:
import requests
import re
from bs4 import BeautifulSoup
from requests.exceptions import ConnectionError
import datetime
from pytz import timezone
import urllib3
import json
from random import randint
import time
import pandas as pd
import time

session = None

def set_global_session():
    from requests.adapters import HTTPAdapter
    from requests.packages.urllib3.util.retry import Retry
    
    retry_strategy = Retry(
        total=300,
        backoff_factor=0.5,
        status_forcelist=[429, 500, 502, 503, 504],
        method_whitelist=["HEAD", "GET", "OPTIONS"]
    )
    # Credits: https://findwork.dev/blog/advanced-usage-python-requests-timeouts-retries-hooks/

    MAX_RETRIES = 20
    global session
    if not session:
        adapter = HTTPAdapter(max_retries=retry_strategy)
        session = requests.Session()
        # adapter = requests.adapters.HTTPAdapter(max_retries=MAX_RETRIES) # Credits: https://stackoverflow.com/questions/33895739/python-requests-module-error-cant-load-any-url-remote-end-closed-connection
        session.mount('https://', adapter)
        session.mount('http://', adapter)
        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
        session.verify = False

def get_token():
    with session.get('http://www.djpk.kemenkeu.go.id/portal/data/apbd') as response:
        if response.status_code==200 and len(response.text) != 0:
            bsoup = BeautifulSoup(response.text, 'html.parser')
            token = bsoup.find("input", {"name":"_token"})['value']
            #print("Token: " + token)
            return token

def filter(token, tahun, provinsi, pemda):
    t0 = time.time()
    try:
        with session.post('http://www.djpk.kemenkeu.go.id/portal/filter',
            data={'_token': token, 'data': 'apbd ', 'tahun': tahun, 'provinsi': provinsi, 'pemda': pemda}) as response:
            if response.status_code==200:
                # return (json.loads(response.text))
                return response.text
    #except requests.exceptions.ConnectionError as e:
    #    pass
    except Exception as e:
            print('It failed :(', e.__class__.__name__)
    else:
        print('It eventually worked', response.status_code)
    finally:
        t1 = time.time()
        print('Took', t1 - t0, 'seconds')

                            
def get_all_tahun():
    with session.get('http://www.djpk.kemenkeu.go.id/portal/data/apbd') as response:
        if response.status_code==200:
            bsoup = BeautifulSoup(response.text, 'html.parser')
            token = bsoup.find("input", {"name":"_token"})['value']
            alltahun = bsoup.find("select", {"name":"tahun"}) # print this var is essential
            # print(alltahun)

            result_tahun = []

            for tiap_tahun in alltahun.find_all("option"): # populate all tahun
                tahun = tiap_tahun['value']
                result_tahun.append(tahun)
            return result_tahun
                            
def get_pemdas(provinsi): # return dict with kdpemda as key, nmpemda as value
    with session.get('http://www.djpk.kemenkeu.go.id/portal/pemda/' + provinsi) as response:
        if response.status_code==200 and len(response.text) != 0:
            pemdas = json.loads(response.text)
            return pemdas

def parse_data(json_result, kodeposturs, kodeakuns, kodesubakuns):
    data_dict = {}
    data_dict['wilayah'] = str(json_result['wilayah'])
    data_dict['disclaimer'] = str(json_result['disclaimer'])
    data_dict['special_row'] = json_result['special_row']

    for kodepostur in kodeposturs:
        if kodepostur in json_result['postur'].keys():
            data_dict[kodepostur + "_a"] = str(json_result['postur'][str(kodepostur)]['total']['anggaran'])
            data_dict[kodepostur + "_r"] = str(json_result['postur'][kodepostur]['total']['realisasi'])
            data_dict[kodepostur + "_p"] = str(json_result['postur'][kodepostur]['total']['persentase']).replace(",",".").replace(" ","")
        for kodeakun in kodeakuns:
            kodepostur = kodeakun[:-2] + '00'
            if kodepostur in json_result['postur'].keys() and kodeakun in json_result['postur'][kodepostur]['child'].keys():
                data_dict[kodeakun + "_a"] = str(json_result['postur'][kodepostur]['child'][kodeakun]['total']['anggaran'])
                data_dict[kodeakun + "_r"] = str(json_result['postur'][kodepostur]['child'][kodeakun]['total']['realisasi'])
                data_dict[kodeakun + "_p"] = str(json_result['postur'][kodepostur]['child'][kodeakun]['total']['persentase']).replace(",",".").replace(" ","")
            for kodesubakun in kodesubakuns:
                kodepostur = kodesubakun[:-2] + '00'
                kodeakun = kodesubakun[:-1] + '0'
                if kodepostur in json_result['postur'].keys() and kodeakun in json_result['postur'][kodepostur]['child'].keys() and kodesubakun in json_result['postur'][kodepostur]['child'][kodeakun]['child'].keys():
                    data_dict[kodesubakun + "_a"] = str(json_result['postur'][kodepostur]['child'][kodeakun]['child'][kodesubakun]['value']['anggaran'])
                    data_dict[kodesubakun + "_r"] = str(json_result['postur'][kodepostur]['child'][kodeakun]['child'][kodesubakun]['value']['realisasi'])
                    data_dict[kodesubakun + "_p"] = str(json_result['postur'][kodepostur]['child'][kodeakun]['child'][kodesubakun]['value']['persentase']).replace(",",".").replace(" ","")
    return data_dict

def wachat_send_message(tujuan, pesan, sender, apikey='F0C584900AB90E1040862FC0B43F561E'):
    HEADERS = {"Accept": "application/json", "APIKey": apikey }
    PAYLOADS = {'destination': tujuan, 'sender': sender, 'message': pesan}

    with session.post(
        'https://api.wachat-api.com/wachat_api/1.0/message',
        headers=HEADERS,
        json=PAYLOADS,
        # Skip SSL Verification
        # verify=False
    ) as response:
        if response.status_code==200:
            #print("Status code: " + str(r.status_code))
            #print("Response: " + r.text)
            return True

# gabungan
if __name__ == "__main__":
    set_global_session()
    data_dict = {}
    start_time = datetime.datetime.now(timezone('Asia/Jakarta'))
    print("Start script at: " + datetime.datetime.now(timezone('Asia/Jakarta')).strftime("%-d-%m-%Y") + " at " + str(start_time))
    
    # kodeposturs, kodeakuns, kodesubakuns, alltahun, token = get_kodeakun_tahun()
    kodeposturs = ['400', '500', '600']
    kodeakuns = ['410', '420', '430', '510', '520', '530', '540', '610', '620']
    kodesubakuns = ['411', '412', '413', '414', '416', '421', '422', '423', '430', '431', '432', '433', '434', '435', '436', '439', '511', '512', '513', '514', '515', '516', '517', '518', '519', '520', '521', '522', '523', '524', '525', '526', '531', '541', '542', '611', '612', '613', '614', '615', '616', '617', '619', '621', '622', '623', '624', '625', '626', '627', '629']
    token = get_token()
    
    # header_data = ['tahun', 'kdpemda', 'pemda', 'kdprov', 'prov', 'wilayah', 'disclaimer', 'special_row', '400_a', '400_r', '400_p', '410_a', '410_r', '410_p', '411_a', '411_r', '411_p', '412_a', '412_r', '412_p', '413_a', '413_r', '413_p', '414_a', '414_r', '414_p', '420_a', '420_r', '420_p', '421_a', '421_r', '421_p', '422_a', '422_r', '422_p', '423_a', '423_r', '423_p', '430_a', '430_r', '430_p', '431_a', '431_r', '431_p', '432_a', '432_r', '432_p', '433_a', '433_r', '433_p', '434_a', '434_r', '434_p', '435_a', '435_r', '435_p', '436_a', '436_r', '436_p', '500_a', '500_r', '500_p', '510_a', '510_r', '510_p', '511_a', '511_r', '511_p', '512_a', '512_r', '512_p', '513_a', '513_r', '513_p', '514_a', '514_r', '514_p', '515_a', '515_r', '515_p', '516_a', '516_r', '516_p', '517_a', '517_r', '517_p', '518_a', '518_r', '518_p', '520_a', '520_r', '520_p', '521_a', '521_r', '521_p', '522_a', '522_r', '522_p', '523_a', '523_r', '523_p', '600_a', '600_r', '600_p', '610_a', '610_r', '610_p', '611_a', '611_r', '611_p', '612_a', '612_r', '612_p', '613_a', '613_r', '613_p', '614_a', '614_r', '614_p', '615_a', '615_r', '615_p', '620_a', '620_r', '620_p', '621_a', '621_r', '621_p', '622_a', '622_r', '622_p', '623_a', '623_r', '623_p', '624_a', '624_r', '624_p', '626_a', '626_r', '626_p', '625_a', '625_r', '625_p', '616_a', '616_r', '616_p', '617_a', '617_r', '617_p', '627_a', '627_r', '627_p', '439_a', '439_r', '439_p', '519_a', '519_r', '519_p', '619_a', '619_r', '619_p', '629_a', '629_r', '629_p', '416_a', '416_r', '416_p', '524_a', '524_r', '524_p', '525_a', '525_r', '525_p', '526_a', '526_r', '526_p', '530_a', '530_r', '530_p', '531_a', '531_r', '531_p', '540_a', '540_r', '540_p', '541_a', '541_r', '541_p', '542_a', '542_r', '542_p']
    # header_data = generate_header()
    
    header_data = ['tahun', 'kdpemda', 'nmpemda', 'kdprov', 'nmprov', 'wilayah', 'disclaimer',  'special_row' ]
    gabung = kodeposturs + kodeakuns + kodesubakuns
    gabung.sort()
    gabung_tail = []
    for i in gabung:
        a = i + "_a"
        r = i + "_r"
        p = i + "_p"
        gabung_tail = gabung_tail + [a, r, p]

    header_data = header_data + gabung_tail

    # allprovinsi = get_all_provinsi()
    allprovinsi = {'01': 'Prov. Aceh', '29': 'Prov. Babel', '22': 'Prov. Bali', '28': 'Prov. Banten', '07': 'Prov. Bengkulu', '12': 'Prov. DIY', '09': 'Prov. DKI Jakarta', '30': 'Prov. Gorontalo', '10': 'Prov. Jabar', '05': 'Prov. Jambi', '11': 'Prov. Jateng', '13': 'Prov. Jawa Timur', '14': 'Prov. Kalbar', '34': 'Prov. Kalimantan Utara', '16': 'Prov. Kalsel', '15': 'Prov. Kalteng', '17': 'Prov. Kaltim', '31': 'Prov. Kepulauan Riau', '08': 'Prov. Lampung', '25': 'Prov. Maluku', '27': 'Prov. Malut', '23': 'Prov. NTB', '24': 'Prov. NTT', '26': 'Prov. Papua', '32': 'Prov. Papua Barat', '04': 'Prov. Riau', '33': 'Prov. Sulawesi Barat', '20': 'Prov. Sulsel', '19': 'Prov. Sulteng', '21': 'Prov. Sultra', '18': 'Prov. Sulut', '03': 'Prov. Sumatera Barat', '06': 'Prov. Sumsel', '02': 'Prov. Sumut', '--': 'Semua Provinsi'}

    #kode_prov = "09" # get_all_prov return dict with kdprov as key and nama prov as value
    #nmprov = "Test wae DKI dul"
    
    datadf = pd.DataFrame(columns = header_data)
    print("Are you executing this?")
    # for tahun in alltahun:
    tahun = '2010'
    kdprov = '17'
    kdpemda = '--'
    result_dict = {}
    pemda_result = filter(token, tahun, kdprov, kdpemda)
    if pemda_result is None:
        print("Kosong!")
    json_result = json.loads(pemda_result)
    result_dict = parse_data(json_result, kodeposturs, kodeakuns, kodesubakuns)
    result_dict['tahun'] = tahun
    result_dict['kdpemda'] = kdpemda
    # result_dict['nmpemda'] = nmpemda
    result_dict['kdprov'] = kdprov
    # result_dict['nmprov'] = nmprov
    #print("Get data tahun "+ str(tahun) +", prov " + nmprov + ", pemda " + nmpemda)
    print(result_dict)
    datadf = datadf.append(result_dict, ignore_index = True)
    filename = "data-apbd-"+ datetime.datetime.now(timezone('Asia/Jakarta')).strftime("%Y-%m-%d--%H-%M") + ".csv"
    datadf.to_csv(filename)

    #pesan = 'Get data APBD tahun ' + str(tahun) + ', prov ' + nmprov + ' sudah selesai'
    #wachat_send_message('628567074554', pesan, '6282189096866', apikey='F0C584900AB90E1040862FC0B43F561E')

    print("datadf:")
    print(datadf)
    duration = datetime.datetime.now(timezone('Asia/Jakarta')) - start_time
    print("Duration: " + str(duration))

Start script at: 17-10-2021 at 2021-10-17 05:02:17.972862+07:00
Are you executing this?
Took 7.381716966629028 seconds
{'wilayah': 'se-Prov. Kalimantan Timur', 'disclaimer': 'Data per 20 Juli 2010 (Anggaran)', 'special_row': False, '400_a': '18846928860695', '400_r': '21383700136573', '400_p': '113.46%', '410_a': '3315240077590', '410_r': '3469520184836', '410_p': '104.65%', '411_a': '1693774531430', '411_r': '2218206081165', '411_p': '130.96%', '412_a': '164871786973', '412_r': '145782029566', '412_p': '88.42%', '413_a': '246420256579', '413_r': '238541849943', '413_p': '96.80%', '414_a': '1210173502608', '414_r': '866990224162', '414_p': '71.64%', '421_a': '13030612697188', '421_r': '15013315565968', '421_p': '115.22%', '422_a': '829818193000', '422_r': '777741895400', '422_p': '93.72%', '423_a': '139042290600', '423_r': '149837268296', '423_p': '107.76%', '431_a': '7000000000', '431_r': '140993880120', '431_p': '2.014.20%', '433_a': '663237571317', '433_r': '1084123795826', '433_p':

In [None]:
datadf

Unnamed: 0,tahun,kdpemda,nmpemda,kdprov,nmprov,wilayah,disclaimer,special_row,400_a,400_r,400_p,410_a,410_r,410_p,411_a,411_r,411_p,412_a,412_r,412_p,413_a,413_r,413_p,414_a,414_r,414_p,416_a,416_r,416_p,420_a,420_r,420_p,421_a,421_r,421_p,422_a,422_r,422_p,423_a,423_r,...,614_p,615_a,615_r,615_p,616_a,616_r,616_p,617_a,617_r,617_p,619_a,619_r,619_p,620_a,620_r,620_p,621_a,621_r,621_p,622_a,622_r,622_p,623_a,623_r,623_p,624_a,624_r,624_p,625_a,625_r,625_p,626_a,626_r,626_p,627_a,627_r,627_p,629_a,629_r,629_p
0,2010,--,,17,,se-Prov. Kalimantan Timur,Data per 20 Juli 2010 (Anggaran),False,18846928860695,21383700136573,113.46%,3315240077590,3469520184836,104.65%,1693774531430,2218206081165,130.96%,164871786973,145782029566,88.42%,246420256579,238541849943,96.80%,1210173502608,866990224162,71.64%,,,,13999473180788,15940894729664,113.87%,13030612697188,15013315565968,115.22%,829818193000,777741895400,93.72%,139042290600,149837268296,...,0.00%,5500000000,531041667,9.66%,,,,,,,,,,675852821627,653814619572,96.74%,,,,459297332175,407700000000,88.77%,206255489452,246114619572,119.33%,10300000000,0,0.00%,,,,,,,,,,,,


# Parsial test

## Generate Header

In [None]:
# generate header
if __name__ == "__main__":
    set_global_session()
    start_time = datetime.datetime.now(timezone('Asia/Jakarta'))
    print("Start script at: " + datetime.datetime.now(timezone('Asia/Jakarta')).strftime("%-d-%m-%Y") + " at " + str(start_time))
    header_data = generate_header()
    print(header_data)
    duration = datetime.datetime.now(timezone('Asia/Jakarta')) - start_time
    print("Duration: " + str(duration))

Start script at: 3-10-2021 at 2021-10-03 10:17:04.932570+07:00
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
['no', 'tahun', 'pemda', 'provinsi', 'wilayah', 'disclaimer', 'special_row', '400_a', '400_r', '400_p', '410_a', '410_r', '410_p', '411_a', '411_r', '411_p', '412_a', '412_r', '412_p', '413_a', '413_r', '413_p', '414_a', '414_r', '414_p', '420_a', '420_r', '420_p', '421_a', '421_r', '421_p', '422_a', '422_r', '422_p', '423_a', '423_r', '423_p', '430_a', '430_r', '430_p', '431_a', '431_r', '431_p', '432_a', '432_r', '432_p', '433_a', '433_r', '433_p', '434_a', '434_r', '434_p', '435_a', '435_r', '435_p', '436_a', '436_r', '436_p', '500_a', '500_r', '500_p', '510_a', '510_r', '510_p', '511_a', '511_r', '511_p', '512_a', '512_r', '512_p', '513_a', '513_r', '513_p', '514_a', '514_r', '514_p', '515_a', '515_r', '515_p', '516_a', '516_r', '516_p', '517_a', '517_r', '517_p', '518_a', '518_r', '518_p', '520_a', '520_r', '520_p', '521_a', '521_r', '521_p', '522_a', '52

In [None]:
print(header_data)

['no', 'tahun', 'pemda', 'provinsi', 'wilayah', 'disclaimer', 'special_row', '400_a', '400_r', '400_p', '410_a', '410_r', '410_p', '411_a', '411_r', '411_p', '412_a', '412_r', '412_p', '413_a', '413_r', '413_p', '414_a', '414_r', '414_p', '420_a', '420_r', '420_p', '421_a', '421_r', '421_p', '422_a', '422_r', '422_p', '423_a', '423_r', '423_p', '430_a', '430_r', '430_p', '431_a', '431_r', '431_p', '432_a', '432_r', '432_p', '433_a', '433_r', '433_p', '434_a', '434_r', '434_p', '435_a', '435_r', '435_p', '436_a', '436_r', '436_p', '500_a', '500_r', '500_p', '510_a', '510_r', '510_p', '511_a', '511_r', '511_p', '512_a', '512_r', '512_p', '513_a', '513_r', '513_p', '514_a', '514_r', '514_p', '515_a', '515_r', '515_p', '516_a', '516_r', '516_p', '517_a', '517_r', '517_p', '518_a', '518_r', '518_p', '520_a', '520_r', '520_p', '521_a', '521_r', '521_p', '522_a', '522_r', '522_p', '523_a', '523_r', '523_p', '600_a', '600_r', '600_p', '610_a', '610_r', '610_p', '611_a', '611_r', '611_p', '612_

## Get All Kode Provinsi

In [None]:
# provinsi
if __name__ == "__main__":
    set_global_session()
    start_time = datetime.datetime.now(timezone('Asia/Jakarta'))
    print("Start script at: " + datetime.datetime.now(timezone('Asia/Jakarta')).strftime("%-d-%m-%Y") + " at " + str(start_time))
    allprovinsi = get_all_provinsi()
    print(allprovinsi)
    #for prov, nmprov in sorted(allprovinsi.items()):
    #    print(prov)
    #    print(nmprov)
    #allprovinsi = sorted(allprovinsi.items())
    #print(allprovinsi)
    allprovinsi = dict(sorted(allprovinsi.items(), key=lambda item: item[1]))
    print(allprovinsi)
    duration = datetime.datetime.now(timezone('Asia/Jakarta')) - start_time
    print("Duration: " + str(duration))

Start script at: 4-10-2021 at 2021-10-04 09:00:52.893709+07:00
{'--': 'Semua Provinsi', '33': 'Prov. Sulawesi Barat', '27': 'Prov. Malut', '01': 'Prov. Aceh', '02': 'Prov. Sumut', '03': 'Prov. Sumatera Barat', '04': 'Prov. Riau', '05': 'Prov. Jambi', '06': 'Prov. Sumsel', '07': 'Prov. Bengkulu', '08': 'Prov. Lampung', '09': 'Prov. DKI Jakarta', '10': 'Prov. Jabar', '11': 'Prov. Jateng', '12': 'Prov. DIY', '13': 'Prov. Jawa Timur', '14': 'Prov. Kalbar', '15': 'Prov. Kalteng', '17': 'Prov. Kaltim', '16': 'Prov. Kalsel', '22': 'Prov. Bali', '23': 'Prov. NTB', '24': 'Prov. NTT', '20': 'Prov. Sulsel', '19': 'Prov. Sulteng', '18': 'Prov. Sulut', '21': 'Prov. Sultra', '25': 'Prov. Maluku', '26': 'Prov. Papua', '28': 'Prov. Banten', '29': 'Prov. Babel', '30': 'Prov. Gorontalo', '31': 'Prov. Kepulauan Riau', '32': 'Prov. Papua Barat', '34': 'Prov. Kalimantan Utara'}
{'01': 'Prov. Aceh', '29': 'Prov. Babel', '22': 'Prov. Bali', '28': 'Prov. Banten', '07': 'Prov. Bengkulu', '12': 'Prov. DIY', '09

## Get All Pemdas in All Provinsi

In [None]:
# pemda in a provinsi
if __name__ == "__main__":
    set_global_session()
    start_time = datetime.datetime.now()
    print("Start script at: " + datetime.datetime.now(timezone('Asia/Jakarta')).strftime("%-d-%m-%Y") + " at " + str(start_time))
    provs = ['--', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34']
    jml = []
    for prov in provs:
      print("Kode Provinsi", prov)
      print("Pemda in prov", prov, ": ")
      pemdas = get_pemdas(prov)
      
      print(pemdas)
      print(len(pemdas))
      jml.append(len(pemdas))
    jml.sort()
    print(jml)

    duration = datetime.datetime.now() - start_time
    print("Duration: " + str(duration))

Start script at: 3-10-2021 at 2021-10-03 06:33:46.684366
Kode Provinsi --
Pemda in prov -- : 
<class 'dict'>
{'--': 'Nasional'}
1
Kode Provinsi 01
Pemda in prov 01 : 
<class 'dict'>
{'22': 'Kab. Pidie Jaya', '23': 'Kota Subulussalam', '04': 'Kab. Aceh Singkil', '09': 'Kab. Bireuen', '21': 'Kab. Bener Meriah', '00': 'Prov. Aceh', '02': 'Kab. Aceh Besar', '10': 'Kab. Pidie', '08': 'Kab. Aceh Utara', '07': 'Kab. Aceh Timur', '03': 'Kab. Aceh Selatan', '01': 'Kab. Aceh Barat', '05': 'Kab. Aceh Tengah', '06': 'Kab. Aceh Tenggara', '12': 'Kota Banda Aceh', '13': 'Kota Sabang', '11': 'Kab. Simeulue', '14': 'Kota Langsa', '15': 'Kota Lhokseumawe', '17': 'Kab. Aceh Barat Daya', '16': 'Kab. Gayo Lues', '18': 'Kab. Aceh Jaya', '19': 'Kab. Nagan Raya', '20': 'Kab. Aceh Tamiang', '--': 'Semua pemda'}
25
Kode Provinsi 02
Pemda in prov 02 : 
<class 'dict'>
{'07': 'Kab. Mandailing Natal', '13': 'Kab. Toba Samosir', '26': 'Kab. Batu Bara', '27': 'Kab. Padang Lawas', '28': 'Kab. Padang Lawas Utara', '30

## Get All Tahuns

In [None]:
# get all tahun
if __name__ == "__main__":
    set_global_session()
    start_time = datetime.datetime.now()
    print("Start script at: " + datetime.datetime.now(timezone('Asia/Jakarta')).strftime("%-d-%m-%Y") + " at " + str(start_time))
    alltahun = get_all_tahun()
    print(alltahun)
    duration = datetime.datetime.now() - start_time
    print("Duration: " + str(duration))

Start script at: 3-10-2021 at 2021-10-03 10:25:07.692468
['2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022']
['2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022']
Duration: 0:00:12.091179


## Loop All Pemdas in a Provinsi (return data )

In [None]:
# loop all pemda in a provinsi
if __name__ == "__main__":
    set_global_session()
    start_time = datetime.datetime.now(timezone('Asia/Jakarta'))
    print("Start script at: " + datetime.datetime.now(timezone('Asia/Jakarta')).strftime("%-d-%m-%Y") + " at " + str(start_time))
    kode_prov = "01"
    pemdas = get_pemdas(kode_prov)
    print(pemdas)
    for pemda in pemdas:
        pemda_result = filter(get_token(), "2021", kode_prov, pemda)
        print(pemda_result)    
    duration = datetime.datetime.now(timezone('Asia/Jakarta')) - start_time
    print("Duration: " + str(duration))

Start script at: 3-10-2021 at 2021-10-03 16:10:53.532413+07:00
{'22': 'Kab. Pidie Jaya', '23': 'Kota Subulussalam', '04': 'Kab. Aceh Singkil', '09': 'Kab. Bireuen', '21': 'Kab. Bener Meriah', '00': 'Prov. Aceh', '02': 'Kab. Aceh Besar', '10': 'Kab. Pidie', '08': 'Kab. Aceh Utara', '07': 'Kab. Aceh Timur', '03': 'Kab. Aceh Selatan', '01': 'Kab. Aceh Barat', '05': 'Kab. Aceh Tengah', '06': 'Kab. Aceh Tenggara', '12': 'Kota Banda Aceh', '13': 'Kota Sabang', '11': 'Kab. Simeulue', '14': 'Kota Langsa', '15': 'Kota Lhokseumawe', '17': 'Kab. Aceh Barat Daya', '16': 'Kab. Gayo Lues', '18': 'Kab. Aceh Jaya', '19': 'Kab. Nagan Raya', '20': 'Kab. Aceh Tamiang', '--': 'Semua pemda'}
{"postur":{"400":{"text":"Pendapatan","child":{"410":{"text":"Pendapatan Asli Daerah","child":{"411":{"text":"Pajak Daerah","value":{"anggaran":7865000000,"realisasi":4105026365,"persentase":"52,19 %"}},"412":{"text":"Retribusi Daerah","value":{"anggaran":63126146000,"realisasi":29123608691.119999,"persentase":"46,14 %

KeyboardInterrupt: ignored

In [None]:
kode_prov = "01"
pemdas = get_pemdas(kode_prov)
print(pemdas)
for kdpemda, nmpemda in pemdas.items():
    print(kdpemda)
    print(nmpemda)

for kdpemda in pemdas.items():
    print(kdpemda)
    print(pemdas[kdpemda])

{'22': 'Kab. Pidie Jaya', '23': 'Kota Subulussalam', '04': 'Kab. Aceh Singkil', '09': 'Kab. Bireuen', '21': 'Kab. Bener Meriah', '00': 'Prov. Aceh', '02': 'Kab. Aceh Besar', '10': 'Kab. Pidie', '08': 'Kab. Aceh Utara', '07': 'Kab. Aceh Timur', '03': 'Kab. Aceh Selatan', '01': 'Kab. Aceh Barat', '05': 'Kab. Aceh Tengah', '06': 'Kab. Aceh Tenggara', '12': 'Kota Banda Aceh', '13': 'Kota Sabang', '11': 'Kab. Simeulue', '14': 'Kota Langsa', '15': 'Kota Lhokseumawe', '17': 'Kab. Aceh Barat Daya', '16': 'Kab. Gayo Lues', '18': 'Kab. Aceh Jaya', '19': 'Kab. Nagan Raya', '20': 'Kab. Aceh Tamiang', '--': 'Semua pemda'}
22
Kab. Pidie Jaya
23
Kota Subulussalam
04
Kab. Aceh Singkil
09
Kab. Bireuen
21
Kab. Bener Meriah
00
Prov. Aceh
02
Kab. Aceh Besar
10
Kab. Pidie
08
Kab. Aceh Utara
07
Kab. Aceh Timur
03
Kab. Aceh Selatan
01
Kab. Aceh Barat
05
Kab. Aceh Tengah
06
Kab. Aceh Tenggara
12
Kota Banda Aceh
13
Kota Sabang
11
Kab. Simeulue
14
Kota Langsa
15
Kota Lhokseumawe
17
Kab. Aceh Barat Daya
16
Kab. 

KeyError: ignored

In [None]:
# for each header
if __name__ == "__main__":
    print(header_data)

['no', 'tahun', 'pemda', 'provinsi', 'wilayah', 'disclaimer', 'special_row', '400_a', '400_r', '400_p', '410_a', '410_r', '410_p', '411_a', '411_r', '411_p', '412_a', '412_r', '412_p', '413_a', '413_r', '413_p', '414_a', '414_r', '414_p', '420_a', '420_r', '420_p', '421_a', '421_r', '421_p', '422_a', '422_r', '422_p', '423_a', '423_r', '423_p', '430_a', '430_r', '430_p', '431_a', '431_r', '431_p', '432_a', '432_r', '432_p', '433_a', '433_r', '433_p', '434_a', '434_r', '434_p', '435_a', '435_r', '435_p', '436_a', '436_r', '436_p', '500_a', '500_r', '500_p', '510_a', '510_r', '510_p', '511_a', '511_r', '511_p', '512_a', '512_r', '512_p', '513_a', '513_r', '513_p', '514_a', '514_r', '514_p', '515_a', '515_r', '515_p', '516_a', '516_r', '516_p', '517_a', '517_r', '517_p', '518_a', '518_r', '518_p', '520_a', '520_r', '520_p', '521_a', '521_r', '521_p', '522_a', '522_r', '522_p', '523_a', '523_r', '523_p', '600_a', '600_r', '600_p', '610_a', '610_r', '610_p', '611_a', '611_r', '611_p', '612_

## Get Kode Akun

In [None]:
# get_kodeakun
if __name__ == "__main__":
    set_global_session()
    kodeposter, kodeakun, kodesubakun, alltahun = get_kodeakun()
    print(kodepostur)
    print(kodeakun)
    print(kodesubakun)
    gabung = [kodeposturs + kodeakuns + kodesubakuns].sort()
    print(gabung)

2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
['400', '500', '600']
['410', '420', '430', '510', '520', '530', '540', '610', '620']
['411', '412', '413', '414', '416', '421', '422', '423', '430', '431', '432', '433', '434', '435', '436', '439', '511', '512', '513', '514', '515', '516', '517', '518', '519', '520', '521', '522', '523', '524', '525', '526', '531', '541', '542', '611', '612', '613', '614', '615', '616', '617', '619', '621', '622', '623', '624', '625', '626', '627', '629']


In [None]:
# gabungan
if __name__ == "__main__":
    set_global_session()
    pemda_result = filter(get_token(), '2010', '26', '21')
    if pemda_result is None:
        pass
    json_result = json.loads(pemda_result)
    print(json_result)

{'postur': {'400': {'text': 'Pendapatan', 'child': {'410': {'text': 'PAD', 'child': {'412': {'text': 'Retribusi daerah', 'value': {'anggaran': 0, 'realisasi': 47900000, 'persentase': '-'}}, '413': {'text': 'Hasil pengelolaan kekayaan daerah yang dipisahkan', 'value': {'anggaran': 0, 'realisasi': 877371805, 'persentase': '-'}}, '414': {'text': 'Lain-lain PAD yang sah', 'value': {'anggaran': 1500000000, 'realisasi': 15595348835, 'persentase': '1.039,69 %'}}}, 'total': {'anggaran': 1500000000, 'realisasi': 16520620640, 'persentase': '1.101,37 %'}}, '420': {'text': 'Daper', 'child': {'421': {'text': 'Dana bagi hasil pajak/bagi hasil bukan pajak', 'value': {'anggaran': 61143096679, 'realisasi': 87499388329, 'persentase': '143,11 %'}}, '422': {'text': 'Dana alokasi umum', 'value': {'anggaran': 391523731000, 'realisasi': 390047731000, 'persentase': '99,62 %'}}, '423': {'text': 'Dana alokasi khusus', 'value': {'anggaran': 34677900000, 'realisasi': 34677900000, 'persentase': '100,00 %'}}}, 'tot

# Test

In [None]:
# gabungan
if __name__ == "__main__":
    set_global_session()
    data_dict = {}
    start_time = datetime.datetime.now(timezone('Asia/Jakarta'))
    print("Start script at: " + datetime.datetime.now(timezone('Asia/Jakarta')).strftime("%-d-%m-%Y") + " at " + str(start_time))
    # kodeposturs, kodeakuns, kodesubakuns = get_kodeakun()
    kodeposturs = ['400', '500', '600']
    kodeakuns = ['410', '420', '430', '510', '520', '530', '540', '610', '620']
    kodesubakuns = ['411', '412', '413', '414', '416', '421', '422', '423', '430', '431', '432', '433', '434', '435', '436', '439', '511', '512', '513', '514', '515', '516', '517', '518', '519', '520', '521', '522', '523', '524', '525', '526', '531', '541', '542', '611', '612', '613', '614', '615', '616', '617', '619', '621', '622', '623', '624', '625', '626', '627', '629']
    kode_prov = "09" # get_all_prov return dict with kdprov as key and nama prov as value
    nmprov = "Test wae DKI dul"
    pemdas = get_pemdas(kode_prov)

    header_data = ['tahun', 'kdpemda', 'pemda', 'kdprov', 'prov', 'wilayah', 'disclaimer', 'special_row', '400_a', '400_r', '400_p', '410_a', '410_r', '410_p', '411_a', '411_r', '411_p', '412_a', '412_r', '412_p', '413_a', '413_r', '413_p', '414_a', '414_r', '414_p', '420_a', '420_r', '420_p', '421_a', '421_r', '421_p', '422_a', '422_r', '422_p', '423_a', '423_r', '423_p', '430_a', '430_r', '430_p', '431_a', '431_r', '431_p', '432_a', '432_r', '432_p', '433_a', '433_r', '433_p', '434_a', '434_r', '434_p', '435_a', '435_r', '435_p', '436_a', '436_r', '436_p', '500_a', '500_r', '500_p', '510_a', '510_r', '510_p', '511_a', '511_r', '511_p', '512_a', '512_r', '512_p', '513_a', '513_r', '513_p', '514_a', '514_r', '514_p', '515_a', '515_r', '515_p', '516_a', '516_r', '516_p', '517_a', '517_r', '517_p', '518_a', '518_r', '518_p', '520_a', '520_r', '520_p', '521_a', '521_r', '521_p', '522_a', '522_r', '522_p', '523_a', '523_r', '523_p', '600_a', '600_r', '600_p', '610_a', '610_r', '610_p', '611_a', '611_r', '611_p', '612_a', '612_r', '612_p', '613_a', '613_r', '613_p', '614_a', '614_r', '614_p', '615_a', '615_r', '615_p', '620_a', '620_r', '620_p', '621_a', '621_r', '621_p', '622_a', '622_r', '622_p', '623_a', '623_r', '623_p', '624_a', '624_r', '624_p', '626_a', '626_r', '626_p', '625_a', '625_r', '625_p', '616_a', '616_r', '616_p', '617_a', '617_r', '617_p', '627_a', '627_r', '627_p', '439_a', '439_r', '439_p', '519_a', '519_r', '519_p', '619_a', '619_r', '619_p', '629_a', '629_r', '629_p', '416_a', '416_r', '416_p', '524_a', '524_r', '524_p', '525_a', '525_r', '525_p', '526_a', '526_r', '526_p', '530_a', '530_r', '530_p', '531_a', '531_r', '531_p', '540_a', '540_r', '540_p', '541_a', '541_r', '541_p', '542_a', '542_r', '542_p']
    #     header_data = generate_header()

    datadf = pd.DataFrame(columns = header_data)
    # datadf = pd.DataFrame()
    for kdpemda, nmpemda in pemdas.items():
        result_dict = {}
        pemda_result = filter(get_token(), "2021", kode_prov, kdpemda)
        json_result = json.loads(pemda_result)
        result_dict = parse_data(json_result, kodeposturs, kodeakuns, kodesubakuns)
        result_dict['tahun'] = '2021'
        result_dict['kdpemda'] = kdpemda
        result_dict['pemda'] = nmpemda
        result_dict['kdprov'] = kode_prov
        result_dict['prov'] = nmprov
        print(result_dict)
        datadf = datadf.append(result_dict, ignore_index = True)
    print("datadf:")
    print(datadf)
    duration = datetime.datetime.now(timezone('Asia/Jakarta')) - start_time
    print("Duration: " + str(duration))

Start script at: 3-10-2021 at 2021-10-03 16:06:27.097907+07:00
{'wilayah': 'Prov. DKI Jakarta', 'disclaimer': 'APBD per 12 Agustus 2021 dan Realisasi APBD per 27 September 2021 (data realisasi bersifat sementara)', 'special_row': False, '400_a': '72187510759990', '400_r': '35464841161236.31', '400_p': '49,13%', '500_a': '72967009600455', '500_r': '32430285269777', '500_p': '44,45%', '600_a': '779498840465', '600_r': '5930225559451', '600_p': '760,77%', '410_a': '51891120970162', '410_r': '25551369641540.31', '410_p': '49,24%', '420_a': '16873544789828', '420_r': '9876665591859', '420_p': '58,53%', '430_a': '3422845000000', '430_r': '36805927837', '430_p': '1,08%', '510_a': '60404641013089', '510_r': '29767322419094', '510_p': '49,28%', '520_a': '9931324102738', '520_r': '1920683213705', '520_p': '19,34%', '530_a': '2133030303357', '530_r': '482260971585', '530_p': '22,61%', '540_a': '498014181271', '540_r': '260018665393', '540_p': '52,21%', '610_a': '12009082856051', '610_r': '5963855

In [None]:
import time

import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry


def requests_retry_session(
    retries=300,
    backoff_factor=0.3,
    status_forcelist=(500, 502, 504),
    session=None,
):
    session = session or requests.Session()
    retry = Retry(
        total=retries,
        read=retries,
        connect=retries,
        backoff_factor=backoff_factor,
        status_forcelist=status_forcelist,
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    return session

t0 = time.time()
try:
    response = requests_retry_session().get(
        'http://localhost:9999',
    )
except Exception as x:
    print('It failed :(', x.__class__.__name__)
else:
    print('It eventually worked', response.status_code)
finally:
    t1 = time.time()
    print('Took', t1 - t0, 'seconds')

Took 20.107375621795654 seconds


KeyboardInterrupt: ignored

# Gabung dan Remove Duplicate hasilnya

In [None]:
# Credits: https://stackoverflow.com/questions/20906474/import-multiple-csv-files-into-pandas-and-concatenate-into-one-dataframe
import pandas as pd

# load all csv

import pandas as pd
import glob

path = r'.' # use your path
all_files = glob.glob(path + "/*.csv")

li = []

for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)

frame = pd.concat(li, axis=0, ignore_index=True)

In [None]:
frame.to_csv("frame.csv")

In [None]:
frame.sort_values(by=["tahun", "kdprov", "kdpemda"], ascending=True)

Unnamed: 0.1,Unnamed: 0,tahun,kdpemda,nmpemda,kdprov,nmprov,wilayah,disclaimer,special_row,400_a,400_r,400_p,410_a,410_r,410_p,411_a,411_r,411_p,412_a,412_r,412_p,413_a,413_r,413_p,414_a,414_r,414_p,416_a,416_r,416_p,420_a,420_r,420_p,421_a,421_r,421_p,422_a,422_r,422_p,423_a,...,615_p,616_a,616_r,616_p,617_a,617_r,617_p,619_a,619_r,619_p,620_a,620_r,620_p,621_a,621_r,621_p,622_a,622_r,622_p,623_a,623_r,623_p,624_a,624_r,624_p,625_a,625_r,625_p,626_a,626_r,626_p,627_a,627_r,627_p,629_a,629_r,629_p,no,pemda,provinsi
5875,30.0,2010,--,Semua pemda,26,Prov. Papua,se-Prov. Papua,Data per 20 Juli 2010 (Anggaran),False,2.112641e+13,2.222993e+13,105.22%,1.017093e+12,8.803176e+11,86.55%,4.161017e+11,3.601770e+11,86.56%,1.259419e+11,1.145637e+11,90.97%,1.440900e+11,1.410351e+11,97.88%,3.309591e+11,2.645418e+11,79.93%,,,,1.471858e+13,1.525745e+13,103.66%,2.221564e+12,2.697276e+12,121.41%,1.100030e+13,1.103977e+13,100.36%,1.496721e+12,...,2.54%,,,,,,,,,,7.035789e+11,5.965183e+11,84.78%,1.016351e+11,3.000000e+11,295.17%,2.423000e+11,7.230000e+10,29.84%,3.096437e+11,2.075338e+11,67.02%,5.000000e+10,0.0,0.00%,,,,0.0,1.668447e+10,-,,,,,,,,,
5855,10.0,2010,00,Prov. Papua,26,Prov. Papua,Prov. Papua,Data per 20 Juli 2010 (Anggaran),False,5.284526e+12,5.661737e+12,107.14%,3.578017e+11,3.800256e+11,106.21%,2.910297e+11,2.524348e+11,86.74%,2.271700e+10,2.117195e+10,93.20%,2.917500e+10,3.034083e+10,104.00%,1.488000e+10,7.607802e+10,511.28%,,,,1.516928e+12,1.762534e+12,116.19%,4.587000e+11,5.389813e+11,117.50%,1.058228e+12,1.148498e+12,108.53%,0.000000e+00,...,,,,,,,,,,,1.600000e+11,1.533642e+11,95.85%,1.000000e+11,1.000000e+11,100.00%,6.000000e+10,1.000000e+10,16.67%,0.000000e+00,4.336421e+10,-,,,,,,,,,,,,,,,,,,
5857,12.0,2010,01,Kab. Biak Numfor,26,Prov. Papua,Kab. Biak Numfor,Data per 20 Juli 2010 (Anggaran),False,5.219799e+11,5.516495e+11,105.68%,2.102688e+10,1.729951e+10,82.27%,2.675500e+09,2.078738e+09,77.70%,9.285659e+09,6.280654e+09,67.64%,6.000000e+08,6.460826e+09,1.076.80%,8.465721e+09,2.479291e+09,29.29%,,,,4.111070e+11,4.148614e+11,100.91%,4.047009e+10,4.422451e+10,109.28%,3.215854e+11,3.215854e+11,100.00%,4.905150e+10,...,,,,,,,,,,,7.211975e+10,4.300626e+10,59.63%,,,,3.000000e+09,3.000000e+09,100.00%,6.911975e+10,4.000626e+10,57.88%,,,,,,,,,,,,,,,,,,
5856,11.0,2010,02,Kab. Jayapura,26,Prov. Papua,Kab. Jayapura,Data per 20 Juli 2010 (Anggaran),False,5.309455e+11,6.670335e+11,125.63%,1.733870e+10,2.674493e+10,154.25%,2.622000e+09,4.701707e+09,179.32%,3.806700e+09,8.549482e+09,224.59%,3.800000e+09,4.785625e+09,125.94%,7.110000e+09,8.708118e+09,122.48%,,,,4.636593e+11,4.859000e+11,104.80%,4.010000e+10,6.234073e+10,155.46%,3.783672e+11,3.783672e+11,100.00%,4.519210e+10,...,,,,,,,,,,,2.000000e+09,1.868447e+10,934.22%,,,,2.000000e+09,2.000000e+09,100.00%,,,,,,,,,,0.0,1.668447e+10,-,,,,,,,,,
5860,15.0,2010,03,Kab. Jayawijaya,26,Prov. Papua,Kab. Jayawijaya,Data per 20 Juli 2010 (Anggaran),False,5.794545e+11,6.539642e+11,112.86%,2.340000e+10,2.232758e+10,95.42%,3.404903e+09,1.837844e+09,53.98%,7.992147e+09,5.498149e+09,68.79%,5.230000e+09,4.989408e+09,95.40%,6.772950e+09,1.000218e+10,147.68%,,,,4.947399e+11,5.082214e+11,102.72%,3.500000e+10,4.848155e+10,138.52%,3.874157e+11,3.874157e+11,100.00%,7.232420e+10,...,1.00%,,,,,,,,,,9.000000e+10,4.000000e+10,44.44%,,,,6.000000e+10,1.000000e+10,16.67%,3.000000e+10,3.000000e+10,100.00%,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8458,2527.0,2020,03,Kab. Nunukan,34,Prov. Kalimantan Utara,Kab. Nunukan,Data APBD per 8 Juli 2021 dan Realisasi APBD 2...,False,1.434942e+12,1.354212e+12,94.37%,9.615113e+10,1.296318e+11,134.82%,1.612045e+10,2.834364e+10,175.82%,3.529481e+09,3.930832e+09,111.37%,5.100000e+09,2.994239e+09,58.71%,7.140120e+10,9.436304e+10,132.16%,,,,1.013993e+12,8.822661e+11,87.01%,2.151173e+11,1.753401e+11,81.51%,5.560868e+11,4.979182e+11,89.54%,2.427894e+11,...,-,0.0,0.0,-,0.0,6300000.0,-,0.0,0.0,-,3.000000e+09,4.488698e+10,1.496.23%,0.000000e+00,0.000000e+00,-,3.000000e+09,3.000000e+09,100.00%,0.000000e+00,4.188698e+10,-,0.000000e+00,0.0,-,0.0,0.0,-,0.0,0.000000e+00,-,,,,0.0,0.0,-,,,
5487,2525.0,2020,04,Kota Tarakan,34,Prov. Kalimantan Utara,Kota Tarakan,Data APBD per 8 Juli 2021 dan Realisasi APBD 2...,False,1.008918e+12,9.244486e+11,91.63%,1.105152e+11,1.078503e+11,97.59%,7.005500e+10,4.453178e+10,63.57%,2.073971e+10,1.088077e+10,52.46%,6.000000e+09,2.923988e+09,48.73%,1.372050e+10,4.951373e+10,360.87%,,,,7.646963e+11,6.919104e+11,90.48%,1.860940e+11,1.788803e+11,96.12%,4.714073e+11,4.259607e+11,90.36%,1.071950e+11,...,-,0.0,0.0,-,0.0,0.0,-,0.0,0.0,-,1.600000e+10,1.830510e+10,114.41%,0.000000e+00,0.000000e+00,-,1.600000e+10,1.830510e+10,114.41%,0.000000e+00,0.000000e+00,-,0.000000e+00,0.0,-,0.0,0.0,-,0.0,0.000000e+00,-,,,,0.0,0.0,-,,,
8456,2525.0,2020,04,Kota Tarakan,34,Prov. Kalimantan Utara,Kota Tarakan,Data APBD per 8 Juli 2021 dan Realisasi APBD 2...,False,1.008918e+12,9.244486e+11,91.63%,1.105152e+11,1.078503e+11,97.59%,7.005500e+10,4.453178e+10,63.57%,2.073971e+10,1.088077e+10,52.46%,6.000000e+09,2.923988e+09,48.73%,1.372050e+10,4.951373e+10,360.87%,,,,7.646963e+11,6.919104e+11,90.48%,1.860940e+11,1.788803e+11,96.12%,4.714073e+11,4.259607e+11,90.36%,1.071950e+11,...,-,0.0,0.0,-,0.0,0.0,-,0.0,0.0,-,1.600000e+10,1.830510e+10,114.41%,0.000000e+00,0.000000e+00,-,1.600000e+10,1.830510e+10,114.41%,0.000000e+00,0.000000e+00,-,0.000000e+00,0.0,-,0.0,0.0,-,0.0,0.000000e+00,-,,,,0.0,0.0,-,,,
5488,2526.0,2020,05,Kab. Tana Tidung,34,Prov. Kalimantan Utara,Kab. Tana Tidung,Data APBD per 8 Juli 2021 dan Realisasi APBD 2...,False,7.871412e+11,7.303595e+11,92.79%,2.616040e+10,1.194172e+10,45.65%,4.200000e+09,3.440104e+09,81.91%,6.000000e+08,9.722951e+08,162.05%,8.000000e+09,4.029894e+09,50.37%,1.336040e+10,3.499429e+09,26.19%,,,,6.785863e+11,6.210959e+11,91.53%,1.169272e+11,1.333586e+11,114.05%,4.621333e+11,4.123792e+11,89.23%,9.952585e+10,...,-,0.0,0.0,-,0.0,0.0,-,0.0,0.0,-,0.000000e+00,0.000000e+00,-,0.000000e+00,0.000000e+00,-,0.000000e+00,0.000000e+00,-,0.000000e+00,0.000000e+00,-,0.000000e+00,0.0,-,0.0,0.0,-,0.0,0.000000e+00,-,,,,0.0,0.0,-,,,


In [None]:
frame.drop_duplicates()
frame.shape

(10252, 201)

In [None]:
frame.head()


Unnamed: 0.1,Unnamed: 0,tahun,kdpemda,nmpemda,kdprov,nmprov,wilayah,disclaimer,special_row,400_a,400_r,400_p,410_a,410_r,410_p,411_a,411_r,411_p,412_a,412_r,412_p,413_a,413_r,413_p,414_a,414_r,414_p,416_a,416_r,416_p,420_a,420_r,420_p,421_a,421_r,421_p,422_a,422_r,422_p,423_a,...,615_p,616_a,616_r,616_p,617_a,617_r,617_p,619_a,619_r,619_p,620_a,620_r,620_p,621_a,621_r,621_p,622_a,622_r,622_p,623_a,623_r,623_p,624_a,624_r,624_p,625_a,625_r,625_p,626_a,626_r,626_p,627_a,627_r,627_p,629_a,629_r,629_p,no,pemda,provinsi
0,0.0,2011,22,Kab. Pidie Jaya,1,Prov. Aceh,Kab. Pidie Jaya,Data per 5 Juli 2011 (Anggaran) dan 4 Nov 2016...,False,352381800000.0,440530100000.0,125.02%,11440850000.0,8243166000.0,72.05%,3856335000.0,3245651000.0,84.16%,3675446000.0,1850179000.0,50.34%,365000000.0,241835900.0,66.26%,3544067000.0,2905500000.0,81.98%,,,,334987100000.0,350404400000.0,104.60%,20258570000.0,31629820000.0,156.13%,268569800000.0,272626800000.0,101.51%,46158800000.0,...,,,,,,,,,,,1000000000.0,1000000000.0,100.00%,,,,1000000000.0,1000000000.0,100.00%,,,,,,,,,,,,,,,,,,,,,
1,1.0,2011,23,Kota Subulussalam,1,Prov. Aceh,Kota Subulussalam,Data per 5 Juli 2011 (Anggaran) dan 4 Nov 2016...,False,272180000000.0,323457100000.0,118.84%,8581757000.0,6825439000.0,79.53%,2520000000.0,3799005000.0,150.75%,1595800000.0,703010400.0,44.05%,5000000.0,60458970.0,1.209.18%,4460957000.0,2262965000.0,50.73%,,,,241478700000.0,240664200000.0,99.66%,24859280000.0,27975230000.0,112.53%,185847700000.0,181919400000.0,97.89%,30771700000.0,...,-,,,,,,,,,,0.0,13480990000.0,-,,,,,,,0.0,13480990000.0,-,,,,,,,,,,,,,,,,,,
2,2.0,2011,4,Kab. Aceh Singkil,1,Prov. Aceh,Kab. Aceh Singkil,Data per 5 Juli 2011 (Anggaran) dan 4 Nov 2016...,False,365958500000.0,406766600000.0,111.15%,14077900000.0,10926050000.0,77.61%,2745000000.0,1927370000.0,70.21%,5317898000.0,4153665000.0,78.11%,1500000000.0,1078185000.0,71.88%,4515000000.0,3766830000.0,83.43%,,,,325343900000.0,326429000000.0,100.33%,29063380000.0,34090170000.0,117.30%,257048200000.0,253117400000.0,98.47%,39232300000.0,...,0.00%,,,,,,,,,,400000000.0,0.0,0.00%,,,,400000000.0,0.0,0.00%,,,,,,,,,,,,,,,,,,,,,
3,3.0,2011,9,Kab. Bireuen,1,Prov. Aceh,Kab. Bireuen,Data per 5 Juli 2011 (Anggaran) dan 4 Nov 2016...,False,732517800000.0,706002500000.0,96.38%,60535080000.0,16524720000.0,27.30%,9600000000.0,7656534000.0,79.76%,6600000000.0,4503750000.0,68.24%,2025000000.0,2247058000.0,110.97%,42310080000.0,2117374000.0,5.00%,,,,572417300000.0,571517500000.0,99.84%,46834970000.0,46752000000.0,99.82%,480010000000.0,479211500000.0,99.83%,45572300000.0,...,,,,,,,,,,,26770330000.0,27328270000.0,102.08%,,,,,,,26770330000.0,27128270000.0,101.34%,,,,,,,0.0,200000000.0,-,,,,,,,,,
4,4.0,2011,21,Kab. Bener Meriah,1,Prov. Aceh,Kab. Bener Meriah,Data per 5 Juli 2011 (Anggaran) dan 4 Nov 2016...,False,429028400000.0,436769800000.0,101.80%,20470100000.0,13046220000.0,63.73%,1618000000.0,1307414000.0,80.80%,7977100000.0,3045580000.0,38.18%,2500000000.0,1117955000.0,44.72%,8375000000.0,7575270000.0,90.45%,,,,336657900000.0,330449700000.0,98.16%,17925420000.0,11931300000.0,66.56%,285611100000.0,285406100000.0,99.93%,33121400000.0,...,,,,,,,,,,,3900000000.0,3300000000.0,84.62%,,,,3900000000.0,3300000000.0,84.62%,,,,,,,,,,,,,,,,,,,,,


# Versi Awal

In [None]:
# Portal Data DJPK Grabber by beta.uliansyah@pknstan.ac.id
import requests
from bs4 import BeautifulSoup
import json
import csv
from datetime import datetime
from pytz import timezone
import sys
from requests.exceptions import ConnectionError
from google.colab import drive
drive.mount('/content/drive')

debug = True
i = 0
data_row = {}

filename = "data-apbd-2019-"+ datetime.now(timezone('Asia/Jakarta')).strftime("%Y-%m-%d--%H-%M") + ".csv"
path = F"/content/drive/My Drive/Colab Notebooks/Portal APBD/"

s = requests.Session()
r = s.get('http://www.djpk.kemenkeu.go.id/portal/data/apbd')

if r.status_code==200:
    bsoup = BeautifulSoup(r.text, 'html.parser')
    token = bsoup.find("input", {"name":"_token"})['value']
    alltahun = bsoup.find("select", {"name":"tahun"}) # print this var is essential
    print(alltahun) if debug else 0
    allprovinsi = bsoup.find("select", {"id":"sel_prov"}) # print this var is essential
    print(allprovinsi) if debug else 0

# create headers (diambil dari data APBD Nasional seluruh tahun)
for tiap_tahun in alltahun.find_all("option"): # populate all tahun
    print(tiap_tahun['value']) if debug else 0
    
    try:
        r = s.post('http://www.djpk.kemenkeu.go.id/portal/filter', 
                data={'_token': token, 'data': 'apbd ', 'tahun': tiap_tahun['value'], 'provinsi': '--', 'pemda': '--'}) # query nasional saja
    except requests.exceptions.ConnectionError as e:
        pass
    except Exception as e:
        logger.error(e)
        randomtime = random.randint(1,5)
        logger.warn('ERROR - Retrying again website %s, retrying in %d secs' % (url, randomtime))
        time.sleep(randomtime)
        continue
    
    if "html" not in r.text and r.status_code==200:
        # create headers
        result = json.loads(r.text)
        if len(result) == 0:
            continue

        print(result) if debug else 0
        
        header_dict = {'no': '',
                      'tahun': '',
                      'pemda': '',
                      'provinsi': '' ,
                      'wilayah': '',
                      'disclaimer': '',
                      'special_row': '',
                      }

        # populate all kode akun
        for kode_postur in result['postur'].keys():
            print(kode_postur) if debug else 0
            header_dict[kode_postur+"_a"] = 0
            header_dict[kode_postur+"_r"] = 0
            header_dict[kode_postur+"_p"] = 0
            for kode_akun in result['postur'][kode_postur]['child'].keys():
                print(kode_akun) if debug else 0
                header_dict[kode_akun+"_a"] = 0
                header_dict[kode_akun+"_r"] = 0
                header_dict[kode_akun+"_p"] = 0
                for kode_subakun in result['postur'][kode_postur]['child'][kode_akun]['child'].keys():
                    print(kode_subakun) if debug else 0
                    header_dict[kode_subakun+"_a"] = 0
                    header_dict[kode_subakun+"_r"] = 0
                    header_dict[kode_subakun+"_p"] = 0
                    

# menuliskan header lengkap ke file
header_row = []
for key in header_dict.keys():
    header_row.append(key)

print("Saving to " + path + filename) if debug else 0 
with open(path+filename, mode='w', newline='') as apbdcsv_file:
    csv.writer(apbdcsv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL).writerow(header_dict)
print(header_dict) if debug else 0

# Looping all tahun
data_dict = header_dict
#for tiap_tahun in alltahun.find_all("option"): # populate all tahun
for tiap_tahun in range(2019, 2021, 1):
    #print(tiap_tahun['value']) if debug else 0
    print(str(tiap_tahun)) if debug else 0
    for option_provinsi in allprovinsi.find_all("option"): # populate all provinsi
    
    # sys.exit(0) if '01' in option_provinsi['value'] else 0 # stop at certain provinsi

        r = s.get('http://www.djpk.kemenkeu.go.id/portal/pemda/'+option_provinsi['value'])
        daerah = json.loads(r.text) # populate all kab/kota under provinsi

        for pemda in daerah:
            print("Populating data APBD Pemda " + daerah[pemda] + ", " + option_provinsi.text + " tahun " + str(tiap_tahun)) if debug else 0
            try:
                r = s.post('http://www.djpk.kemenkeu.go.id/portal/filter', 
                    data={'_token': token, 'data': 'apbd ', 'tahun': tiap_tahun, 'provinsi': str(option_provinsi['value']), 'pemda': str(pemda)})
            except requests.exceptions.ConnectionError as e:
                pass
            except Exception as e:
                logger.error(e)
                randomtime = random.randint(1,5)
                logger.warn('ERROR - Retrying again website %s, retrying in %d secs' % (url, randomtime))
                time.sleep(randomtime)
                continue

            if "html" not in r.text and r.status_code==200 and len(json.loads(r.text)) != 0:
                i = i + 1
                print(r.text) if debug else 0
                result=json.loads(r.text)

                data_dict = {'no': i,
                            'tahun': tiap_tahun,
                            'pemda': str(pemda),
                            'provinsi': str(option_provinsi['value']) ,
                            #'wilayah': str(result['wilayah']),
                            #'disclaimer': str(result['disclaimer']),
                            #'special_row': str(result['special_row']),
                            }
                data_dict['no'] = i
                
                # iterate child
                for kode_postur in result['postur'].keys():
                    print(kode_postur) if debug else 0
                    data_dict[kode_postur+"_a"] = str(result['postur'][kode_postur]['total']['anggaran'])
                    data_dict[kode_postur+"_r"] = str(result['postur'][kode_postur]['total']['realisasi'])
                    data_dict[kode_postur+"_p"] = str(result['postur'][kode_postur]['total']['persentase'])
                    for kode_akun in result['postur'][kode_postur]['child'].keys():
                        print(kode_akun) if debug else 0
                        data_dict[kode_akun+"_a"] = str(result['postur'][kode_postur]['child'][kode_akun]['total']['anggaran'])
                        data_dict[kode_akun+"_r"] = str(result['postur'][kode_postur]['child'][kode_akun]['total']['realisasi'])
                        data_dict[kode_akun+"_p"] = str(result['postur'][kode_postur]['child'][kode_akun]['total']['persentase'])
                        for kode_subakun in result['postur'][kode_postur]['child'][kode_akun]['child'].keys():
                            print(kode_subakun) if debug else 0
                            data_dict[kode_subakun+"_a"] = pajakdaerah_a=str(result['postur'][kode_postur]['child'][kode_akun]['child'][kode_subakun]['value']['anggaran'])
                            data_dict[kode_subakun+"_r"] = pajakdaerah_a=str(result['postur'][kode_postur]['child'][kode_akun]['child'][kode_subakun]['value']['realisasi'])
                            data_dict[kode_subakun+"_p"] = pajakdaerah_a=str(result['postur'][kode_postur]['child'][kode_akun]['child'][kode_subakun]['value']['persentase'])
                
        
                # isikan result sesuai kolom
                for column_name in header_row:
                    if column_name in data_dict.keys():
                        data_row[column_name] = data_dict[column_name]
                    else:
                        data_row[column_name] = 0
                print("Isi data_row:") if debug else 0
                print(data_row) if debug else 0
                data_list = data_row.values()
                print("Saving to " + path + filename) if debug else 0 
                with open(path+filename, mode='a+', newline='') as apbdcsv_file:
                    csv.writer(apbdcsv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL).writerow(data_list)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
512
513
514
515
516
517
518
520
521
522
523
600
610
611
612
613
614
615
620
621
622
623
624
625
626
2012
{'postur': {'400': {'text': 'Pendapatan', 'child': {'410': {'text': 'PAD', 'child': {'411': {'text': 'Pajak daerah', 'value': {'anggaran': 81737186043154, 'realisasi': 95068617468023, 'persentase': '116,31 %'}}, '412': {'text': 'Retribusi daerah', 'value': {'anggaran': 8139734317636, 'realisasi': 9747604370075, 'persentase': '119,75 %'}}, '413': {'text': 'Hasil pengelolaan kekayaan daerah yang dipisahkan', 'value': {'anggaran': 5278998686166, 'realisasi': 5176263607205, 'persentase': '98,05 %'}}, '414': {'text': 'Lain-lain PAD yang sah', 'value': {'anggaran': 17588785762592, 'realisasi': 21834750461175, 'persentase': '124,14 %'}}}, 'total': {'anggaran': 112744704809548, 'realisasi': 131827235906478, 'persentase': '116,93 %'}}, '420': {'text': 'Daper', 'child': {'421': {'text': 'Dana bagi hasil pajak/bagi hasil bukan pa

ConnectionError: ignored

# tes POST satu daerah

In [None]:
# Portal Data DJPK Grabber by beta.uliansyah@pknstan.ac.id
import requests
from bs4 import BeautifulSoup
import json

#headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.90 Safari/537.36'}

s = requests.Session()
r = s.get('http://www.djpk.kemenkeu.go.id/portal/data/apbd')
bsoup = BeautifulSoup(r.text, 'html.parser')

if r.status_code==200:
    token = bsoup.find("input", {"name":"_token"})['value']
    print(token)

    r = s.post('http://www.djpk.kemenkeu.go.id/portal/filter',
            data={'_token': token, 'data': 'apbd ', 'tahun': '2020', 'provinsi': '--', 'pemda': '--'})
    
    print(r.status_code)
    if r.status_code==200:
        print(r.text)
        print(len(json.loads(r.text)))

IAIMXtBvjuoNUBJCfZY2rnHudwPULTf9cfvQkncs
200
{"postur":{"400":{"text":"Pendapatan","child":{"410":{"text":"PAD","child":{"411":{"text":"Pajak daerah","value":{"anggaran":235267331454771.78,"realisasi":175067357638537,"persentase":"74,41 %"}},"412":{"text":"Retribusi daerah","value":{"anggaran":12816701901373.66,"realisasi":8186505378062,"persentase":"63,87 %"}},"413":{"text":"Hasil pengelolaan kekayaan daerah yang dipisahkan","value":{"anggaran":9920929966058.8008,"realisasi":8228422205772,"persentase":"82,94 %"}},"414":{"text":"Lain-lain PAD yang sah","value":{"anggaran":70413754983807.859,"realisasi":63031530879031,"persentase":"89,52 %"}}},"total":{"anggaran":328418718306012.12,"realisasi":254513816101402,"persentase":"77,50 %"}},"420":{"text":"Daper","child":{"421":{"text":"Dana bagi hasil pajak\/bagi hasil bukan pajak","value":{"anggaran":94771156686338.031,"realisasi":89647483899402,"persentase":"94,59 %"}},"422":{"text":"Dana alokasi umum","value":{"anggaran":425216415146714.38,

# Dibuang sayang

In [None]:
def find_keys(json_result):
    # Credits: https://stackoverflow.com/questions/10756427/loop-through-all-nested-dictionary-values
    for k, v in json_result.items():
        if isinstance(v, dict):
            find_keys(v)
        else:
            print("{0} : {1}".format(k, v))
            return k

def keys_exists(element, *keys):
    '''
    Check if *keys (nested) exists in `element` (dict).
    '''
    if not isinstance(element, dict):
        raise AttributeError('keys_exists() expects dict as first argument.')
    if len(keys) == 0:
        raise AttributeError('keys_exists() expects at least two arguments, one given.')

    _element = element
    for key in keys:
        try:
            _element = _element[key]
        except KeyError:
            return False
    return True
    # Credits: https://stackoverflow.com/questions/43491287/elegant-way-to-check-if-a-nested-key-exists-in-a-dict

def json_extract(obj, key):
    """Recursively fetch values from nested JSON."""
    arr = []

    def extract(obj, arr, key):
        """Recursively search for values of key in JSON tree."""
        if isinstance(obj, dict):
            for k, v in obj.items():
                if isinstance(v, (dict, list)):
                    extract(v, arr, key)
                elif k == key:
                    arr.append(v)
        elif isinstance(obj, list):
            for item in obj:
                extract(item, arr, key)
        return arr

    values = extract(obj, arr, key)
    return values
    # Credits: https://hackersandslackers.com/extract-data-from-complex-json-python/

def DictListUpdate( lis1, lis2):
    for aLis1 in lis1:
        if aLis1 not in lis2:
            lis2.append(aLis1)
    return lis2    