In [10]:
import pandas as pd
import glob
from pathlib import Path
from typing import List, Dict

# Convert all data from JSON to a single CSV and Excel file

## East Java Data All Year

In [11]:
dapo_kemdikdasmen_dir = Path('../data/dapo-kemdikdasmen')
semester_list = [p.name for p in dapo_kemdikdasmen_dir.iterdir() if p.is_dir()]

In [None]:
files_jatim_all_years = {
    year: [
        f
        for f in glob.glob(f'../data/dapo-kemdikdasmen/{year}/Prov. Jawa Timur/*/*/*.json')
        if Path(f).name != 'ALL.json'
    ]
    for year in semester_list
}

def convert_all_schools_json_to_single_csv(semester_id: str, file_path: List[str]) -> None:
    temp_df_jatim = []

    for f in file_path:
        p = Path(f)
        
        sekolah = p.stem
        kecamatan = p.parent.name
        kabupaten = p.parent.parent.name
        semester_id = p.parent.parent.parent.parent.name
        
        df = pd.read_json(p)
        
        # insert `kabupaten`, `kecamatan`, and `sekolah` column
        df.insert(0, 'kabupaten/kota', kabupaten)
        df.insert(1, 'kecamatan', kecamatan)
        df.insert(2, 'sekolah', sekolah)
        
        temp_df_jatim.append(df)

    df_jatim = pd.concat(temp_df_jatim, ignore_index=True)
    
    target_all_dir = Path(f'../data/dapo-kemdikdasmen/{semester_id}/Prov. Jawa Timur/ALL')
    
    target_all_dir.mkdir(parents=True, exist_ok=True)

    df_jatim.to_csv(target_all_dir / 'jatim-csv.csv', index=False)
    df_jatim.to_excel(target_all_dir /  'jatim-excel.xlsx', index=False)

for index, semester in enumerate(semester_list):
    convert_all_schools_json_to_single_csv(semester_id=semester, file_path=files_jatim_all_years[semester])

## Each Province in Indonesia

In [12]:
files_all_prov_all_years: Dict[str, Dict[str, List[str]]] = {
    semester: {
        prov: [
            f
            for f in glob.glob(f'../data/dapo-kemdikdasmen/{semester}/{prov}/*/*/*.json')
            if Path(f).name != 'ALL.json'
        ]
        for prov in [
            p.name for p in (dapo_kemdikdasmen_dir / semester).iterdir()
            if p.is_dir() and p.name != 'ALL'
        ]
    }
    for semester in semester_list
}

def convert_all_schools_json_to_single_csv(
    semester_id: str, 
    prov: str,  
    file_path: List[str]
) -> None:
    temp_df = []

    for f in file_path:
        p = Path(f)

        sekolah = p.stem
        kecamatan = p.parent.name
        kabupaten = p.parent.parent.name

        df = pd.read_json(p)

        # insert `kabupaten`, `kecamatan`, and `sekolah` column
        df.insert(0, 'kabupaten/kota', kabupaten)
        df.insert(1, 'kecamatan', kecamatan)
        df.insert(2, 'sekolah', sekolah)

        temp_df.append(df)

    if not temp_df:  # skip if no files
        return

    df_prov = pd.concat(temp_df, ignore_index=True)

    target_all_dir = Path(f'../data/dapo-kemdikdasmen/{semester_id}/{prov}/ALL')
    target_all_dir.mkdir(parents=True, exist_ok=True)

    df_prov.to_csv(target_all_dir / f'{prov}-csv.csv', index=False)
    df_prov.to_excel(target_all_dir / f'{prov}-excel.xlsx', index=False)

for semester in semester_list:
    for prov, files in files_all_prov_all_years[semester].items():
        convert_all_schools_json_to_single_csv(
            semester_id=semester, 
            prov=prov, 
            file_path=files
        )

## All schools in Indonesia

In [4]:
files_indonesia_slb = [
    f for f in glob.glob('../data/dapo-kemdikdasmen/*/*/*/*.json')
    if Path(f).name != "ALL.json"
]


temp_df_indonesia_slb = []

for f in files_indonesia_slb:
    p = Path(f)
    
    sekolah = p.stem
    kecamatan = p.parent.name
    kabupaten = p.parent.parent.name
    provinsi = p.parent.parent.parent.name
    
    df = pd.read_json(p)
    
    # insert `provinsi`, `kabupaten`, `kecamatan`, and `sekolah` column
    df.insert(0, 'provinsi', provinsi)
    df.insert(1, 'kabupaten/kota', kabupaten)
    df.insert(2, 'kecamatan', kecamatan)
    df.insert(3, 'sekolah', sekolah)
    
    temp_df_indonesia_slb.append(df)

df_indonesia_slb = pd.concat(temp_df_indonesia_slb, ignore_index=True)

df_indonesia_slb.to_csv('../data/dapo-kemdikdasmen/ALL/indonesia-slb-csv.csv', index=False)
df_indonesia_slb.to_excel('../data/dapo-kemdikdasmen/ALL/indonesia-slb-excel.xlsx', index=False)