## Data Mining from Badan Pangan Nasional
Mine the price of each food ingredients for each Kabupaten in Indonesia

### 0. Importing dependencies

In [41]:
import requests, json
import pandas as pd
import numpy as np
from typing import Dict
from datetime import datetime, timedelta

Datetime function to fetch realtime data

In [42]:
def get_api_date() -> str:
  current_time = datetime.now()
  week_before = current_time - timedelta(days=7)

  current_date = current_time.strftime("%d-%m-%Y")
  week_before_date = week_before.strftime("%d-%m-%Y")

  api_date = week_before_date + "/" + current_date
  return api_date

api_date = get_api_date()
print("Datetime used:", api_date)

Datetime used: 21-10-2024/28-10-2024


In [43]:
def save_id_json(save_data: Dict, save_name: str) -> json:
    with open(save_name, 'w') as f:
        json.dump(save_data, f)

def load_id_json(json_file: str) -> Dict:
    with open(json_file, 'r') as f:
        return json.load(f)

## 1. Get Each Provinsi Id

In [44]:
def get_prov_id(api_date: str) -> Dict:
  api_prov_id = "https://panelharga.badanpangan.go.id/data/provinsi-by-levelharga/3/{}".format(api_date)
  id_prov_json = requests.get(api_prov_id).json()

  prov_id = {}
  for prov in id_prov_json['data']:
      prov_id[prov['nama']] = prov['id']

  return prov_id

province_dataset = get_prov_id(api_date)

province_dataset

{'Aceh': 1,
 'Sumatera Utara': 2,
 'Sumatera Barat': 3,
 'Riau': 4,
 'Jambi': 5,
 'Sumatera Selatan': 6,
 'Bengkulu': 7,
 'Lampung': 8,
 'Kepulauan Bangka Belitung': 9,
 'Kepulauan Riau': 10,
 'DKI Jakarta': 11,
 'Jawa Barat': 12,
 'Jawa Tengah': 13,
 'D.I Yogyakarta': 14,
 'Jawa Timur': 15,
 'Banten': 16,
 'Bali': 17,
 'Nusa Tenggara Barat': 18,
 'Nusa Tenggara Timur': 19,
 'Kalimantan Barat': 20,
 'Kalimantan Tengah': 21,
 'Kalimantan Selatan': 22,
 'Kalimantan Timur': 23,
 'Kalimantan Utara': 24,
 'Sulawesi Utara': 25,
 'Sulawesi Tengah': 26,
 'Sulawesi Selatan': 27,
 'Sulawesi Tenggara': 28,
 'Gorontalo': 29,
 'Sulawesi Barat': 30,
 'Maluku': 31,
 'Maluku Utara': 32,
 'Papua Barat': 33,
 'Papua': 34,
 'Papua Barat Daya': 35,
 'Papua Pegunungan': 36,
 'Papua Tengah': 37,
 'Papua Selatan': 38}

### 2. Get Each Kabupaten Id

In [45]:
def get_kab_id(id: int) -> Dict:
  api_kab_id = "https://panelharga.badanpangan.go.id/data/kabkota-by-levelharga/3/{}/{}".format(id, api_date)
  id_kab_json = requests.get(api_kab_id).json()

  kab_id = {}

  for kab in id_kab_json['data']:
    kab_id[kab['nama']] = kab['id']

  return kab_id

In [46]:
def get_kab_id_2(id: int, province_dataset) -> Dict:
  kab_id = {}
  # counter = 0
  for province_id in province_dataset.values():
    api_kab_id = "https://panelharga.badanpangan.go.id/data/kabkota-by-levelharga/3/{}/{}".format(province_id, api_date)
    id_kab_json = requests.get(api_kab_id).json()

    kab_each_prov = {}
    for kab in id_kab_json['data']:
      kab_each_prov[kab['nama']] = kab['id']
    
    kab_id = kab_id | kab_each_prov

    # counter += 1
    # if counter  > 5: return kab_id 

  return kab_id

In [47]:
kab_id = get_kab_id_2(2, province_dataset)

In [31]:
save_id_json(kab_id, "kab_id.json")

In [21]:
kab_id

{'Kab. Aceh Barat': 7,
 'Kab. Aceh Barat Daya': 12,
 'Kab. Aceh Besar': 8,
 'Kab. Aceh Jaya': 16,
 'Kab. Aceh Selatan': 3,
 'Kab. Aceh Singkil': 2,
 'Kab. Aceh Tamiang': 14,
 'Kab. Aceh Tengah': 6,
 'Kab. Aceh Tenggara': 4,
 'Kab. Aceh Timur': 5,
 'Kab. Aceh Utara': 11,
 'Kab. Bener Meriah': 17,
 'Kab. Bireuen': 10,
 'Kab. Gayo Lues': 13,
 'Kab. Nagan Raya': 15,
 'Kab. Pidie': 9,
 'Kab. Pidie Jaya': 18,
 'Kab. Simeulue': 1,
 'Kota Banda Aceh': 19,
 'Kota Langsa': 21,
 'Kota Lhokseumawe': 22,
 'Kota Sabang': 20,
 'Kota Subulussalam': 23,
 'Kab. Asahan': 31,
 'Kab. Batu Bara': 42,
 'Kab. Dairi': 33,
 'Kab. Deli Serdang': 35,
 'Kab. Humbang Hasundutan': 38,
 'Kab. Karo': 34,
 'Kab. Labuhan Batu': 30,
 'Kab. Labuhan Batu Selatan': 45,
 'Kab. Labuhan Batu Utara': 46,
 'Kab. Langkat': 36,
 'Kab. Mandailing Natal': 25,
 'Kab. Nias': 24,
 'Kab. Nias Barat': 48,
 'Kab. Nias Selatan': 37,
 'Kab. Nias Utara': 47,
 'Kab. Padang Lawas': 44,
 'Kab. Padang Lawas Utara': 43,
 'Kab. Pakpak Bharat': 39,

In [23]:
for province in province_dataset.keys():
    province_dataset[province] = get_kab_id(province_dataset[province])

### 3. Get Data Each Kabupaten

In [8]:
data = load_id_json("region_id.json")

In [13]:
df = pd.DataFrame([
    {'Province': province, 'District': district, 'Value': value}
    for province, districts in data.items()
    for district, value in districts.items()
])

In [14]:
df

Unnamed: 0,Province,District,Value
0,Aceh,Kab. Aceh Barat,7
1,Aceh,Kab. Aceh Barat Daya,12
2,Aceh,Kab. Aceh Besar,8
3,Aceh,Kab. Aceh Jaya,16
4,Aceh,Kab. Aceh Selatan,3
...,...,...,...
492,Papua Tengah,Kab. Intan Jaya,497
493,Papua Tengah,Kab. Mimika,479
494,Papua Tengah,Kab. Nabire,474
495,Papua Tengah,Kab. Puncak Jaya,478


In [37]:
def get_cost_kab(id_kab: int) -> Dict:
  api_cost_kab = "https://panelharga.badanpangan.go.id/data/kabkota-range-by-levelharga/{}/3/{}".format(id_kab, api_date)
  cost_kab_json = requests.get(api_cost_kab).json()

  cost_kab = {}

  for costs in cost_kab_json['data']:
    cost_kab[costs['name']] = [cost['geomean'] for cost in costs['by_date']]

  return cost_kab

In [41]:
def get_ingredients_cost(ingredient: str):
    return get_cost_kab(188)[ingredient]

In [45]:
test_log = get_ingredients_cost("Beras Premium")

In [46]:
test_log

[14500, 14500, 14500, 14500, 14500, 14500, 14500, 14500]

In [39]:
get_cost_kab(188)["Beras Premium"]

[14500, 14500, 14500, 14500, 14500, 14500, 14500, 14500]

### 4. Implementation

E.g. We want to get the chicken egg price on Kab. Banyumas Jawa Tengah with python code.

In [77]:
class DataScraper:
    def __init__(self):
        # self.prov_name_   : str = ""
        self.api_prov_id_  : str  = "https://panelharga.badanpangan.go.id/data/provinsi-by-levelharga/3/{}"
        self.api_kab_id_   : str  = "https://panelharga.badanpangan.go.id/data/kabkota-by-levelharga/3/{}/{}"
        self.api_cost_     : str  = "https://panelharga.badanpangan.go.id/data/kabkota-range-by-levelharga/{}/3/{}"
        self.api_datetime_ : str  = self.get_api_date()
        self.prov_id_      : Dict = self.get_prov_id()
        self.kab_id_       : Dict = self.get_kab_id()

    def get_api_date(self) -> str:
        current_time = datetime.now()
        week_before = current_time - timedelta(days=7)

        current_date = current_time.strftime("%d-%m-%Y")
        week_before_date = week_before.strftime("%d-%m-%Y")

        api_date = week_before_date + "/" + current_date
        return api_date
    
    def get_prov_id(self) -> Dict:
        api_prov_id = self.api_prov_id_.format(self.api_datetime_)
        id_prov_json = requests.get(api_prov_id).json()

        prov_id = {}
        for prov in id_prov_json['data']:
            prov_id[prov['nama']] = prov['id']

        return prov_id
    
    def get_kab_id(self) -> Dict:
        kab_id = {}

        for prov_id in self.prov_id_.values():
            api_kab_id = self.api_kab_id_.format(prov_id, self.api_datetime_)
            id_kab_json = requests.get(api_kab_id).json()

            kab_iter = {}
            for kab in id_kab_json["data"]:
                kab_iter[kab["nama"]] = kab["id"]
                kab_id = kab_id | kab_iter
        
        return kab_id

class Province(DataScraper):
    def __init__(self, province_name: str) -> None:
        super().__init__()
        self.prov_name_ : str = province_name
        self.prov_kab_id_ : Dict = self.get_prov_kab_id() 

    def get_prov_kab_id(self) -> Dict:
        api_prov_kab = self.api_kab_id_.format(self.prov_id_[self.prov_name_], self.api_datetime_)
        prov_kab_json = requests.get(api_prov_kab).json()

        prov_kab_id = {}
        for kab in prov_kab_json['data']:
            prov_kab_id[kab['nama']] = kab['id']

        return prov_kab_id
    


class Kabupaten(DataScraper):
    def __init__(self, kabupaten_name: str) -> None:
        super().__init__()
        self.kab_name_     : str  = "Kab. " + kabupaten_name
        self.api_kab_cost_ : str  = "https://panelharga.badanpangan.go.id/data/kabkota-range-by-levelharga/{}/3/{}".format(self.kab_id_[self.kab_name_], self.api_datetime_)
        self.prices        : Dict = {}
    
    def get_ingredients_cost(self, ingredient: str) -> Dict:
        cost_json = requests.get(self.api_kab_cost_).json()

        for costs in cost_json['data']:
            self.prices[costs['name']] = [cost['geomean'] for cost in costs['by_date']]

        return self.prices[ingredient]

In [86]:
test_kab = Kabupaten("Aceh Barat")
# test_kab.get_ingredients_cost("Jagung")

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [88]:
test_kab.prices

{'Beras Premium': [14250, 14250, 14490, 14490, 14490, 14490, 14490, '-'],
 'Beras Medium': [12750, 12750, 12750, 12750, 12750, 12750, 12750, '-'],
 'Kedelai Biji Kering (Impor)': [10670,
  10670,
  10670,
  10670,
  10670,
  10670,
  10670,
  '-'],
 'Bawang Merah': [25920, 26460, 26460, 26460, 26460, 26460, 26460, '-'],
 'Bawang Putih Bonggol': [35500,
  35500,
  35500,
  35500,
  35500,
  35500,
  35500,
  '-'],
 'Cabai Merah Keriting': [22360,
  22360,
  21910,
  21910,
  21910,
  23240,
  22360,
  '-'],
 'Cabai Rawit Merah': [42430, 40000, 40000, 40000, 40000, 40000, 40000, '-'],
 'Daging Sapi Murni': [132480,
  132480,
  132480,
  132480,
  132480,
  132480,
  132480,
  '-'],
 'Daging Ayam Ras': [36410, 36410, 36880, 36880, 36880, 36880, 36880, '-'],
 'Telur Ayam Ras': [26000, 27500, 26500, 26500, 26500, 26500, 26500, '-'],
 'Gula Konsumsi': [17000, 17000, 17000, 17000, 17000, 17000, 17000, '-'],
 'Minyak Goreng Kemasan Sederhana': [17730,
  17730,
  17730,
  17730,
  17730,
  1773

In [87]:
test_kab.get_ingredients_cost("Bawang Merah") 

[25920, 26460, 26460, 26460, 26460, 26460, 26460, '-']

In [79]:
test_kab.api_kab_cost_

'https://panelharga.badanpangan.go.id/data/kabkota-range-by-levelharga/187/3/21-10-2024/28-10-2024'

In [67]:
test_province = Province("Jawa Tengah")

In [68]:
test_province.prov_kab_id_

{'Kab. Banjarnegara': 189,
 'Kab. Banyumas': 187,
 'Kab. Batang': 210,
 'Kab. Blora': 201,
 'Kab. Boyolali': 194,
 'Kab. Brebes': 214,
 'Kab. Cilacap': 186,
 'Kab. Demak': 206,
 'Kab. Grobogan': 200,
 'Kab. Jepara': 205,
 'Kab. Karanganyar': 198,
 'Kab. Kebumen': 190,
 'Kab. Kendal': 209,
 'Kab. Klaten': 195,
 'Kab. Kudus': 204,
 'Kab. Magelang': 193,
 'Kab. Pati': 203,
 'Kab. Pekalongan': 211,
 'Kab. Pemalang': 212,
 'Kab. Purbalingga': 188,
 'Kab. Purworejo': 191,
 'Kab. Rembang': 202,
 'Kab. Semarang': 207,
 'Kab. Sragen': 199,
 'Kab. Sukoharjo': 196,
 'Kab. Tegal': 213,
 'Kab. Temanggung': 208,
 'Kab. Wonogiri': 197,
 'Kab. Wonosobo': 192,
 'Kota Magelang': 215,
 'Kota Pekalongan': 219,
 'Kota Salatiga': 217,
 'Kota Semarang': 218,
 'Kota Surakarta': 216,
 'Kota Tegal': 220}

In [58]:
test_prov = Province()

In [None]:
test

In [63]:
jawa_tengah = Kabupaten("Banyumas")

NameError: name 'Kabupaten' is not defined

In [64]:
jawa_tengah.prov_id_

{'Aceh': 1,
 'Sumatera Utara': 2,
 'Sumatera Barat': 3,
 'Riau': 4,
 'Jambi': 5,
 'Sumatera Selatan': 6,
 'Bengkulu': 7,
 'Lampung': 8,
 'Kepulauan Bangka Belitung': 9,
 'Kepulauan Riau': 10,
 'DKI Jakarta': 11,
 'Jawa Barat': 12,
 'Jawa Tengah': 13,
 'D.I Yogyakarta': 14,
 'Jawa Timur': 15,
 'Banten': 16,
 'Bali': 17,
 'Nusa Tenggara Barat': 18,
 'Nusa Tenggara Timur': 19,
 'Kalimantan Barat': 20,
 'Kalimantan Tengah': 21,
 'Kalimantan Selatan': 22,
 'Kalimantan Timur': 23,
 'Kalimantan Utara': 24,
 'Sulawesi Utara': 25,
 'Sulawesi Tengah': 26,
 'Sulawesi Selatan': 27,
 'Sulawesi Tenggara': 28,
 'Gorontalo': 29,
 'Sulawesi Barat': 30,
 'Maluku': 31,
 'Maluku Utara': 32,
 'Papua Barat': 33,
 'Papua': 34,
 'Papua Barat Daya': 35,
 'Papua Pegunungan': 36,
 'Papua Tengah': 37,
 'Papua Selatan': 38}

In [62]:
jawa_tengah.get_ingredients_cost('Beras Premium')

[14250, 14250, 14250, 14490, 14490, 14490, 14490, 14490]

Clean up the codes

In [32]:
kab_id["Kab. Banyumas"]

187

In [74]:
from get_price import Kabupaten

In [75]:
banyumas = Kabupaten("Banyumas")

In [76]:
banyumas.get_ingredients_cost("Jagung")

JSONDecodeError: Expecting value: line 1 column 1 (char 0)