<a href="https://colab.research.google.com/github/Albert7293/ImEx2025/blob/main/Export_Import_Indonesia_BPS_2025.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install langchain_community
!pip install replicate

In [69]:
from langchain_community.llms import Replicate
import os
from google.colab import userdata
api_token = userdata.get('BPS_Indonesia1')
os.environ["REPLICATE_API_TOKEN"] = api_token
model = "ibm-granite/granite-3.3-8b-instruct"
output = Replicate(
model=model,
replicate_api_token=api_token,
)



In [70]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(model)

In [None]:
import itertools
from typing import Iterator, Callable, List, Dict


class BPSSummarizer:
    """Processes BPS Indonesia HS Code commodity data and generates hierarchical summaries"""

    def __init__(self, model_instance, api_token: str):

        self.model = model_instance
        self.api_token = api_token

 self.parameters = {
            "top_k": 0,
            "top_p": 1.0,
            "max_tokens": 10000,
            "min_tokens": 0,
            "random_seed": None,
            "repetition_penalty": 1.0,
            "stopping_criteria": "length (10000 tokens)",
            "stopping_sequence": None
        }



    def load_hs_code_data(self) -> List[Dict]:
        """Load the fixed HS Code commodity data you provided"""
        return[
        {'hs_code': '01', 'commodity': 'Binatang hidup', 'export_value': 143805.1254, 'export_weight': 15454.8706, 'import_value': 69239214.496, 'import_weight': 23383518.33},
        {'hs_code': '02', 'commodity': 'Daging hewan', 'export_value': 394386.559, 'export_weight': 33176.45, 'import_value': 79981831.0018, 'import_weight': 25376892.08},
        {'hs_code': '03', 'commodity': 'Ikan, krustasea, dan moluska', 'export_value': 307931800.1552, 'export_weight': 62801065.6493, 'import_value': 36404449.1843, 'import_weight': 17390080.3056},
        {'hs_code': '04', 'commodity': 'Susu, mentega, dan telur', 'export_value': 26424775.2464, 'export_weight': 3552664.2116, 'import_value': 148125356.3884, 'import_weight': 45086748.193},
        {'hs_code': '05', 'commodity': 'Produk hewani', 'export_value': 3643862.8209, 'export_weight': 2023474.1499, 'import_value': 10144553.8108, 'import_weight': 2998629.7},
        {'hs_code': '06', 'commodity': 'Pohon hidup dan bunga potong', 'export_value': 1739732.7512, 'export_weight': 506527.4638, 'import_value': 856472.2172, 'import_weight': 95990.4014},
        {'hs_code': '07', 'commodity': 'Sayuran', 'export_value': 5226298.94, 'export_weight': 5934974.804, 'import_value': 86337985.8714, 'import_weight': 74334154.7886},
        {'hs_code': '08', 'commodity': 'Buah-buahan', 'export_value': 107403796.6136, 'export_weight': 131180867.46, 'import_value': 136856567.0428, 'import_weight': 67557563.792},
        {'hs_code': '09', 'commodity': 'Kopi, teh, dan rempah-rempah', 'export_value': 221569256.2163, 'export_weight': 41608344.6358, 'import_value': 35071726.9626, 'import_weight': 12426305.8141},
        {'hs_code': '10', 'commodity': 'Serealia', 'export_value': 81968.9568, 'export_weight': 104996.955, 'import_value': 300774117.1162, 'import_weight': 1051850497.3548},
        {'hs_code': '11', 'commodity': 'Hasil penggilingan', 'export_value': 6266384.3591, 'export_weight': 11632199.2235, 'import_value': 25377867.4618, 'import_weight': 50859850.56},
        {'hs_code': '12', 'commodity': 'Biji dan buah mengandung minyak', 'export_value': 51311831.5102, 'export_weight': 24800203.483, 'import_value': 134577645.7726, 'import_weight': 235610499.186},
        {'hs_code': '13', 'commodity': 'Lak, getah, dan damar', 'export_value': 13595448.1744, 'export_weight': 4646087.73, 'import_value': 7147447.6694, 'import_weight': 1426937.331},
        {'hs_code': '14', 'commodity': 'Bahan anyaman nabati', 'export_value': 40894384.7698, 'export_weight': 376400373.7, 'import_value': 402421.1132, 'import_weight': 1065574.1},
        {'hs_code': '15', 'commodity': 'Lemak dan minyak hewani/nabati', 'export_value': 1839992945.056, 'export_weight': 1592967596.892, 'import_value': 19424425.6918, 'import_weight': 9130363.1202},
        {'hs_code': '16', 'commodity': 'Olahan dari daging, ikan, krustasea, dan moluska', 'export_value': 93674416.3922, 'export_weight': 13536094.474, 'import_value': 5747083.2179, 'import_weight': 1428887.4227},
        {'hs_code': '17', 'commodity': 'Gula dan kembang gula', 'export_value': 23983251.3039, 'export_weight': 27822584.095, 'import_value': 338163052.5244, 'import_weight': 626156765.3246},
        {'hs_code': '18', 'commodity': 'Kakao dan olahannya', 'export_value': 267036627.7167, 'export_weight': 23342331.7092, 'import_value': 224043314.5708, 'import_weight': 25357612.6084},
        {'hs_code': '19', 'commodity': 'Olahan dari tepung', 'export_value': 111685208.8296, 'export_weight': 48574145.505, 'import_value': 35419155.7874, 'import_weight': 12121436.3525},
        {'hs_code': '20', 'commodity': 'Olahan dari sayuran, buah, dan kacang', 'export_value': 27731523.6143, 'export_weight': 16601136.1719, 'import_value': 26067292.0629, 'import_weight': 15840095.083},
        {'hs_code': '21', 'commodity': 'Berbagai makanan olahan', 'export_value': 145554520.3408, 'export_weight': 51070915.6781, 'import_value': 105664642.4558, 'import_weight': 29918671.3424},
        {'hs_code': '22', 'commodity': 'Minuman, alkohol, dan cuka', 'export_value': 23432686.0156, 'export_weight': 35090285.1273, 'import_value': 9474314.1829, 'import_weight': 7534325.2917},
        {'hs_code': '23', 'commodity': 'Ampas dan sisa industri makanan', 'export_value': 68866712.0411, 'export_weight': 434029355.802, 'import_value': 182043691.8862, 'import_weight': 395271689.0189},
        {'hs_code': '24', 'commodity': 'Tembakau dan rokok', 'export_value': 188777154.444, 'export_weight': 15952735.3085, 'import_value': 133401003.4349, 'import_weight': 22506901.38},
        {'hs_code': '25', 'commodity': 'Garam, belerang, batu, dan semen', 'export_value': 48523351.0133, 'export_weight': 2126518202.27, 'import_value': 114966425.4437, 'import_weight': 957694942.1602},
        {'hs_code': '26', 'commodity': 'Bijih logam, terak, dan abu', 'export_value': 810284198.9795, 'export_weight': 368579209.5, 'import_value': 208581439.3608, 'import_weight': 1947495261.442},
        {'hs_code': '27', 'commodity': 'Bahan bakar mineral', 'export_value': 3614441185.4681, 'export_weight': 41563824654.3352, 'import_value': 821657516.1746, 'import_weight': 6155315644.8323},
        {'hs_code': '28', 'commodity': 'Bahan kimia anorganik', 'export_value': 344249346.5556, 'export_weight': 637804366.6132, 'import_value': 265773427.8386, 'import_weight': 534123114.7813},
        {'hs_code': '29', 'commodity': 'Bahan kimia organik', 'export_value': 301643097.1001, 'export_weight': 266557671.62, 'import_value': 549042619.9017, 'import_weight': 499649987.4965},
        {'hs_code': '30', 'commodity': 'Produk farmasi', 'export_value': 59523410.9332, 'export_weight': 1755942.5784, 'import_value': 131991635.7657, 'import_weight': 2199079.9022},
        {'hs_code': '31', 'commodity': 'Pupuk', 'export_value': 76201310.0876, 'export_weight': 199686464, 'import_value': 234126120.0797, 'import_weight': 810586593.1084},
        {'hs_code': '32', 'commodity': 'Sari bahan samak dan celup', 'export_value': 33545530.6676, 'export_weight': 10024569.2635, 'import_value': 130613212.2141, 'import_weight': 39409836.5582},
        {'hs_code': '33', 'commodity': 'Minyak atsiri, wewangian, dan kosmetik', 'export_value': 90573741.4885, 'export_weight': 11755395.6746, 'import_value': 126412400.8208, 'import_weight': 8970036.1205},
        {'hs_code': '34', 'commodity': 'Sabun dan preparat pembersih', 'export_value': 109084694.9821, 'export_weight': 92831167.7358, 'import_value': 64642691.9072, 'import_weight': 26103819.629},
        {'hs_code': '35', 'commodity': 'Perekat dan enzim', 'export_value': 5109228.2875, 'export_weight': 2174011.2737, 'import_value': 52174601.0396, 'import_weight': 25493285.2109},
        {'hs_code': '36', 'commodity': 'Bahan peledak, korek api, dan kembang api', 'export_value': 1019116.94, 'export_weight': 472662.76, 'import_value': 10046665.1321, 'import_weight': 1000112.9466},
        {'hs_code': '37', 'commodity': 'Barang fotografi atau sinematografi', 'export_value': 98602.6441, 'export_weight': 1596.85, 'import_value': 3878693.4998, 'import_weight': 798675.4973},
        {'hs_code': '38', 'commodity': 'Berbagai produk kimia', 'export_value': 718308773.9491, 'export_weight': 497015619.0848, 'import_value': 396795092.6962, 'import_weight': 176471122.7644},
        {'hs_code': '39', 'commodity': 'Plastik dan barang dari plastik', 'export_value': 217689327.5399, 'export_weight': 154852108.9501, 'import_value': 873190843.6843, 'import_weight': 545247679.4304},
        {'hs_code': '40', 'commodity': 'Karet dan barang dari karet', 'export_value': 497630489.7101, 'export_weight': 197159999.126, 'import_value': 242712129.8085, 'import_weight': 74195027.3346},
        {'hs_code': '41', 'commodity': 'Jangat dan kulit mentah/samak', 'export_value': 4340001.5071, 'export_weight': 149757.1039, 'import_value': 60002282.9499, 'import_weight': 8591656.3629},
        {'hs_code': '42', 'commodity': 'Barang dari kulit samak', 'export_value': 106528172.256, 'export_weight': 7086026.7886, 'import_value': 33699039.6464, 'import_weight': 6333765.5601},
        {'hs_code': '43', 'commodity': 'Kulit berbulu, bulu tiruan, dan barang daripadanya', 'export_value': 2644.25, 'export_weight': 247.2, 'import_value': 687430.602, 'import_weight': 61114.3667},
        {'hs_code': '44', 'commodity': 'Kayu dan barang dari kayu', 'export_value': 260619000.9785, 'export_weight': 385203491.634, 'import_value': 67398402.9009, 'import_weight': 345712306.229},
        {'hs_code': '45', 'commodity': 'Gabus dan barang dari gabus', 'export_value': 1696.2861, 'export_weight': 1514, 'import_value': 376146.7313, 'import_weight': 40031.0947},
        {'hs_code': '46', 'commodity': 'Barang anyaman', 'export_value': 5673728.5339, 'export_weight': 1302467.5608, 'import_value': 44425.7781, 'import_weight': 1871.8073},
        {'hs_code': '47', 'commodity': 'Pulp dari kayu', 'export_value': 295125381.5911, 'export_weight': 543068402.7, 'import_value': 144295589.6243, 'import_weight': 382671625.4038},
        {'hs_code': '48', 'commodity': 'Kertas, karton, dan barang daripadanya', 'export_value': 389099770.6014, 'export_weight': 513345287.3245, 'import_value': 154031196.1534, 'import_weight': 103314555.6677},
        {'hs_code': '49', 'commodity': 'Produk industri percetakan', 'export_value': 1276408.1504, 'export_weight': 217619.9836, 'import_value': 20603129.8062, 'import_weight': 3152974.6633},
        {'hs_code': '50', 'commodity': 'Sutra', 'export_value': 1496.4, 'export_weight': 547, 'import_value': 220241.9171, 'import_weight': 12571.6254},
        {'hs_code': '51', 'commodity': 'Wol, bulu hewan halus atau kasar', 'export_value': 52856.1784, 'export_weight': 1522.21, 'import_value': 9107149.5738, 'import_weight': 380646.9781},
        {'hs_code': '52', 'commodity': 'Kapas', 'export_value': 34803232.1908, 'export_weight': 12380679.8601, 'import_value': 117353490.3802, 'import_weight': 36778511.7337},
        {'hs_code': '53', 'commodity': 'Serat tekstil nabati lainnya', 'export_value': 485544.2376, 'export_weight': 2700714.516, 'import_value': 4133326.8128, 'import_weight': 1775291.5172},
        {'hs_code': '54', 'commodity': 'Filamen buatan', 'export_value': 45620733.4157, 'export_weight': 14401533.2029, 'import_value': 194824913.3804, 'import_weight': 62314844.2207},
        {'hs_code': '55', 'commodity': 'Serat stapel buatan', 'export_value': 133220124.239, 'export_weight': 74809099.3413, 'import_value': 88462270.836, 'import_weight': 25871933.6853},
        {'hs_code': '56', 'commodity': 'Kain kempa, benang khusus, dan benang pintal', 'export_value': 13113629.1834, 'export_weight': 3311564.4442, 'import_value': 52215912.9221, 'import_weight': 15903200.7194},
        {'hs_code': '57', 'commodity': 'Karpet dan tekstil penutup lantai lainnya', 'export_value': 2657200.8141, 'export_weight': 861508.7365, 'import_value': 330336.0029, 'import_weight': 69475.3229},
        {'hs_code': '58', 'commodity': 'Kain tenunan khusus', 'export_value': 2791422.3464, 'export_weight': 164042.9553, 'import_value': 32873003.0359, 'import_weight': 2358398.4159},
        {'hs_code': '59', 'commodity': 'Kain tekstil dilapisi atau dilaminasi', 'export_value': 4250160.0148, 'export_weight': 752112.269, 'import_value': 84852283.8843, 'import_weight': 11289632.0292},
        {'hs_code': '60', 'commodity': 'Kain rajutan', 'export_value': 10680158.787, 'export_weight': 1727179.2178, 'import_value': 216010770.5269, 'import_weight': 27193878.3855},
        {'hs_code': '61', 'commodity': 'Pakaian dan aksesorinya (rajutan)', 'export_value': 271561588.6459, 'export_weight': 13793532.4155, 'import_value': 30777771.2249, 'import_weight': 2954949.0582},
        {'hs_code': '62', 'commodity': 'Pakaian dan aksesorinya (bukan rajutan)', 'export_value': 270239550.1813, 'export_weight': 9150887.397, 'import_value': 24491484.7504, 'import_weight': 2075320.4755},
        {'hs_code': '63', 'commodity': 'Barang tekstil jadi lainnya', 'export_value': 8626172.3791, 'export_weight': 2370491.4029, 'import_value': 12521599.158, 'import_weight': 4455453.1865},
        {'hs_code': '64', 'commodity': 'Alas kaki', 'export_value': 534719031.6823, 'export_weight': 28174561.3326, 'import_value': 99592760.7395, 'import_weight': 10819748.9312},
        {'hs_code': '65', 'commodity': 'Tutup kepala dan bagiannya', 'export_value': 7334150.8151, 'export_weight': 341128.2028, 'import_value': 3585756.9224, 'import_weight': 557433.0986},
        {'hs_code': '66', 'commodity': 'Payung, tongkat, dan bagiannya', 'export_value': 114750.089, 'export_weight': 28478.3144, 'import_value': 1505169.3161, 'import_weight': 1206497.9127},
        {'hs_code': '67', 'commodity': 'Barang dari bulu unggas, bunga artifisial, dan wig', 'export_value': 14355920.1008, 'export_weight': 524707.8024, 'import_value': 3554525.7971, 'import_weight': 1054871.7771},
        {'hs_code': '68', 'commodity': 'Barang dari batu, semen, asbes, atau mika', 'export_value': 14759971.3917, 'export_weight': 29770622.4364, 'import_value': 41735324.2422, 'import_weight': 101209620.6555},
        {'hs_code': '69', 'commodity': 'Produk keramik', 'export_value': 20894789.4625, 'export_weight': 27245423.2886, 'import_value': 47608911.1456, 'import_weight': 78341261.9775},
        {'hs_code': '70', 'commodity': 'Kaca dan barang dari kaca', 'export_value': 34971017.5473, 'export_weight': 76903183.253, 'import_value': 49643428.3433, 'import_weight': 55710956.2011},
        {'hs_code': '71', 'commodity': 'Logam mulia dan perhiasan/permata', 'export_value': 652245146.1876, 'export_weight': 207827.3113, 'import_value': 1361442679.0586, 'import_weight': 786882.8067},
        {'hs_code': '72', 'commodity': 'Besi dan baja', 'export_value': 2316118338.8101, 'export_weight': 1856760090.5371, 'import_value': 992782259.0255, 'import_weight': 1465050315.2577},
        {'hs_code': '73', 'commodity': 'Barang dari besi dan baja', 'export_value': 122944353.2354, 'export_weight': 65206322.8843, 'import_value': 382748135.4167, 'import_weight': 220523768.7119},
        {'hs_code': '74', 'commodity': 'Tembaga dan barang daripadanya', 'export_value': 321479940.9233, 'export_weight': 35788627.6255, 'import_value': 138812187.7373, 'import_weight': 15000900.7739},
        {'hs_code': '75', 'commodity': 'Nikel dan barang daripadanya', 'export_value': 595852620.2296, 'export_weight': 157726271.7126, 'import_value': 5998256.4738, 'import_weight': 209157.3986},
        {'hs_code': '76', 'commodity': 'Aluminium dan barang daripadanya', 'export_value': 158199634.9856, 'export_weight': 53655135.5619, 'import_value': 200489563.2286, 'import_weight': 63029742.4707},
        {'hs_code': '78', 'commodity': 'Timbal dan barang daripadanya', 'export_value': 8860183.868, 'export_weight': 5296669.92, 'import_value': 6789706.0727, 'import_weight': 2694904.7822},
        {'hs_code': '79', 'commodity': 'Seng dan barang daripadanya', 'export_value': 3210824.1375, 'export_weight': 1384007.4424, 'import_value': 37241793.6428, 'import_weight': 12113447.4656},
        {'hs_code': '80', 'commodity': 'Timah dan barang daripadanya', 'export_value': 154648349.88, 'export_weight': 4789722.6, 'import_value': 12549081.5962, 'import_weight': 349361.5725},
        {'hs_code': '81', 'commodity': 'Logam tidak mulia lainnya', 'export_value': 22040269.5, 'export_weight': 4488885.7355, 'import_value': 6044304.1732, 'import_weight': 104619.3663},
        {'hs_code': '82', 'commodity': 'Perkakas dan peralatan dari logam tidak mulia', 'export_value': 6084916.5619, 'export_weight': 645370.7119, 'import_value': 53121888.055, 'import_weight': 11124849.4175},
        {'hs_code': '83', 'commodity': 'Berbagai barang logam tidak mulia', 'export_value': 8252567.5939, 'export_weight': 2150999.5229, 'import_value': 86234374.0325, 'import_weight': 29716451.7363},
        {'hs_code': '84', 'commodity': 'Mesin dan peralatan mekanis serta bagiannya', 'export_value': 526585892.6331, 'export_weight': 49534324.3832, 'import_value': 3015443204.5593, 'import_weight': 380548824.9388},
        {'hs_code': '85', 'commodity': 'Mesin dan perlengkapan elektrik serta bagiannya', 'export_value': 1578954132.4043, 'export_weight': 87692668.1902, 'import_value': 2543907032.7433, 'import_weight': 148324266.9202},
        {'hs_code': '86', 'commodity': 'Kereta api, trem, dan bagiannya', 'export_value': 1154033.2028, 'export_weight': 487939.84, 'import_value': 54503888.1134, 'import_weight': 11278519.8681},
        {'hs_code': '87', 'commodity': 'Kendaraan dan bagiannya', 'export_value': 894311880.1025, 'export_weight': 93555678.3361, 'import_value': 867502059.5836, 'import_weight': 134414070.4252},
        {'hs_code': '88', 'commodity': 'Kendaraan udara dan bagiannya', 'export_value': 8891703.2301, 'export_weight': 37623.1368, 'import_value': 25182552.5135, 'import_weight': 37114.5335},
        {'hs_code': '89', 'commodity': 'Kapal, perahu, dan struktur terapung', 'export_value': 22615575.0759, 'export_weight': 4010826.05, 'import_value': 93389328.9092, 'import_weight': 56920202.5828},
        {'hs_code': '90', 'commodity': 'Instrumen optik, fotografi, sinematografi, dan medis', 'export_value': 63078581.3619, 'export_weight': 2291655.1556, 'import_value': 308650501.2945, 'import_weight': 9138928.792},
        {'hs_code': '91', 'commodity': 'Jam dan arloji serta bagiannya', 'export_value': 1775784.4411, 'export_weight': 25144.1213, 'import_value': 30439538.3128, 'import_weight': 1153312.9416},
        {'hs_code': '92', 'commodity': 'Instrumen musik dan bagiannya', 'export_value': 45704026.4004, 'export_weight': 2946522.9369, 'import_value': 12375823.1602, 'import_weight': 765224.8886},
        {'hs_code': '93', 'commodity': 'Senjata dan amunisi serta bagiannya', 'export_value': 9165, 'export_weight': 11343, 'import_value': 2333965.7759, 'import_weight': 44061.1649},
        {'hs_code': '94', 'commodity': 'Perabotan, lampu, dan alat penerangan', 'export_value': 153797691.4993, 'export_weight': 35679905.2792, 'import_value': 135949978.8272, 'import_weight': 83823825.2431},
        {'hs_code': '95', 'commodity': 'Mainan, permainan dan keperluan olahraga', 'export_value': 60085756.0195, 'export_weight': 4889150.8908, 'import_value': 48711455.569, 'import_weight': 13880586.8805},
        {'hs_code': '96', 'commodity': 'Berbagai barang buatan pabrik', 'export_value': 22355362.9494, 'export_weight': 7986690.0751, 'import_value': 70462784.8347, 'import_weight': 15640159.347},
        {'hs_code': '97', 'commodity': 'Karya seni, barang kolektor, dan barang antik', 'export_value': 522878.2568, 'export_weight': 27583.5579, 'import_value': 230545.5642, 'import_weight': 95848.7415},
        {'hs_code': '98', 'commodity': 'Kendaraan bermotor dan komponennya (terbongkar tidak lengkap)', 'export_value': 3779.8141, 'export_weight': 41.7, 'import_value': 68984762.004, 'import_weight': 6904779.5576},
        {'hs_code': '99', 'commodity': 'Piranti lunak, barang digital dan barang kiriman', 'export_value': 855068.083, 'export_weight': 72465.9078, 'import_value': 48803401.7506, 'import_weight': 628.7214}
    ]

    def chunk_data(self,
                 dropwhile: Callable[[Dict], bool] = lambda c: False,
                 takewhile: Callable[[Dict], bool] = lambda c: True) -> Iterator[Dict]:
        """Chunk the HS Code data with filtering"""
        raw_data = self.load_hs_code_data()
        return itertools.takewhile(takewhile, itertools.dropwhile(dropwhile, raw_data))

    def merge_by_hs_category(self,
                           chunks: Iterator[Dict],
                           group_by: Callable[[Dict], str] = lambda c: c['hs_code'][0] + '0') -> Iterator[Dict]:
        """Merge data by HS Code category (first digit)"""
        prior_category = None
        document = {}
        doc_id = 0

        for chunk in chunks:
            current_category = group_by(chunk)
            if prior_category != current_category:
                if document:
                    yield document
                prior_category = current_category
                document = {
                    'doc_id': str(doc_id := doc_id + 1),
                    'hs_category': current_category,
                    'title': f"HS {current_category} - {self._get_category_name(current_category)}",
                    'text': self._format_hs_data(chunk),
                    'commodities': [{
                        'hs_code': chunk['hs_code'],
                        'name': chunk['commodity'],
                        'export_value': chunk['export_value'],
                        'import_value': chunk['import_value']
                    }]
                }
            else:
                document['text'] += f"\n\n{self._format_hs_data(chunk)}"
                document['commodities'].append({
                    'hs_code': chunk['hs_code'],
                    'name': chunk['commodity'],
                    'export_value': chunk['export_value'],
                    'import_value': chunk['import_value']
                })

        if document:
            yield document

    def _get_category_name(self, hs_category: str) -> str:
        """Map HS Code categories to names"""
        categories = {
        '01': 'Live Animals',
        '02': 'Meat and Edible Meat Offal',
        '03': 'Fish, Crustaceans, and Aquatic Invertebrates',
        '04': 'Dairy Products, Eggs, Honey',
        '05': 'Products of Animal Origin',
        '06': 'Live Trees and Plants',
        '07': 'Edible Vegetables',
        '08': 'Edible Fruits and Nuts',
        '09': 'Coffee, Tea, and Spices',
        '10': 'Cereals',
        '11': 'Milling Products',
        '12': 'Oil Seeds and Oleaginous Fruits',
        '13': 'Lac, Gums, Resins',
        '14': 'Vegetable Plaiting Materials',
        '15': 'Animal/Vegetable Fats and Oils',
        '16': 'Prepared Meat, Fish, or Crustaceans',
        '17': 'Sugars and Sugar Confectionery',
        '18': 'Cocoa and Cocoa Preparations',
        '19': 'Preparations of Cereals or Starch',
        '20': 'Preparations of Vegetables, Fruits, or Nuts',
        '21': 'Miscellaneous Food Preparations',
        '22': 'Beverages, Spirits, and Vinegar',
        '23': 'Food Industry Residues and Wastes',
        '24': 'Tobacco and Manufactured Tobacco Substitutes',
        '25': 'Salt, Sulfur, Earths, and Stone',
        '26': 'Metalliferous Ores, Slag, and Ash',
        '27': 'Mineral Fuels and Oils',
        '28': 'Inorganic Chemicals',
        '29': 'Organic Chemicals',
        '30': 'Pharmaceutical Products',
        '31': 'Fertilizers',
        '32': 'Tanning or Dyeing Extracts',
        '33': 'Essential Oils and Resinoids',
        '34': 'Soaps and Cleaning Preparations',
        '35': 'Albuminoidal Substances, Glues, Enzymes',
        '36': 'Explosives, Matches, Pyrotechnics',
        '37': 'Photographic or Cinematographic Goods',
        '38': 'Miscellaneous Chemical Products',
        '39': 'Plastics and Articles Thereof',
        '40': 'Rubber and Articles Thereof',
        '41': 'Raw Hides and Skins',
        '42': 'Articles of Leather',
        '43': 'Furskins and Artificial Fur',
        '44': 'Wood and Articles of Wood',
        '45': 'Cork and Articles of Cork',
        '46': 'Manufactures of Straw or Plaiting Materials',
        '47': 'Pulp of Wood or Fibrous Cellulosic Material',
        '48': 'Paper and Paperboard',
        '49': 'Printed Books, Newspapers, Pictures',
        '50': 'Silk',
        '51': 'Wool and Fine Animal Hair',
        '52': 'Cotton',
        '53': 'Other Vegetable Textile Fibers',
        '54': 'Man-Made Filaments',
        '55': 'Man-Made Staple Fibers',
        '56': 'Wadding, Felt, and Nonwovens',
        '57': 'Carpets and Other Textile Floor Coverings',
        '58': 'Special Woven Fabrics',
        '59': 'Impregnated, Coated, or Laminated Textiles',
        '60': 'Knitted or Crocheted Fabrics',
        '61': 'Knitted or Crocheted Apparel',
        '62': 'Non-Knitted or Non-Crocheted Apparel',
        '63': 'Other Made-Up Textile Articles',
        '64': 'Footwear, Gaiters, and Similar Articles',
        '65': 'Headgear and Parts Thereof',
        '66': 'Umbrellas, Walking Sticks, and Similar Articles',
        '67': 'Prepared Feathers and Articles Made Thereof',
        '68': 'Articles of Stone, Plaster, Cement',
        '69': 'Ceramic Products',
        '70': 'Glass and Glassware',
        '71': 'Pearls, Precious Stones, and Metals',
        '72': 'Iron and Steel',
        '73': 'Articles of Iron or Steel',
        '74': 'Copper and Articles Thereof',
        '75': 'Nickel and Articles Thereof',
        '76': 'Aluminum and Articles Thereof',
        '78': 'Lead and Articles Thereof',
        '79': 'Zinc and Articles Thereof',
        '80': 'Tin and Articles Thereof',
        '81': 'Other Base Metals',
        '82': 'Tools and Implements of Base Metals',
        '83': 'Miscellaneous Articles of Base Metals',
        '84': 'Machinery and Mechanical Appliances',
        '85': 'Electrical Machinery and Equipment',
        '86': 'Railway or Tramway Locomotives',
        '87': 'Vehicles Other Than Railway',
        '88': 'Aircraft, Spacecraft, and Parts Thereof',
        '89': 'Ships, Boats, and Floating Structures',
        '90': 'Optical, Photographic, and Medical Instruments',
        '91': 'Clocks and Watches',
        '92': 'Musical Instruments',
        '93': 'Arms and Ammunition',
        '94': 'Furniture, Lighting, and Prefabricated Buildings',
        '95': 'Toys, Games, and Sports Equipment',
        '96': 'Miscellaneous Manufactured Articles',
        '97': 'Works of Art and Collectibles',
        '98': 'Motor Vehicles and Components (Incomplete)',
        '99': 'Software, Digital Goods, and Special Shipments'
    }
        return categories.get(hs_category, 'Other')

    def _format_hs_data(self, data: Dict) -> str:
        """Format HS Code data for LLM processing"""
        return (
            f"HS Code: {data['hs_code']}\n"
            f"Commodity: {data['commodity']}\n"
            f"Export Value (USD): {data['export_value']:,.2f}\n"
            f"Export Weight (Kg): {data['export_weight']:,.2f}\n"
            f"Import Value (USD): {data['import_value']:,.2f}\n"
            f"Import Weight (Kg): {data['import_weight']:,.2f}"
        )

    def generate_chunk_summaries(self, documents: List[Dict]) -> List[Dict]:
        """Generate summaries for each HS Code category"""
        user_prompt = """Analyze this HS Code category trade data. Focus on:
1. Key export/import comparisons
2. Notable commodity performances
3. Significant weight/value ratios
Provide only the summary without commentary."""

        summaries = []
        for doc in documents:
            prompt = self._build_prompt(user_prompt, doc)

            summary = self.model(prompt, replicate_api_token=self.api_token)
            summaries.append({
                'doc_id': doc['doc_id'],
                'title': doc['title'],
                'text': summary.split("Summary:")[-1].strip(),
                'hs_category': doc['hs_category']
            })

        return summaries

    def generate_final_summary(self, summaries: List[Dict]) -> str:
        """Generate unified summary of all HS Code categories"""
        user_prompt = """Synthesize these HS Code category summaries into a national trade report:
1. Create 3 Import Commdities that Indonesia actually can produce and reduce dependency
2. Create 1 Export Commocities that Indoneisa should improve based on their country potential
Provide concise analysis with quantitative highlights."""

        prompt = self._build_prompt(user_prompt, summaries)

        return self.model(prompt, replicate_api_token=self.api_token).split("Analysis:")[-1].strip()

    def _build_prompt(self, instruction: str, data: Dict | List[Dict]) -> str:
        """Construct the prompt with instruction and data"""
        if isinstance(data, list):
            documents = "\n\n".join([f"### {doc['title']}\n{doc['text']}" for doc in data])
        else:
            documents = f"### {data['title']}\n{data['text']}"

        return f"""Task: {instruction}

Input Data:
{documents}

Analysis:"""


if __name__ == "__main__":

    if not api_token:
      print(BPS_Indonesia1)
    else:
      summarizer = BPSSummarizer(output, api_token)


      def hs_dropwhile(chunk: Dict) -> bool:
          return int(chunk['hs_code']) < 1

      def hs_takewhile(chunk: Dict) -> bool:
          return int(chunk['hs_code']) <= 99

      hs_chunks = summarizer.chunk_data(
          dropwhile=hs_dropwhile,
          takewhile=hs_takewhile
      )

      documents = list(summarizer.merge_by_hs_category(
          hs_chunks,
          group_by=lambda c: c['hs_code'][:2]
      ))

      print(f"Created {len(documents)} HS Code category documents")


      print("\nGenerating category summaries...")
      category_summaries = summarizer.generate_chunk_summaries(documents)
      print(f"Generated {len(category_summaries)} category summaries")

      print("\nGenerating final national trade summary...")
      national_summary = summarizer.generate_final_summary(category_summaries)

      print("\n=== INDONESIA TRADE SUMMARY BY HS CODE ===")
      print(national_summary)