In [2]:
import os
import json
import glob
import numpy as np
from pathlib import Path
import duckdb
import pandas as pd
import pyarrow as pa

from fnmatch import fnmatch 

import tqdm

from natsort import natsorted, index_natsorted

In [4]:
df = pd.read_csv("pali_canon.csv", index_col=0)

  df = pd.read_csv("pali_canon.csv", index_col=0)


In [171]:
df[df['nikaya'] == 'sn']


Unnamed: 0_level_0,nikaya,vagga,sutta_id,text
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
sn1.1:0.1,sn,sn1,,Saṁyutta Nikāya 1.1
sn1.1:0.2,sn,sn1,,1. Naḷavagga
sn1.1:0.3,sn,sn1,,Oghataraṇasutta
sn1.1:1.1,sn,sn1,,Evaṁ me sutaṁ—
sn1.1:1.2,sn,sn1,,ekaṁ samayaṁ bhagavā sāvatthiyaṁ viharati jeta...
...,...,...,...,...
sn56.131:5.4,sn,sn56,,jhānānāpānasaṁyutaṁ;
sn56.131:5.5,sn,sn56,,"Sotāpatti saccañcāti,"
sn56.131:5.6,sn,sn56,,mahāvaggoti vuccatīti.
sn56.131:5.7,sn,sn56,,Mahāvaggasaṁyuttapāḷi niṭṭhitā.


In [12]:
df_titles = df[df.index.str.contains(":0.2")]


df_titles.iloc[index_natsorted(df_titles.index)]

df_titles



Unnamed: 0_level_0,nikaya,vagga,sutta_id,text
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
an1.1:0.2,an,an1,,1. Rūpādivagga
an1.11:0.2,an,an1,,2. Nīvaraṇappahānavagga
an1.21:0.2,an,an1,,3. Akammaniyavagga
an1.31:0.2,an,an1,,4. Adantavagga
an1.41:0.2,an,an1,,5. Paṇihitaacchavagga
...,...,...,...,...
vv81:0.2,kn,vv81:0,,Purisavimāna
vv82:0.2,kn,vv82:0,,Purisavimāna
vv83:0.2,kn,vv83:0,,Purisavimāna
vv84:0.2,kn,vv84:0,,Purisavimāna


## AN Index Creation

In [147]:
with open("an-name_root-misc-site.json", "r") as f:
    data = json.load(f)

data = {k.split(".", 1)[1]: v for k, v in data.items()}

data

{'an1': 'Ekakanipāta',
 'an1-cittapariyadanavagga': 'Cittapariyādānavagga',
 'an1.1-10': 'Cittapariyādānavagga',
 'an1-nivaranappahanavagga': 'Nīvaraṇappahānavagga',
 'an1.11-20': 'Nīvaraṇappahānavagga',
 'an1-akammaniyavagga': 'Akammaniyavagga',
 'an1.21-30': 'Akammaniyavagga',
 'an1-adantavagga': 'Adantavagga',
 'an1.31-40': 'Adantavagga',
 'an1-panihitaacchavagga': 'Paṇihitaacchavagga',
 'an1.41-50': 'Paṇihitaacchavagga',
 'an1-accharasanghatavagga': 'Accharāsaṅghātavagga',
 'an1.51-60': 'Accharāsaṅghātavagga',
 'an1-viriyarambhadivagga': 'Vīriyārambhādivagga',
 'an1.61-70': 'Vīriyārambhādivagga',
 'an1-kalyanamittadivagga': 'Kalyāṇamittādivagga',
 'an1.71-81': 'Kalyāṇamittādivagga',
 'an1-pamadadivagga': 'Pamādādivagga',
 'an1.82-97': 'Pamādādivagga',
 'an1-dutiyapamadadivagga': 'Dutiyapamādādivagga',
 'an1.98-139': 'Dutiyapamādādivagga',
 'an1-adhammavagga': 'Adhammavagga',
 'an1.140-149': 'Adhammavagga',
 'an1-anapattivagga': 'Anāpattivagga',
 'an1.150-169': 'Anāpattivagga',
 'an

In [167]:
indexed_dict = {}

nipata_key = None
vagga_key = None


for k,v in data.items():
    if v.endswith('nipāta'):
        nipata_key = v
        indexed_dict.setdefault(nipata_key, {})
        continue

    elif '-' in k and '.' not in k:
        vagga_key = v
        indexed_dict[nipata_key].setdefault(vagga_key, {})
        continue


    elif '.' in k:
        indexed_dict[nipata_key][vagga_key][k] = v

        if 'an2' in k:
            print(nipata_key, vagga_key, k)
        
        
        

with open("an-index-tree.json", "w", encoding="utf-8") as f:
    json.dump(indexed_dict, f, ensure_ascii=False, indent=4)        

Dukanipāta Kammakaraṇavagga an2.1-10
Dukanipāta Adhikaraṇavagga an2.11-20
Dukanipāta Bālavagga an2.21-31
Dukanipāta Samacittavagga an2.32-41
Dukanipāta Parisavagga an2.42-51
Dukanipāta Puggalavagga an2.52-63
Dukanipāta Sukhavagga an2.64-76
Dukanipāta Sanimittavagga an2.77-86
Dukanipāta Dhammavagga an2.87-97
Dukanipāta Bālavagga an2.98-117
Dukanipāta Āsāduppajahavagga an2.118-129
Dukanipāta Āyācanavagga an2.130-140
Dukanipāta Dānavagga an2.141-150
Dukanipāta Santhāravagga an2.151-162
Dukanipāta Samāpattivagga an2.163-179
Dukanipāta Kodhapeyyālavagga an2.180-229
Dukanipāta Akusalapeyyālavagga an2.230-279
Dukanipāta Vinayapeyyālavagga an2.280-309
Dukanipāta Rāgapeyyālavagga an2.310-479


In [158]:
nipata_key="Ekakanipāta"

print(repr(nipata_key))
print(nipata_key == "Ekakanipāta")


'Ekakanipāta'
True


## SN Index Creation

In [135]:
with open("pali_canon_index.json", "r") as f:
    data = json.load(f)

In [136]:
data_new = {}
for k, v in data.items():
    k = k.split(":")[1].split(".", 1)[1]
    data_new[k] = v
    


In [137]:
indexed_dict = {}
vs_key = None
s_key = None
vagga_key = None

for k,v in data_new.items():


    if v.endswith('vaggasaṁyutta'):
        vs_key = v
        indexed_dict.setdefault(vs_key, {})
        continue 

    elif v.endswith('saṁyutta'):
        s_key = f"{k}.{v}"
        indexed_dict[vs_key].setdefault(s_key, {})
        continue

    elif v.endswith('vagga') :
        vagga_key = v
        indexed_dict[vs_key][s_key].setdefault(vagga_key, {})
        indexed_dict[vs_key][s_key][vagga_key].setdefault(k, {})
        continue

    elif v.endswith('sutta'):
        
  
        indexed_dict[vs_key][s_key][vagga_key][k]=v
    
    



In [138]:
suffix = "vagga"

filtered_indexed_dict = {
    vs_k: {
        samyutta_k: {
            vagga_k: {
                sutta_k: sutta_v
                for sutta_k, sutta_v in vagga_v.items()
                if not sutta_k.endswith(suffix)
            }
            for vagga_k, vagga_v in samyutta_v.items()
        }
        for samyutta_k, samyutta_v in vs_v.items()
    }
    for vs_k, vs_v in indexed_dict.items()
}





In [139]:
with open("result.json", "w", encoding="utf-8") as f:
    json.dump(filtered_indexed_dict, f, ensure_ascii=False, indent=4) 

## ABHIDHAMMA

#### Abhidhamma Text

In [4]:
root=Path('../pali_cannon/abhidhamma/')

dfs = []

for file_path in root.rglob("*.json"):
    


    # Get relative path parts
    relative_path = file_path.relative_to(root)
    parts = relative_path.parts
    
    # Separate folders and filename
    *folders, filename = parts
   

    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)

        df = pd.DataFrame(data.items(), columns=["verse_id", "text"])
        dfs.append(df)

        try:
             pakarana = folders[0] 
             df['pakarana'] = pakarana
        except:
            print(f"Error: {file_path}")
            pass
        
        try:
            chapter = folders[1]
            df['chapter'] = chapter
        except:
            df['chapter'] = df['verse_id'].str.split(":").str[0]

            pass

        if  df.empty:
            print(f"Empty df: {file_path}")
            

  

df_final = pd.concat(dfs)



#replace patthana with pt for verse_id and chapter consistency
df_final['chapter'] = df_final['chapter'].str.replace("patthana", "pt")
df_final['verse_id'] = df_final['verse_id'].str.replace("patthana", "pt")
df_final['pakarana'] = df_final['pakarana'].str.replace("patthana", "pt")


df_final['subchapter'] = df_final['verse_id'].astype(str).str.split(pat=":", n=1).str[0]



df_final = df_final[['pakarana', 'chapter', 'subchapter', 'text', 'verse_id']]


df_final['pakarana_id'] = df_final['pakarana'].map({'ds':1, 'vb':2, 'dt':3, 'pp':4, 'kv':5, 'ya':6, 'pt':7})

df_final.set_index('verse_id', inplace=True)






index_natsorted(df_final.index)
df_final.sort_values(by='pakarana_id', inplace=True)

df_final.drop(columns=['pakarana_id'], inplace=True)


df_final.to_csv("../abhidhamma_text.csv")


df_final


Unnamed: 0_level_0,pakarana,chapter,subchapter,text
verse_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ds1.1:0.1,ds,ds1,ds1.1,Dhammasaṅgaṇī
ds2.1.1:87.2,ds,ds2,ds2.1.1,"Saddhindriyaṁ, vīriyindriyaṁ, satindriyaṁ, sam..."
ds2.1.1:87.1,ds,ds2,ds2.1.1,Katamāni tasmiṁ samaye aṭṭhindriyāni honti?
ds2.1.1:86.4,ds,ds2,ds2.1.1,Ime tasmiṁ samaye tayo āhārā honti.
ds2.1.1:86.3,ds,ds2,ds2.1.1,ayaṁ tasmiṁ samaye viññāṇāhāro hoti.
...,...,...,...,...
pt2.11:157.1,pt,pt2,pt2.11,Upādānasampayuttaṁ dhammaṁ saṁsaṭṭho upādānasa...
pt2.11:157.0.2,pt,pt2,pt2.11,2.11.3.5.1–4 Paccayānulomādi
pt2.11:157.0.1,pt,pt2,pt2.11,2.11.3.5. Saṁsaṭṭhavāra
pt2.11:175.1,pt,pt2,pt2.11,Upādānavippayutto dhammo upādānasampayuttassa ...


#### Abhidhamma Index

In [26]:
ds_root=Path('../pali_cannon/abhidhamma/ds')

chapter_dict = {}

ds_index = {}


for file_path in ds_root.rglob("*.json"):
    relative_path = file_path.relative_to(root)
    parts = relative_path.parts
    *folders, filename = parts

    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)

        if folders[1] == 'ds1':
            chapter_value = list(data.values())[1].strip()
            chapter_key = list(data.keys())[2].split(":")[0]
        
        else:
            chapter_key = list(data.keys())[1].split(":")[0].rsplit(".", 1)[0]
            chapter_value = list(data.values())[2].strip().split(" ", 1)[1]
            



        chapter_dict[chapter_key] = chapter_value

chapter_dict = dict(natsorted(chapter_dict.items()))



# ds_index['dt1.Matika'] = {k: v for k, v in chapter_dict.items() if k.split('.')[0]=='ds1'}
# ds_index['dt2.Niddesa'] = {k: v for k, v in chapter_dict.items() if k.split('.')[0]=='ds2'}

# ds_index


chapter_dict

{'ds1.1': 'Tikamātikā',
 'ds1.2': 'Dukamātikā',
 'ds1.3': 'Suttantikadukamātikā',
 'ds2.1': 'Cittuppādakaṇḍa',
 'ds2.2': 'Rūpakaṇḍa',
 'ds2.3': 'Nikkhepakaṇḍa',
 'ds2.4': 'Aṭṭhakathākaṇḍa'}

In [6]:
# Vibhanga

VB_root=Path('../pali_cannon/abhidhamma/vb/')

vb_index = {}

for file_path in VB_root.rglob("*.json"):

    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)


        chapter = list(data.values())[1].strip()
        chapter_id = list(data.keys())[1].split(":")[0]

        vb_index[chapter_id] = chapter

sorted_dict = {k: vb_index[k] for k in natsorted(vb_index)}

vb_index = sorted_dict

In [8]:
#Dhatukatha index - PERFECTED
dt_root=Path('../pali_cannon/abhidhamma/dt/')

chapter_dict = {}
dt_index = {}


for file_path in dt_root.rglob("*.json"):
    relative_path = file_path.relative_to(root)
    parts = relative_path.parts
    *folders, filename = parts

    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)

        chapter_value = list(data.values())[2].strip().split(" ", 1)[1]
        chapter_key = list(data.keys())[2].split(":")[0]

        chapter_dict[chapter_key] = chapter_value

chapter_dict = dict(natsorted(chapter_dict.items()))



dt_index['dt1.Uddesa'] = {k: v for k, v in chapter_dict.items() if k.split('.')[0]=='dt1'}
dt_index['dt2.Niddesa'] = {k: v for k, v in chapter_dict.items() if k.split('.')[0]=='dt2'}

dt_index

{'dt1.Uddesa': {'dt1.1': 'Nayamātikā',
  'dt1.2': 'Abbhantaramātikā',
  'dt1.3': 'Nayamukhamātikā',
  'dt1.4': 'Lakkhaṇamātikā',
  'dt1.5': 'Bāhiramātikā'},
 'dt2.Niddesa': {'dt2.1': 'Paṭhamanaya saṅgahāsaṅgahapadaniddesa',
  'dt2.2': 'Dutiyanaya Saṅgahitenaasaṅgahitapadaniddesa',
  'dt2.3': 'Tatiyanaya Asaṅgahitenasaṅgahitapadaniddesa',
  'dt2.4': 'Catutthanaya Saṅgahitenasaṅgahitapadaniddesa',
  'dt2.5': 'Pañcamanaya Asaṅgahitenaasaṅgahitapadaniddesa',
  'dt2.6': 'Chaṭṭhanaya Sampayogavippayogapadaniddesa',
  'dt2.7': 'Sattamanaya Sampayuttenavippayuttapadaniddesa',
  'dt2.8': 'Aṭṭhamanaya Vippayuttenasampayuttapadaniddesa',
  'dt2.9': 'Navamanaya Sampayuttenasampayuttapadaniddesa',
  'dt2.10': 'Dasamanaya Vippayuttenavippayuttapadaniddesa',
  'dt2.11': 'Ekādasamanaya Saṅgahitenasampayuttavippayuttapadaniddesa',
  'dt2.12': 'Dvādasamanaya Sampayuttenasaṅgahitāsaṅgahitapadaniddesa',
  'dt2.13': 'Terasamanaya Asaṅgahitenasampayuttavippayuttapadaniddesa',
  'dt2.14': 'Cuddasamanaya Vipp

In [22]:
#Kathavatthu Index Creation - PERFECTED
ya_root=Path('../pali_cannon/abhidhamma/kv/')

chapter_dict={}
kv_index={}

#eg: file_path = ..pali_cannon/abhidhamma/kv/kv2/kv2.2_root-pli-ms.json
for file_path in ya_root.rglob("*.json"):
    relative_path = file_path.relative_to(root)
    parts = relative_path.parts

    #eg: filename = kv2.2_root-pli-ms.json
    #eg: folders = ['kv', 'kv2']
    *folders, filename = parts
    
    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)      
        #Values or text level - reading unit - one higher than verse level 
        #In Abhidhamma pitaka this level corresponds to individual JSON files
        #get value and lowest key of the dictionary
        chapter_key = list(data.keys())[3].split(":")[0]
        chapter_value = list(data.values())[3].strip()
        chapter_dict[chapter_key] = chapter_value

        vagga_key = list(data.keys())[2].split(".")[0]
        vagga_value = list(data.values())[2].strip()
        vagga_key = f"{vagga_key}.{vagga_value}"
        kv_index[vagga_key] = 'chapter_dict'

       

    
    chapter_dict = dict(natsorted(chapter_dict.items()))
    kv_index = dict(natsorted(vagga_dict.items()))
    #kv_index = dict(natsorted(kv_index.items()))

#select sub-dict from chapter_dict and insert to vagga_dict
for k,v in kv_index.items():
     sub_key = k.split('.')[0]
     kv_index[k] = {k: v for k, v in chapter_dict.items() if k.split('.')[0]==(sub_key)}

kv_index

{'kv1.Paṭhamavagga': {'kv1.1': 'Puggalakathā',
  'kv1.2': 'Parihānikathā',
  'kv1.3': 'Brahmacariyakathā',
  'kv1.4': 'Odhisokathā',
  'kv1.5': 'Jahatikathā',
  'kv1.6': 'Sabbamatthītikathā',
  'kv1.7': 'Atītakkhandhādikathā',
  'kv1.8': 'Ekaccaṁatthītikathā',
  'kv1.9': 'Satipaṭṭhānakathā',
  'kv1.10': 'Hevatthikathā'},
 'kv2.Dutiyavagga': {'kv2.1': 'Parūpahārakathā',
  'kv2.2': 'Aññāṇakathā',
  'kv2.3': 'Kaṅkhākathā',
  'kv2.4': 'Paravitāraṇakathā',
  'kv2.5': 'Vacībhedakathā',
  'kv2.6': 'Dukkhāhārakathā',
  'kv2.7': 'Cittaṭṭhitikathā',
  'kv2.8': 'Kukkuḷakathā',
  'kv2.9': 'Anupubbābhisamayakathā',
  'kv2.10': 'Vohārakathā',
  'kv2.11': 'Nirodhakathā'},
 'kv3.Tatiyavagga': {'kv3.1': 'Balakathā',
  'kv3.2': 'Ariyantikathā',
  'kv3.3': 'Vimuttikathā',
  'kv3.4': 'Vimuccamānakathā',
  'kv3.5': 'Aṭṭhamakakathā',
  'kv3.6': 'Aṭṭhamakassaindriyakathā',
  'kv3.7': 'Dibbacakkhukathā',
  'kv3.8': 'Dibbasotakathā',
  'kv3.9': 'Yathākammūpagatañāṇakathā',
  'kv3.10': 'Saṁvarakathā',
  'kv3.11

In [None]:
#puggalapannatti
pp_root=Path('../pali_cannon/abhidhamma/pp/')

pp_index = {}
matika_index = {}
niddesa_index = {}

for file_path in pp_root.rglob("*.json"):
    relative_path = file_path.relative_to(root)
    parts = relative_path.parts
    *folders, filename = parts

    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)

        if folders[1] == 'pp1':

            chapter = list(data.values())[2].strip().split(" ", 1)[1]
            chapter_id = list(data.keys())[2].split(":")[0]

            matika_index[chapter_id] = chapter      

        elif folders[1] == 'pp2':

            chapter = list(data.values())[2].strip().split(" ", 1)[1]
            chapter_id = list(data.keys())[2].split(":")[0]

            niddesa_index[chapter_id] = chapter

        matika_index_sorted = {k: matika_index[k] for k in natsorted(matika_index)}
        niddesa_index_sorted = {k: niddesa_index[k] for k in natsorted(niddesa_index)}



pp_index = {"Mātikā": matika_index_sorted, 
            "Niddesa": niddesa_index_sorted
}

pp_index

{'Mātikā': {'pp1.1': 'Ekakauddesa',
  'pp1.2': 'Dukauddesa',
  'pp1.3': 'Tikauddesa',
  'pp1.4': 'Catukkauddesa',
  'pp1.5': 'Pañcakauddesa',
  'pp1.6': 'Chakkauddesa',
  'pp1.7': 'Sattakauddesa',
  'pp1.8': 'Aṭṭhakauddesa',
  'pp1.9': 'Navakauddesa',
  'pp1.10': 'Dasakauddesa'},
 'Niddesa': {'pp2.1': 'Ekakapuggalapaññatti',
  'pp2.2': 'Dukapuggalapaññatti',
  'pp2.3': 'Tikapuggalapaññatti',
  'pp2.4': 'Catukkapuggalapaññatti',
  'pp2.5': 'Pañcakapuggalapaññatti',
  'pp2.6': 'Chakkapuggalapaññatti',
  'pp2.7': 'Sattakapuggalapaññatti',
  'pp2.8': 'Aṭṭhakapuggalapaññatti',
  'pp2.9': 'Navakapuggalapaññatti',
  'pp2.10': 'Dasakapuggalapaññatti'}}

In [None]:
#Yamaka Index Creation - PERFECTED
ya_root=Path('../pali_cannon/abhidhamma/ya/')

ya_index = {}
chapter_dict={}
yamaka_dict={}
#eg: file_path = ../pali_cannon/abhidhamma/ya/ya2/ya2.2.2_root-pli-ms.json
for file_path in ya_root.rglob("*.json"):
    relative_path = file_path.relative_to(root)
    parts = relative_path.parts

    #eg: filename = ya2.2.2_root-pli-ms.json
    #eg: folders = ['ya', 'ya2']
    *folders, filename = parts
    
    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)      
        #Values or text level - reading unit - one higher than verse level 
        #In Abhidhamma pitaka this level corresponds to individual JSON files
        #get value and lowest key of the dictionary
        filename_key = filename.split("_")[0]
        

        #JSON files are less deep both chapter level and text level are identical
        if len(filename.split(".")) == 3:
            filename_value = list(data.values())[1].strip().split(" ")[1]
            #chapter key eg:  "ya4.3:0.2": "4.3. Pariññāvāra "  -> Pavattivāra is chapter name
            #get 2nd value of JSON, strip whitespace and split by space
            chapter_value = list(data.values())[1].strip().split(" ")[1]
            chapter_key = list(data.keys())[1].split(":")[0]
            #join the list with a period and add "ya" prefix
            chapter_key =  f"{chapter_key}.{chapter_value}"

        else:
            filename_value = list(data.values())[2].strip().split(" ")[1]
            #chapter key eg:  "ya2.2.2:0.2": "2.2 Pavattivāra"  -> Pavattivāra is chapter name
            #get 2nd value of JSON, strip whitespace and split by space
            chapter_key = list(data.values())[1].strip().split(" ")
            #join the list with a period and add "ya" prefix
            chapter_key =  f"ya{'.'.join(chapter_key)}"

            #get the value of the highest key of the dict for YAMAKA PAKARANA
            yamaka_value = list(data.values())[0].strip().split(" ")[1]
            yamaka_key = folders[1]
            yamaka_key = f"{yamaka_key}.{yamaka_value}"
     
        #chapter_dict is a dictionary of chapter names (Hetuvārādiniddesa, Uppādavāra) and their corresponding nested dict
        chapter_dict[filename_key] = filename_value
        #yamaka_dict is a dictionary of yamaka names (eg: Uddesa, Niddesa, etc) and their corresponding nested dict
        yamaka_dict[chapter_key] = 'chapter_dict'
        #ya_index is a dictionary of the 10 yamakas names (eg: Mūlayamaka) and their corresponding nested dict
        ya_index[yamaka_key] = 'yamaka_dict'
       

    
    chapter_dict = dict(natsorted(chapter_dict.items()))
    yamaka_dict = dict(natsorted(yamaka_dict.items()))
    ya_index = dict(natsorted(ya_index.items()))


#select sub-dict from chapter_dict and insert to yamaka_dict
for k,v in yamaka_dict.items():
    sub_key = k.rpartition('.')[0]
    yamaka_dict[k] = {k: v for k, v in chapter_dict.items() if k.rpartition('.')[0]==(sub_key)}


#select sub-dict from yamaka_dict and insert to ya_index
for k,v in ya_index.items():
    sub_key = k.split('.')[0]
    ya_index[k] = {k: v for k, v in yamaka_dict.items() if k.split('.')[0]==(sub_key)}

#ya_index

In [None]:
#puggalapannatti
pt_root=Path('../pali_cannon/abhidhamma/patthana/')

chapter_dict={}

for file_path in pt_root.rglob("*.json"):
    relative_path = file_path.relative_to(root)
    parts = relative_path.parts
    *folders, filename = parts

    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)
        
        chapter_key = list(data.values())[2].strip().split(" ", 1)[0]
        chapter_value = list(data.values())[2].strip().split(" ", 1)[1]
        chapter_key = f"pt{chapter_key}"

        chapter_dict[chapter_key] = chapter_value

chapter_dict = dict(natsorted(chapter_dict.items()))

chapter_dict


{'pt1.1.': 'Nikkhepavāra',
 'pt1.2.': 'Kusalattika',
 'pt1.3.': 'Vedanāttika',
 'pt1.4.': 'Vipākattika',
 'pt1.5.': 'Upādinnattika',
 'pt1.6.': 'Saṅkiliṭṭhattika',
 'pt1.7.': 'Vitakkattika',
 'pt1.8.': 'Pītittika',
 'pt1.9.': 'Dassanenapahātabbattika',
 'pt1.10.': 'Dassanenapahātabbahetukattika',
 'pt1.11.': 'Ācayagāmittika',
 'pt1.12.': 'Sekkhattika',
 'pt1.13.': 'Parittattika',
 'pt1.14.': 'Parittārammaṇattika',
 'pt1.15.': 'Hīnattika',
 'pt1.16.': 'Micchattaniyatattika',
 'pt1.17.': 'Maggārammaṇattika',
 'pt1.18.': 'Uppannattika',
 'pt1.19.': 'Atītattika',
 'pt1.20.': 'Atītārammaṇattika',
 'pt1.21.': 'Ajjhattattika',
 'pt1.22.': 'Ajjhattārammaṇattika',
 'pt1.23.': 'Sanidassanasappaṭighattika',
 'pt2.1.': 'Hetugocchaka',
 'pt2.2.': 'Cūḷantaraduka',
 'pt2.3.': 'Āsavagocchaka',
 'pt2.4.': 'Saññojanagocchaka',
 'pt2.5.': 'Ganthagocchaka',
 'pt2.6–7.': 'Oghayogagocchaka',
 'pt2.8.': 'Nīvaraṇagocchaka',
 'pt2.9.': 'Parāmāsagocchaka',
 'pt2.10.': 'Mahantaraduka',
 'pt2.11.': 'Upādānagoccha

In [27]:
abhidhamma_index = {
    "Dhammasangani": ds_index,
    "Vibhanga": vb_index,
    "Dhatukatha": dt_index,
    "Kathavatthu": kv_index,
    "Puggalapannatti": pp_index, 
    "Yamaka": ya_index,
    "Patthana": {}
}

with open("../abhidhamma_index.json", "w", encoding="utf-8") as f:
    json.dump(abhidhamma_index, f, ensure_ascii=False, indent=4)






In [115]:
pakarana = df_index[df_index['verse_id'].str.split(":", n=1).str[1]=='0.1']


pakarana['text'].unique()


<ArrowStringArray>
[   'Dhammasaṅgaṇī ',         'Vibhaṅga ',       'Dhātukathā ',
  'Puggalapaññatti ',      'Kathāvatthu ', '10 Indriyayamaka ',
  '3 Āyatanayamaka ',    '5 Saccayamaka ',  '2 Khandhayamaka ',
    '4 Dhātuyamaka ',  '7 Anusayayamaka ',     '1 Mūlayamaka ',
   '9 Dhammayamaka ', '6 Saṅkhārayamaka ',    '8 Cittayamaka ',
 'Paṭṭhānapakaraṇa ']
Length: 16, dtype: str

In [None]:
df_index[df_index['verse_id'].str.split(":").str[1]=='0.2']

Unnamed: 0,verse_id,text
272,ds2.1.1:0.2,2 Niddesa
927,ds2.3.2:0.2,2 Niddesa
1127,ds2.2.2:0.2,2 Niddesa
1194,ds2.1.9:0.2,2 Niddesa
1227,ds2.3.1:0.2,2 Niddesa
...,...,...
84415,pt2.13:0.2,"Dhammānuloma (1), Dukapaṭṭhānapāḷi (2)"
85442,pt2.4:0.2,"Dhammānuloma (1), Dukapaṭṭhānapāḷi (2)"
87427,pt2.5:0.2,"Dhammānuloma (1), Dukapaṭṭhānapāḷi (2)"
87676,pt2.11:0.2,"Dhammānuloma (1), Dukapaṭṭhānapāḷi (2)"


In [43]:
!uv pip install "psycopg[binary]"

[2K[2mResolved [1m3 packages[0m [2min 675ms[0m[0m                                         [0m
[2K[37m⠙[0m [2mPreparing packages...[0m (0/1)                                                   
[2K[1A[37m⠙[0m [2mPreparing packages...[0m (0/1)-------------------[0m[0m     0 B/4.44 MiB            [1A
[2K[1A[37m⠙[0m [2mPreparing packages...[0m (0/1)-------------------[0m[0m 16.00 KiB/4.44 MiB          [1A
[2K[1A[37m⠙[0m [2mPreparing packages...[0m (0/1)-------------------[0m[0m 32.00 KiB/4.44 MiB          [1A
[2K[1A[37m⠙[0m [2mPreparing packages...[0m (0/1)-------------------[0m[0m 39.99 KiB/4.44 MiB          [1A
[2K[1A[37m⠙[0m [2mPreparing packages...[0m (0/1)-------------------[0m[0m 53.33 KiB/4.44 MiB          [1A
[2K[1A[37m⠙[0m [2mPreparing packages...[0m (0/1)-------------------[0m[0m 61.32 KiB/4.44 MiB          [1A
[2K[1A[37m⠙[0m [2mPreparing packages...[0m (0/1)-------------------[0m[0m 61.40 KiB/4.44 MiB      

In [51]:
import psycopg


conn = psycopg.connect(
    host="localhost",          # Docker host
    port=5432,                 # Postgres port from docker-compose
    dbname="pali_canon_db",    # Database name
    user="pali_canon",         # DB user
    password="anicca"          # DB password
)

cur = conn.cursor()

In [None]:
from sqlmodel import SQLModel, Field, create_engine, Session
from typing import Optional, List, Dict
from sqlalchemy.dialects.postgresql import JSONB, ARRAY, FLOAT




In [59]:
import sys
sys.path.insert(0, '..')

from pali_text_models import Sutta, get_session