<a href="https://colab.research.google.com/github/AnshumanAI/Indh/blob/main/Indh_Assm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Vibe coded implementation of the assignment.

Requests for calls, beautifulsoup for extraction and management.

In [9]:
!pip install --quiet requests beautifulsoup4

In [10]:
import requests, re, json, textwrap
from bs4 import BeautifulSoup
from pathlib import Path

# URLs from the assignment
URLS = {
    "astavakra"   : "https://gretil.sub.uni-goettingen.de/gretil/corpustei/transformations/plaintext/sa_aSTAvakragItA.txt",
    # using the **plain‑text** version of Nārāyaṇīya so the same parser works unchanged
    "narayaniya"  : "https://gretil.sub.uni-goettingen.de/gretil/corpustei/transformations/plaintext/sa_nArAyaNIya.txt",
}

def fetch(url, is_html=False):
    r = requests.get(url, timeout=30)
    r.raise_for_status()
    data = r.text
    if is_html:
        data = BeautifulSoup(data, "html.parser").get_text("\n")   # strip tags, keep line‑breaks
    return data.splitlines()

# text lines
lines = {
    k: fetch(v) for k, v in URLS.items()
}

In [11]:
from collections import deque

def extract_verses(raw_lines):
    out          = []
    prev_lines   = deque(maxlen=2)
    started      = False
    tag_re       = re.compile(r"(.*?)//\s*(?:[A-Za-z_]+)?([0-9][0-9,\.]*)\s*$")

    for line in raw_lines:
        line = line.strip()
        if not line or line.startswith("#"):
            continue

        m = tag_re.match(line)
        if m:
            body = "\n".join((*prev_lines, m.group(1).strip())).strip()
            if body:
                out.append({
                    "verse": body,
                    "index": m.group(2).replace(",", ".")
                })
            prev_lines.clear()
            started = True
        elif started:
            prev_lines.append(line)

    return out

In [12]:
#run the parser
astavakra   = extract_verses(lines["astavakra"])
narayaniya  = extract_verses(lines["narayaniya"])

print(f"Aṣṭāvakragītā sample →\n{json.dumps(astavakra[4], ensure_ascii=False, indent=2)}\n")
print(f"Nārāyaṇīya sample   →\n{json.dumps(narayaniya[0], ensure_ascii=False, indent=2)}")

Aṣṭāvakragītā sample →
{
  "verse": "na tvaṃ viprādiko varṇo nāśramī nākṣagocaraḥ\nasaṅgo 'si nirākāro viśvasākṣī sukhī bhava",
  "index": "1.5"
}

Nārāyaṇīya sample   →
{
  "verse": "ya icchet siddhim āsthātuṃ devatāṃ kāṃ yajeta saḥ",
  "index": "12.321.1"
}


In [13]:
all_verses = astavakra + narayaniya
Path("verses.json").write_text(json.dumps(all_verses, ensure_ascii=False, indent=2))

180661