In [None]:
import json
import re

def convert_surah_to_json(surah_text, surah_name, surah_number):
    # Regular expression to match the Ayah text and number
    ayah_pattern = re.compile(r'([^\s﴿]+(?:\s+[^\s﴿]+)*)\s*﴿(\d+)﴾')

    # Extract Ayahs from the Surah text
    ayahs = []
    matches = list(ayah_pattern.finditer(surah_text))

    # Handle the case where there's no Ayah number at the end of the Surah text
    if len(matches) == 0:
        print("No matches found.")
        return

    for i, match in enumerate(matches):
        text, ayah_number = match.groups()
        if i + 1 < len(matches):
            next_text_start = matches[i + 1].start()
            ayah_text = surah_text[match.start():next_text_start].strip()
        else:
            ayah_text = surah_text[match.start():].strip()

        # Remove the marker and number from the Ayah text
        ayah_text = re.sub(r'\s*﴿\d+﴾', '', ayah_text).strip()
        ayah_text = ayah_text.replace("۩", "").replace("۞", "").strip()

        ayahs.append({
            "ayah_number": int(ayah_number),
            "text": ayah_text
        })

    # Create JSON structure
    surah_json = {
        "surah": surah_name,
        "number": surah_number,
        "ayahs": ayahs
    }

    # Save to JSON file
    with open(f"{surah_name}.json", "w", encoding="utf-8") as file:
        json.dump(surah_json, file, ensure_ascii=False, indent=4)

    

# Example Surah text and metadata
surah_text = """
الْحَمْدُ لِلَّهِ رَبِّ الْعَالَمِينَ ﴿2﴾ الرَّحْمَٰنِ الرَّحِيمِ ﴿3﴾ مَالِكِ يَوْمِ الدِّينِ ﴿4﴾ إِيَّاكَ نَعْبُدُ وَإِيَّاكَ نَسْتَعِينُ ﴿5﴾ اهْدِنَا الصِّرَاطَ الْمُسْتَقِيمَ ﴿6﴾ صِرَاطَ الَّذِينَ أَنْعَمْتَ عَلَيْهِمْ غَيْرِ الْمَغْضُوبِ عَلَيْهِمْ وَلَا الضَّالِّينَ ﴿7﴾
"""

surah_name = "الفاتحة"
surah_number = 1

convert_surah_to_json(surah_text, surah_name, surah_number)