In [2]:
import json
import re

def convert_surah_to_json(surah_text, surah_name, surah_number):
    # Regular expression to match the Ayah text and number
    ayah_pattern = re.compile(r'([^\s﴿]+(?:\s+[^\s﴿]+)*)\s*﴿(\d+)﴾')

    # Extract Ayahs from the Surah text
    ayahs = []
    matches = list(ayah_pattern.finditer(surah_text))

    # Handle the case where there's no Ayah number at the end of the Surah text
    if len(matches) == 0:
        print("No matches found.")
        return None  # Return None if no matches are found

    for i, match in enumerate(matches):
        text, ayah_number = match.groups()
        if i + 1 < len(matches):
            next_text_start = matches[i + 1].start()
            ayah_text = surah_text[match.start():next_text_start].strip()
        else:
            ayah_text = surah_text[match.start():].strip()

        # Remove the marker and number from the Ayah text
        ayah_text = re.sub(r'\s*﴿\d+﴾', '', ayah_text).strip()
        ayah_text = ayah_text.replace("۩", "").replace("۞", "").strip()

        ayahs.append({
            "ayah_number": int(ayah_number),
            "text": ayah_text
        })

    # Create JSON structure
    surah_json = {
        "surah": surah_name,
        "number": surah_number,
        "ayahs": ayahs
    }

    return surah_json  # Return the JSON structure

def print_surah_json(surah_json):
    if surah_json:
        # Convert JSON to string and print
        surah_json_str = json.dumps(surah_json, ensure_ascii=False, indent=4)
        print(surah_json_str)
    else:
        print("Invalid or empty Surah JSON data.")

# Example Surah text and metadata
surah_text = """
الْحَمْدُ لِلَّهِ رَبِّ الْعَالَمِينَ ﴿2﴾ الرَّحْمَٰنِ الرَّحِيمِ ﴿3﴾ مَالِكِ يَوْمِ الدِّينِ ﴿4﴾ إِيَّاكَ نَعْبُدُ وَإِيَّاكَ نَسْتَعِينُ ﴿5﴾ اهْدِنَا الصِّرَاطَ الْمُسْتَقِيمَ ﴿6﴾ صِرَاطَ الَّذِينَ أَنْعَمْتَ عَلَيْهِمْ غَيْرِ الْمَغْضُوبِ عَلَيْهِمْ وَلَا الضَّالِّينَ ﴿7﴾
"""

surah_name = "الفاتحة"
surah_number = 1

# Generate JSON structure
surah_json = convert_surah_to_json(surah_text, surah_name, surah_number)

# Print JSON using the separate function
print_surah_json(surah_json)

{
    "surah": "الفاتحة",
    "number": 1,
    "ayahs": [
        {
            "ayah_number": 2,
            "text": "الْحَمْدُ لِلَّهِ رَبِّ الْعَالَمِينَ"
        },
        {
            "ayah_number": 3,
            "text": "الرَّحْمَٰنِ الرَّحِيمِ"
        },
        {
            "ayah_number": 4,
            "text": "مَالِكِ يَوْمِ الدِّينِ"
        },
        {
            "ayah_number": 5,
            "text": "إِيَّاكَ نَعْبُدُ وَإِيَّاكَ نَسْتَعِينُ"
        },
        {
            "ayah_number": 6,
            "text": "اهْدِنَا الصِّرَاطَ الْمُسْتَقِيمَ"
        },
        {
            "ayah_number": 7,
            "text": "صِرَاطَ الَّذِينَ أَنْعَمْتَ عَلَيْهِمْ غَيْرِ الْمَغْضُوبِ عَلَيْهِمْ وَلَا الضَّالِّينَ"
        }
    ]
}
