In [None]:
import re
import json

def parse_text_to_json(text):
    text = text.replace("\n", " ")  # Replace newlines with spaces
    pattern = re.compile(r'(\d{4})')  # Match years from 2005 to 2025
    question_pattern = re.compile(r'Q\d+\.\s(.*?)(?=\(A\))')  # Match question text
    option_pattern = re.compile(r'\((A|B|C|D)\)\s([^()]+)')  # Match options

    data = {str(year): [] for year in range(2005, 2026)}  # Create dictionary with empty lists

    for section in text.split("JAM - "):
        match = pattern.search(section)
        if match:
            current_year = match.group(1)
            questions = question_pattern.findall(section)
            options = option_pattern.findall(section)
            option_list = []
            for opt in options[:4]:  # Extract first 4 options
                option_text = opt[1].strip()
                if "Q" in option_text:
                    option_text = option_text.split("Q")[0].strip()

                option_list.append(option_text)

            for question in questions:
                data[current_year].append({
                    "question": question.strip(),
                    "options": option_list if len(option_list) == 4 else ["", "", "", ""],
                    "answer": ""
                })
                options = options[4:]  # Move to next set of options

    return json.dumps(data, indent=4)


# Example usage:
text = """2020\nQ7. Approximately 71% of the planetary mass in the\nsolar system is concentrated in\n(A) Uranus (B) Mercury\n(C) Saturn (D) Jupiter\n\nQ9. The most abundant element in the Earth’s\ncontinental crust is\n(A) Silicon\n(C) Oxygen\n\n(B) Aluminium\n(D) Iron\n\nQ27. Match the seismic discontinuity in Group I with\ntheir occurrence in Earth’s interior in Group II.\nGroup I Group II\nP. Conrad 1. Between lower mantle\n\nand outer core\n\nQ. Mohorovicic 2. Between crust and upper\n\nmantle\n\nR. .Gutenberg 3. Between inner and outer\ncore\n\nS. Lehmann 4. Between lower and\nupper crust\n\n(A) P-4, Q-2, R-1,S-3 (B) P-4, Q-2, R-3, S-1\n(C) P-3, Q-2, R-4,S-1  (D) P-2, Q-4, R-1, S-3\n\nJAM - 2019\nQ2. Shear waves do not travel through the\n(A) Upper continental crust\n(B) Upper mantle\n(C) Lower mantle\n(D) Outer core\n\nQ41. The intensity of an earthquake of magnitude 8 on\nthe Richter scale is greater than the intensity of\nan earthquake of magnitude 5 on the same scale\nby times.\n\nJAM - 2018\nQl. Which one among the following planets in the\nSolar system is most similar in size to the Earth?\n(A) Mercury (B) Venus\n(C) Neptune (D) Uranus\n\nQ2. In which one of the following tectonic settings\nare the highest mountain chains and thickest crust\nfound?\n\n(A) Island arc\n(B) Continental arc\n\n_\n\nQ41.\n\n(C) Continental collision\n(D) Transcurrent\nQ3. The second-most abundant oxide in the Earth’s\n\ncrust is\n(A) ALO, (B) SiO,\n(C) CaO (D) Na,O\n\ncontinental mountain\n\n. Isostasy involves\nbelts.\n(A) compensation in (B) creation of\n(C) destruction of (D) thrusting in\n\nJAM - 2017\nQ7. Conservative plate boundary is represented by\n\nQl\n\n(A) Normal fault (B) Growth fault\n(C) Transform fault (D) Reverse fault\nJAM - 2016\n\nQI. The most abundant metal (by weight %) in the\nEarth’s crust is\n(A) Al (B) Fe\n(C) Na (D) Mg\n\nQ6. The amplitude of ground motion during an\n\nearthquake of magnitude 7 in Richter scale is how\n\\many times more than that of a magnitude 5?\n\n(A) 10 (B) 100\n(C) 1000 (D) 10,000\nQ10. Which is the most abundant ion in the normal\nseawater?\n(A) Cr (B) SO,>\n(C) Na (D) K\nJAM - 2015\n\nQ28. S-wave is terminated at:\n(A) Crust—Mantle boundary\n(B) Lithosphere—Asthenosphere boundary\n(C) Mantle—Core boundary\n(D) Inner and Outer core boundary\n\nQ9. A radioactive isotope has 1024 atoms. How many\natoms will remain after 4 half-lives?\n\nQ15. Calculate the average atomic weight (answer to\nbe given up to 3 decimal places) of Rubidium using\n\nthe given data.\n\nIsotope Abundance | Atomic weight |\nry | }\n(%) (a.m.u.)\n\n84.912 |\n\n86.909\n\n72.17 |\n27.83"""

json_output = parse_text_to_json(text)
print(json_output)

{
    "2005": [],
    "2006": [],
    "2007": [],
    "2008": [],
    "2009": [],
    "2010": [],
    "2011": [],
    "2012": [],
    "2013": [],
    "2014": [],
    "2015": [
        {
            "question": "S-wave is terminated at:",
            "options": [
                "Crust\u2014Mantle boundary",
                "Lithosphere\u2014Asthenosphere boundary",
                "Mantle\u2014Core boundary",
                "Inner and Outer core boundary"
            ],
            "answer": ""
        }
    ],
    "2016": [
        {
            "question": "The amplitude of ground motion during an  earthquake of magnitude 7 in Richter scale is how \\many times more than that of a magnitude 5?",
            "options": [
                "Al",
                "Fe",
                "Na",
                "Mg"
            ],
            "answer": ""
        },
        {
            "question": "Which is the most abundant ion in the normal seawater?",
            "options": [
             