In [1]:
import fitz  # PyMuPDF

def extract_questions_from_pdf(pdf_path):
    document = fitz.open(pdf_path)
    questions = []
    current_question = None
    is_question_text = False
    is_choice_text = False
    question_number = 1

    for page_num in range(document.page_count):
        page = document.load_page(page_num)
        text = page.get_text("text")
        lines = text.split('\n')

        for line in lines:
            line = line.strip()

            if " Quick" in line or "The Details:" in line:
                is_choice_text = False
                continue

            if line.startswith("B") and line[1].isdigit():
                # Starting a new question
                if current_question:
                    questions.append(current_question)
                current_question = {
                    'number': question_number,
                    'text': line[3:].strip(),
                    'choices': []
                }
                question_number += 1
                is_question_text = True
                is_choice_text = False
            elif line.startswith("❍"):
                # Starting choices
                is_question_text = False
                is_choice_text = True
                choice_text = line.strip()[1:].strip()
                if current_question:
                    current_question['choices'].append(choice_text)
            elif is_question_text and current_question:
                # Continuing the question text
                current_question['text'] += " " + line
            elif is_choice_text and current_question:
                # Continuing the choice text
                if line.startswith("❍") or (line.startswith("A") and line[1].isdigit()):
                    # Ignore the line
                    continue
                current_question['choices'][-1] += " " + line

    if current_question:
        questions.append(current_question)

    return questions

def write_markdown(questions, md_path):
    with open(md_path, 'w') as md_file:
        for question in questions:
            md_file.write(f"# Question {question['number']}\n")
            md_file.write(f"{question['text']}\n")
            for choice in question['choices']:
                md_file.write(f"- {choice}\n")
            md_file.write("\n")

pdf_path = 'testing_2.pdf'
md_path = 'questions_2.md'

questions = extract_questions_from_pdf(pdf_path)
write_markdown(questions, md_path)
