In [None]:
import re
import pandas as pd

with open("Cerinte2.0.tex", "r", encoding="utf-8") as file:
    full_text = file.read()

# Extragere sectiuni, subsectiuni si exercitii ===
structure = []
pattern = re.finditer(
    r'(\\section\*\{([^\}]+)\})|'  # \extragere capitol
    r'(\\subsection\*\{([^\}]+)\})|'  # \extragere subcapitol
    r'((\d+)\.(\d+)([ABC])\s+(.+?)\\\s*a\)(.+?))(?=(?:\d+\.\d+[ABC]|\\section|\\subsection|\Z))', # \extragere exercitiu
    full_text,
    re.DOTALL
)

for match in pattern:
    if match.group(2):  # \section*{}
        structure.append({"type": "section", "value": match.group(2).strip(), "start": match.start()})
    elif match.group(4):  # \subsection*{}
        raw_title = match.group(4).strip()
        clean_title = re.sub(r'^\d+(\.\d+)?\s*', '', raw_title)  # elimină prefixul numeric
        structure.append({"type": "subsection", "value": clean_title, "start": match.start()})
    elif match.group(5):  # exercițiu
        major = int(match.group(6))
        minor = int(match.group(7))
        difficulty = match.group(8)
        ex_id = f"{major}.{minor}{difficulty}"
        question = match.group(9).strip()
        choices_text = "a)" + match.group(10).strip()
        structure.append({
            "type": "exercise",
            "id": ex_id,
            "exercise_number": minor,
            "difficulty": difficulty,
            "question": re.sub(r'(\\\\|\\)+$', '', question).strip(),
            "choices_text": choices_text,
            "start": match.start()
        })

# Asociaza contextul de capitol si subcapitol fiecarui exercitiu
current_section = ""
current_subsection = ""
exercises = []

for item in structure:
    if item["type"] == "section":
        current_section = item["value"]
    elif item["type"] == "subsection":
        current_subsection = item["value"]
    elif item["type"] == "exercise":
        exercises.append({
            "id": item["id"],
            "exercise_number": item["exercise_number"],
            "difficulty": item["difficulty"],
            "question": item["question"],
            "choices_text": item["choices_text"],
            "chapter": current_section,
            "subchapter": current_subsection
        })

# Extrage raspunsuri corecte si explicatii
responses = {
    int(num): letter.strip(")")
    for num, letter in re.findall(r'(\d+)\s*([a-f]\))', full_text)
}
explanations = {
    int(num): text.strip().replace("\n", " ")
    for num, text in re.findall(r'(\d+)\s+(.+?)(?=(?:\n\d+\s)|$)', full_text, re.DOTALL)
}

# Functie pentru extragerea variantelor a–f
def extract_choices_definitive(text):
    pattern = r'([a-f])\)\s*(.*?)(?=(?:[a-f]\))|z\))'
    matches = re.findall(pattern, text + "z)", re.DOTALL)
    return {k: v.strip(" ;.\n") for k, v in matches}

# Construieste structura finala
final_data = []
for i, ex in enumerate(exercises, 1):  # i = pozitie in lista
    choices = extract_choices_definitive(ex["choices_text"])
    final_data.append({
        "id": ex["id"],
        "numar_exercitiu": ex["exercise_number"],
        "dificultate": ex["difficulty"],
        "capitol": ex["chapter"],
        "subcapitol": ex["subchapter"],
        "intrebare": ex["question"],
        "a": choices.get("a", ""),
        "b": choices.get("b", ""),
        "c": choices.get("c", ""),
        "d": choices.get("d", ""),
        "e": choices.get("e", ""),
        "f": choices.get("f", ""),
        "raspuns_corect": responses.get(i, ""),
        "indicatii": explanations.get(i, "")
    })

# Salvează rezultatul in CSV
df = pd.DataFrame(final_data)
df.to_csv("exercitii_extrase_structurat.csv", index=False, encoding="utf-8-sig")
print("Fișierul 'exercitii_extrase_structurat.csv' a fost generat cu succes!")

In [None]:
import pandas as pd
import re

csv_path = "exercitii_extrase_structurat.csv"
tex_path = "Rezolvari.tex"

df = pd.read_csv(csv_path)

with open(tex_path, "r", encoding="utf-8") as f:
    tex_content = f.read()

# Extrage rezolvarile pe baza ID-ului
# Se cauta grupuri de forma: 1.1A <spatiu> continut ... pana la urmatorul ID sau sfarsit
solutie = {
    match[0]: match[1].strip().replace("\n", " ")
    for match in re.findall(r'(\d+\.\d+[ABC])\s+(.+?)(?=(?:\n\d+\.\d+[ABC]\s)|\Z)', tex_content, re.DOTALL)
}

# Adaugă o coloană noua cu soluțiile in DataFrame
df["solutie"] = df["id"].map(solutie).fillna("")

# Exporta un nou fisier CSV cu coloana "solutie"
df.to_csv("exercitii_extrase_structurat_rezolvari.csv", index=False, encoding="utf-8-sig")
print("Fișierul final cu rezolvări a fost salvat: exercitii_extrase_structurat_rezolvari.csv")