In [45]:
import pandas as pd
import json
from pathlib import Path
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
from reportlab.lib.styles import getSampleStyleSheet

In [19]:
QUIZ_DIR = Path.cwd().parent / 'outputs/quizzes'

In [14]:
video_id = "teCubd25XwI"

In [15]:
with open(Path.cwd().parent / f'raw_openai/{video_id}.json', 'r') as file:
    raw_quiz = json.load(file)

In [None]:
def export_csv(data, filename="flashcards.csv"):
    df = pd.DataFrame([{"Front": d["question"], "Back": d["answer"]} for d in data])
    df.to_csv(filename, index=False, encoding="utf-8")
    return df

def export_pdf(data, video_id, output_dir=QUIZ_DIR):
    """
    Export quiz data to a PDF file.

    Args:
        data (str | list): Either a JSON string or a list of dicts 
                           with keys 'question', 'answer', 'explanation'.
        video_id (str): YouTube video ID (used as filename).
        output_dir (Path | str): Directory where the PDF will be saved.

    Returns:
        Path: Path to the saved PDF file.
    """
    # Convert JSON string to Python list if needed
    if isinstance(data, str):
        try:
            data = json.loads(data)
        except json.JSONDecodeError:
            raise ValueError("Provided data is a string but not valid JSON")

    # Make sure output directory exists
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    # Construct full path with .pdf extension
    file_path = output_dir / f"{video_id}.pdf"

    # Build PDF
    doc = SimpleDocTemplate(str(file_path))
    styles = getSampleStyleSheet()
    flowables = []

    flowables.append(Paragraph("Open-Ended Quiz", styles["Title"]))
    flowables.append(Spacer(1, 20))

    for i, d in enumerate(data, 1):
        flowables.append(Paragraph(f"Q{i}: {d['question']}", styles["Normal"]))
        flowables.append(Paragraph(f"Answer: {d['answer']}", styles["Italic"]))
        flowables.append(Paragraph(f"Explanation: {d['explanation']}", styles["Normal"]))
        flowables.append(Spacer(1, 12))

    doc.build(flowables)
    return file_path



In [53]:
def export_pdf(data, video_id, output_dir=QUIZ_DIR):
    """
    Export a quiz PDF with:
    - Section 1: Questions only (spaced out)
    - Section 2: Questions + Answers + Explanations

    Args:
        data (str | list): JSON string (raw OpenAI output) or parsed list of dicts
        video_id (str): YouTube video ID, used for filename
        output_dir (Path | str): Directory where PDF will be saved
    """
    # If data is a string, extract JSON array
    if isinstance(data, str):
        match = re.search(r"\[.*\]", data, re.DOTALL)
        if not match:
            raise ValueError("Could not find valid JSON array in string")
        data = json.loads(match.group(0))

    # Ensure it's a list of dicts
    if not isinstance(data, list) or not isinstance(data[0], dict):
        raise TypeError("Data must be a list of dicts with 'question', 'answer', 'explanation'")

    # Ensure output directory exists
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    file_path = output_dir / f"{video_id}.pdf"

    # Build PDF
    doc = SimpleDocTemplate(str(file_path))
    styles = getSampleStyleSheet()
    flowables = []

    # -----------------------------
    # Section 1: Student Version
    # -----------------------------
    flowables.append(Paragraph("Open-Ended Quiz (Student Version)", styles["Title"]))
    flowables.append(Spacer(1, 20))

    for i, d in enumerate(data, 1):
        flowables.append(Paragraph(f"{i}. {d['question']}", styles["Normal"]))
        flowables.append(Spacer(1, 84))  # add extra blank space for writing

    flowables.append(PageBreak())

    # -----------------------------
    # Section 2: Teacher Version
    # -----------------------------
    flowables.append(Paragraph("Open-Ended Quiz (Teacher Version)", styles["Title"]))
    flowables.append(Spacer(1, 20))

    for i, d in enumerate(data, 1):
        flowables.append(Paragraph(f"{i}. {d['question']}", styles["Normal"]))
        flowables.append(Paragraph(f"Answer: {d['answer']}", styles["Italic"]))
        flowables.append(Paragraph(f"Explanation: {d['explanation']}", styles["Normal"]))
        flowables.append(Spacer(1, 12))

    doc.build(flowables)
    return file_path

In [37]:
def clean_raw_quiz_data(raw_quiz):
    output = raw_quiz['choices'][0]['message']['content']

    if "```json" in output:
        json_output = output.split("```json")[1].split("```")[0].strip()

    else:
        raise ValueError("JSON block not found")

    data = json.loads(json_output)

    return data

In [39]:
data = clean_raw_quiz_data(raw_quiz)

In [54]:
export_pdf(data, video_id)

WindowsPath('c:/Users/kidsa/Documents/learning_pipeline/outputs/quizzes/teCubd25XwI.pdf')