# FlashGen Colab Notebook
A single notebook to convert PDFs into Anki flashcards using GPT-4 Turbo.

In [None]:
!pip install PyPDF2 genanki openai==0.28.0


In [None]:
import os
import getpass   # if you’re hiding your key
from google.colab import files

from google.colab import drive, files
# Option A: Mount Google Drive
drive.mount('/content/drive')

# Option B: Upload interactively
uploaded = files.upload()
pdf_path = list(uploaded.keys())[0]

In [None]:
import os, json
from PyPDF2 import PdfReader
import openai, genanki


def extract_text(pdf_path):
    reader = PdfReader(pdf_path)
    text = []
    for page in reader.pages:
        t = page.extract_text()
        if t:
            text.append(t)
    return "\n".join(text)

def chunk_text(text, max_chars):
    paragraphs = text.split('\n\n')
    chunks, current = [], ""
    for para in paragraphs:
        if len(current) + len(para) + 2 <= max_chars:
            current += para + "\n\n"
        else:
            if current:
                chunks.append(current.strip())
            if len(para) <= max_chars:
                current = para + "\n\n"
            else:
                for i in range(0, len(para), max_chars):
                    chunks.append(para[i:i+max_chars].strip())
                current = ""
    if current:
        chunks.append(current.strip())
    return chunks

def generate_flashcards(chunks, model):
    import re, json
    flashcards = []
    for i, chunk in enumerate(chunks, start=1):
        messages = [
            {
                "role": "system",
                "content": (
                    "You are a JSON-only generator. "
                    "Respond *only* with a JSON array of objects, each with keys: "
                    "question (string), answer (string), tags (list of strings), "
                    "difficulty (easy|medium|hard). "
                    "Do NOT include any other text, markdown, or commentary."
                )
            },
            {
                "role": "user",
                "content": f"Text:\n{chunk}"
            }
        ]
        resp = openai.ChatCompletion.create(
            model=model,
            messages=messages,
            temperature=0,
            max_tokens=1000
        )
        raw = resp.choices[0].message.content.strip()

        # first, try direct JSON parse
        try:
            cards = json.loads(raw)
        except json.JSONDecodeError:
            # fallback: extract the first [...] block
            m = re.search(r'\[.*\]', raw, re.S)
            if not m:
                print(f"Chunk {i}: no JSON array found. Raw start:\n{raw[:200]!r}\n")
                continue
            fragment = m.group(0)
            try:
                cards = json.loads(fragment)
            except json.JSONDecodeError:
                print(f"Chunk {i}: still invalid JSON after regex. Fragment start:\n{fragment[:200]!r}\n")
                continue

        flashcards.extend(cards)

    return flashcards


def build_deck(flashcards, deck_name, deck_id):
    deck = genanki.Deck(deck_id, deck_name)
    model = genanki.Model(
        1607392319, 'Simple Model',
        fields=[{'name':'Question'},{'name':'Answer'},{'name':'Tags'},{'name':'Difficulty'}],
        templates=[{
            'name':'Card 1',
            'qfmt':'{{Question}}<br><small>Tags: {{Tags}}</small>',
            'afmt':'{{FrontSide}}<hr id="answer">{{Answer}}<br><small>Difficulty: {{Difficulty}}</small>'
        }]
    )
    for card in flashcards:
        tags = ' '.join(card.get('tags', []))
        diff = card.get('difficulty','')
        note = genanki.Note(model=model, fields=[card['question'], card['answer'], tags, diff])
        deck.add_note(note)
    return deck


In [None]:
# === Configuration ===
# If you mounted Drive, set pdf_path appropriately above.
deck_name = 'My Flashcards'
model = 'gpt-4-turbo'
max_chars = 1000
output = 'flashcards.apkg'
openai.api_key = os.getenv('OPENAI_API_KEY') or input('OpenAI API Key: ')

# === Run ===
text = extract_text(pdf_path)
chunks = chunk_text(text, max_chars)
print(f'{len(chunks)} chunks')
cards = generate_flashcards(chunks, model)
print(f'{len(cards)} cards')
deck = build_deck(cards, deck_name, abs(hash(deck_name)) % 10**12)
genanki.Package(deck).write_to_file(output)
print('Deck saved to', output)
files.download(output)