In [95]:
!pip install transformers sentencepiece huggingface_hub

Defaulting to user installation because normal site-packages is not writeable


In [96]:
import os
import PyPDF2
import pandas as pd
from huggingface_hub import InferenceClient
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

In [97]:
def extract_text_from_pdf(file_path):
    text = ""
    with open(file_path, "rb") as file:
        reader = PyPDF2.PdfReader(file)
        for page in reader.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text + "\n"
    return text

In [98]:
pdf_path = "sample_data/sample_textbook.pdf"
pdf_text = extract_text_from_pdf(pdf_path)

In [99]:
hf_token = "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"
client = InferenceClient(model="HuggingFaceH4/zephyr-7b-beta", token=hf_token)

In [100]:
def generate_flashcards_with_zephyr(text, max_flashcards=5):
    short_text = text[:1000]
    prompt = (
        f"<|user|>\n"
        f"You are a tutor. Generate {max_flashcards} educational flashcards based on this text:\n\n"
        f"{short_text}\n\n"
        f"Use this format only:\n"
        f"Q: ...\nA: ...\n\n"
        f"Return flashcards only, no explanation.\n<|assistant|>\n"
    )

    response = client.text_generation(
        prompt=prompt,
        max_new_tokens=512,
        temperature=0.7
    )

    return response


In [101]:
flashcard_output = generate_flashcards_with_zephyr(pdf_text)
print(flashcard_output)


1. Q: Which organ in the digestive system begins the process of digestion?
A: Mouth

2. Q: What is the function of the esophagus in the digestive system?
A: Carries food from the mouth to the stomach

3. Q: What organ produces acid and enzymes that break down food in the digestive system?
A: Stomach

4. Q: Which organ in the digestive system absorbs nutrients into the bloodstream?
A: Small intestine

5. Q: What organ in the digestive system absorbs water and forms waste?
A: Large intestine

6. Q: Which organ in the digestive system produces bile to help digest fats?
A: Liver

7. Q: What organ in the digestive system releases enzymes to help in digestion?
A: Pancreas

8. Q: What is a fun fact about the small intestine in the digestive system?
A: The small intestine is about 6 meters long!

9. Q: What is the conclusion about the digestive system?
A: The digestive system is crucial for breaking food into nutrients that fuel the body.


In [102]:
def parse_flashcards_safe(text_output):
    lines = [line.strip() for line in text_output.strip().splitlines()]
    cards = []
    question = answer = None
    
    for line in lines:
        if "Q:" in line:
            question = line.split("Q:", 1)[1].strip()
        elif "A:" in line:
            answer = line.split("A:", 1)[1].strip()
            if question and answer:
                cards.append({"Question": question, "Answer": answer})
                question = answer = None  # Reset after storing
    return pd.DataFrame(cards)


In [103]:
df = parse_flashcards_safe(flashcard_output)
print(df)


                                            Question  \
0  Which organ in the digestive system begins the...   
1  What is the function of the esophagus in the d...   
2  What organ produces acid and enzymes that brea...   
3  Which organ in the digestive system absorbs nu...   
4  What organ in the digestive system absorbs wat...   
5  Which organ in the digestive system produces b...   
6  What organ in the digestive system releases en...   
7  What is a fun fact about the small intestine i...   
8  What is the conclusion about the digestive sys...   

                                              Answer  
0                                              Mouth  
1         Carries food from the mouth to the stomach  
2                                            Stomach  
3                                    Small intestine  
4                                    Large intestine  
5                                              Liver  
6                                           Pancreas  

In [104]:
# Save to CSV
df.to_csv("outputs/flashcards.csv", index=False)

# Save to JSON
df.to_json("outputs/flashcards.json", orient="records", indent=2)

# Preview top 5 cards
df.head()


Unnamed: 0,Question,Answer
0,Which organ in the digestive system begins the...,Mouth
1,What is the function of the esophagus in the d...,Carries food from the mouth to the stomach
2,What organ produces acid and enzymes that brea...,Stomach
3,Which organ in the digestive system absorbs nu...,Small intestine
4,What organ in the digestive system absorbs wat...,Large intestine
