In [1]:
!pip install pyPDF2



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
from PyPDF2 import PdfReader

reader=PdfReader(stream=r"/content/drive/MyDrive/Fed_Meta_Allign.pdf")

text=""

for page in reader.pages:

  text+=page.extract_text()

In [19]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter=RecursiveCharacterTextSplitter(separators=["\n\n","\n"],chunk_size=500,chunk_overlap=50)

splitted=splitter.split_text(text=text)

In [6]:
!pip install langchain_google_genai



In [49]:
import google.generativeai as genai
from google.colab import userdata

# Configure Gemini
genai.configure(api_key=userdata.get('google'))
model = genai.GenerativeModel('models/gemma-3-27b-it')

def generate_qa_pairs(text):
    prompt = f"""
    You are a Technical AI Assistant specialized in Federated Learning and TinyML.
    Based on the text below, generate exactly 3 Question and Answer pairs about Fed-Meta-Align.

    TEXT: {text}

    Generate in this format:
    Q1: [Specific technical question]
    A1: [Brief 2-3 line answer]

    Q2: [Specific technical question]
    A2: [Brief 2-3 line answer]

    Q3: [Specific technical question]
    A3: [Brief 2-3 line answer]
    """

    try:
        response = model.generate_content(prompt)
        return response.text
    except Exception as e:
        return f"Error: {str(e)}"

# Process batches
all_outputs = []
for index, sentence in enumerate(splitted):
    print(f"Processing Batch-{index+1}/{len(splitted)}...")

    try:
        if sentence.strip() and len(sentence) > 50:  # Only process meaningful text
            response = generate_qa_pairs(sentence)
            all_outputs.append(response)
            print(f"Batch-{index+1} processing done.")
            print(f"Preview: {response[:100]}...")
        else:
            print(f"Batch-{index+1} skipped - too short")
            all_outputs.append("")

    except Exception as e:
        print(f"Batch-{index+1} processing failed: {e}")
        all_outputs.append("")

Processing Batch-1/95...
Batch-1 processing done.
Preview: Here are 3 Question and Answer pairs about Fed-Meta-Align, based on the provided text:

Q1: What is ...
Processing Batch-2/95...
Batch-2 processing done.
Preview: Q1: What is the primary goal of the Fed-Meta-Align framework as described in the text?
A1: Fed-Meta-...
Processing Batch-3/95...
Batch-3 processing done.
Preview: Here are 3 Question and Answer pairs about Fed-Meta-Align, based on the provided text:

Q1: What is ...
Processing Batch-4/95...
Batch-4 processing done.
Preview: Q1: What is the reported average test accuracy of Fed-Meta-Align across heterogeneous IoT devices?
A...
Processing Batch-5/95...
Batch-5 processing done.
Preview: Okay, here are 3 Question and Answer pairs about Fed-Meta-Align, based on the provided (limited) tex...
Processing Batch-6/95...
Batch-6 processing done.
Preview: Please provide the text about Fed-Meta-Align. The provided text only discusses TinyML generally and ...
Processing Batch-7/95.

In [57]:
import re

qa_pairs = []

pattern = re.compile(r"(Q\d*:\s*(.*?))\n(A\d*:\s*(.*?))(?=\nQ|\Z)", re.DOTALL)

for index,text in enumerate(all_outputs):
    matches = pattern.findall(text)
    for q_full, q, a_full, a in matches:
        qa_pairs.append((q_full.strip(), a_full.strip()))

with open("filtered_qna.txt", "w") as f:
    for q, a in qa_pairs:
        f.write(f"{q}\n{a}\n\n")

print(f"Extracted {len(qa_pairs)} Q&A pairs and saved to 'filtered_qna.txt'")

Extracted 273 Q&A pairs and saved to 'filtered_qna.txt'
