In [54]:
import os
import faiss
import pdfplumber

from llama_index.core import VectorStoreIndex, Document
from llama_index.vector_stores.faiss import FaissVectorStore
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.groq import Groq
from llama_index.core.node_parser import SentenceSplitter

import warnings
warnings.filterwarnings("ignore", category=UserWarning)

from dotenv import load_dotenv
load_dotenv()

True

In [55]:
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

In [56]:
template_sections = [
    {
        "title": "Summary",
        "instruction": "Summarize the business goals, scope, and high-level outcomes of the proposed system."
    },
    {
        "title": "Functional Requirements",
        "instruction": "Detail the main features and capabilities the system must deliver from the user's perspective."
    },
    {
        "title": "Technical Architecture",
        "instruction": "Provide an overview of the system's technical design including components and their interactions."
    }
]

template_docs = [Document(text=s["instruction"], metadata={"title": s["title"]})
                 for s in template_sections]

In [57]:
embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
nodes = SentenceSplitter().get_nodes_from_documents(template_docs)

faiss_index = faiss.IndexFlatL2(384)
vector_store = FaissVectorStore(faiss_index=faiss_index)
index = VectorStoreIndex(nodes, vector_store=vector_store, embed_model=embed_model)
retriever = index.as_retriever(similarity_top_k=1)


In [58]:
def extract_sections_from_pdf(pdf_path):
    content = ""
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            text = page.extract_text()
            if text:
                content += text + "\n"
    sections = [s.strip() for s in content.split("\n\n") if len(s.strip()) > 50]
    return sections

In [59]:
llm = Groq(model="meta-llama/llama-4-scout-17b-16e-instruct")

def review_section(section_text, retrieved_instruction):
    prompt = f"""
You are reviewing a technical document section.

Template Instruction:
"{retrieved_instruction}"

Submitted Section:
"{section_text}"

Task: Provide detailed, constructive feedback and whether the section meets expectations.
"""
    response = llm.complete(prompt)
    return response.text


In [60]:
pdf_path = "Leela Chess Zero and the Human Play.pdf"
submitted_sections = extract_sections_from_pdf(pdf_path)

results = []
for section_text in submitted_sections:
    retrieved_nodes = retriever.retrieve(section_text)
    instruction = retrieved_nodes[0].text
    section_title = retrieved_nodes[0].metadata.get("title", "Unknown")
    feedback = review_section(section_text, instruction)

    results.append({
        "section": section_title,
        "instruction": instruction,
        "text": section_text,
        "feedback": feedback
    })

CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


In [61]:
for res in results:
    print(f"Section: {res['section']}")
    print(f"Instruction: {res['instruction']}")
    print(f"Feedback:\n{res['feedback']}\n")
    print("="*80)


Section: Technical Architecture
Instruction: Provide an overview of the system's technical design including components and their interactions.
Feedback:
**Overview of Technical Design and Feedback**

The provided section appears to be a research paper discussing the comparison between Leela Chess Zero (Lc0), a neural network-based chess engine, and elite human chess players. The study aims to explore how artificial intelligence, reinforcement learning, and neural network-based systems are reshaping our understanding of chess.

**Technical Design Components and Interactions**

The paper discusses the following technical design components:

1. **Lc0**: A neural network-based chess engine that uses reinforcement learning and self-play to improve its performance.
2. **AlphaZero**: A reinforcement learning-based chess engine that utilizes neural networks to analyze and solve chess positions.
3. **Stockfish**: A deterministic, rule-based engine that relies on handcrafted evaluation functions