In [1]:
import os
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_chroma import Chroma
from langchain.document_loaders import TextLoader


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import boto3
from pdf2image import convert_from_path
import os

# Initialize AWS Textract client
textract = boto3.client('textract', region_name='us-east-1')  # Change region if needed

# Convert PDF to images
pdf_path = "22101A0029 - Assignment 111.pdf"  # Replace with your PDF file
output_folder = "pdf"  # Folder to save images temporarily

os.makedirs(output_folder, exist_ok=True)  # Create folder if it doesn't exist
images = convert_from_path(pdf_path, dpi=300)  # Convert PDF to high-resolution images

extracted_text = ""  # Variable to store extracted text

# Process each image
for i, image in enumerate(images):
    image_path = os.path.join(output_folder, f"page_{i+1}.png")
    image.save(image_path, "PNG")  # Save image temporarily

    # Read image bytes
    with open(image_path, "rb") as img_file:
        image_bytes = img_file.read()

    # Extract text using Textract
    response = textract.detect_document_text(Document={'Bytes': image_bytes})

    # Store extracted text
    for item in response["Blocks"]:
        if item["BlockType"] == "LINE":
            extracted_text += item["Text"] + "\n"

    # Delete the image immediately after processing
    os.remove(image_path)

# Remove the empty folder after all images are deleted
if not os.listdir(output_folder):
    os.rmdir(output_folder)


In [4]:
from langchain.schema import Document
# Load environment variables (Ensure API keys are set)
os.environ['HF_TOKEN'] = "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" 

# Create text embeddings and vector store
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Convert extracted text into a LangChain Document object
doc = Document(page_content=extracted_text)

# Use a text splitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents([doc])  # Pass as a list of Document objects

# Create a vector store
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
retriever = vectorstore.as_retriever()


In [7]:
# Initialize LLM
gemini_api_key = "AIzaSyD4AJnu6NLQaISQvtCkqb-SK4TrV5ZRLiI"
llm = ChatGoogleGenerativeAI(google_api_key=gemini_api_key, model="gemini-2.0-flash")


system_prompt = (
    "You are an AI teacher grading a student's assignment based on quality, accuracy, and completeness.\n\n"
    "Instructions:\n"
    "1. If the student's name is not found in the retrieved content, assign a grade of 0/10.\n"
    "2. Otherwise, provide a grade from [4/10, 6/10, 8/10, or 10/10] based on the assignment quality.\n"
    "3. Always return the response in the format:\n"
    "   **Grade:** X/10\n"
    "   **Feedback:** [Provide 1-2 lines of constructive feedback on how the student can improve.]\n\n"
    "{context}"
)

# Create prompt template
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "Student Name: {input}\n\nStudent Assignment:\n{context}"),
    ]
)


In [8]:
def get_relevant_docs(student_name):
    retrieved_docs = retriever.invoke(student_name)  # Retrieve related assignment
    context_list = [doc.page_content for doc in retrieved_docs]
    
    # Check if any retrieved document actually contains the student's name
    if not any(student_name in doc for doc in context_list):
        return ""  # Return empty to indicate no relevant document found
    
    return "\n\n".join(context_list)  # Convert to string if relevant doc found

def generate_grade_and_feedback(student_name):
    context = get_relevant_docs(student_name)
    
    if not context.strip():  # If no relevant documents found
        return "**Grade:** 0/10\n**Feedback:** No assignment found for this student."
    
    # Format the prompt correctly before passing it to LLM
    messages = prompt.format_messages(input=student_name, context=context)
    
    # Pass the formatted messages to the LLM
    response = llm.invoke(messages)
    return response.content  # Extract text response from LLM output

# Example usage:
result = generate_grade_and_feedback("Pratik")
print(result)


**Grade:** 8/10
**Feedback:** The assignment is well-structured and covers the key aspects of ethics in AI. To improve, consider providing real-world examples to illustrate your points and enhance clarity.

