In [None]:
!pip install  pdfplumber
!pip install langchain_google_genai

In [None]:
import pdfplumber
import os
from langchain_google_genai import GoogleGenerativeAI as genai
from google.colab import userdata
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

In [None]:
def extract_text_from_pdf(pdf_path):
  with pdfplumber.open(pdf_path) as pdf:
    text = ""
    for page in pdf.pages:
      text += page.extract_text() + "\n"
    return text

In [None]:
#setting up gemini

api_key = userdata.get('GEMINI_KEY')



In [None]:
# creating an template
template = """
You are an expert at analyzing exam papers and classifying questions.

Given exam paper contains questions in both English and Hindi.
Your tasks:
1. Extract ONLY the English questions (completely ignore Hindi text)
2. Classify each English question into the appropriate chapter
3. If a question doesn't clearly fit any chapter, classify it under "Others".
4. For Multiple Choice Questions (MCQs), include the options.

Available chapters:
{chapters}

Please organize the output as:

Chapter: [Chapter Name]
1. [Full question text in English]
2. [Full question text in English]
(and so on)

for MCQ questions organize the output as:

Chapter: [Chapter Name]
1. [Full question text in English]
   A. [Option A]
   B. [Option B]
   C. [Option C]
   D. [Option D]
2. [Full question text in English]
   A. [Option A]
   B. [Option B]
   C. [Option C]
   D. [Option D]
(and so on)

If a question doesn't clearly fit any chapter, classify it under "Others".

Exam Paper Content:
{pdf_text}
"""

In [None]:
# creating the prompt
prompt = PromptTemplate(
    input_variables=["pdf_text","chapters"],
    template=template,
)

In [None]:
def classify_questions(pdf_path,chapters):

  llm = genai(model='gemini-2.0-flash',
            temperature=0.3,
            api_key=api_key)

  chain = LLMChain(llm=llm, prompt=prompt)

  pdf_text = extract_text_from_pdf(pdf_path)

  result = chain.run({
      "chapters": "\n".join(chapters),
      "pdf_text": pdf_text
  })

# saving the results in a txt file
  output_file = "results.txt"

  with open(output_file, 'w', encoding='utf-8') as file:
        file.write(result)
  print(f"Results have been saved to {output_file}")

# returning the results in the function
  return result


In [None]:
chapters = [
    "Electric Charges and Fields",
    "Electrostatic Potential and Capacitance",
    "Current Electricity",
    "Moving Charges and Magnetism",
    "Magnetism and Matter",
    "Electromagnetic Induction",
    "Alternating Currents",
    "Electromagnetic Waves",
    "Ray Optics and Optical Instruments",
    "Wave Optics",
    "Dual Nature of Radiation and Matter",
    "Atoms",
    "Nuclei",
    "Semiconductor Electronics: Materials, Devices and Simple Circuits"
]

result = classify_questions("/12th Physics.pdf", chapters)
print(result)

Results have been saved to results.txt
Chapter: Electric Charges and Fields
1. The intensity of electric field inside a charged hollow sphere is :-
   A. infinite
   B. zero
   C. one
   D. Equal to radius
2. Two electric lines of force never intersect each other, why?
3. The mass of an isolated conductor decreases, when it becomes positively charged, why?
4. Establish the expression for the electric field intensity at a point, due to the electric dipole located in the axial position.

Chapter: Current Electricity
1. SI unit of current density is.
   A. coulomb per metre
   B. Ampere per metre2
   C. coulomb per metre2
   D. Ampere per metre
2. In which device a high resistance is joined in series with the coil of galvanometer:
   A. Voltmeter
   B. Ammeter
   C. Voltameter
   D. Resistance Box
3. Write any two differences between electric resistance and specific resistance (resistivity)
4. Write the definition of internal resistance and write its SI unit and dimensional formula.
5. Wr