# Setup

## Load libraries and environment variables

In [1]:
# Vector storage
from langchain_community.vectorstores import InMemoryVectorStore

# Embeddings Model and LLM
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI

# Load Environment Variables
import os
from dotenv import load_dotenv
load_dotenv()
google_api = os.getenv("GOOGLE_API_KEY")

# Text Splitter
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Document Loader
from langchain_community.document_loaders import TextLoader, PyPDFLoader

# Prompt Template
from langchain_core.prompts import PromptTemplate

# RegEx for JSON responses
import re
import json

# For randomizing
import random

## Prepare the embeddings model

In [2]:
out_dim = 768
embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001", output_dimensionality=out_dim)

## Prepare the vector Store

## Read Toy PDF

In [3]:
# Load the file
pdf_loader = PyPDFLoader("Lecture 1.pdf")
pdf_documents = pdf_loader.load()
pdf_contents = [doc.page_content for doc in pdf_documents]

# Split the content into chunks
pdf_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
pdf_chunks = pdf_splitter.create_documents(pdf_contents)

# Store the vector
vector_store_pdf = InMemoryVectorStore.from_documents(pdf_chunks, embeddings)

## Read Toy TXT

In [4]:
# Load the file
txt_loader = TextLoader("transcript_1.txt")
txt_documents = txt_loader.load()
txt_contents = [doc.page_content for doc in txt_documents]

# Split the content into chunks
txt_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
txt_chunks = txt_splitter.create_documents(txt_contents)

# Store the vector
vector_store_txt = InMemoryVectorStore.from_documents(txt_chunks, embeddings)

## Test Similarity search

In [5]:
docs = vector_store_pdf.similarity_search_with_score("What does weak AI refer to?", k=5)
contexts = []
for doc in docs:
    print(f"Content: {doc[0].page_content[:300]}\n\nScore: {doc[1]}\n---\n")
    # print(doc)
    contexts.append(doc[0].page_content)

Content: AI - Definitions
1. D efinitions
1.1
 Artificial Intelligence (AI)
The definitions of AI outlined in textbooks  
(a) 'The exciting new effort to make 
computers think ... machines with
minds, in the full and literal sense' 
(Haugeland, 1985) 
'The automation of activities that we 
associate with hum

Score: 0.8548709876539596
---

Content: Introduction 
 What  is  Artificial  Intelligence ? 
• John McCarthy, who coined the term Artificial Intelligence in 1956,
defines it as "the science and engineering of making
intelligent machines",  especially  intelligent  computer  programs.
• Artificial Intelligence (AI) is the intelligence of m

Score: 0.8512835612803957
---

Content: AI - Definitions 
1.6 Soft  or   Weak   AI 
▪ Weak AI  refers to the use of software to study or accomplish specific
problem solving or reasoning tasks that do not encompass the full
range of human cognitive abilities.
▪ Example : a chess program such as  Deep Blue.
▪ Weak AI  does not achieve self-

Score

## Create the system prompt template

In [6]:
template = """You are a college professor creating an exam for students.
Create {num_questions} MCQs for an upcoming exam with the following qualities:
- Exactly one correct answer grounded in the source text.
- Three plausible but incorrect "distractor" options.
- Do not include “All of the above” or “None of the above.”
- The question must be clear and answerable based only on the source text.
- Don't directly say things like "According to the source text" or "In Page X it was mentioned..."

Each question should follow this JSON schema:
json("Question Number" (an integer): json(
  "Question": "string",
  "Options": ["(a) Option 1", "(b) Option 2", "(c) Option 3", "(d) Option 2"],
  "Answer": "string"
))
Respond ONLY in valid JSON format
---
Source Text:
{context}"""

prompt_template = PromptTemplate(
    input_variables=["num_questions", "context"],
    template=template)

In [7]:
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash")

random.seed(42)
chunk_content = [chunk.page_content for chunk in pdf_chunks]
context = "\n\n".join(random.sample(chunk_content, k=5))
prompt = prompt_template.invoke({"num_questions":2, "context":context})

response = llm.invoke(prompt)

In [8]:
raw = response.content.strip()
pattern = r"```json\n(.*?)\n```"
match = re.search(pattern, raw, re.DOTALL)

if match:
    json_string = match.group(1)
    json_response = json.loads(json_string)

In [9]:
json_response

{'1': {'Question': 'In the context of AI, which item is categorized as an AI technique?',
  'Options': ['(a) Cognitive Science',
   '(b) Constraint satisfaction',
   '(c) Engineering based AI Goal',
   '(d) Intelligent behavior'],
  'Answer': '(b) Constraint satisfaction'},
 '2': {'Question': 'A described disadvantage of the "Generate and test" approach is that it:',
  'Options': ['(a) Often struggles with problems requiring unique solutions, such as the 12 unique solutions of a board puzzle.',
   '(b) Is not very efficient and generates many wrong assignments.',
   '(c) Requires a predefined set of variables and domains for each variable, making it inflexible.',
   '(d) Lacks the ability to maintain logical relations among variables when positions change.'],
  'Answer': '(b) Is not very efficient and generates many wrong assignments.'}}

In [10]:
with open('response.json', 'w') as f:
    json.dump(json_response, f)

In [11]:
for i in range(len(json_response)):
    docs = vector_store_pdf.similarity_search_with_score(json_response[str(i+1)]["Question"], k=5)
    for doc in docs:
        score = doc[1]
        content = doc[0].page_content
        if score < 0.6:
            del json_response[str(i+1)]
            continue
        print(f"Content: {doc[0].page_content[:300]}\n\nScore: {doc[1]}\n---\n")

Content: AI Techniques 
4. AI Techniques
Various techniques that have evolved, can be applied to a variety of AI tasks.
The techniques are concerned with how  we represent,  manipulate and reason
with  knowledge  in  order  to  solve  problems.
Example  
• Techniques, not all "intelligent" but used to behave

Score: 0.8427080987652675
---

Content: AI Techniques 
• Genetic Algorithms (GA)
■ GAs are part of evolutionary computing, a rapidly growing area of AI.
■ Genetic algorithms are implemented as a computer simulation, where
techniques are inspired by evolutionary biology.
■ Mechanics of biological evolution
◊ Every organism has a set of rul

Score: 0.8414914468750248
---

Content: Introduction 
 What  is  Artificial  Intelligence ? 
• John McCarthy, who coined the term Artificial Intelligence in 1956,
defines it as "the science and engineering of making
intelligent machines",  especially  intelligent  computer  programs.
• Artificial Intelligence (AI) is the intelligence of m

Score