In [1]:
import os 
from dotenv import load_dotenv 
import warnings 
warnings.filterwarnings("ignore") 
from pprint import pprint 
from langchain.document_loaders import PyPDFLoader , DirectoryLoader 
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI 
from langchain.text_splitter import RecursiveCharacterTextSplitter 
from langchain.prompts import PromptTemplate 
from langchain.chains.question_answering import load_qa_chain 
from langchain_community.vectorstores import FAISS  

In [2]:
load_dotenv()

GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

#### Load Model Gemini

In [3]:
def load_model(name_model):
    
    llm = ChatGoogleGenerativeAI(
        model=name_model,
        temperature=0.2,
        google_api_key=GOOGLE_API_KEY
    )
    return llm

model = load_model("gemini-1.5-flash")

In [4]:
model.invoke("Hi!")

AIMessage(content='Hi there! How can I help you today?', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-1.5-flash', 'safety_ratings': []}, id='run--8bb593b9-21da-4037-add0-ce59a1ee3360-0', usage_metadata={'input_tokens': 2, 'output_tokens': 11, 'total_tokens': 13, 'input_token_details': {'cache_read': 0}})

#### Load Data

In [5]:
def data_extraction(path_data):

    if not os.path.exists(path_data):
        raise FileNotFoundError(f"The directory {path_data} does not exist.")
    
    loader = DirectoryLoader(path=path_data, glob="*.pdf", loader_cls=PyPDFLoader, show_progress=True)
    documents = loader.load()
    
    print(f"Number of documents loaded: {len(documents)}")
    if not documents:
        print(f"No PDF files found in {path_data}")
    
    return documents

extracted_data = data_extraction("../DataSets/")

100%|██████████| 1/1 [00:04<00:00,  4.25s/it]

Number of documents loaded: 132





#### Text Chunking 

In [6]:
def text_chunking(extracted_data):
    text_split = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
    chunks = text_split.split_documents(extracted_data)
    return chunks

# Extract data and create chunks
text_chunks = text_chunking(extracted_data)

In [7]:
print(f"Number of chunks: {len(text_chunks)}")

Number of chunks: 131


In [8]:
for i, chunk in enumerate(text_chunks):
            print(f"Chunk {i+1} content preview: {chunk.page_content[:100]}...")

Chunk 1 content preview: Learn	Python	in	One	Day	and	Learn	It	Well
Python	for	Beginners	with	Hands-on	Project
The	only	book	y...
Chunk 2 content preview: In	addition,	as	Richard	Branson	puts	it:	"The	best	way	of	learning	about
anything	is	by	doing".	At	t...
Chunk 3 content preview: Table	of	Contents
	
Chapter	1:	Python,	what	Python?
What	is	Python?
Why	Learn	Python?
Chapter	2:	Get...
Chunk 4 content preview: Condition	Statements
If	Statement
Inline	If
For	Loop
While	Loop
Break
Continue
Try,	Except
Chapter	7...
Chunk 5 content preview: Appendix	D:	Working	With	Dictionaries
Appendix	E:	Project	Answers
One	Last	Thing…...
Chunk 6 content preview: Chapter	1:	Python,	what	Python?
	
Welcome	to	the	exciting	world	of	programming.	I'm	so	glad	you	pick...
Chunk 7 content preview: What	is	Python?
	
Python	is	a	widely	used	high-level	programming	language	created	by
Guido	van	Rossu...
Chunk 8 content preview: Why	Learn	Python?
	
There	are	a	large	number	of	high	level	programming	languages	available,

#### Embedding model

In [9]:
embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001" , google_api_key=GOOGLE_API_KEY)

In [10]:
def vector_store(text_chunks : list[str]) :
    text_chunks = [i.page_content for i in text_chunks]
    vector_db = FAISS.from_texts(texts=text_chunks , embedding=embedding)
    vector_db = vector_db.save_local(folder_path="faiss-index")
    return vector_db

vector_db = vector_store(text_chunks=text_chunks)

### Pipeline for Questions Generation

In [11]:
def user_query(question, num_questions, difficulty_level, question_types, include_answers):

    embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY)
    
    db = FAISS.load_local(
        folder_path="faiss-index",
        embeddings=embedding,
        allow_dangerous_deserialization=True
    )
    
    docs = db.similarity_search(query=question, k=10)
    

    chain_input = {
        "input_documents": docs,
        "input": question,
        "num_questions": num_questions,
        "difficulty_level": difficulty_level,
        "question_types": question_types,
        "include_answers": include_answers
    }

    # Define exam prompt
    exam_prompt = """You are a knowledgeable and professional AI assistant that specializes in generating high-quality exam questions.

                    Your role is to:

                    1. Generate clear, accurate, and well-structured exam questions based on the provided context and requirements.
                    2. Ensure that questions are relevant to the given subject, topic, and difficulty level.
                    3. Vary the question types if requested (e.g., multiple choice, true/false, short answer, essay).
                    4. Provide correct answers or model answers if specified.
                    5. Follow academic standards and avoid overly simplistic or overly complex wording unless specified.
                    6. If context is not enough to generate meaningful questions, acknowledge the limitation and ask for more detail.

                    Remember:
                    - Do not make up unrelated information.
                    - Stick closely to the topic and subject area.
                    - Maintain clarity and educational value in all questions.
                    - Avoid repetition unless explicitly instructed.
                    - Ensure consistency with the question format and style.
                    - Do not put words in " " or ' '

                    Context:
                    {context}

                    Instructions:
                    - Number of Questions: {num_questions} and print them under each other
                    - Difficulty Level: {difficulty_level}
                    - Question Type(s): {question_types}
                    - Include Answers: {include_answers}

                    Now, generate the questions based on the above."""


    exam_prompt_template = PromptTemplate(
        input_variables=["context", "num_questions", "difficulty_level", "question_types", "include_answers"],
        template=exam_prompt
    )

    
    model = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=GOOGLE_API_KEY)

    
    chain = load_qa_chain(llm=model, prompt=exam_prompt_template)

    
    response = chain(chain_input, return_only_outputs=True)

    return response["output_text"]


user_inputs = {
    "num_questions": "50",
    "difficulty_level": "Medium",
    "question_types": "essay",
    "include_answers": "Yes"
}


try:
    response = user_query(
        question="Give me Questions on Chapter 3 only",
        num_questions=user_inputs["num_questions"],
        difficulty_level=user_inputs["difficulty_level"],
        question_types=user_inputs["question_types"],
        include_answers=user_inputs["include_answers"]
    )
    
    
    print(response)

except Exception as e:
    print(f"An error occurred: {str(e)}")

stuff: https://python.langchain.com/docs/versions/migrating_chains/stuff_docs_chain
map_reduce: https://python.langchain.com/docs/versions/migrating_chains/map_reduce_chain
refine: https://python.langchain.com/docs/versions/migrating_chains/refine_chain
map_rerank: https://python.langchain.com/docs/versions/migrating_chains/map_rerank_docs_chain

See also guides on retrieval and question-answering here: https://python.langchain.com/docs/how_to/#qa-with-rag
  chain = load_qa_chain(llm=model, prompt=exam_prompt_template)
  response = chain(chain_input, return_only_outputs=True)


Given the provided text, generating 50 distinct essay questions of medium difficulty is challenging. The text snippets focus on specific coding tasks and concepts within a Python programming tutorial, rather than a broad range of topics suitable for 50 unique essay questions.  The information is insufficient to create such a large number of diverse, medium-difficulty essay questions.


To generate 50 meaningful essay questions, I need significantly more context.  Please provide a more comprehensive syllabus or textbook chapters that cover the material.  This would allow for the creation of questions covering various aspects of Python programming, file handling, data structures, and problem-solving techniques illustrated in the provided excerpts.


However, I can offer a few example essay questions based on the limited context:


1. **Essay Question:**  Describe the process of updating user scores in the `userScores.txt` file, explaining the roles of `userScores.txt`, `userScores.tmp`, 

## END