In [84]:
OLLAMA_API_URL = "http://localhost:11434/api/generate"

In [85]:
!pip install -r requirements.txt

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




In [86]:
import PyPDF2

def extract_text_from_pdf_in_chunks(pdf_path, char_limit=300):
    with open(pdf_path, 'rb') as file:
        pdf_reader = PyPDF2.PdfReader(file)

        chunks = []
        current_chunk_text = ""
        current_chunk_metadata = []
        for page_num, page in enumerate(pdf_reader.pages):
            page_text = page.extract_text()
            if len(current_chunk_text) + len(page_text) <= char_limit:
                current_chunk_text += page_text
                current_chunk_metadata.append({'page_number': page_num + 1, 'text': page_text})
            else:
                chunks.append((current_chunk_text, current_chunk_metadata))
                current_chunk_text = page_text
                current_chunk_metadata = [{'page_number': page_num + 1, 'text': page_text}]

        if current_chunk_text:
            chunks.append((current_chunk_text, current_chunk_metadata))
    return chunks

chunks = []
for i in range(1, 9):
  chunks.extend(extract_text_from_pdf_in_chunks(f"flamingo-class-12/ch{i}.pdf"))



In [87]:
print(chunks[0])

('The Last Lesson\nAlphonse Daudet\nLost Spring\nAnees Jung\nDeep Water\nWilliam  Douglas\nThe Rattrap\nSelma Lagerlof\nIndigo\nLouis Fischer\nPoets and Pancakes\nAsokamitran\nThe Interview\nChristopher Silvester\nUmberto Eco\nGoing Places\nA. R. Barton\nProse..\nReprint 2024-25\n', [{'page_number': 1, 'text': 'The Last Lesson\nAlphonse Daudet\nLost Spring\nAnees Jung\nDeep Water\nWilliam  Douglas\nThe Rattrap\nSelma Lagerlof\nIndigo\nLouis Fischer\nPoets and Pancakes\nAsokamitran\nThe Interview\nChristopher Silvester\nUmberto Eco\nGoing Places\nA. R. Barton\nProse..\nReprint 2024-25\n'}])


In [92]:
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss

# Load a pre-trained SentenceTransformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Vectorize the text chunks
chunk_texts = [chunk[0] for chunk in chunks]
print(chunk_texts)
chunk_embeddings = model.encode(chunk_texts)

# Convert embeddings to float32 (required by FAISS)
chunk_embeddings = np.array(chunk_embeddings, dtype=np.float32)

# Store the embeddings in FAISS
index = faiss.IndexFlatL2(chunk_embeddings.shape[1])
index.add(chunk_embeddings)

# Save the index to disk (optional)
faiss.write_index(index, 'chunks.index')

import pickle
with open('chunks.pkl', 'wb') as f:
    pickle.dump(chunks, f)




['The Last Lesson\nAlphonse Daudet\nLost Spring\nAnees Jung\nDeep Water\nWilliam  Douglas\nThe Rattrap\nSelma Lagerlof\nIndigo\nLouis Fischer\nPoets and Pancakes\nAsokamitran\nThe Interview\nChristopher Silvester\nUmberto Eco\nGoing Places\nA. R. Barton\nProse..\nReprint 2024-25\n', '2/Flamingo\nThe Last Lesson\nAbout the author\nAlphonse Daudet  (1840-1897) was a French novelist\nand short-story writer . The Last Lesson is set in the\ndays of the Franco-Prussian W ar (1870-1871) in which\nFrance was defeated by Prussia led by Bismarck.\nPrussia then consisted of what now are the nations of\nGermany, Poland and parts of Austria. In this story the\nFrench districts of Alsace and Lorraine have passed\ninto Prussian hands. Read the story to find out what\neffect this had on life at school.\nNotice these expressions in the text.\nInfer their meaning from the context\n/circle5in great dread of /circle5in unison\n/circle5counted on /circle5a great bustle\n/circle5thumbed at the edges /circle

In [89]:
def retrieve_chunks(query, model, index, chunks, top_k=5):
    query_embedding = model.encode([query])
    D, I = index.search(np.array(query_embedding, dtype=np.float32), top_k)

    retrieved_chunks = []

    for idx in I[0]:
        chunk_text = chunks[idx][0]
        page_number = chunks[idx][1][0]['page_number']

        # Check if the chunk contains undesired content (e.g., questions)
        if 'think as you read' not in chunk_text.lower() and 'answer' not in chunk_text.lower() and 'understanding the text' not in chunk_text.lower() and 'talking about the text' not in chunk_text.lower() and 'working with words' not in chunk_text.lower() and 'noticing form' not in chunk_text.lower() and 'things to do' not in chunk_text.lower():
            retrieved_chunks.append((chunk_text, page_number))

    return retrieved_chunks


# Example query
query = "who was peddler?"

# Load the FAISS index
index = faiss.read_index('chunks.index')

# Retrieve relevant chunks based on the query
retrieved_chunks = retrieve_chunks(query, model, index, chunks)
print("Retrieved Chunks:")
for text, page_number in retrieved_chunks:
    print(f"Page {page_number}: {text[:200]}... (first 200 characters)")


Retrieved Chunks:
Page 12: The Rattrap /43
Understanding  the text
1.How does the peddler interpret the acts of kindness and
hospitality shown by the cr ofter , the ir onmaster and his
daughter?
2.What are the instances in the ... (first 200 characters)
Page 3: 34/Flamingo
then stuffed them back into the
pouch.
The next day both men got up
in good season. The crofter was in
a hurry to milk his cow, and the
other man probably thought he
should not stay in bed... (first 200 characters)
Page 7: 38/Flamingo
stretched himself out on the floor and lay with a piece of
pig iron under his head and his hat pulled down over his
eyes. As soon as the young girl caught sight of him, she
went up and lif... (first 200 characters)
Page 3: 70/Flamingo
interviews,” Denis Brian has written. “Almost everything
of moment reaches us through one man asking questions
of another . Because of this, the interviewer holds a position
of unprecedent... (first 200 characters)


In [90]:
import requests
import json

def generate_response(query, ret):
    context = ""
    for text, page_number in ret:
        context += f"{text}\n"
    prompt = f"""You are a bot to answer the Questions from class 12 NCERT textbooks
Answer the question based only on the following context:

{context}

---

Answer the question based on the above context: {query}
"""
    # print(prompt)
    data = {
        "model": "llama3",
        "prompt": prompt
    }

    s = requests.Session()

    with s.post(OLLAMA_API_URL, headers=None, data=json.dumps(data), stream=True) as resp:
        for line in resp.iter_lines():
            if line:
                print(json.loads(line)['response'], end='')

    # responses = []
    # done = False
    # Keep fetching responses until 'done' is True in all responses
    # while True:
    # response = requests.post(OLLAMA_API_URL, json=data)  # Use stream=True for chunked responses

    # if response.status_code == 200:
    #     return response.json()['response']
    #     # print(response)
    # else:
    #     print(f"Error: {response.status_code}, {response.text}")


# Example usage
query = "How did the incident at the YMCA pool affect Douglas?"
ret = retrieve_chunks(query, model, index, chunks)
generate_response(query, ret)
# print("Generated Response:", response)

The incident at the YMCA pool was a traumatic experience for Douglas, who almost drowned and felt a sense of panic as he struggled to breathe. He described the sensation as "stark terror" that knows no understanding or control. The experience had a profound impact on him, causing him to develop a fear of water that affected his ability to enjoy activities like fishing, canoeing, and swimming.

This incident had a lasting effect on Douglas, making him determined to overcome his fear of water. He eventually learned to swim with the help of an instructor who taught him techniques such as putting his face under water and exhaling, raising his nose and inhaling, and kicking with his legs. Through this process, Douglas gradually built up his confidence and conquered his fear.

In fact, Douglas's experience at the YMCA pool had a profound impact on his life, teaching him valuable lessons about facing and overcoming fears. He recounts the incident as an adult to illustrate how he overcame his 