In [1]:
# Modification of code to work with Ollama
# Import Required Libraries

from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from chromadb.config import DEFAULT_TENANT, DEFAULT_DATABASE, Settings
from llama_index.core.node_parser import MarkdownElementNodeParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
import chromadb.utils.embedding_functions as embedding_functions
from langchain_ollama import OllamaEmbeddings
from langchain_community.llms import Cohere
from llama_index.llms.ollama import Ollama 
from langchain_cohere import CohereRerank
from langchain_chroma import Chroma 
from llama_parse import LlamaParse
from uuid import uuid4
import gradio as gr
import chromadb
import ollama
import pickle
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# llama-parse is async-first, running the async code in a notebook requires the use of nest_asyncio

import nest_asyncio
nest_asyncio.apply()

from dotenv import load_dotenv
load_dotenv()

EMBEDDING_MODEL = "nomic-embed-text"
GENERATION_MODEL = "llama3.1"

embed_model = OllamaEmbeddings(model=EMBEDDING_MODEL)
ollama_ef = embedding_functions.OllamaEmbeddingFunction(
    url="http://127.0.0.1:11434/api/embeddings",
    model_name=EMBEDDING_MODEL,
)
llm = Ollama(model=GENERATION_MODEL, request_timeout=5.0)

chroma_client = chromadb.PersistentClient(path=os.path.join('.', 'embeddings'),     
                               settings=Settings(allow_reset=True),
                               tenant=DEFAULT_TENANT,
                               database=DEFAULT_DATABASE,
                               )

chroma_client.reset()
col_name = "LlamaParse"
col = chroma_client.get_or_create_collection(col_name, embedding_function=ollama_ef)

In [3]:
# Create Custom Parsing Instructions

parsing_instructions = '''Answer questions using the information in this pdf and be precise. Avoid Hallucinations, and say you don't know if given data is not enough to answer the question'''

In [4]:
name = "Resume"
path = os.path.join('.', 'data', f'{name}.pdf')
pickle_path = os.path.join('.', 'data', f'parsed_{name}_documents.pkl')

if os.path.exists(pickle_path):
    with open(pickle_path, 'rb') as file:
        documents= pickle.load(file)
        print("Loaded documents")
else:
    documents = LlamaParse(result_type="markdown", parsing_instructions=parsing_instructions).load_data(path)
    with open(pickle_path, 'wb') as pickle_file:
        pickle.dump(documents, pickle_file)

Loaded documents


In [5]:
node_parser = MarkdownElementNodeParser(llm=llm, num_workers=8).from_defaults()

# Retrieve nodes (text) and objects (table)

nodes = node_parser.get_nodes_from_documents(documents)
base_nodes, objects = node_parser.get_nodes_and_objects(nodes)


0it [00:00, ?it/s]


In [6]:
# Check output of extraction

print(base_nodes)
print()
print(objects)

[TextNode(id_='cc5fc93e-ec7b-41e0-aa13-53c1beaf0760', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='cade575f-a988-4dd7-8016-5e0769e74cfa', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='9346a9481d8d224cd8680fcd472bea893812a99f887b1eb2b97f93544bd6d4a8')}, text='Aditya Kushal\n\n+91 97664 58874 | adityakushal23@gmail.com | linkedin.com/in/akxy4321 | github.com/AKxy4321\n\n Education\n\nRV University - CGPA: 9.683/10\n\nBengaluru, KA\n\nBachelor of Technology in Computer Science, Minor in FinTech\n\nNov. 2022 – June 2026\n\n Experience\n\n Summer Internship\n\nJuly 2024 – Present\n\nRV University, Bengaluru, KA\n\n- Led a team of 3 on pruning deep learning models to reduce its size while retaining accuracy.\n- Implemented Cosine Similarity to find and prune similar filters to reduce information loss.\n- Pruned LeNet5 model by 14.5x while having accuracy loss of 

In [7]:
# Store embeddings and metadata into chroma

for node in base_nodes + objects:
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0, separators=[
        "\n",
        "\n\n"
    ])
    split_texts = text_splitter.split_text(node.text)
    i = 0
    for text in split_texts:
        doc = ollama_ef(text)
        col.add(documents=text, ids=str(i), embeddings=doc)
        i += 1

In [8]:
# Return top nodes * split_texts

db = Chroma(client=chroma_client, collection_name=col_name, embedding_function=embed_model)
retriever = db.as_retriever()
query = "What internships have Aditya completed"
docs = retriever.invoke(query)

In [9]:
# Check output of the similarity

docs

[Document(page_content='- Leveraged OpenAI to build a Chatbot on company’s custom data.\n- Implemented Cohere’s API to rerank documents and retrieve relevant information.\n- Implemented Tavily’s API to provide Web Search functionality to the Chatbot.\n\n Projects\n\n Obstacle Avoidance System for Visually Impaired\n\nPython, PyTorch, Ultralytics\n\nJan 2024 – May 2024\n\n- Trained a Yolov8-nano object detection model to detect potholes, light poles, vehicles, and roadside stalls.'),
 Document(page_content='Aditya Kushal\n\n+91 97664 58874 | adityakushal23@gmail.com | linkedin.com/in/akxy4321 | github.com/AKxy4321\n\n Education\n\nRV University - CGPA: 9.683/10\n\nBengaluru, KA\n\nBachelor of Technology in Computer Science, Minor in FinTech\n\nNov. 2022 – June 2026\n\n Experience\n\n Summer Internship\n\nJuly 2024 – Present\n\nRV University, Bengaluru, KA\n\n- Led a team of 3 on pruning deep learning models to reduce its size while retaining accuracy.'),
 Document(page_content='- Implem

In [10]:
# Initiate Cohere Reranker

compressor = CohereRerank(top_n=10, model="rerank-english-v3.0")
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)
compressed_docs = compression_retriever.invoke(query)

In [11]:
# Notice how cohere reranker makes the output better

compressed_docs

[Document(metadata={'relevance_score': 0.54664737}, page_content='Aditya Kushal\n\n+91 97664 58874 | adityakushal23@gmail.com | linkedin.com/in/akxy4321 | github.com/AKxy4321\n\n Education\n\nRV University - CGPA: 9.683/10\n\nBengaluru, KA\n\nBachelor of Technology in Computer Science, Minor in FinTech\n\nNov. 2022 – June 2026\n\n Experience\n\n Summer Internship\n\nJuly 2024 – Present\n\nRV University, Bengaluru, KA\n\n- Led a team of 3 on pruning deep learning models to reduce its size while retaining accuracy.'),
 Document(metadata={'relevance_score': 0.0006386796}, page_content='- Implemented Cosine Similarity to find and prune similar filters to reduce information loss.\n- Pruned LeNet5 model by 14.5x while having accuracy loss of 0.85.\n\n Deep Learning Intern\n\nSep. 2023 – Present\n\nVectraTech Global, Bengaluru, KA\n\n- Trained deep learning models for breast cancer detection.\n- Trained BIRADS Classification on VGG16 and Breast Cancer Object Detection on Yolov7.\n\n Artificia

In [12]:
SYSTEM_PROMPT = """
You are a PDF expert assistant with a focus on accurate and reliable information retrieval from the documents provided to you. 
You must only answer questions based on the content of these documents. 
If you do not find the answer in the documents, respond with "I don't know." 
Avoid providing speculative or unrelated information, and do not pull in knowledge from external sources beyond what is contained in the given documents. 
Always prioritize correctness and clarity in your responses.
"""

In [13]:
prompt = f"""
            <|begin_of_text|><|start_header_id|>system<|end_header_id|>
            {SYSTEM_PROMPT}<|eot_id|>
            <|start_header_id|>user<|end_header_id|>
            Query: {query}
            Answer: Answer using {compressed_docs}<|eot_id|> 
            <|start_header_id|>assistant<|end_header_id|>
            """

In [14]:
ollama.generate(prompt=prompt, model=GENERATION_MODEL)['response']

"Based on the provided documents, Aditya's internships are:\n\n1. Summer Internship at RV University (July 2024 - Present)\n2. Deep Learning Intern at VectraTech Global (Sep. 2023 - Present)\n3. Artificial Intelligence Intern at Shaale (Aug 2023 - May 2024)"

In [15]:
# # Now we make the chatbot function for gradio
# import nest_asyncio

# nest_asyncio.apply()

# def upload_file(files):
#     file_paths = [file.name for file in files]
#     return file_paths

# def chat(query):
#     global compression_retriever
#     compressed_docs = compression_retriever.invoke(query)
#     prompt = f"""
#                <|begin_of_text|><|start_header_id|>system<|end_header_id|>
#                {SYSTEM_PROMPT}<|eot_id|>
#                <|start_header_id|>user<|end_header_id|>
#                Query: {query}
#                Answer: Answer using {compressed_docs}<|eot_id|> 
#                <|start_header_id|>assistant<|end_header_id|>
#                """
               
#     return ollama.generate(prompt=prompt, model=GENERATION_MODEL)['response']
    
# chatbot = gr.ChatInterface(fn=chat, title="PDFBot")
# chatbot.launch()