In [None]:
from langchain_ollama import ChatOllama

llm = ChatOllama(
    model="llama3.2",
    temperature=0,
)

In [141]:
from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader, TextLoader

def custom_loader(file_path: str):
    if file_path.endswith(".pdf"):
        return PyPDFLoader(file_path)
    elif file_path.endswith(".txt"):
        return TextLoader(file_path)
    else:
        raise ValueError(f"Unsupported file type: {file_path}")

loader = DirectoryLoader("personal", glob="**/*", show_progress=True, loader_cls=custom_loader)
docs = loader.load()


[A
 33%|███▎      | 2/6 [04:16<08:33, 128.27s/it]

[A
[A
 86%|████████▌ | 6/7 [00:00<00:00,  6.51it/s]


In [146]:
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(
    model="llama3.2",
)

In [226]:
import getpass
import os
import time

from pinecone import Pinecone, ServerlessSpec

if not os.getenv("PINECONE_API_KEY"):
    os.environ["PINECONE_API_KEY"] = getpass.getpass("Enter your Pinecone API key: ")

pinecone_api_key = os.environ.get("PINECONE_API_KEY")

pc = Pinecone(api_key=pinecone_api_key)

In [227]:
import time

index_name = "personal" 

existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]

if index_name not in existing_indexes:
    pc.create_index(
        name=index_name,
        dimension=3072,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )
    while not pc.describe_index(index_name).status["ready"]:
        time.sleep(1)

index = pc.Index(index_name)

In [221]:
from langchain_pinecone import PineconeVectorStore

vector_store = PineconeVectorStore(index=index, embedding=embeddings)

In [222]:
# vector_store.add_documents(docs)

In [159]:
stats = index.describe_index_stats()

In [160]:
stats

{'dimension': 3072,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 37}},
 'total_vector_count': 37}

In [169]:
vector_store.similarity_search("gre", k=4)

[Document(id='27d5e11e-10cc-47e0-ae7d-9dff0f711bee', metadata={'page': 9.0, 'source': 'personal/paper-crash-detection.pdf'}, page_content='                                                                                                    Nabaraj Subedi, Nirajan Paudel, Manish Chhetri, Sudarshan Acharya, Nabin Lamichhane  \nJournal of IoT in Social, Mobile, Analytics, and Cloud, March 2024, Volume 6, Issue 1 63 \n \nThe facial detection system accurately identifies yawning and eye conditions as in \nFigure 10, and the crash false notification triggering mechanism responds as depicted in Figure \n7. \n Conclusion \nThe results obtained from the drowsiness and crash detection system demonstrate its \neffectiveness in improving road safety. By detecting driver drowsiness and accurately \nidentifying crash events while promptly notifying relevant parties, the system facilit ates swift \nemergency response and aids in preventing potential accidents by monitoring the driver\'s facial \ncondi

In [170]:
doc_retriever = vector_store.as_retriever()

tool for pdf reader

In [191]:
# from pypdf import PdfReader
# from langchain_core.tools import tool

# @tool
# def marks_reader(pdf_path: str):
#     """
#     Read the marks of subjects from the pdf and return subject marks
#     """
#     pdf_reader = PdfReader(pdf_path)
#     text = ""
    
#     for page_num in range(len(pdf_reader.pages)):
#         text += pdf_reader.pages[page_num].extract_text()
    
#     return text

text mark reader tool

In [200]:
from langchain_core.tools import tool

@tool
def marks_reader(self, file_path: str) -> str:
    """Read content from a text file."""
    try:
        if not os.path.exists(file_path):
            return f"Error: File not found at path {file_path}"
        
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.read()
        
        return content
    except Exception as e:
        return f"Error reading file: {str(e)}"

In [201]:
tools = [marks_reader]
llm_with_tools = llm.bind_tools(tools)

In [203]:
llm_with_tools.invoke("can you tell me my first year marks using the markreader tool?")

AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'llama3.2', 'created_at': '2025-03-10T15:35:52.216445Z', 'done': True, 'done_reason': 'stop', 'total_duration': 1705163208, 'load_duration': 37503375, 'prompt_eval_count': 177, 'prompt_eval_duration': 700000000, 'eval_count': 22, 'eval_duration': 964000000, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-285bafbe-fd71-452e-b19b-dcbad821fbbf-0', tool_calls=[{'name': 'marks_reader', 'args': {'file_path': 'your_first_year_marks.txt'}, 'id': 'a3e4bf09-9b42-41dc-9c58-42ad34fe1a83', 'type': 'tool_call'}], usage_metadata={'input_tokens': 177, 'output_tokens': 22, 'total_tokens': 199})

In [204]:
from langchain.prompts import PromptTemplate

template = """
    You are a helpful assistant that can provide information based on both vector search from a database and other tools.

    ### Task:
    You will be provided with a user query. Your goal is to respond to the query by doing the following:
    1. Retrieve relevant information from the vector database (Pinecone) that best matches the query.
    2. If additional context or information is required that can be retrieved from tools (such as reading a PDF), use the appropriate tool.
    3. Combine the information from the database and tools to provide a well-structured and complete response.

    ### User Query:
    {user_query}

    ### Relevant Documents (from Pinecone vector database):
    {retrieved_documents}

    ### Tool Usage:
    If the relevant documents or context are not sufficient to answer the query, use the tools you have. For example, if the user is asking for marks of certain subjects, use the marks_reader tool to extract information from the document.
    If you used any tools, describe how the tool was used in your response.

    ### Answer:
    Your answer should be concise, clear, and comprehensive, using the retrieved documents and any tool-assisted information.
 """

prompt = PromptTemplate(
    input_variables=["user_query", "retrieved_documents"],
    template=template,
)
print(prompt)

input_variables=['retrieved_documents', 'user_query'] input_types={} partial_variables={} template='\n    You are a helpful assistant that can provide information based on both vector search from a database and other tools.\n\n    ### Task:\n    You will be provided with a user query. Your goal is to respond to the query by doing the following:\n    1. Retrieve relevant information from the vector database (Pinecone) that best matches the query.\n    2. If additional context or information is required that can be retrieved from tools (such as reading a PDF), use the appropriate tool.\n    3. Combine the information from the database and tools to provide a well-structured and complete response.\n\n    ### User Query:\n    {user_query}\n\n    ### Relevant Documents (from Pinecone vector database):\n    {retrieved_documents}\n\n    ### Tool Usage:\n    If the relevant documents or context are not sufficient to answer the query, use the tools you have. For example, if the user is asking fo

In [205]:
chain = prompt | llm_with_tools
print(chain)

first=PromptTemplate(input_variables=['retrieved_documents', 'user_query'], input_types={}, partial_variables={}, template='\n    You are a helpful assistant that can provide information based on both vector search from a database and other tools.\n\n    ### Task:\n    You will be provided with a user query. Your goal is to respond to the query by doing the following:\n    1. Retrieve relevant information from the vector database (Pinecone) that best matches the query.\n    2. If additional context or information is required that can be retrieved from tools (such as reading a PDF), use the appropriate tool.\n    3. Combine the information from the database and tools to provide a well-structured and complete response.\n\n    ### User Query:\n    {user_query}\n\n    ### Relevant Documents (from Pinecone vector database):\n    {retrieved_documents}\n\n    ### Tool Usage:\n    If the relevant documents or context are not sufficient to answer the query, use the tools you have. For example, 

In [206]:
user_query = input("Your query :: \n")
retrieved_documents = doc_retriever.invoke(user_query)

response = chain.invoke({"user_query": user_query, "retrieved_documents": retrieved_documents})
print(response.content)

Based on the provided document, I can infer that the author is working on a project related to crash detection for vehicles. The text mentions "Nabaraj Subedi, Nirajan Paudel, Manish Chhetri, Sudarshan Acharya, Nabin Lamichhane" as authors and "Journal of IoT in Social, Mobile, Analytics, and Cloud" as the publication.

The document also discusses the use of Arduino Uno and MPU 6050 components in the project. The author mentions that the Arduino Uno receives velocity and tilt values from the accelerometer and verifies if these values meet predefined conditions before proceeding with further operations.

Unfortunately, I couldn't find any information on marks or grades related to specific subjects in the provided document. If you could provide more context or clarify what you're looking for, I'd be happy to try and assist you further.


## let's try with deepseek model

In [247]:
from langchain_ollama import ChatOllama

deepseek = ChatOllama(
    model="deepseek-r1",
    temperature=0,
)

In [248]:
deepseek.invoke("which model are you?")

AIMessage(content="<think>\n\n</think>\n\nHi! I'm DeepSeek-R1, an AI assistant independently developed by the Chinese company DeepSeek Inc. For detailed information about models and products, please refer to the official documentation.", additional_kwargs={}, response_metadata={'model': 'deepseek-r1', 'created_at': '2025-03-10T16:09:19.630264Z', 'done': True, 'done_reason': 'stop', 'total_duration': 10288325625, 'load_duration': 645679042, 'prompt_eval_count': 8, 'prompt_eval_duration': 3571000000, 'eval_count': 42, 'eval_duration': 6067000000, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-219ec175-17e1-461f-ada6-44e4dc6df53a-0', usage_metadata={'input_tokens': 8, 'output_tokens': 42, 'total_tokens': 50})

In [209]:
from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5")

 29%|██▊       | 2/7 [1:44:19<4:20:49, 3129.93s/it]


In [210]:
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': True}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='BAAI/bge-large-en-v1.5', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [228]:
from langchain_pinecone import PineconeVectorStore

vector_store = PineconeVectorStore(index=index, embedding=embeddings)

In [230]:
# vector_store.add_documents(docs)

In [233]:
# index_ids

In [237]:
vector_store.similarity_search("statement of purpose", k=1)

[Document(id='150ee64b-47c0-4b00-b6bd-2c53858c1f70', metadata={'page': 2.0, 'source': 'personal/cu-sop.pdf'}, page_content='knowledge  and  skills  needed  to  make  significant  contributions  to  this  field.  I  look  \nforward\n \nto\n \nbringing\n \nmy\n \nunique\n \nperspective,\n \ndedication,\n \nand\n \nenthusiasm\n \nto\n \nthe\n \nUniversity,\n \nand\n \nI\n \nam\n \nexcited\n \nabout\n \nthe\n \nopportunities\n \nto\n \ncollaborate\n \nwith\n \nlike-minded\n \nindividuals\n \nand\n \ndistinguished\n \nprofessors.\n \nUpon\n \ncompleting\n \nmy\n \ngraduate\n \nstudies,\n \nI\n \naspire\n \nto\n \nreturn\n \nto\n \nNepal\n \nand\n \ncontribute\n \nto\n \nthe\n \nadvancement\n \nof\n \nAI\n \ntechnology,\n \nparticularly\n \nin\n \napplications\n \nthat\n \nenhance\n \naccessibility\n \nand\n \nimprove\n \nthe\n \nquality\n \nof\n \nlife\n \nfor\n \nindividuals\n \nwith\n \ndisabilities.\n   Here  is  the  link  of  published  articles  :  Nepali  Image  Captioning:  Generati

In [238]:
retriever_deepseek = vector_store.as_retriever(k=3)

In [249]:
from langchain_core.tools import tool

@tool
def marks_reader(self, file_path: str) -> str:
    """Read content from a text file."""
    try:
        if not os.path.exists(file_path):
            return f"Error: File not found at path {file_path}"
        
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.read()
        
        return content
    except Exception as e:
        return f"Error reading file: {str(e)}"
    
tools = [marks_reader]
deepseek_with_tools = deepseek.bind_tools(tools)

In [242]:
from langchain.prompts import PromptTemplate

template = """
    You are a helpful assistant that can provide information based on both vector search from a database and other tools.

    ### Task:
    You will be provided with a user query. Your goal is to respond to the query by doing the following:
    1. Retrieve relevant information from the vector database (Pinecone) that best matches the query.
    2. If additional context or information is required that can be retrieved from tools (such as reading a PDF), use the appropriate tool.
    3. Combine the information from the database and tools to provide a well-structured and complete response.

    ### User Query:
    {user_query}

    ### Relevant Documents (from Pinecone vector database):
    {retrieved_documents}

    ### Tool Usage:
    If the relevant documents or context are not sufficient to answer the query, use the tools you have. For example, if the user is asking for marks of certain subjects, use the marks_reader tool to extract information from the document.
    If you used any tools, describe how the tool was used in your response.

    ### Answer:
    Your answer should be concise, clear, and comprehensive, using the retrieved documents and any tool-assisted information.
 """

prompt = PromptTemplate(
    input_variables=["user_query", "retrieved_documents"],
    template=template,
)

In [252]:
chain = prompt | deepseek
print(chain)

first=PromptTemplate(input_variables=['retrieved_documents', 'user_query'], input_types={}, partial_variables={}, template='\n    You are a helpful assistant that can provide information based on both vector search from a database and other tools.\n\n    ### Task:\n    You will be provided with a user query. Your goal is to respond to the query by doing the following:\n    1. Retrieve relevant information from the vector database (Pinecone) that best matches the query.\n    2. If additional context or information is required that can be retrieved from tools (such as reading a PDF), use the appropriate tool.\n    3. Combine the information from the database and tools to provide a well-structured and complete response.\n\n    ### User Query:\n    {user_query}\n\n    ### Relevant Documents (from Pinecone vector database):\n    {retrieved_documents}\n\n    ### Tool Usage:\n    If the relevant documents or context are not sufficient to answer the query, use the tools you have. For example, 

In [254]:
user_query = input("Your query :: \n")
retrieved_documents = retriever_deepseek.invoke(user_query)

response = chain.invoke({"user_query": user_query, "retrieved_documents": retrieved_documents})
print(response.content)

<think>
Okay, so I need to figure out how to help this user. They provided a chunk of text from a document about someone's application to CU Boulder for an AI program. The user is asking me to analyze their journey into AI, focusing on their motivation, key research themes, and how they plan to contribute.

First, I'll read through the text carefully. It starts with a paragraph image captioning thesis, which caught my attention because that's exactly what the user is working on. They mention collaborating with Professor Danna Gurari and the Image and Video Computing Group. That seems important for their research focus.

Next, there's a section about their application motivation. They were inspired by an experience with visually impaired individuals in Nepal using Envision glasses, which only work in English. This led them to want to develop solutions in Nepali, making it more accessible. That's a strong motivator and shows the real-world impact they're aiming for.

Then, there's a part