In [1]:
# %pip install torch -q 
# %pip install transformers -q 
# %pip install numpy -q 
# %pip install langchain -q 
# %pip install langchain_community -q 
# %pip install langchain-chroma -q 
# %pip install langchain_transformers -q

Initialize HuggingFace LLM


In [2]:
# Install python-dotenv to load .env files
# %pip install python-dotenv -q

In [3]:
import os
from dotenv import load_dotenv
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint

# Load environment variables from .env file
load_dotenv()

# Get API key from environment variable
api_key = os.getenv('HUGGINGFACE_API_TOKEN')

if not api_key:
    raise ValueError("HUGGINGFACE_API_TOKEN not found in .env file. Please check your .env file.")

# Initialize the HuggingFace llm
# Create base endpoint
llm_base = HuggingFaceEndpoint(
    repo_id="mistralai/Mistral-7B-Instruct-v0.3",
    temperature=0.1,
    max_new_tokens=500,
    huggingfacehub_api_token=api_key
)

# Wrap with ChatHuggingFace for better compatibility
llm = ChatHuggingFace(llm=llm_base)


print("✅ HuggingFace LLM initialized successfully!")



  from .autonotebook import tqdm as notebook_tqdm


✅ HuggingFace LLM initialized successfully!


Initialize Embedding model

In [4]:
# %pip install sentence-transformers


In [5]:
# %pip install hf_xet

In [9]:
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

embedding_model=HuggingFaceBgeEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2"
)

Initialize output parser

In [12]:
from langchain_core.output_parsers import StrOutputParser




output_parser=StrOutputParser()

Load the Pdf Document

In [None]:
# %pip install pypdf -qU

Note: you may need to restart the kernel to use updated packages.


In [13]:
from langchain_community.document_loaders import PyPDFLoader

#Load the PDF document
loader=PyPDFLoader("Chapter 01.pdf")

docs=loader.load()

In [14]:
docs[0]

Document(metadata={'producer': 'Microsoft® PowerPoint® LTSC', 'creator': 'Microsoft® PowerPoint® LTSC', 'creationdate': '2024-10-12T10:35:42+05:30', 'title': '', 'author': 'Roshani Wijesuriya', 'moddate': '2024-10-12T10:35:42+05:30', 'source': 'Chapter 01.pdf', 'total_pages': 19, 'page': 0, 'page_label': '1'}, page_content='IN2311:Operating systems\nRoshani Wijesuriya')

Split Document into Chunks

In [15]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

#Initialize the text splitter
text_splitter=RecursiveCharacterTextSplitter(chunk_size=400,chunk_overlap=50)

#Split the document in to chunks
splits=text_splitter.split_documents(docs)

In [16]:
len(splits)

23

Create Vector Store and Retriver

In [17]:
from langchain_chroma import Chroma

#Create vector store from the document chunks
vectorstore=Chroma.from_documents(documents=splits,embedding=embedding_model)

In [18]:
#Create a retriver from the vector store
retriever=vectorstore.as_retriever()

Define Promt Template

In [20]:
from langchain_core.prompts import ChatPromptTemplate
#Define prompt template
template="""
Answer this question using the provided context only.

{question}

Context:
{context}

Answer:
"""
prompt=ChatPromptTemplate.from_template(template)

In [None]:
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='\nAnswer this question using the provided context only.\n\n{question}\n\nContext:\n{context}\n\nAnswer:\n'), additional_kwargs={})])

Chain Retriever and Prompt Template with LLM

In [22]:
from langchain_core.runnables import RunnablePassthrough


chain=(
    {"context":retriever,  "question":RunnablePassthrough()}
    |prompt
    |llm
    |output_parser
)

In [23]:
response=llm.invoke("what is operating system?")
print(response)

content=" An operating system (OS) is a software that manages computer hardware resources and provides common services for computer programs. It acts as an intermediary between the user and the computer hardware, allowing users to interact with the computer in a user-friendly manner.\n\nThe operating system is responsible for tasks such as managing the computer's memory, processing input and output, managing files and directories, and coordinating the execution of multiple programs. Some common examples of operating systems include Microsoft Windows, macOS, Linux, and Android.\n\nThe operating system is essential for the proper functioning of a computer, as it provides the necessary infrastructure for other software to run on the system. Without an operating system, a computer would be little more than a collection of hardware components with no way to interact with them." additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 164, 'prompt_tokens': 9, 'total_token