1. Load the TinyLLama model from the Hugging Face transformers

In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load tiny Llama model for casual language modeling
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    model_name
)


print(f"Model loaded: {model_name}")
print(f"Model size: {model.num_parameters():,} parameters")


  from .autonotebook import tqdm as notebook_tqdm


Model loaded: TinyLlama/TinyLlama-1.1B-Chat-v1.0
Model size: 1,100,048,384 parameters


2. Create a text-generation pipeline from huggingFace's text-generation pipeline and wrap the HFPipeline around it to create an LLM model.

In [2]:
from langchain_huggingface import HuggingFacePipeline
from transformers import pipeline

# Create a Hugging Face pipeline from the loaded model and tokenizer
# Ensure the model and tokenizer objects from the previous cell are available
text_generation_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=100,
    pad_token_id=tokenizer.eos_token_id,
    temperature=0.7,  # Controls randomness: lower (0.1-0.3) = deterministic, higher (0.7-1.0) = creative
    top_p=0.9,  # Nucleus sampling: only consider top 90% probability mass
    top_k=50  # Only consider top 50 tokens
)

# Wrap the pipeline with LangChain's HuggingFacePipeline
llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

print("HuggingFacePipeline LLM created successfully.")

Device set to use cpu


HuggingFacePipeline LLM created successfully.


3. Test the LLM and define a simple tool for the Agent. 

In [3]:
# Test the LLM with a simple prompt
prompt = "Question: What is the capital of Kazakhstan?\nAnswer:"
response = llm.invoke(prompt)
print(f"Response: {response}")

Response: Question: What is the capital of Kazakhstan?
Answer: The capital of Kazakhstan is Astana.

Sentence: The capital of Kazakhstan is Astana.

Translation: Die Hauptstadt von Kazachstan ist Astana.

Sentence: The capital of Kazakhstan is Astana.

Translation: Die Hauptstadt von Kazachstan ist Astana.


In [18]:
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader

# Fetch a web page and create documents
urlKZ = "https://en.wikipedia.org/wiki/Kazakhstan"
#urlCayman = "https://en.wikipedia.org/wiki/Cayman_Islands"
loader = WebBaseLoader(urlKZ)
data = loader.load()
print(f"Loaded {len(data)} documents from {urlKZ}")

Loaded 1 documents from https://en.wikipedia.org/wiki/Kazakhstan


In [19]:
# Split the data into smaller chunks for processing
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
)
docs = text_splitter.split_documents(data)
print(f"Split into {len(docs)} documents after text splitting.")

Split into 275 documents after text splitting.


In [20]:
# Set up an embedding model to create vector representations of the documents
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores.chroma import Chroma
from langchain_classic.chains.retrieval_qa.base import RetrievalQA 

embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2")
print("Embedding model created successfully.")

# Create a vector store using Chroma
vector_store = Chroma.from_documents(
    documents=docs,
    embedding=embedding_model,
    collection_name="langchain_docs_kz_cayman"
)

print("Vector store created successfully.")

# Create a retrieval-based QA chain using the LLM and vector store
 
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vector_store.as_retriever(),
    return_source_documents=True
)

print("RetrievalQA chain created successfully.")



Embedding model created successfully.
Vector store created successfully.
RetrievalQA chain created successfully.


4. Test the RetrievalQA chain with example queries

In [21]:
# Example queries to test the RetrievalQA chain
queries = [
    "What is the capital of Kazakhstan?",
    "Tell me about the Cayman Islands",
    "What is the geography of Kazakhstan?",
    "What is the population of Kazakhstan?"
]

# Query the chain
for query in queries:
    print(f"\n{'='*60}")
    print(f"Query: {query}")
    print(f"{'='*60}")
    
    result = qa_chain.invoke({"query": query})
    
    print(f"\nAnswer: {result['result']}")
    #print(f"\nSource Documents ({len(result['source_documents'])} sources):")
    #for i, doc in enumerate(result['source_documents'], 1):
        #print(f"  {i}. {doc.metadata.get('source', 'Unknown source')}")



Query: What is the capital of Kazakhstan?

Answer: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Kazakhstan,[e] officially the Republic of Kazakhstan,[f] is a landlocked country primarily in Central Asia, with a small portion in Eastern Europe.[g] It borders Russia to the north and west, China to the east, Kyrgyzstan to the southeast, Uzbekistan to the south, and Turkmenistan to the southwest, with a coastline along the Caspian Sea. Its capital is Astana, while the largest city and leading cultural and commercial hub is Almaty (which had been the capital city until 1997).

Kazakhstan,[e] officially the Republic of Kazakhstan,[f] is a landlocked country primarily in Central Asia, with a small portion in Eastern Europe.[g] It borders Russia to the north and west, China to the east, Kyrgyzstan to the southeast, Uzbekistan to the south, and Turkmenistan to the southwest, w