In [None]:
### Install Required Packages
# !pip install langchain llama-cpp-python faiss-cpu chromadb duckduckgo-search transformers

# Example 1: Retrieval-Augmented Generation (RAG) with LLaMA

In [10]:
### Load LLaMA Model
from langchain.llms import LlamaCpp
llm = LlamaCpp(model_path="../llm/llama-7b.ggmlv3.q4_0.bin")

### Retrieval-Augmented Generation (RAG) with FAISS
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA

# Load and process documents
loader = TextLoader("../data/kb.txt")
documents = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
texts = splitter.split_documents(documents)

# Create FAISS vector store
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = FAISS.from_documents(texts, embeddings)
retriever = vector_store.as_retriever()

# Create Retrieval-QA Chain
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, chain_type="stuff")

# Ask a question
question = "What are the main applications of Bayesian inference?"
response = qa_chain.run(question)
print(response)


llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from ../llm/llama-7b.ggmlv3.q4_0.bin (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 llama.attention.head_count u32            

 Bayesian inference is used in machine learning, AI, and decision-making systems for tasks such as spam filtering, medical diagnosis, predictive modeling, reinforcement learning, and more.



# Example 2: AI Agent Using LLaMA

In [11]:
### AI Agent Using LLaMA
from langchain.tools import Tool
from langchain.agents import initialize_agent, AgentType
import requests

def search_web(query):
    response = requests.get(f"https://api.duckduckgo.com/?q={query}&format=json")
    return response.json().get("Abstract", "No results found.")

search_tool = Tool(
    name="Web Search",
    func=search_web,
    description="Search the web for relevant information."
)

agent = initialize_agent(
    tools=[search_tool],
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True
)

response = agent.run("What are the recent advancements in deep learning?")
print(response)

  agent = initialize_agent(
Llama.generate: 1 prefix-match hit, remaining 161 prompt tokens to eval




[1m> Entering new AgentExecutor chain...[0m


llama_perf_context_print:        load time =   29590.75 ms
llama_perf_context_print: prompt eval time =    8772.10 ms /   161 tokens (   54.49 ms per token,    18.35 tokens per second)
llama_perf_context_print:        eval time =   17790.75 ms /   107 runs   (  166.27 ms per token,     6.01 tokens per second)
llama_perf_context_print:       total time =   26637.91 ms /   268 tokens
Llama.generate: 269 prefix-match hit, remaining 89 prompt tokens to eval


[32;1m[1;3m I should read the papers that summarize these advancements
Action: Search for [Deep Learning](https://www.google.com/search?q=deep+learning&oq=deep+learning&aqs=chrome.0.57j69i57j0l3.1421j0j7&sourceid=chrome&ie=UTF-8)
Action Input: The input is the query used to search for deep learning papers.[0m
Observation: Search for [Deep Learning](https://www.google.com/search?q=deep+learning&oq=deep+learning&aqs=chrome.0.57j69i57j0l3.1421j0j7&sourceid=chrome&ie=UTF-8) is not a valid tool, try one of [Web Search].
Thought:

llama_perf_context_print:        load time =   29590.75 ms
llama_perf_context_print: prompt eval time =    5055.18 ms /    89 tokens (   56.80 ms per token,    17.61 tokens per second)
llama_perf_context_print:        eval time =   17555.66 ms /   103 runs   (  170.44 ms per token,     5.87 tokens per second)
llama_perf_context_print:       total time =   22680.93 ms /   192 tokens


[32;1m[1;3m I should find papers that are more recent
Action: Search for [Deep Learning](https://www.google.com/search?q=deep+learning&oq=deep+learning&aqs=chrome.0.57j69i57j0l3.1421j0j7&sourceid=chrome&ie=UTF-8)
Action Input: The input is the query used to search for deep learning papers.[0m
Observation: Search for [Deep Learning](https://www.google.com/search?q=deep+learning&oq=deep+learning&aqs=chrome.0.57j69i57j0l3.1421j0j7&sourceid=chrome&ie=UTF-8) is not a valid tool, try one of [Web Search].
Thought:

ValueError: Requested tokens (550) exceed context window of 512

# Expanding on Memory Integration in LangChain with LLaMA

In [None]:

### Memory Integration (Short-Term & Long-Term Memory)
from langchain.memory import ConversationBufferMemory, VectorStoreRetrieverMemory, CombinedMemory
from langchain.chains import ConversationalRetrievalChain

# Short-Term Memory
buffer_memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

# Long-Term Memory
retriever_memory = VectorStoreRetrieverMemory(retriever=vector_store.as_retriever())

# Hybrid Memory
memory = CombinedMemory(memories=[buffer_memory, retriever_memory])

conversation = ConversationalRetrievalChain.from_llm(llm, retriever=vector_store.as_retriever(), memory=memory)

# User interaction
dialogue = [
    "My favorite programming language is Python.",
    "What is my favorite programming language?"
]

for message in dialogue:
    print(conversation.run(message))