<h1>Custom Chat Model</h1>

In [23]:
from customchatmodel import CustomChatModel
from langchain_core.messages import (
    AIMessage,
    AIMessageChunk,
    BaseMessage,
    HumanMessage,
    SystemMessage
)


model = CustomChatModel(
    model="llama3.2:1b",
)

response = model.invoke([HumanMessage(content="Hello, what's your name")])  # Replace with your input
print(response)  # Print the model's response

INFO:customchatmodel:Messages: Hello, what's your name
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:customchatmodel:Response: I'm an artificial intelligence model known as Llama. Llama stands for "Large Language Model Meta AI."
INFO:customchatmodel:Response type: <class 'str'>


content='I\'m an artificial intelligence model known as Llama. Llama stands for "Large Language Model Meta AI."' additional_kwargs={} response_metadata={'time_in_seconds': 3, 'model_name': 'llama3.2:1b'} id='run--bbd5c629-589f-4e01-9582-6244f0d7e981-0'


<h1>Custom Embedding</h1>

In [24]:
from customembedding import CustomEmbeddings

embeddings = CustomEmbeddings(
    model="llama3.2:1b",
)
embeddings_response = embeddings.embed_query("Hello, what's your name") 
print(embeddings_response)  

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"


[-0.03418502, 0.051423963, 0.028751202, 0.015932208, 0.026842665, -0.006169097, 0.04432092, 0.004276944, -0.008132363, 0.039445937, 0.0048868516, 0.001317669, -0.031817265, -0.027940579, -0.020138387, -0.0048444704, -0.009676678, 0.04719823, 0.012169251, 0.008606751, 0.0027995822, 0.056166276, -0.019544866, 0.011486821, 0.009820095, -0.0023257863, -0.047486797, 0.022317247, -0.017454246, 0.006601091, 0.031872004, 0.020653006, 0.0064871972, 0.015931537, -0.0071272585, -0.015431345, 0.03245299, -0.0066599227, 0.002315344, -0.003914186, 0.02014854, -0.027143152, -0.013680257, 0.026852993, -0.03746, 0.013351, 0.009235829, -0.0009939444, 0.0122098625, 0.040399317, -0.0011279369, 0.0057453723, 0.06265314, 0.040310852, -0.0003982326, 0.049701568, 0.014464867, -0.015251153, -0.004298773, 0.011154066, 0.016652044, 0.013125055, -0.026291305, 0.009402062, 0.011876801, 0.0013673686, -0.0036793363, 0.04136717, 0.0058027324, 0.010019802, 0.054438476, 0.011888701, -0.031556383, -0.032811716, -0.03636

In [25]:
embeddings_response = embeddings.embed_documents(["Hello, what's your name", "hi", "hey"]) 
print(embeddings_response)  
print(len(embeddings_response)) 

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"


[[-0.03418502, 0.051423963, 0.028751202, 0.015932208, 0.026842665, -0.006169097, 0.04432092, 0.004276944, -0.008132363, 0.039445937, 0.0048868516, 0.001317669, -0.031817265, -0.027940579, -0.020138387, -0.0048444704, -0.009676678, 0.04719823, 0.012169251, 0.008606751, 0.0027995822, 0.056166276, -0.019544866, 0.011486821, 0.009820095, -0.0023257863, -0.047486797, 0.022317247, -0.017454246, 0.006601091, 0.031872004, 0.020653006, 0.0064871972, 0.015931537, -0.0071272585, -0.015431345, 0.03245299, -0.0066599227, 0.002315344, -0.003914186, 0.02014854, -0.027143152, -0.013680257, 0.026852993, -0.03746, 0.013351, 0.009235829, -0.0009939444, 0.0122098625, 0.040399317, -0.0011279369, 0.0057453723, 0.06265314, 0.040310852, -0.0003982326, 0.049701568, 0.014464867, -0.015251153, -0.004298773, 0.011154066, 0.016652044, 0.013125055, -0.026291305, 0.009402062, 0.011876801, 0.0013673686, -0.0036793363, 0.04136717, 0.0058027324, 0.010019802, 0.054438476, 0.011888701, -0.031556383, -0.032811716, -0.0363

<h1>Document Loader</h1>

In [26]:
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import WebBaseLoader
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin


def get_all_pages(base_url):
    visited = set()  # To keep track of visited URLs
    pages = []       # To store all the pages

    def crawl(url):
        if url in visited:
            return
        visited.add(url)

        try:
            response = requests.get(url)
            response.raise_for_status()  # Raise an error for bad status codes
            soup = BeautifulSoup(response.text, 'html.parser')
            pages.append(url)

            # Find all links on the page
            for link in soup.find_all('a', href=True):
                full_url = urljoin(base_url, link['href'])  # Resolve relative URLs
                if base_url in full_url:  # Ensure the link is part of the same domain
                    crawl(full_url)
        except Exception as e:
            print(f"Failed to crawl {url}: {e}")

    crawl(base_url)
    return pages

# base_url = "https://lonyinchan.com"  # Replace with your target URL
# all_pages = get_all_pages(base_url)
# print(all_pages)  # Print all crawled pages

loader = WebBaseLoader(
    web_paths=(["https://lilianweng.github.io/posts/2023-06-23-agent/"])
)
docs = loader.load()

print(docs)

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Autonomous Agents | Lil'Log", 'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final resu

<h1>Chunking</h1>

In [39]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # chunk size (characters)
    chunk_overlap=0,  # chunk overlap (characters)
    add_start_index=True,  # track index in original document
    separators=["\n\n", "\n", " ", ""]
)
chunked_docs = text_splitter.split_documents(docs)

print(chunked_docs)  # Print chunked documents

print(len(chunked_docs))
print(len(docs))

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Autonomous Agents | Lil'Log", 'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final resu

<h1>Indexing</h1>

In [28]:
from langchain.vectorstores import FAISS
import os

if not os.path.exists("faiss_index"):
    vectorstore = FAISS.from_documents(
        chunked_docs,
        embeddings,
    )
    vectorstore.save_local("faiss_index")
    print("Created new index")
else:   
    vectorstore = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
    print("Loaded existing index")



print(vectorstore)  # Print the loaded vector store

Loaded existing index
<langchain_community.vectorstores.faiss.FAISS object at 0x000001EC40A8CA10>


<h1>RetrievalQA</h1>

In [None]:
from langchain.chains import RetrievalQA

qa = RetrievalQA.from_chain_type(
    llm=model,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(search_kwargs={"k": 1}),
    return_source_documents=True,
)

response = qa("Tell me about this site")

print(response)  # Print the response
print(response['result'])  # Print the answer
print(response['source_documents'])  # Print the source documents

ValidationError: 1 validation error for RetrievalQA
return_generated_question
  Extra inputs are not permitted [type=extra_forbidden, input_value=True, input_type=bool]
    For further information visit https://errors.pydantic.dev/2.11/v/extra_forbidden

<h1>Tuning</h1>

In [30]:
messages = [
    SystemMessage(
        content="You are a very helpful assistant! Your name is Lonyin."
    ),
    HumanMessage(
        content="What is your name?"
    )
]
response = model.invoke(messages)
print(response)  # Print the model's response

INFO:customchatmodel:Messages: You are a very helpful assistant! Your name is Lonyin.What is your name?
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:customchatmodel:Response: Hello! I'm Lonyin, nice to meet you. I'm here to provide information and help with any questions or tasks you may have. How can I assist you today?
INFO:customchatmodel:Response type: <class 'str'>


content="Hello! I'm Lonyin, nice to meet you. I'm here to provide information and help with any questions or tasks you may have. How can I assist you today?" additional_kwargs={} response_metadata={'time_in_seconds': 3, 'model_name': 'llama3.2:1b'} id='run--e425c3bf-414e-485a-b31c-07dd9d6a6b37-0'


<h1>Tooling</h1>

<h1>Chat History</h1>

In [53]:
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables import RunnablePassthrough
from typing import List, Optional
from langchain.chains import LLMChain
from langchain_core.prompts import PromptTemplate

from langchain_core.pydantic_v1 import BaseModel, Field




# Create a memory object to store chat history
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True,
    output_key="answer"
)


followup_prompt = PromptTemplate(
    input_variables=["answer", "chat_history", "question", "retrieved_docs"],
    template="""
You are Lonyin, an AI assistant. Based on the previous conversation and retrieved content, suggest 3 relevant follow-up questions.

Previous conversation:
{chat_history}

Current question: {question}
Answer that was provided: {answer}

Retrieved documents for context:
{retrieved_docs}

Provide exactly 3 concise, specific follow-up questions that would help explore this topic further:
"""
)

# structured_llm = model.with_structured_output(Search)

# Create a conversational retrieval chain that combines the model, retriever, and memory
conversational_qa = ConversationalRetrievalChain.from_llm(
    llm=model,
    retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
    memory=memory,
    chain_type="stuff",
    return_source_documents=True,
)

# structured_llm = conversational_qa.with_structured_output(Search)

# query_analyzer = {"question": RunnablePassthrough()} | prompt | conversational_qa


# query_analyzer.invoke({"question": "Tell me about this site"})

# Example conversation with context from indexed documents
# question1 = "What pages does lonyinchan.com have?"
# result1 = conversational_qa({"question": question1})

# print("Question:", question1)
# print("Answer:", result1["answer"])
# print("Source Documents:", result1["source_documents"])

# # Ask a follow-up question that references the previous question
# question2 = "hi"
# result2 = conversational_qa({"question": question2})

# print("\nQuestion:", question2)
# print("Answer:", result2["answer"])
# print("Source Documents:", result2["source_documents"])

# # Print the chat history
# print("\nChat History:")
# print(memory.chat_memory.messages)

# response = conversational_qa.invoke()
# Create a wrapper function that enhances the conversational_qa with suggestion
# 
followup_chain = LLMChain(
    llm=model,
    prompt=followup_prompt,
    output_key="suggested_questions"
)
def enhanced_qa(question):
    # Handle both string input and dict input
    if isinstance(question, str):
        query = {"question": question}
    else:
        query = question
        
    # Get the answer from the conversational QA chain
    result = conversational_qa(query)
    
    # Extract needed elements for generating follow-ups
    answer = result["answer"]
    question_text = query["question"]
    
    # Format the retrieved documents to a readable string
    retrieved_docs_text = "\n\n".join([f"Document {i+1}:\n{doc.page_content}" 
                                     for i, doc in enumerate(result["source_documents"][:2])])
    
    # Format chat history for the prompt
    chat_history_text = str(memory.chat_memory.messages)
    
    # Generate follow-up suggestions
    followup_result = followup_chain.run(
        answer=answer,
        chat_history=chat_history_text,
        question=question_text,
        retrieved_docs=retrieved_docs_text
    )
    
    # Add suggested questions to the original result
    result["suggested_questions"] = followup_result
    
    return result

question = "Tell me about AI agents described in the document"
result = enhanced_qa(question)

print(result)

print(result["answer"])

print(result["suggested_questions"])

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
INFO:customchatmodel:Messages: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
How to store


(Image credit: Ledger)

Unlike the traditional currency, crypto is entirely digital which allows you to store using different approaches. This is because you technically dont store a unit of the cryptocurrency, instead it is a private key that is used
                to sign transactions.


For the privacy focused person, you can opt to use a hardware wallet like the Ledger Wallet which acts like a usb flash drive. There are also cold (offline) wallets that can be stored on your device or online wallets affilated
                with exchanges or independent platforms. Unfortunately this does mean that if you do not backup your wallet and if the data is all lost, you also lose all the cr

{'question': 'Tell me about AI agents described in the document', 'chat_history': [HumanMessage(content='Tell me about AI agents described in the document', additional_kwargs={}, response_metadata={}), AIMessage(content='I don\'t know anything about AI agents from the provided text. The conversation only mentions cryptocurrency and investing in it, using terms like "private key," "hardware wallet," "cold wallets," and discussing brokerages, stock exchanges, index funds, and ETFs. There\'s no mention of artificial intelligence or agents. Is there something specific you\'d like to know about AI?', additional_kwargs={}, response_metadata={})], 'answer': 'I don\'t know anything about AI agents from the provided text. The conversation only mentions cryptocurrency and investing in it, using terms like "private key," "hardware wallet," "cold wallets," and discussing brokerages, stock exchanges, index funds, and ETFs. There\'s no mention of artificial intelligence or agents. Is there something

<h1>Test Chat History</h1>

In [32]:
response = conversational_qa("Did I say hi already?")  # Ask a follow-up question
print(response)  # Print the model's response

INFO:customchatmodel:Messages: Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: What pages does lonyinchan.com have?
Assistant: According to the text, Lonyin Chan's website is likely "LonyinChan.com". However, it's not explicitly stated which page(s) this website has.
Human: hi
Assistant: I don't know if LonyinChan.com has any specific pages.
Follow Up Input: Did I say hi already?
Standalone question:
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:customchatmodel:Response: Since the conversation started with "What pages does lonyinchan.com have?" and there was a follow-up input of "Did I say hi already?", it's likely that the human is referring to their initial greeting. Here's a rephrased standalone question:

Assistant: Did you greet someone?
INFO:customchatmodel:Response type: <class 'str'>
INFO:httpx:HTTP Request: POST htt

{'question': 'Did I say hi already?', 'chat_history': [HumanMessage(content='What pages does lonyinchan.com have?', additional_kwargs={}, response_metadata={}), AIMessage(content='According to the text, Lonyin Chan\'s website is likely "LonyinChan.com". However, it\'s not explicitly stated which page(s) this website has.', additional_kwargs={}, response_metadata={}), HumanMessage(content='hi', additional_kwargs={}, response_metadata={}), AIMessage(content="I don't know if LonyinChan.com has any specific pages.", additional_kwargs={}, response_metadata={}), HumanMessage(content='Did I say hi already?', additional_kwargs={}, response_metadata={}), AIMessage(content='Based on the provided context, yes, the user did greet someone by saying "Hi" twice before asking about various topics related to cryptocurrency and trading platforms.', additional_kwargs={}, response_metadata={})], 'answer': 'Based on the provided context, yes, the user did greet someone by saying "Hi" twice before asking ab

In [33]:
print(response['answer'])  # Print the answer
print(response['source_documents'])  # Print the source documents

Based on the provided context, yes, the user did greet someone by saying "Hi" twice before asking about various topics related to cryptocurrency and trading platforms.
[Document(id='41cbae46-14f4-470e-b5fd-3aed87182020', metadata={'source': 'https://lonyinchan.com/blog/crypto_review', 'title': 'Lonyin Chan', 'language': 'en', 'start_index': 1777}, page_content='Spotify: Up to $9.99 equiv montly subscription fees\nNetflix: Up to $12.99 equiv standard monthly subscription - HD, 2 screens\nAmazon Prime: Up to $12.99\nExpedia: Up to $50\nAirbnb: Up to $100\n\n\nThese rebates are paid in the native crypto CRO and these benefits are not due to a result of a partnership between Crypto.com and those companies so Crypto.com may modify this offer at any time.\n\n\nIf you want to try out the card with no risk, you can get the Midnight Blue card which is free to sign up to, for the higher tier metal cards, you will need to stake the following amounts:\n\n\nRuby Steel = Stake 1K CRO ≅ £130 \nRoyal 