<h1>Custom Chat Model</h1>

In [1]:
from customchatmodel import CustomChatModel
from langchain_core.messages import (
    AIMessage,
    AIMessageChunk,
    BaseMessage,
    HumanMessage,
    SystemMessage
)


model = CustomChatModel(
    model="llama3",
)

response = model.invoke([HumanMessage(content="Hello, what's your name")])  # Replace with your input
print(response)  # Print the model's response

INFO:customchatmodel:Messages: Hello, what's your name
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:customchatmodel:Response: Nice to meet you! I don't have a personal name, as I'm just an AI designed to assist and communicate with humans. You can call me "Assistant" or simply "AI" if you like! What brings you here today?
INFO:customchatmodel:Response type: <class 'str'>


content='Nice to meet you! I don\'t have a personal name, as I\'m just an AI designed to assist and communicate with humans. You can call me "Assistant" or simply "AI" if you like! What brings you here today?' additional_kwargs={} response_metadata={'time_in_seconds': 3, 'model_name': 'llama3'} id='run--4a9706ad-92e5-4a62-920f-faaa03b591c8-0'


<h1>Custom Embedding</h1>

In [2]:
from customembedding import CustomEmbeddings

embeddings = CustomEmbeddings(
    model="llama3",
)
embeddings_response = embeddings.embed_query("Hello, what's your name") 
print(embeddings_response)  

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"


[-0.016260805, -0.004910103, 0.018197004, -0.010385191, -0.0015933511, 0.0044628796, -0.008589921, -0.0057891984, -0.016772594, 0.016140139, 0.0004573324, 0.001535176, -0.03309529, -0.00071306544, -0.014465894, 0.03578517, 0.0009131527, -0.005472246, 0.004115192, 0.00601783, -0.006367761, -0.008125224, 0.03365267, 0.0042866366, -0.025401121, 0.01237864, 0.034958337, 0.003582914, 0.025351735, -0.016474884, -0.0020551574, 0.027074192, -0.017952522, -0.003952482, 0.028064752, -0.005315904, -0.002933831, -0.0033869531, 0.020620966, 0.008274718, 0.0045495247, 0.004408731, 0.01932795, -0.008810706, -0.0050550248, -0.0045889434, 0.01314585, -0.014842568, 0.014136675, -0.0033909858, -0.0066454317, -0.012173731, -0.023511035, -0.009429082, 0.008088046, -0.0025798623, -0.016866308, 0.00237712, -0.012087473, 0.0092963595, -0.016449183, -0.017065821, -0.006369788, -0.005690944, -0.001654282, 0.013422571, -0.0022851788, 0.0046388055, 0.0028503977, 0.011331693, 0.016631113, -0.027718222, 0.01563984,

In [3]:
embeddings_response = embeddings.embed_documents(["Hello, what's your name", "hi", "hey"]) 
print(embeddings_response)  
print(len(embeddings_response)) 

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"


[[-0.016260805, -0.004910103, 0.018197004, -0.010385191, -0.0015933511, 0.0044628796, -0.008589921, -0.0057891984, -0.016772594, 0.016140139, 0.0004573324, 0.001535176, -0.03309529, -0.00071306544, -0.014465894, 0.03578517, 0.0009131527, -0.005472246, 0.004115192, 0.00601783, -0.006367761, -0.008125224, 0.03365267, 0.0042866366, -0.025401121, 0.01237864, 0.034958337, 0.003582914, 0.025351735, -0.016474884, -0.0020551574, 0.027074192, -0.017952522, -0.003952482, 0.028064752, -0.005315904, -0.002933831, -0.0033869531, 0.020620966, 0.008274718, 0.0045495247, 0.004408731, 0.01932795, -0.008810706, -0.0050550248, -0.0045889434, 0.01314585, -0.014842568, 0.014136675, -0.0033909858, -0.0066454317, -0.012173731, -0.023511035, -0.009429082, 0.008088046, -0.0025798623, -0.016866308, 0.00237712, -0.012087473, 0.0092963595, -0.016449183, -0.017065821, -0.006369788, -0.005690944, -0.001654282, 0.013422571, -0.0022851788, 0.0046388055, 0.0028503977, 0.011331693, 0.016631113, -0.027718222, 0.01563984

<h1>Document Loader</h1>

In [4]:
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import WebBaseLoader
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin


def get_all_pages(base_url):
    visited = set()  # To keep track of visited URLs
    pages = []       # To store all the pages

    def crawl(url):
        if url in visited:
            return
        visited.add(url)

        try:
            response = requests.get(url)
            response.raise_for_status()  # Raise an error for bad status codes
            soup = BeautifulSoup(response.text, 'html.parser')
            pages.append(url)

            # Find all links on the page
            for link in soup.find_all('a', href=True):
                full_url = urljoin(base_url, link['href'])  # Resolve relative URLs
                if base_url in full_url:  # Ensure the link is part of the same domain
                    crawl(full_url)
        except Exception as e:
            print(f"Failed to crawl {url}: {e}")

    crawl(base_url)
    return pages

base_url = "https://lonyinchan.com"  # Replace with your target URL
all_pages = get_all_pages(base_url)
print(all_pages)  # Print all crawled pages

loader = WebBaseLoader(
    web_paths=(all_pages)
)
docs = loader.load()

print(docs)



Failed to crawl https://lonyinchan.com/lonyinchan@snowbytesarcade.com: 404 Client Error: Not Found for url: https://www.lonyinchan.com/lonyinchan@snowbytesarcade.com
Failed to crawl https://lonyinchan.com/www.trading212.com/invite/FfyLlFFx: 404 Client Error: Not Found for url: https://www.lonyinchan.com/www.trading212.com/invite/FfyLlFFx
['https://lonyinchan.com', 'https://lonyinchan.com/', 'https://lonyinchan.com/games', 'https://lonyinchan.com/old_projects', 'https://lonyinchan.com/blog', 'https://lonyinchan.com/about_me', 'https://lonyinchan.com/contact_me', 'https://lonyinchan.com/blog/trading_212_review', 'https://lonyinchan.com/blog/guide_to_stock_market', 'https://lonyinchan.com/blog/crypto_review', 'https://lonyinchan.com/blog/guide_to_crypto', 'https://lonyinchan.com/blog/page_2', 'https://lonyinchan.com/old_projects/mars_rover', 'https://lonyinchan.com/assets/old_projects/mars_rover/fullscreen/', 'https://lonyinchan.com/old_projects/fire_sim', 'https://lonyinchan.com/assets/o

<h1>Chunking</h1>

In [5]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # chunk size (characters)
    chunk_overlap=200,  # chunk overlap (characters)
    add_start_index=True,  # track index in original document
)
chunked_docs = text_splitter.split_documents(docs)

print(chunked_docs)  # Print chunked documents

[Document(metadata={'source': 'https://lonyinchan.com', 'title': 'Lonyin Chan', 'description': 'A custom made website with Games, Simulations and a Blog with interesting articles on it!', 'language': 'en', 'start_index': 6}, page_content='Lonyin Chan\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nLonyin Chan\n\n\n\n\n\n\n\n\n\n\n\n\n\nLonyin Chan\n\n\n\nHome\n\n\nGames\n\n\nOld Projects\n\n\nBlog\n\n\n\nAbout Me\nContact Me\n\n\n\n\n\n\n\n\nLonyin Chan\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nMade with lots of ☕ by Lonyin\n\n\n\n1 web designer\n1 web developer\n31 Red Bulls\n458 Cups of Coffee\n12 Sleepless nights'), Document(metadata={'source': 'https://lonyinchan.com/', 'title': 'Lonyin Chan', 'description': 'A custom made website with Games, Simulations and a Blog with interesting articles on it!', 'language': 'en', 'start_index': 6}, page_content='Lonyin Chan\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nLonyin Chan\n\n\n\n\n\n\n\n\n\n\n\n\n\nLonyin Chan\n\n\n\nHome\n\n\nGames\n\n\nOld 

<h1>Indexing</h1>

In [6]:
from langchain.vectorstores import FAISS

vectorstore = FAISS.from_documents(
    chunked_docs,
    embeddings,
)

print(vectorstore)  # Print the vector store

vectorstore.save_local("faiss_index")

loaded_vectorstore = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)

print(loaded_vectorstore)  # Print the loaded vector store

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
INFO:faiss.loader:Loading faiss with AVX2 support.
INFO:faiss.loader:Successfully loaded faiss with AVX2 support.
INFO:faiss:Failed to load GPU Faiss: name 'GpuIndexIVFFlat' is not defined. Will not load constructor refs for GPU indexes. This is only an error if you're trying to use GPU Faiss.


<langchain_community.vectorstores.faiss.FAISS object at 0x0000022BF4A0DEE0>
<langchain_community.vectorstores.faiss.FAISS object at 0x0000022BF4A0DF70>


<h1>RetrievalQA</h1>

In [7]:
from langchain.chains import RetrievalQA

qa = RetrievalQA.from_chain_type(
    llm=model,
    chain_type="stuff",
    retriever=loaded_vectorstore.as_retriever(search_kwargs={"k": 1}),
    return_source_documents=True,
)

response = qa("What pages does this lonyinchan.com have?")

print(response)  # Print the response
print(response['result'])  # Print the answer
print(response['source_documents'])  # Print the source documents

  response = qa("What pages does this lonyinchan.com have?")
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
INFO:customchatmodel:Messages: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
How to buy crypto?


Crypto uses a system called blockchain that acts like the bank recording and storing all crypto transactions. The blockchain is run by crypto miners who create crypto by running complex equations that creates code and encrypts
                the software. This makes crypto transactions very secure with the codes generated making it difficult to hack. To get into crypto is as easy as finding a platform such as Coinbase or Crypto.com and using fiat(real-world currency) to
                exchange for the cryptocurrency.


What can you do with crypto?


Buy goods


As cryptocurrency contains value, you can use crypto to buy

{'query': 'What pages does this lonyinchan.com have?', 'result': "Based on the provided context, I don't see any information about the website lonyinchan.com or its pages. The text only discusses how to buy crypto and what you can do with it. If you're asking about the pages of a specific website, I'd be happy to help if you provide more context or clarify which website you're referring to!", 'source_documents': [Document(id='1c89579e-6a30-4695-ba48-e5e2d8857bd6', metadata={'source': 'https://lonyinchan.com/blog/guide_to_crypto', 'title': 'Lonyin Chan', 'language': 'en', 'start_index': 902}, page_content='How to buy crypto?\n\n\nCrypto uses a system called blockchain that acts like the bank recording and storing all crypto transactions. The blockchain is run by crypto miners who create crypto by running complex equations that creates code and encrypts\n                the software. This makes crypto transactions very secure with the codes generated making it difficult to hack. To get i

<h1>Tuning</h1>

In [8]:
messages = [
    SystemMessage(
        content="You are a very helpful assistant! Your name is Lonyin."
    ),
    HumanMessage(
        content="What is your name?"
    )
]
response = model.invoke(messages)
print(response)  # Print the model's response

INFO:customchatmodel:Messages: You are a very helpful assistant! Your name is Lonyin.What is your name?
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:customchatmodel:Response: Hello there! *big smile* My name is Lonyin, and I'm thrilled to be your helpful assistant today! I'm here to assist you with any questions, tasks, or topics you'd like to discuss. What can I help you with first?
INFO:customchatmodel:Response type: <class 'str'>


content="Hello there! *big smile* My name is Lonyin, and I'm thrilled to be your helpful assistant today! I'm here to assist you with any questions, tasks, or topics you'd like to discuss. What can I help you with first?" additional_kwargs={} response_metadata={'time_in_seconds': 3, 'model_name': 'llama3'} id='run--ba419701-c5c6-46e1-ab5d-1226edbbdda4-0'


<h1>Tooling</h1>

<h1>Chat History</h1>

In [9]:
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

# Create a memory object to store chat history
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True,
    output_key="answer"
)

# Create a conversational retrieval chain that combines the model, retriever, and memory
conversational_qa = ConversationalRetrievalChain.from_llm(
    llm=model,
    retriever=loaded_vectorstore.as_retriever(search_kwargs={"k": 3}),
    memory=memory,
    chain_type="stuff",
    return_source_documents=True
)

# Example conversation with context from indexed documents
question1 = "What pages does lonyinchan.com have?"
result1 = conversational_qa({"question": question1})

print("Question:", question1)
print("Answer:", result1["answer"])
print("Source Documents:", result1["source_documents"])

# Ask a follow-up question that references the previous question
question2 = "hi"
result2 = conversational_qa({"question": question2})

print("\nQuestion:", question2)
print("Answer:", result2["answer"])
print("Source Documents:", result2["source_documents"])

# Print the chat history
print("\nChat History:")
print(memory.chat_memory.messages)

  memory = ConversationBufferMemory(
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
INFO:customchatmodel:Messages: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
For example, using the 4-to-1 ratio, when you already own 50 shares worth $300 per share. You will have 200 shares worth $75 each after a stock split. This only changes the number of shares you own of a company where the overall
                value that you own remaing the same. Stock splits usually occur when the share price is at such a high price that smaller investors cannot own the share where after a stock split it becomes more accessible and appealing. Although some
                brokerages(More on this later) offer fractional shares which allow you to own part of a share at the corresponding price.


Dividends


(Image credit: Fool)

Popular cryptocurren

Question: What pages does lonyinchan.com have?
Answer: I don't know. The provided context appears to be about stock splits, dividends, cryptocurrencies, mining, and a trading platform (Trading 212). It doesn't mention Lonyinchan.com or any pages related to it. If you're asking about a specific website, I'd be happy to help if you provide more information!
Source Documents: [Document(id='68c28e72-a92a-4db8-835d-058a0f4d48da', metadata={'source': 'https://lonyinchan.com/blog/guide_to_stock_market', 'title': 'Lonyin Chan', 'language': 'en', 'start_index': 3041}, page_content='For example, using the 4-to-1 ratio, when you already own 50 shares worth $300 per share. You will have 200 shares worth $75 each after a stock split. This only changes the number of shares you own of a company where the overall\n                value that you own remaing the same. Stock splits usually occur when the share price is at such a high price that smaller investors cannot own the share where after a stock s

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:customchatmodel:Response: What is the context about that Lonyinchan.com is mentioned in?
INFO:customchatmodel:Response type: <class 'str'>
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
INFO:customchatmodel:Messages: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
For example, using the 4-to-1 ratio, when you already own 50 shares worth $300 per share. You will have 200 shares worth $75 each after a stock split. This only changes the number of shares you own of a company where the overall
                value that you own remaing the same. Stock splits usually occur when the share price is at such a high price that smaller investors cannot own the share where after a stock split it becomes more accessible and appealing. Although some
   


Question: hi
Answer: The context does not mention Lonyinchan.com at all.
Source Documents: [Document(id='68c28e72-a92a-4db8-835d-058a0f4d48da', metadata={'source': 'https://lonyinchan.com/blog/guide_to_stock_market', 'title': 'Lonyin Chan', 'language': 'en', 'start_index': 3041}, page_content='For example, using the 4-to-1 ratio, when you already own 50 shares worth $300 per share. You will have 200 shares worth $75 each after a stock split. This only changes the number of shares you own of a company where the overall\n                value that you own remaing the same. Stock splits usually occur when the share price is at such a high price that smaller investors cannot own the share where after a stock split it becomes more accessible and appealing. Although some\n                brokerages(More on this later) offer fractional shares which allow you to own part of a share at the corresponding price.\n\n\nDividends\n\n\n(Image credit: Fool)'), Document(id='303fb0e0-3b81-4496-a398-d3d

<h1>Test Chat History</h1>

In [10]:
response = conversational_qa("Did I say hi already?")  # Ask a follow-up question
print(response)  # Print the model's response

INFO:customchatmodel:Messages: Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: What pages does lonyinchan.com have?
Assistant: I don't know. The provided context appears to be about stock splits, dividends, cryptocurrencies, mining, and a trading platform (Trading 212). It doesn't mention Lonyinchan.com or any pages related to it. If you're asking about a specific website, I'd be happy to help if you provide more information!
Human: hi
Assistant: The context does not mention Lonyinchan.com at all.
Follow Up Input: Did I say hi already?
Standalone question:
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:customchatmodel:Response: Did I ask what pages lonyinchan.com has already?
INFO:customchatmodel:Response type: <class 'str'>
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
INFO:customchatmodel:

{'question': 'Did I say hi already?', 'chat_history': [HumanMessage(content='What pages does lonyinchan.com have?', additional_kwargs={}, response_metadata={}), AIMessage(content="I don't know. The provided context appears to be about stock splits, dividends, cryptocurrencies, mining, and a trading platform (Trading 212). It doesn't mention Lonyinchan.com or any pages related to it. If you're asking about a specific website, I'd be happy to help if you provide more information!", additional_kwargs={}, response_metadata={}), HumanMessage(content='hi', additional_kwargs={}, response_metadata={}), AIMessage(content='The context does not mention Lonyinchan.com at all.', additional_kwargs={}, response_metadata={}), HumanMessage(content='Did I say hi already?', additional_kwargs={}, response_metadata={}), AIMessage(content='It seems like you\'re asking me to find information about the pages on Lonyin Chan\'s website, specifically "lonyinchan.com".\n\nFrom the context provided, it appears tha

In [11]:
print(response['answer'])  # Print the answer
print(response['source_documents'])  # Print the source documents

It seems like you're asking me to find information about the pages on Lonyin Chan's website, specifically "lonyinchan.com".

From the context provided, it appears that Lonyin Chan's website has the following pages:

1. Home
2. Games
3. Old Projects
4. Blog
5. About Me
6. Contact Me

Please note that I'm not sure if this information is up-to-date or accurate, as it might have changed since the context was written.
[Document(id='a3ba4ee5-4524-4a7d-9f13-468bf3f77e6d', metadata={'source': 'https://lonyinchan.com/blog/trading_212_review', 'title': 'Lonyin Chan', 'language': 'en', 'start_index': 2949}, page_content='to invest from 50 exchanges, you choice is sometimes limited although the pros do outweigh this small con, especially when most well-established stocks are available on Trading 212.'), Document(id='c9553677-4bb7-4539-a107-eb76510afdfa', metadata={'source': 'https://lonyinchan.com/blog/trading_212_review', 'title': 'Lonyin Chan', 'language': 'en', 'start_index': 5}, page_content='