In [1]:
# ! pip install pymilvus milvus langchain sentence-transformers tiktoken octoai-sdk

In [1]:
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_community.llms.octoai_endpoint import OctoAIEndpoint

In [2]:
from dotenv import load_dotenv
import os

load_dotenv()
os.environ["OCTOAI_API_TOKEN"] = os.getenv("OCTOAI_API_TOKEN")

In [3]:
template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.\n Instruction:\n{question}\n Response: """
prompt = PromptTemplate.from_template(template)

In [4]:
llm = OctoAIEndpoint(
    endpoint_url="https://text.octoai.run/v1/chat/completions",
    model_kwargs={
        "model": "mixtral-8x7b-instruct-fp16",
        "max_tokens": 128,
        "presence_penalty": 0,
        "temperature": 0.01,
        "top_p": 0.9,
        "messages": [
            {
                "role": "system",
                "content": "You are a helpful assistant. Keep your responses limited to one short paragraph if possible.",
            },
        ],
    },
)

In [5]:
question = "Who was leonardo davinci?"

llm_chain = LLMChain(prompt=prompt, llm=llm)

print(llm_chain.invoke(question)["text"])

 Leonardo da Vinci (1452-1519) was an Italian polymath who is often regarded as one of the greatest painters in history. He is also celebrated for his technological ingenuity, scientific curiosity, and philosophical wisdom. Da Vinci is widely known for his masterpieces such as 'The Last Supper' and 'Mona Lisa.' As an artist, scientist, mathematician, engineer, inventor, anatomist, geologist, cartographer, botanist, musician, and writer, da Vinci embodied the Renaissance ideal. His thirst for


In [6]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Milvus

In [7]:
embeddings = HuggingFaceEmbeddings()

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
from milvus import default_server

In [10]:
default_server.start()

In [8]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document
import os

In [10]:
files = os.listdir("./data")
files

['Boston.txt',
 'Cambridge, Massachusetts.txt',
 'Chicago.txt',
 'Houston.txt',
 'San Francisco.txt',
 'Seattle.txt',
 'Toronto.txt',
 'Washington, D.C..txt']

In [11]:
file_texts = []

In [13]:
for file in files:
    with open(f"./data/{file}", encoding="utf8") as f:
        file_text = f.read()
    text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=512, chunk_overlap=64, 
    )
    texts = text_splitter.split_text(file_text)
    for i, chunked_text in enumerate(texts):
        file_texts.append(Document(page_content=chunked_text, 
                metadata={"doc_title": file.split(".")[0], "chunk_num": i}))

Created a chunk of size 713, which is longer than the specified 512
Created a chunk of size 543, which is longer than the specified 512
Created a chunk of size 838, which is longer than the specified 512
Created a chunk of size 666, which is longer than the specified 512
Created a chunk of size 690, which is longer than the specified 512
Created a chunk of size 758, which is longer than the specified 512
Created a chunk of size 1142, which is longer than the specified 512
Created a chunk of size 1014, which is longer than the specified 512
Created a chunk of size 531, which is longer than the specified 512
Created a chunk of size 1038, which is longer than the specified 512
Created a chunk of size 585, which is longer than the specified 512
Created a chunk of size 716, which is longer than the specified 512
Created a chunk of size 631, which is longer than the specified 512
Created a chunk of size 972, which is longer than the specified 512
Created a chunk of size 696, which is longer 

In [18]:
vector_store = Milvus.from_documents(
    file_texts,
    embedding=embeddings,
    connection_args={"host": "localhost", "port": 19530},
    collection_name="filter_test",
)

TypeError: Milvus.__init__() got an unexpected keyword argument 'filter'

In [16]:
file_texts[0]

Document(page_content="Boston (US: ), officially the City of Boston, is the capital and most populous city of the U.S. state of Massachusetts, and the cultural and financial center of New England in the Northeastern United States, with an area of 48.4 sq mi (125 km2) and a population of 675,647 in 2020. Greater Boston metropolitan statistical area is the eleventh-largest in the country.Boston is one of the United States's oldest municipalities. It was founded on the Shawmut Peninsula in 1630 by Puritan settlers from Boston, Lincolnshire. During the American Revolution, Boston was the location of several key events, including the Boston Massacre, the Boston Tea Party, the hanging of Paul Revere's lantern signal in Old North Church, the Battle of Bunker Hill, and the siege of Boston. Following American independence from Great Britain, the city continued to play an important role as a port, manufacturing hub, and center for education and culture. The city has expanded beyond the original 

In [18]:
retriever = vector_store.as_retriever()

# https://www.e2enetworks.com/blog/implementing-a-rag-pipeline-with-mixtral-8x7b
# retriever = db.as_retriever(
#    search_type="similarity",
#    search_kwargs={'k': 20}
# )

In [19]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = PromptTemplate.from_template(template)

In [20]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [21]:
chain.invoke("How big is the city of Seattle?")

' The city of Seattle has a population of 749,256 as of 2022, and the Seattle metropolitan area has a population of 4.02 million.'

In [27]:
# Let's make this a bit more fun and showcase the multilingual capabilities of Mixtal which really outshine other open source models

# Our Vector DB is populated with entries from english text - even the embedding model we're using here, GTE-Large
# works best on english text. However Mixtral has good mutlilingual capabilities in French, German, Spanish and Italian.
# So what we'll do is ask the assistant to only answer in french in the system and user prompt. RAG here is performed based on 
# english text, but upon producing the user response, the Mixtral LLM will generate tokens in a different language here (french)
french_llm = OctoAIEndpoint(
    endpoint_url="https://text.octoai.run/v1/chat/completions",
    model_kwargs={
        "model": "mixtral-8x7b-instruct-fp16",
        "max_tokens": 128,
        "presence_penalty": 0,
        "temperature": 0.1,
        "top_p": 0.9,
        "messages": [
            {
                "role": "system",
                "content": "You are a helpful assistant who responds in French and not in English.",
            },
        ],
    },
)

french_template = """Answer the question in French based only on the following context:
{context}

Question: {question}
"""
french_prompt = PromptTemplate.from_template(template)

In [28]:
french_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | french_prompt
    | french_llm
    | StrOutputParser()
)

In [29]:
french_chain.invoke("How big is the city of Seattle?")

" Based on the document provided, the city of Seattle is the most populous city in both the state of Washington and the Pacific Northwest region of North America with a population of 749,256 in 2022. The Seattle metropolitan area's population is 4.02 million, making it the 15th-largest in the United States."

In [None]:
# default_server.stop()
# default_server.cleanup()