In [1]:
# ! pip install -q langchain-openai langchain langchain-text-splitters lxml octoai-sdk pymilvus

In [3]:
from dotenv import load_dotenv
import os

load_dotenv()
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
OCTOAI_API_TOKEN = os.environ["OCTOAI_API_TOKEN"]

ModuleNotFoundError: No module named 'dotenv'

In [4]:
from langchain_text_splitters import RecursiveCharacterTextSplitter, HTMLHeaderTextSplitter

url = "https://en.wikipedia.org/wiki/Star_Wars"

headers_to_split_on = [
    ("h1", "Header 1"),
    ("h2", "Header 2"),
    ("h3", "Header 3"),
    ("h4", "Header 4"),
    ("div", "Divider")
]

html_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on)

# for local file use html_splitter.split_text_from_file(<path_to_file>)
html_header_splits = html_splitter.split_text_from_url(url)

ModuleNotFoundError: No module named 'langchain_text_splitters'

In [4]:
chunk_size = 1024
chunk_overlap = 128
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=chunk_size,
    chunk_overlap=chunk_overlap,
)

# Split
splits = text_splitter.split_documents(html_header_splits)

In [5]:
from langchain_community.vectorstores import Milvus

In [6]:
from langchain_community.embeddings import OctoAIEmbeddings
from langchain_community.llms.octoai_endpoint import OctoAIEndpoint
llm = OctoAIEndpoint(
        model="llama-2-13b-chat-fp16",
        max_tokens=1024,
        presence_penalty=0,
        temperature=0.1,
        top_p=0.9,
        
    )
embeddings = OctoAIEmbeddings(endpoint_url="https://text.octoai.run/v1/embeddings")

                model was transferred to model_kwargs.
                Please confirm that model is what you intended.


In [7]:
vector_store = Milvus.from_documents(
    splits,
    embedding=embeddings,
    connection_args={"host": "localhost", "port": 19530},
    collection_name="starwars"
)

In [8]:
retriever = vector_store.as_retriever()

In [None]:
from langchain.prompts import ChatPromptTemplate
template="""You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: {question} 
Context: {context} 
Answer:"""
prompt = ChatPromptTemplate.from_template(template)

In [61]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [62]:
chain.invoke("Who is Luke's Father?")

" Luke's father is Darth Vader."

In [9]:
from langchain.prompts import ChatPromptTemplate
template="""You are a literary critic. You are given some context and asked to answer questions based on only that context.
Question: {question} 
Context: {context} 
Answer:"""
lit_crit_prompt = ChatPromptTemplate.from_template(template)

In [12]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
lcchain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | lit_crit_prompt
    | llm
    | StrOutputParser()
)

In [14]:
from pprint import pprint

pprint(lcchain.invoke("What is the worst thing about Darth Vader's story line?"))

(" The worst thing about Darth Vader's storyline in the Star Wars prequels is "
 'that it is overly simplistic and lacks depth and complexity compared to the '
 'original trilogy. The prequels fail to explore the nuances of Anakin '
 "Skywalker's fall to the dark side and his transformation into Darth Vader, "
 'instead portraying him as a one-dimensional villain with little motivation '
 'or backstory. This lack of development makes it difficult for audiences to '
 "empathize with Anakin's journey and invest in his character arc. "
 "Additionally, the prequels' focus on political intrigue and CGI-heavy action "
 "scenes detracts from the emotional resonance of Anakin's story, making it "
 'feel shallow and unfulfilling.')
