In [1]:
import json
import os
import requests
from langchain.chains import RetrievalQA
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms.base import LLM
from langchain.prompts import PromptTemplate
from langchain.text_splitter import MarkdownTextSplitter
from langchain.vectorstores import Chroma
from typing import Optional, List, Mapping, Any
import transformers

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("lmsys/fastchat-t5-3b-v1.0")
model = AutoModelForSeq2SeqLM.from_pretrained("lmsys/fastchat-t5-3b-v1.0")

In [22]:
input_ids = tokenizer("Translate English to Spanish: The house is wonderful", return_tensors="pt").input_ids
outputs = model.generate(input_ids)
tokenizer.decode(outputs[0], skip_special_tokens=True)

'The house is wonderful'

In [4]:
class ApiLLM(LLM):
    
    model: transformers.models.t5.modeling_t5.T5ForConditionalGeneration
    tokenizer: transformers.models.t5.tokenization_t5_fast.T5TokenizerFast

    @property
    def _llm_type(self) -> str:
        return "custom"
    
    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        """Call the LLM."""
        input_ids = tokenizer(prompt, return_tensors="pt").input_ids
        outputs = model.generate(input_ids)
        return tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # @property
    # def _identifying_params(self) -> Mapping[str, Any]:
    #     """Get the identifying parameters."""
    #     return {"api_url": self.api_url, "api_params": self.api_params}

In [5]:
loader = DirectoryLoader('../data/external', glob="**/*.md", loader_cls=TextLoader)
documents = loader.load()  # FYI with the current dataset, documents[42] is the FAQ

text_splitter = MarkdownTextSplitter(chunk_overlap=0, chunk_size=500)  # Consider setting chunk_size=1000
texts = text_splitter.split_documents(documents)
print(f"{len(documents)} documents were loaded in {len(texts)} chunks")

74 documents were loaded in 10349 chunks


In [6]:
max_len = max(len(text.page_content) for text in texts)
max_len_idx = [i for i, text in enumerate(texts) if len(text.page_content) == max_len][0]
print(f"The longest text chunk is index {max_len_idx}, with lenght {max_len}")

The longest text chunk is index 334, with lenght 500


In [7]:
embeddings = HuggingFaceEmbeddings()

# https://langchain.readthedocs.io/en/latest/modules/indexes/vectorstore_examples/chroma.html#persist-the-database
db_dir = "../data/interim"
docsearch = None
if os.path.isdir(os.path.join(db_dir, "index")):
    # Load the existing vector store
    docsearch = Chroma(persist_directory=db_dir, embedding_function=embeddings)
else:
    # Create a new vector store
    docsearch = Chroma.from_documents(texts[:1000], embeddings, persist_directory=db_dir)
    docsearch.persist()

Using embedded DuckDB with persistence: data will be stored in: ../data/interim


In [8]:
llm = ApiLLM(model=model, tokenizer=tokenizer)

In [9]:
template = """You are a talkative AI model who loves to explain how things work. You are smart and constantly learning.
Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Detailed answer:"""
qa_prompt = PromptTemplate(template=template, input_variables=["question", "context"])
chain_type_kwargs = {"prompt": qa_prompt}

qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever(), chain_type_kwargs=chain_type_kwargs)

In [12]:
queries = ['Where can I see a roadmap or make feature requests for the service?',
           'How is the pricing of Red Hat OpenShift Service on AWS calculated?',
           'Is there an upfront commitment?',
           'How can I delete ROSA cluster?',
           'Can I shut down my VMs temporarily?', # https://docs.openshift.com/rosa/rosa_architecture/rosa_policy_service_definition/rosa-service-definition.html#rosa-sdpolicy-instance-types_rosa-service-definition
           'How can I automatically deploy ROSA cluster?',
           'How can my ROSA cluster autoscale?',
           'How can I install aws load balancer controller',
           'How can I install Prometheus Operator with my ROSA cluster?',
           'What time is it?',
           'How can I federate metrics to a centralized Prometheus Cluster?',
           'What is the meaning of life?']

In [14]:
answers = []
for query in queries:
    answers.append(qa_chain(query))

# Print the answers
for result in answers:
    print("Question:", result["query"])
    # split the answer into chunks, separating context and answer
    print(result["result"])
    answer = result["result"].split("Detailed answer:")[1]
    print("Answer: ", answer)



Question: Where can I see a roadmap or make feature requests for the service?
Answer: Where can I see a roadmap or make feature requests for the service?


IndexError: list index out of range