In [None]:
from langchain.vectorstores import Chroma
from langchain.document_loaders import DirectoryLoader
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.chains.question_answering import load_qa_chain
import openai
import os

In [None]:
os.environ["OPENAI_API_KEY"]= "Your OpenAI API Key"

In [None]:
directory = 'contents'
def load_docs(directory):
    loader = DirectoryLoader(directory)
    documents = loader.load()
    return documents

documents = load_docs(directory)
print(documents)

In [None]:
def split_docs(documents, chunk_size=1000, chunk_overlap=50):
    text_split = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    docs = text_split.split_documents(documents)
    return docs

docs = split_docs(documents)
print(len(docs))

In [None]:
embeddings = OpenAIEmbeddings()
db = Chroma.from_documents(docs, embeddings)

In [None]:
query = "What is the Login Procedure for Requests Requiring Approval?"
matching_docs = db.similarity_search(query)
matching_docs[0]

In [None]:
matching_docs2 = db.similarity_search_with_score(query,k=3)
matching_docs2

In [None]:
persist_directory = "chroma_db"

vectordb = Chroma.from_documents(documents=docs, embedding=embeddings, persist_directory=persist_directory)
vectordb.persist()

In [None]:
new_db = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
matching_final = new_db.similarity_search_with_score(query,k=3)
matching_final

In [None]:
model_name = "gpt-3.5-turbo"
llm = ChatOpenAI(model_name=model_name)

In [None]:
chain = load_qa_chain(llm, chain_type="stuff")

In [None]:
query_final = "What is Ticket System?"
matching_docs_final = new_db.similarity_search(query_final)
# Ensure 'question' key is included in the input dictionary
input_data = {'question': query_final, 'input_documents': matching_docs_final}
answer = chain.invoke(input=input_data)
answer

In [None]:
print(answer['output_text'])