In [1]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFDirectoryLoader
from dotenv import load_dotenv
import os
from langchain.document_loaders import UnstructuredExcelLoader



In [3]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.chains.llm import LLMChain
from langchain import PromptTemplate
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from dotenv import load_dotenv
import os


In [4]:

load_dotenv()

# load the API key from the .env file
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

# user_input = input("User: ")
# load the documents from the docs directory
loader = PyPDFDirectoryLoader("docs/")
docs = loader.load()

#split the documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(docs)

# create the vector store directory
persist_directory = 'db'
## here we are using OpenAI embeddings but in future we will swap out to local embeddings
embedding = OpenAIEmbeddings()
# create the vector store
vectordb = Chroma.from_documents(documents=texts, 
                                 embedding=embedding,
                                 persist_directory=persist_directory)

# persiste the db to disk
vectordb.persist()
vectordb = None

In [5]:

# load vector db from disk
persist_directory = 'db'
## here we are using OpenAI embeddings but in future we will swap out to local embeddings
embedding = OpenAIEmbeddings()

vectordb = Chroma(persist_directory=persist_directory, 
                  embedding_function=embedding)


In [7]:
# custom prompts
CONDENSE_PROMPT = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""

QA_PROMPT = """You will act as an AI Assistant that I am having a conversation with. You have access to a clients training records and will be asked questions relating to their performance. You will ask follow-up questions to clarify the last response and provide more accurate and personalized answers. If the answer is not included in your knowledge base, you will say 'Hmm, I am not sure.' and stop after that. Your goal is to provide the best possible guidance and support to help me with my queries and problems. You think things through step by step every time and show your working. You break down problems into simple steps before solving and always double check your answers. 

{summaries}

Question: {question}
Helpful answer in markdown or latex where equations are included:"""



# Create PromptTemplate instances
CONDENSE_QUESTION_PROMPT = PromptTemplate(
    input_variables=["chat_history", "question"], 
    template=CONDENSE_PROMPT
)

QA_PROMPT_TEMPLATE = PromptTemplate(
    input_variables=["summaries", "question"], 
    template=QA_PROMPT
)

# initialise llm instances and chains
gpt3 = ChatOpenAI(temperature=0, model_name = 'gpt-3.5-turbo')
gpt4 = ChatOpenAI(temperature=0, model_name = 'gpt-4')
question_generator = LLMChain(llm=gpt3, prompt=CONDENSE_QUESTION_PROMPT)
doc_chain = load_qa_with_sources_chain(gpt4, chain_type="stuff", prompt=QA_PROMPT_TEMPLATE)

# declare main retrieval chain
chain = ConversationalRetrievalChain(
    retriever=vectordb.as_retriever(),
    question_generator=question_generator,
    combine_docs_chain=doc_chain,
    return_source_documents = True
)

def chat_bot_response(message, chat_history):
    # chat_history.append(message)
    result = chain({"question": message, "chat_history": chat_history})
    return result


In [8]:
chat_history = []
message = "What is the best bench press result?"
res = chat_bot_response(message, chat_history)

In [12]:
res['answer']

'The client has reported discomfort in the right shoulder, specifically in the pec minor region. This discomfort seems to restrict certain movements. The client also mentioned that they are unable to perform the decline DB press without damaging the DBs, possibly due to difficulty in getting up. Additionally, they have requested to remove pull-ups from their routine, which could suggest difficulty or discomfort in performing this exercise.'

In [10]:
chat_history = []
message = "What injuries or difficulties training does the client have?"
res = chat_bot_response(message, chat_history)

In [11]:
res

{'question': 'What injuries or difficulties training does the client have?',
 'chat_history': [],
 'answer': 'The client has reported discomfort in the right shoulder, specifically in the pec minor region. This discomfort seems to restrict certain movements. The client also mentioned that they are unable to perform the decline DB press without damaging the DBs, possibly due to difficulty in getting up. Additionally, they have requested to remove pull-ups from their routine, which could suggest difficulty or discomfort in performing this exercise.',
 'source_documents': [Document(page_content="000000\nCant do decline DB press cause cant get up without hurting the DB's\nTake pull ups out\nDiscomfort in right shoulder (pec minor region) restriction \nGive floor press option", metadata={'source': 'docs\\test.pdf', 'page': 60}),
  Document(page_content='000000\nWants arms to grow - inparticular biceps', metadata={'source': 'docs\\test.pdf', 'page': 14}),
  Document(page_content='Week 6 Cros