In [1]:
import os
from langchain_community.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
import config

text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000,
                                               chunk_overlap=200)


javelin = []
loader = PyMuPDFLoader("/teamspace/studios/this_studio/para_chat/manuals/javelin_manual.pdf")
javelin.extend(loader.load())

javelin_docs = text_splitter.split_documents(javelin)

vector_db_javelin = Chroma.from_documents(javelin_docs, embedding=OpenAIEmbeddings(model="text-embedding-3-large"), persist_directory="./javelin_db/")
vector_db_javelin.persist()

#######

vector3 = []
loader = PyMuPDFLoader("/teamspace/studios/this_studio/para_chat/manuals/vector3_manual.pdf")
vector3.extend(loader.load())


vector3_docs = text_splitter.split_documents(vector3)

vector_db_vector3 = Chroma.from_documents(vector3_docs, embedding=OpenAIEmbeddings(model="text-embedding-3-large"), persist_directory="./vector3_db/")
vector_db_vector3.persist()

#######

firebird = []
loader = PyMuPDFLoader("/teamspace/studios/this_studio/para_chat/manuals/firebird_2024_manual.pdf")
firebird.extend(loader.load())


firebird_docs = text_splitter.split_documents(firebird)

vector_db_firebird = Chroma.from_documents(firebird_docs, embedding=OpenAIEmbeddings(model="text-embedding-3-large"), persist_directory="./firebird_db/")
vector_db_firebird.persist()

#######

icon = []
loader = PyMuPDFLoader("/teamspace/studios/this_studio/para_chat/manuals/icon_manual.pdf")
icon.extend(loader.load())


icon_docs = text_splitter.split_documents(icon)

vector_db_icon = Chroma.from_documents(icon_docs, embedding=OpenAIEmbeddings(model="text-embedding-3-large"), persist_directory="./icon_db/")
vector_db_icon.persist()

#######

mirage_G4 = []
loader = PyMuPDFLoader("/teamspace/studios/this_studio/para_chat/manuals/mirage_G4_manual.pdf")
mirage_G4.extend(loader.load())


mirage_G4_docs = text_splitter.split_documents(mirage_G4)

vector_db_mirage_G4 = Chroma.from_documents(mirage_G4_docs, embedding=OpenAIEmbeddings(model="text-embedding-3-large"), persist_directory="./mirage_G4_db/")
vector_db_mirage_G4.persist()

#######

wings = []
loader = PyMuPDFLoader("/teamspace/studios/this_studio/para_chat/manuals/Wings_manual.pdf")
wings.extend(loader.load())


wings_docs = text_splitter.split_documents(wings)

vector_db_wings = Chroma.from_documents(wings_docs, embedding=OpenAIEmbeddings(model="text-embedding-3-large"), persist_directory="./wings_db/")
vector_db_wings.persist()

In [1]:
%%writefile /teamspace/studios/this_studio/para_chat/code/ParaChat_app.py

# installing dependencies

from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate
from langchain.chains import ConversationalRetrievalChain
from langchain_openai import ChatOpenAI
import streamlit as st
import config


# setting the llm model:

llm = ChatOpenAI(
                temperature=0.2,
                model_name="gpt-4o")




#@st.cache_resource
def load_vector_database(brand):
    if brand == "SunPath Javelin":
        return Chroma(
                        persist_directory="/teamspace/studios/this_studio/javelin_db", 
                        embedding_function=OpenAIEmbeddings(
                        model = "text-embedding-3-large")
                        )
    elif brand == "UPT Vector 3":
        return Chroma(
                        persist_directory="/teamspace/studios/this_studio/vector3_db", 
                        embedding_function=OpenAIEmbeddings(
                        model = "text-embedding-3-large")
                        )
    elif brand == "Firebird EVO":
        return Chroma(
                        persist_directory="/teamspace/studios/this_studio/firebird_db", 
                        embedding_function=OpenAIEmbeddings(
                        model = "text-embedding-3-large")
                        )
    elif brand == "Aerodyne Icon":
        return Chroma(
                        persist_directory="/teamspace/studios/this_studio/icon_db", 
                        embedding_function=OpenAIEmbeddings(
                        model = "text-embedding-3-large")
                        )
    elif brand == "Mirage G4":
        return Chroma(
                        persist_directory="/teamspace/studios/this_studio/mirage_db", 
                        embedding_function=OpenAIEmbeddings(
                        model = "text-embedding-3-large")
                        )
    elif brand == "Sunrise Manufacturing Wings":
        return Chroma(
                        persist_directory="/teamspace/studios/this_studio/wings_db", 
                        embedding_function=OpenAIEmbeddings(
                        model = "text-embedding-3-large")
                        )
                        



#@st.cache_resource
def create_retriever(vector_db):
    return vector_db.as_retriever(search_kwargs={"k": 3})

# Load the vector database

with st.sidebar:
    st.title("Select your container model")
    brand = st.selectbox("", ["SunPath Javelin", "UPT Vector 3", "Firebird EVO", "Aerodyne Icon", "Mirage G4", "Sunrise Manufacturing Wings"])

vector_db = load_vector_database(brand)

# Create the retriever
retriever = create_retriever(vector_db)


# Memory

@st.cache_resource
def init_memory(_llm):
    return ConversationBufferMemory(
        llm=llm,
        output_key='answer',
        memory_key='chat_history',
        return_messages=True)
memory = init_memory(llm)


# prompt

template = """
<s> [INST]
You are polite and professional question-answering AI assistant specialized in answering technical questions about skydiving equipment.
It is very important that you answer the question specific to each manufacturer and container model.
If you are not sure which manufacturer the user is asking about, please ask for it again.
The user needs to input the manufacturer and the name of the skydive Container system, if that doesn't happen, you should ask for it.

In your response, PLEASE ALWAYS:
  (0) Be a detail-oriented reader: read the question and context and understand both before answering
  (1) Start your answer with a friendly tone, and reiterate the question so the user is sure you understood it
  (2) If the context enables you to answer the question, write a detailed, helpful, and easily understandable answer. If you can't find the answer, respond with an explanation, starting with: "I couldn't find the answer in the information I have access to".
  (3) Ensure your answer answers the question, is helpful and professional, it only needs to be understandeable by professionals.
[/INST]
[INST]
Answer the following question using the context provided.
The question is surrounded by the tags <q> </q>.
The context is surrounded by the tags <c> </c>.
<q>
{question}
</q>
<c>
{context}
</c>
[/INST]
</s>
[INST]
Helpful Answer:
[INST]
"""

prompt = PromptTemplate(template=template,
                        input_variables=["context", "question"])


#chain everything:

chain = ConversationalRetrievalChain.from_llm(
                                              llm=llm,
                                              retriever=retriever,
                                              memory=memory,
                                              return_source_documents=True,
                                              combine_docs_chain_kwargs={"prompt": prompt}
                                              )



##### streamlit ####

st.title("Ask me about nylon")


# Initialise chat history
# Chat history saves the previous messages to be displayed
if "messages" not in st.session_state:
    st.session_state.messages = []

# Display chat messages from history on app rerun
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

# React to user input
if prompt := st.chat_input("write away.."):

    # Display user message in chat message container
    st.chat_message("user").markdown(prompt)

    # Add user message to chat history
    st.session_state.messages.append({"role": "user", "content": prompt})

    # Begin spinner before answering question so it's there for the duration
    with st.spinner("Going to the riggingloft for answers..."):

        # send question to chain to get answer
        answer = chain(prompt)

        # extract answer from dictionary returned by chain
        response = answer["answer"]
        source_documents = answer["source_documents"]


        # Display chatbot response in chat message container
        with st.chat_message("assistant"):
            st.markdown(answer["answer"])

        # Display source documents
        with st.expander("Source Documents"):
            for doc in source_documents:
                st.markdown(f"**Document: {doc.metadata['source']}**")
                st.markdown(doc.page_content)

        # Add assistant response to chat history
        st.session_state.messages.append({"role": "assistant", "content": response})

Overwriting /teamspace/studios/this_studio/para_chat/code/ParaChat_app.py
