In [None]:
import os
import streamlit as st
from dotenv import load_dotenv
from openai import OpenAI
import tiktoken

if load_dotenv('.env'):
# for local development
    OPENAI_KEY=os.getenv('OPENAI_API_KEY')
else:
    OPENAI_KEY=st.secrets['OPENAI_API_KEY']

# Pass the API Key to the OpenAI Client
client = OpenAI(api_key=OPENAI_KEY)

# Helper function for getting embedding
def get_embedding(input, model='text-embedding-3-small'):
    response = client.embeddings.create(
        input=input,
        model=model
    )
    return [x.embedding for x in response.data]

# Note that this function directly take in "messages" as the parameter.
def get_completion(messages, model="gpt-4o-mini", temperature=0, top_p=1.0, max_tokens=1024, n=1):
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=temperature,
        top_p=top_p,
        max_tokens=max_tokens,
        n=1
    )
    return response.choices[0].message.content





In [None]:
import os
import streamlit as st
from dotenv import load_dotenv
from openai import OpenAI
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain.document_loaders import PyPDFLoader
from langchain_core.documents import Document
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from helper_functions.utility import check_password

# Load environment variables
load_dotenv()

# region <--------- Streamlit App Configuration ---------> 
st.set_page_config(
    layout="centered",
    page_title="CPF Policy Assistant"
)
# Check if the password is correct.
if not check_password():
    st.stop()

# endregion <--------- Streamlit App Configuration --------->

st.title("ðŸ“š My CPF Policy Assistant")

### Provide the Bot with CPF news from Oct 23 since GPT-4o mini has knowledge cutoff of October 2023
# Get the name of the files in the folder
dir_path = r'./News Releases'
filename_list = []

# Iterate directory
for path in os.listdir(dir_path):
    # check if current path is a file
    if os.path.isfile(os.path.join(dir_path, path)):
        filename_list.append(path)

# Load PDF documents
list_of_documents_loaded = []

for filename in filename_list:
    if filename.endswith('.pdf'): 
        try:
            # Load the document
            pdf_path = os.path.join(dir_path, filename)
            loader = PyPDFLoader(pdf_path)
            data = loader.load()
            
            # Merge data into a single string if necessary
            if isinstance(data, list):
                full_text = ' '.join([page.page_content for page in data])
            else:
                full_text = data
            
            list_of_documents_loaded.append(Document(page_content=full_text))
        except Exception as e:
            print(f"Error loading {filename}: {e}")
            continue

# Create embeddings and vector database
embeddings_model = OpenAIEmbeddings(model='text-embedding-3-small')
vectordb = Chroma.from_documents(
    documents=list_of_documents_loaded,
    embedding=embeddings_model,
    collection_name="naive_splitter", 
    persist_directory="./vector_db"
)

# Create the RAG chain
llm_model = ChatOpenAI(model='gpt-4o-mini', temperature=0, seed=42)
rag_chain = RetrievalQA.from_llm(
    retriever=vectordb.as_retriever(), llm=llm_model
)

form = st.form(key="form")
form.subheader("CPF Policy Explainer")
user_prompt2 = form.text_area("Enter your query regarding CPF Policy here", height=200)

# Initialize session state if not already done
if 'LLM_reply' not in st.session_state:
    st.session_state['LLM_reply'] = ''

if 'user_prompt1' not in st.session_state:
    st.session_state['user_prompt1'] = ''

# Form submission
if form.form_submit_button("Submit"):
    # Prepare the system message
    system_message = """You are a helpful assistant from the Central Provident Fund (CPF) of Singapore,
    you are well-versed in CPF Policy. 

    Understand the customer service query and decide if the query is related to CPF policy.
    If the query is related to CPF policy, proceed to reply using information from https://www.cpf.gov.sg/ and ensure it is based on the Singapore context.
    If the query is NOT related to CPF policy, reply: I'm unable to assist as the enquiry is not related to CPF policy.
    """
    
    # Prepare the conversation history
    conversation_history = ""
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": st.session_state['user_prompt1']},
        {"role": "assistant", "content": st.session_state['LLM_reply']},
        {"role": "user", "content": user_prompt2}
    ]

    # Concatenate the conversation history into a single string
    for message in messages:
        role = message['role']
        content = message['content']
        conversation_history += f"{role.capitalize()}: {content}\n"

    # Prepare the full input (conversation history + latest user query)
    full_input = f"Conversation history:\n{conversation_history}\nUser query: {user_prompt2}"

    # Get the response from the RAG chain
    response = rag_chain.run(full_input)

    # Update session state with the new user input and LLM response
    st.session_state['user_prompt1'] = user_prompt2
    st.session_state['LLM_reply'] = response

    # This displays the response generated by the LLM onto the frontend
    st.write(response)
    print(f"User Input is {user_prompt2}")
