In [1]:
# Common imports
import os
from dotenv import load_dotenv
from openai import OpenAI
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings

# Importing Dependencies for RAG
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

#Import the key CrewAI classes
# from crewai import Agent, Task, Crew

import json
import lolviz


In [3]:
# Load .env for API password
load_dotenv('.env')

True

In [5]:
# Check which openAI model we using 
print(f'OPENAI_MODEL_NAME = "{os.getenv('OPENAI_MODEL_NAME')}"')

OPENAI_MODEL_NAME = "gpt-4o-mini"


In [7]:
# cannot access cpf website using this method 

# from crewai_tools import WebsiteSearchTool
    # Create a new instance of the WebsiteSearchTool
    # Set the base URL of a website so that the tool can search for sub-pages on that website
# tool_websearch = WebsiteSearchTool("https://abc-notes.data.tech.gov.sg/")


In [5]:
# Note that this function directly take in "messages" as the parameter.
def get_completion(messages, model="gpt-4o-mini", temperature=0, top_p=1.0, max_tokens=1024, n=1):
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=temperature,
        top_p=top_p,
        max_tokens=max_tokens,
        n=1
    )
    return response.choices[0].message.content

In [7]:
API_KEY = os.getenv("OPENAI_API_KEY")

client = OpenAI(api_key=API_KEY)

In [9]:
# This function is for calculating the tokens given the "message"
# ⚠️ This is simplified implementation that is good enough for a rough estimation

import tiktoken

def count_tokens(text):
    encoding = tiktoken.encoding_for_model('gpt-4o-mini')
    return len(encoding.encode(text))

def count_tokens_from_message_rough(messages):
    encoding = tiktoken.encoding_for_model('gpt-4o-mini')
    value = ' '.join([x.get('content') for x in messages])
    return len(encoding.encode(value))

In [11]:
# embedding model that we will use for the session
embeddings_model = OpenAIEmbeddings(model='text-embedding-3-small')

# llm to be used in RAG pipeplines in this notebook
llm = ChatOpenAI(model='gpt-4o-mini', temperature=0, seed=42)

In [13]:
messages = [
        {"role": "system", "content": "You are a helpful assistant from the Central Provident Fund (CPF) of Singapore, \
        you are well versed in CPF Policy. Information must come from https://www.cpf.gov.sg/ and always base on Singapore context."},
        {"role": "user", "content": "tell me about three basic needs in retirement"}
    ]

In [51]:
#get_completion (messages)

"In retirement, individuals typically focus on three basic needs to ensure a comfortable and secure lifestyle. These needs are:\n\n1. **Income for Daily Living Expenses**: Retirees need a steady income to cover their daily living expenses, such as food, housing, utilities, and transportation. In Singapore, the Central Provident Fund (CPF) provides various schemes to help retirees generate income, such as the CPF LIFE scheme, which offers lifelong monthly payouts to help meet these expenses.\n\n2. **Healthcare Costs**: As individuals age, healthcare needs often increase. It is essential for retirees to plan for medical expenses, including regular check-ups, medications, and potential hospitalizations. In Singapore, the MediSave scheme allows individuals to set aside savings for medical expenses, and the MediShield Life insurance provides coverage for large hospital bills.\n\n3. **Housing Needs**: Having a stable and comfortable living environment is crucial in retirement. Many retirees 

In [29]:
# `MultiQueryRetriever` improve user query by generates multiple queries
retriever_multiquery = MultiQueryRetriever.from_llm(
  retriever=vectordb.as_retriever(), llm=llm,
)

NameError: name 'vectordb' is not defined

In [15]:
# Get the name of the files in the folder

# folder path
dir_path = r'./News Releases'

# list to store files
filename_list = []

# Iterate directory
for path in os.listdir(dir_path):
    # check if current path is a file
    if os.path.isfile(os.path.join(dir_path, path)):
        filename_list.append(path)
print(filename_list)

['cpf.gov.sg-Extends 4 percent interest rate floor until 31 Dec 25.pdf', 'cpf.gov.sg-Guide to your healthcare insurance coverage.pdf', 'cpf.gov.sg-Lowering of maximum Daily Withdrawal Limit to 50000 for online CPF withdrawals.pdf', 'cpf.gov.sg-New default daily limit of 2000 for online CPF withdrawals.pdf']


In [None]:
# Function to load an entire PDF document as a single document
def load_entire_pdf_as_document(path):
    loader = PyPDFLoader(path)
    pages = loader.load()
    full_text = "\n".join(page.page_content for page in pages)
    return full_text

# Load the documents
list_of_documents_loaded = []
for filename in filename_list:
    if filename.endswith('.pdf'):  # Check file type
        try:
            # Construct the path to the PDF file
            pdf_path = os.path.join('News Releases', filename)
            # Load the entire PDF as a single document
            full_text = load_entire_pdf_as_document(pdf_path)
            list_of_documents_loaded.append(full_text)
            print(f"Loaded {filename}")

        except Exception as e:
            # If there is an error loading the document, print the error and continue to the next document
            print(f"Error loading {filename}: {e}")
            continue

In [None]:
from langchain.document_loaders import PyPDFLoader

# load the documents
list_of_documents_loaded = []
for filename in filename_list:
  if filename.endswith('.pdf'): # Check file type
    try:
        # try to load the document
        markdown_path = os.path.join('News Releases', filename)
        loader = PyPDFLoader(markdown_path)

        # load() returns a list of Document objects
        data = loader.load()
        # use extend() to add to the list_of_documents_loaded
        list_of_documents_loaded.extend(data)
        print(f"Loaded {filename}")

    except Exception as e:
        # if there is an error loading the document, print the error and continue to the next document
        print(f"Error loading {filename}: {e}")
        continue

In [35]:
list_of_documents_loaded[0]

"1 / 2Government extends 4% interest rate floor on Special,\nMediSave and Retirement Account monies until 31\nDecember 2025\ncpf.gov .sg/member/infohub/news/news-releases/government-extends-4-per-cent-interest-rate-floor-on-special-\nmedisave-and-retirement-account-monies-until-31-december-2025\nOne-year extension of minimum 4% interest rate floor on Special, MediSave and\nRetirement Account monies from 1 January to 31 December 2025\nCPF Interest Rates from 1 October to 31 December 2024\nOrdinary Account Special, MediSave and Retirement Accounts\nInterest Rate 2.5% 4.14%\nCPF members aged below 55\nMembers earn an extra 1% interest  on the first $60,000 of their combined CPF\nbalances\nCPF members aged 55 and above\nMembers earn an extra 2% interest  on the first $30,000 of their combined CPF\nbalances, and an extra 1%  on the next $30,000\nHDB Concessionary Interest Rate from 1 October to 31 December 2024\nRemains unchanged at 2.6%\nNote: All interest rates are quoted on a per annum b

In [23]:
list_of_documents_loaded[0].metadata

{'source': 'News Releases\\cpf.gov.sg-Extends 4 percent interest rate floor until 31 Dec 25.pdf',
 'page': 0}

In [33]:
len(list_of_documents_loaded)

4

In [29]:
list_of_documents_loaded[1].metadata

{'source': 'News Releases\\cpf.gov.sg-Extends 4 percent interest rate floor until 31 Dec 25.pdf',
 'page': 1}

In [None]:
# Initialize session state if not already done
if 'LLM_reply' not in st.session_state:
    st.session_state['LLM_reply'] = ''

if 'user_prompt1' not in st.session_state:
    st.session_state['user_prompt1'] = ''

# Form submission


if form.form_submit_button("Submit"):
    # Prepare the messages for the LLM
      
    system_message = f""" You are a helpful assistant from the Central Provident Fund (CPF) of Singapore,\
    you are well versed in CPF Policy. 

    Understand the customer service query and decide if the query is related CPF policy.
    Query is related to CPF policy, procced to reply using information from https://www.cpf.gov.sg/ and it must be based on Singapore context.
    Query NOT related to CPF policy, reply: I'm unable to assist as the enquiry is not related to CPF policy. 
    """
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": st.session_state['user_prompt1']},
        {"role": "assistant", "content": st.session_state['LLM_reply']},
        {"role": "user", "content": user_prompt2}
    ]
    
    # Call the helper function to get the completion
    response = llm.get_completion(messages)  # Ensure llm is defined and imported correctly
    
    # Update session state with the new user input and LLM response
    st.session_state['user_prompt1'] = user_prompt2
    st.session_state['LLM_reply'] = response

    # This calls the helper function that we have created 
    response = llm.get_completion(messages) 
    # This displays the response generated by the LLM onto the frontend 
    st.write(response) 
    print(f"User Input is {user_prompt2}")



In [None]:
from dotenv import load_dotenv
from helper_functions import llm