In [1]:
import pandas as pd
import minsearch
from openai import OpenAI
import os

In [2]:
df = pd.read_parquet('_sample_clean.parquet.brotli')

# Step 4: Optional cleanup to remove "to ask the" from the responder column
df['responder'] = df['responder'].str.replace('to ask the ', '', regex=False)


In [3]:
df.head()

Unnamed: 0,date,mp,question,answer,id,responder
0,2024-10-11,"Bodlani, Ms T",Whether his department has put any plans in pl...,Find replyhere,NW801,Minister of Communications and Digital Technol...
1,2024-10-11,"Moela, Mr MS",How will the SA Police Service develop mechani...,The workload of the Detectives is created by a...,NW223,Minister of Police
2,2024-10-11,"Ndlozi, Dr MQ","What total (a) amount in State incentives, suc...",The automobile industry benefits from the Auto...,NW990,Minister of Finance
3,2024-10-11,"Mrara, Mr M",(a) What is the long-term strategy of his depa...,I have been advised by the Department as follo...,NW771,Minister of Communications and Digital Technol...
4,2024-10-11,"Mdluli, Mr MM",Since the start of the migration from analogue...,I have been advised by the Department as follo...,NW909,Minister of Communications and Digital Technol...


In [4]:
df.index

RangeIndex(start=0, stop=300, step=1)

In [5]:
df.question[7]

'With reference to the importance of expanding digital infrastructure and prioritising it in development plans to deal with hurdles such as high mobile data costs and limited access to high-speed internet, what plans has his department developed to improve the much-needed digital infrastructure?'

In [6]:
df.columns

Index(['date', 'mp', 'question', 'answer', 'id', 'responder'], dtype='object')

In [7]:
docs = df.to_dict(orient='records')

In [8]:
docs

[{'date': Timestamp('2024-10-11 00:00:00'),
  'mp': 'Bodlani, Ms T',
  'question': 'Whether his department has put any plans in place to (a) rationalise the 11 entities reporting to him that have overlapping mandates and (b) pursue public-private partnerships to strengthen the performance of the entities, to lower the pressure on public finances; if not, in each case, what is the position in this regard; if so, what are the relevant details in each case?',
  'answer': 'Find replyhere',
  'id': 'NW801',
  'responder': 'Minister of Communications and Digital Technologies'},
 {'date': Timestamp('2024-10-11 00:00:00'),
  'mp': 'Moela, Mr MS',
  'question': 'How will the SA Police Service develop mechanisms that will reduce the workload of detectives to ensure a speedy resolution of criminal cases as investigations and finalisation of criminal cases depend on the efficiency and availability of police detectives?',
  'answer': 'The workload of the Detectives is created by attrition in the de

In [9]:
index = minsearch.Index(
    text_fields=['id', 'mp', 'question', 'answer', 'responder'],
    keyword_fields=[]
)

In [10]:
index.fit(docs)

<minsearch.Index at 0x173e85100>

In [11]:
def search(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={}, # might filter per ministry
        boost_dict=boost,
        num_results=5
    )

    return results


In [12]:
client = OpenAI()


In [13]:
docs[0]

{'date': Timestamp('2024-10-11 00:00:00'),
 'mp': 'Bodlani, Ms T',
 'question': 'Whether his department has put any plans in place to (a) rationalise the 11 entities reporting to him that have overlapping mandates and (b) pursue public-private partnerships to strengthen the performance of the entities, to lower the pressure on public finances; if not, in each case, what is the position in this regard; if so, what are the relevant details in each case?',
 'answer': 'Find replyhere',
 'id': 'NW801',
 'responder': 'Minister of Communications and Digital Technologies'}

In [14]:
query = "what steps has she taken to ensure that a culture of human rights and racial tolerance is embraced in schools?"

prompt_template = """
You're a politcal analyst. Answer the QUESTION based on the CONTEXT from the PMG database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

entry_template = """
date: {date}
mp: {mp}
question: {question}
answer: {answer}
responder: {responder}
""".strip()


def build_prompt(query, search_results):
    context = ""
    
    for doc in search_results:
        context = context + entry_template.format(**doc) + "\n\n"
        f"responder: {doc['responder']}\nquestion: {doc['question']}\nanswer: {doc['answer']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

def llm(prompt):
    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [15]:
search_results = search(query=query)
prompt = build_prompt(query, search_results)

In [16]:
prompt

"You're a politcal analyst. Answer the QUESTION based on the CONTEXT from the PMG database.\nUse only the facts from the CONTEXT when answering the QUESTION.\n\nQUESTION: what steps has she taken to ensure that a culture of human rights and racial tolerance is embraced in schools?\n\nCONTEXT: \ndate: 2024-10-09 00:00:00\nmp: Mathafa, Mr OM\nquestion: Considering that incidents of racism amongst young and old have been spiking in schools throughout the Republic, what steps has she taken to ensure that a culture of human rights and racial tolerance is embraced in schools?\nanswer: \nresponder: Minister of Justice and Constitutional Development\n\ndate: 2024-10-03 00:00:00\nmp: Mohlala, Ms MR\nquestion: (a) What specific actions has she taken to address the (i) severe delays and (ii) mismanagement issues plaguing the Snake Park Water, Sanitation and Roads Project in Ward 16 of Kimberley which is managed by her department, including the completion of the remaining infrastructure and (b) ho

In [17]:
rag(query)

'The Minister of Justice and Constitutional Development has taken several steps to ensure that a culture of human rights and racial tolerance is embraced in schools. These steps include initiatives aimed at addressing the rising incidents of racism within the educational environment, although specific actions were not detailed in the provided context. The focus appears to be on fostering an inclusive atmosphere that promotes human rights awareness among students and staff in schools across the Republic.'