In [13]:
import pandas as pd
import os
import sys
import importlib
from dotenv import load_dotenv

In [14]:
# pip install openai

In [15]:
#importing minsearch
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

minsearch = importlib.import_module("med-conv-ai.minsearch")

## Loading data

In [16]:
df = pd.read_csv("../data/qa_data.csv")

In [30]:
df.tail(10)

Unnamed: 0,id,question_type,question,answer
200,224,outlook,What is the outlook for Narcolepsy ?,None of the currently available medications en...
201,158,outlook,What is the outlook for Inclusion Body Myositis ?,IBM is generally resistant to all therapies an...
202,109,outlook,What is the outlook for Dyssynergia Cerebellar...,The progression of the disorder is usually 10 ...
203,266,outlook,What is the outlook for Peripheral Neuropathy ?,"In acute neuropathies, such as Guillain-Barr s..."
204,61,outlook,What is the outlook for Cerebral Palsy ?,Cerebral palsy doesnt always cause profound di...
205,114,outlook,What is the outlook for Encephaloceles ?,The prognosis for individuals with encephaloce...
206,330,outlook,What is the outlook for Todd's Paralysis ?,Todd's paralysis is an indication that an indi...
207,110,outlook,What is the outlook for Dystonias ?,The initial symptoms can be very mild and may ...
208,290,outlook,What is the outlook for Restless Legs Syndrome ?,RLS is generally a life-long condition for whi...
209,138,outlook,What is the outlook for Gestational Trophoblas...,Certain factors affect prognosis (chance of re...


## Ingestion test

In [17]:
load_dotenv()

OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')

In [18]:
docs = df.to_dict(orient='records')

In [19]:
index = minsearch.Index(
    text_fields=[ 
                    'question_type',
                    'question',
                    'answer'],
    keyword_fields=["id"]
) 

In [20]:
index.fit(docs)

<med-conv-ai.minsearch.Index at 0x727fe31ceb10>

## Construction of RAG

In [21]:
from openai import OpenAI

client = OpenAI()

In [22]:
def search(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [23]:
prompt_template = """
You're a medical doctor. Answer the QUESTION based on the CONTEXT from our exercises database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT:
{context}
""".strip()

entry_template = """
question: {question}
answer: {answer}
""".strip()

def build_prompt(query, search_results):
    context = ""
    
    for doc in search_results:
        context = context + entry_template.format(**doc) + "\n\n"

    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [24]:
def llm(prompt, model='gpt-4o-mini'):
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [25]:
def rag(query, model='gpt-4o-mini'):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    #print(prompt)
    answer = llm(prompt, model=model)
    return answer

In [33]:
question = 'What is the prognosis for Cerebral Palsy ?'
answer = rag(question)
print(answer)

The prognosis for cerebral palsy (CP) varies widely among individuals. Many people with CP do not experience profound disabilities, and for most, the disorder does not affect life expectancy. Many children with CP have average to above-average intelligence and can attend the same schools as their peers. Through supportive treatments, medications, and surgery, many individuals can improve their motor skills and ability to communicate. However, the severity of CP can differ greatly; while some children may not need special assistance, others with severe CP may be unable to walk and require extensive, lifelong care.
