# Use Giskard create test cases

In [5]:
import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
MODEL = "gpt-3.5-turbo"

In [6]:
from langchain_community.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)

loader = DirectoryLoader('../../Consultations20240916', glob="**/*.txt")
documents = loader.load_and_split(text_splitter)
print(documents)

[Document(metadata={'source': '../../Consultations20240916/Lawrence_320361.txt'}, page_content="Doctor: Lawrence On\n\nPatient: Lauren Demers, F, 34, Allergies:\n\nPast medications:\n\nCondition description: full body rash;\n\nPhone transcript: Dr: Hi Lauren, my name is Lauren, I'm one of the doctors. How are you?\n\nPt: Yeah, where are you?\n\nDr: Okay, do you want to send me a photo of your rash?\n\nPt: Yep, how do I do that?\n\nDr: If you click on the link, it will take you to our website.\n\nPt: Sorry, did I lose you?\n\nDr: Yeah, if you click on the link, it will take you to our webpage where you can send it.\n\nPt: Oh, I see it, okay.\n\nDr: Have you been sick recently?\n\nPt: Nope.\n\nDr: New medications?\n\nPt: Nope.\n\nDr: Okay, yeah it just looks like a urticaria is what we call it. Have you taken any antihistamines?\n\nPt: Yeah.\n\nDr: Okay. Is it quite itchy?\n\nPt: It's so itchy.\n\nDr: Okay, we'll probably start you on some steroids.\n\nPt: Okay.\n\nDr: So it's just two t

In [8]:
import pandas as pd

df = pd.DataFrame([d.page_content for d in documents], columns=["text"])
df.head(10)

Unnamed: 0,text
0,"Doctor: Lawrence On\n\nPatient: Lauren Demers,..."
1,Dr: So we recommend during the morning take a ...
2,"Pt: Thank you. Yeah, I've tried and nothing he..."
3,Dr: Okay.\n\nPt: But the fact that it just got...
4,"Doctor: Ayad Jbreeta\n\nPatient: Oliver Brun, ..."
5,Dr: Any swelling in the face?\n\nPt: They were...
6,"Pt: Yes.\n\nDr: Okay, so we'll say he is fit t..."
7,Doctor: Sharib Yusuf\n\nPatient: Jayden Hayter...
8,Pt: Thank you.\n\nDr: Thank you very much.\n\n...
9,"Doctor: Ayad Jbreeta\n\nPatient: Lynn Godfrey,..."


In [9]:
from giskard.rag import KnowledgeBase

knowledge_base = KnowledgeBase(df)

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
from giskard.rag import generate_testset

testset = generate_testset(
    knowledge_base,
    num_questions=60,
    agent_description="A chatbot answering questions about the Proper Treatment for patients",
)

2024-09-16 20:04:16,775 pid:2455721 MainThread giskard.rag  INFO     Finding topics in the knowledge base.
2024-09-16 20:04:30,783 pid:2455721 MainThread giskard.rag  INFO     Found 5 topics in the knowledge base.


Generating questions: 100%|██████████| 60/60 [06:04<00:00,  6.07s/it]


In [11]:
test_set_df = testset.to_pandas()

for index, row in enumerate(test_set_df.head(3).iterrows()):
    print(f"Question {index + 1}: {row[1]['question']}")
    print(f"Reference answer: {row[1]['reference_answer']}")
    print("Reference context:")
    print(row[1]['reference_context'])
    print("******************", end="\n\n")


Question 1: What is the most likely diagnosis for a 49 year old female with symptoms of cough, myalgia, and incontinence?
Reference answer: The most likely diagnosis is Influenza (SNOMED: 6142004)
Reference context:
Document 65: Dr: Take care.

Pt: Thank you very much. Bye.

Dr: Bye.

Most likely diagnosis: Influenza (SNOMED: 6142004)

Differential diagnoses: Acute bronchitis (SNOMED: 10509002), LRTI (SNOMED: 50417007), Pneumonia (SNOMED: 233604007)

Summary: 39M w/ cough, sore chest, muscle pain

Document 8: Pt: Thank you.

Dr: Thank you very much.

Pt: To my phone? Okay.

Dr: Thank you.

Pt: Okay, all right, thank you, bye.

Dr: Bye.

Pt: Bye.

Most likely diagnosis: Pneumonia (SNOMED: 233604007)

Differential diagnoses: LRTI (SNOMED: 50417007), Influenza (SNOMED: 6142004), Acute bronchitis (SNOMED: 10509002)

Summary: 16M w/ fever, cough, stomach ache

Document 15: Pt: Thank you.

Dr: It's not coming up on his thing, so we'll see what comes up with. Please don't spit. No! No, no, I'

In [12]:
testset.save("test-set.jsonl")