# EmergencyZIP AI

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/patient-symptom-dataset/patient_symptom_data.csv


In [2]:
!pip uninstall -qqy jupyterlab  # Remove unused packages from Kaggle's base image that conflict
!pip install -U -q "google-genai==1.7.0"

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m144.7/144.7 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.9/100.9 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
patient_symptom_df = pd.read_csv('/kaggle/input/patient-symptom-dataset/patient_symptom_data.csv')
patient_symptom_df

Unnamed: 0,PatientID,ZipCode,Age,Gender,Symptoms
0,P001,70112,32,Female,Joint pain;Runny nose;Loss of taste;Sore throat
1,P002,15213,87,Female,Cough;Fever;Fatigue;Chest pain;Joint pain
2,P003,46201,21,Non-binary,Swelling;Blurred vision;Rash
3,P004,20001,46,Male,Chest tightness;Sneezing;Fever;Breathing diffi...
4,P005,96813,38,Non-binary,Vomiting;Headache;Runny nose;Chest pain;Breath...
...,...,...,...,...,...
95,P096,92101,71,Non-binary,Low-grade fever;Joint pain;Headache
96,P097,20001,80,Female,Vomiting;Abdominal cramps;Chest tightness;Abdo...
97,P098,85001,77,Female,Loss of smell;Abdominal pain
98,P099,84101,90,Non-binary,Fatigue;Blurred vision


In [4]:
from google import genai
from google.genai import types

from IPython.display import HTML, Markdown, display

In [5]:
from google.api_core import retry


is_retriable = lambda e: (isinstance(e, genai.errors.APIError) and e.code in {429, 503})

genai.models.Models.generate_content = retry.Retry(
    predicate=is_retriable)(genai.models.Models.generate_content)

In [6]:
from kaggle_secrets import UserSecretsClient

GOOGLE_API_KEY = UserSecretsClient().get_secret("GOOGLE_API_KEY")

In [7]:
client = genai.Client(api_key=GOOGLE_API_KEY)

In [8]:
patient_symptom_df.iloc[0].to_string()

'PatientID                                               P001\nZipCode                                                70112\nAge                                                       32\nGender                                                Female\nSymptoms     Joint pain;Runny nose;Loss of taste;Sore throat'

In [9]:
import typing_extensions as typing

class MedicalPatient(typing.TypedDict): 
    zipcode: int
    age: int
    gender: str
    symptoms: list[str]

def get_json_form(patient_symptom_entry): 
    entry_str = patient_symptom_entry.to_string()
    json_response = client.models.generate_content(
        model='gemini-2.0-flash', 
        config=types.GenerateContentConfig(
            temperature=0.1, 
            response_mime_type="application/json", 
            response_schema=MedicalPatient
        ), 
        contents=entry_str
    )
    json_form = json_response.text
    return json_form

print(get_json_form(patient_symptom_df.iloc[0]))

{
  "zipcode": 70112,
  "age": 32,
  "gender": "Female",
  "symptoms": [
    "Joint pain",
    "Runny nose",
    "Loss of taste",
    "Sore throat"
  ]
}


In [10]:
def get_general_model_config(temperature, top_p, max_output_tokens): 
    return types.GenerateContentConfig(
        temperature=temperature, 
        top_p=top_p, 
        max_output_tokens=max_output_tokens
    )

def few_shot_func(model_input, prompt, config): 
    few_shot_response = client.models.generate_content(
        model='gemini-2.0-flash', 
        config=config,
        contents=[prompt, model_input]
    )
    str_response = few_shot_response.text
    return str_response

model_config = get_general_model_config(0.1, 1, 500)

few_shot_prompt = """Parse JSON into a string request and only return output: 

EXAMPLE: 
{
"zipcode": 78664, 
"age": 30, 
"gender": "Female", 
"symptoms": ["Cough", "Fever", "Sore throat"]
}
"30-year old female patient from zipcode 78664 currently experiencing symptoms of cough, fever, and sore throat."

EXAMPLE: 
{
"zipcode": 77055, 
"age": 25, 
"gender": "Non-binary", 
"symptoms": ["Joint pain", "Loss of smell"]
}
"25-year old non-binary patient from zipcode 77055 currently experiencing symptoms of joint pain and loss of smell."

EXAMPLE: 
{
"zipcode": 78681, 
"age": 15, 
"gender" "Male", 
"symptoms": ["Fatigue", "Loss of Vision", "Headache", "Joint pain"]
}
"15-year old male patient from zipcode 78681 currently experiencing symptoms of fatigue, loss of vision, headache, and joint pain."
"""

print(few_shot_func(get_json_form(patient_symptom_df.iloc[0]), few_shot_prompt, model_config))

"32-year old female patient from zipcode 70112 currently experiencing symptoms of joint pain, runny nose, loss of taste, and sore throat."



In [11]:
def enhance_prompt(prompt_config, orig_prompt): 
    generate_prompt = 'Enhance this prompt to be as precise and return most accurate results as possible: ' + orig_prompt + " and return enhanced prompt only"
    prompt_response = client.models.generate_content(
        model='gemini-2.0-flash', 
        config=prompt_config, 
        contents=generate_prompt
    )
    prompt = prompt_response.text
    return prompt

prompt_config = get_general_model_config(0.1, 1, 1000)

generate_prompt_prompt = few_shot_func(get_json_form(patient_symptom_df.iloc[0]), few_shot_prompt, model_config) + "What disease are they most likely to experience now?"

print(enhance_prompt(prompt_config, generate_prompt_prompt))

Here's an enhanced prompt designed to elicit a more accurate and relevant response regarding the potential diagnosis:

**Enhanced Prompt:**

"Given a 32-year-old female patient residing in the 70112 zip code (New Orleans, Louisiana), presenting with the following acute symptoms: joint pain (arthralgia), rhinorrhea (runny nose), ageusia (loss of taste), and pharyngitis (sore throat), what are the most likely differential diagnoses, ranked by probability, considering the following factors:

*   **Current prevalent illnesses in the New Orleans, Louisiana area (including but not limited to respiratory viruses, bacterial infections, and mosquito-borne illnesses).**
*   **Seasonal considerations (e.g., time of year, typical viral circulation patterns).**
*   **The patient's age and sex as risk factors for specific conditions.**
*   **The relative likelihood of each diagnosis given the specific combination of symptoms.**
*   **Include considerations for both common and less common, but potent

In [12]:
def grounding_func(input_prompt): 
    config_with_search = types.GenerateContentConfig(
        tools=[types.Tool(google_search=types.GoogleSearch())],
    )
    response = client.models.generate_content(
        model='gemini-2.0-flash', 
        contents=input_prompt, 
        config=config_with_search,
    )
    rc = response.candidates[0]
    return rc.content.parts[0].text

prompt_config = get_general_model_config(0.1, 1, 1000)

few_shot_result = few_shot_func(get_json_form(patient_symptom_df.iloc[0]), few_shot_prompt, model_config)

generate_prompt_prompt = few_shot_result + "What disease are they most likely to experience now?"

enhanced_prompt = enhance_prompt(prompt_config, generate_prompt_prompt)

Markdown(grounding_func(enhanced_prompt))

Okay, I can help you with a differential diagnosis for this patient. I'll generate some search queries to get the most up-to-date information about prevalent illnesses and outbreaks in the 70112 zip code, as well as common conditions presenting with those symptoms.



In [13]:
"""
config_with_search = types.GenerateContentConfig(
    tools=[types.Tool(google_search=types.GoogleSearch())],
)

response = client.models.generate_content(
    model='gemini-2.0-flash', 
    contents=prompt, 
    config=config_with_search,
)

rc = response.candidates[0]

Markdown(rc.content.parts[0].text)
"""

"\nconfig_with_search = types.GenerateContentConfig(\n    tools=[types.Tool(google_search=types.GoogleSearch())],\n)\n\nresponse = client.models.generate_content(\n    model='gemini-2.0-flash', \n    contents=prompt, \n    config=config_with_search,\n)\n\nrc = response.candidates[0]\n\nMarkdown(rc.content.parts[0].text)\n"