In [None]:
!pip install -U -q "google-generativeai>=0.7.2"

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import google.generativeai as genai
import vertexai
import pandas as pd
import json

In [None]:
genai.configure(api_key="API_KEY")

In [None]:
PROJECT_ID = "[med-extractor]"
LOCATION = "africa-south1"


vertexai.init(project=PROJECT_ID, location=LOCATION)

In [None]:
system_instructions = """
You are an intelligent and highly capable agent tasked with assisting medical practitioners in standardizing and structuring medical notes into JSON format.

Your role is to process unstructured medical notes provided in a list and convert each entry into a well-structured JSON object.

The JSON output must **exactly match the structure and format** described in the example below.Ensure each entry starts on a new line, with no unnecessary indentation, incorrect line breaks, or formatting deviations

### Instructions:
1. **Input Format**: The medical notes will be provided as a list of text entries. Each entry contains details about a patient's medical visit.
2. **Output Format**: Convert each entry into a structured JSON object. Ensure the output adheres to JSON standards with proper nesting, key-value pairs, and an organized structure.
3. **Key Sections**: Each JSON object should include the following:
    - **patient_info**: Contains the patient's age and gender.
    - **visit_motivation**: Describes the reason for the visit.
    - **symptoms**: A list of symptoms reported by the patient.
    - **vital_signs**: Includes measurements such as blood pressure, heart rate, respiratory rate, and other relevant vitals.
4. **Error Handling**: If any required information is missing, infer the structure but leave the value as `null` or an empty array where appropriate.
5. **Consistency**: Ensure the keys and structure are consistent across all JSON objects.

### Example:
**Input**:
medical_notes = [
  '"patient_info": "age": 45, "gender": "Female", "visit_motivation": "Hypertension (High Blood Pressure)", "symptoms": ["headache", "difficulty_breathing", "chest_pain", "dizziness", "sneezing", "blurred_vision", "wheezing", "pale_skin"], "vital_signs": "blood_pressure": "systolic": "value": 136, "unit": "mmHg", "diastolic": "value": 115, "unit": "mmHg", "heart_rate": "value": 88, "unit": "bpm", "respiratory_rate": "value": 16, "unit": "breaths/min", "cholesterol_level": "value": 187.5, "unit": "mg/dL"',
  '"patient_info": "age": 18, "gender": "Female", "visit_motivation": "Common Cold", "symptoms": ["cough", "fatigue", "runny_nose", "rash", "sneezing", "blurred_vision", "wheezing", "swollen_lymph_nodes", "anxiety"], "vital_signs": "respiratory_rate": "value": 19, "unit": "breaths/min", "oxygen_saturation": "value": 98.0, "unit": "%"'
]

**Output**:
[
  {"patient_info": {"age": 45, "gender": "Female"}, "visit_motivation": "Hypertension (High Blood Pressure)", "symptoms": ["headache", "difficulty_breathing", "chest_pain", "dizziness", "sneezing", "blurred_vision", "wheezing", "pale_skin"], "vital_signs": {"blood_pressure": {"systolic": {"value": 136, "unit": "mmHg"}, "diastolic": {"value": 115, "unit": "mmHg"}}, "heart_rate": {"value": 88, "unit": "bpm"}, "respiratory_rate": {"value": 16, "unit": "breaths/min"}, "cholesterol_level": {"value": 187.5, "unit": "mg/dL"}}},
  {"patient_info": {"age": 18, "gender": "Female"}, "visit_motivation": "Common Cold", "symptoms": ["cough", "fatigue", "runny_nose", "rash", "sneezing", "blurred_vision", "wheezing", "swollen_lymph_nodes", "anxiety"], "vital_signs": {"respiratory_rate": {"value": 19, "unit": "breaths/min"}, "oxygen_saturation": {"value": 98.0, "unit": "%"}}}
]

### Performance Goals:
- **Accuracy**: Ensure the extracted information matches the input notes precisely.
- **Efficiency**: Handle long and complex notes effectively while maintaining performance.
"""


In [None]:
generation_config = {
    "max_output_tokens": 6000,
    "temperature": 0.2,
    "top_k": 85,
    "top_p": 0.95,
}

In [None]:
model_name = "gemini-1.5-pro"

model = genai.GenerativeModel(
    model_name=model_name,
    generation_config= generation_config,
    # safety_settings= self.safety_settings,
    # tools=tools,
    system_instruction=system_instructions

    )


In [None]:
chat = model.start_chat(
      enable_automatic_function_calling=True
      )

In [None]:
# prompt = ['"patient_info": "age": 33, "gender": "Male", "visit_motivation": "Allergies", "symptoms": ["cough", "sore_throat", "nausea", "runny_nose", "rash", "sneezing", "itchy_eyes", "loss_of_taste_smell", "weight_loss"], "vital_signs": "blood_pressure": "systolic": "value": 104, "unit": "mmHg", "diastolic": "value": 71, "unit": "mmHg", "temperature": "value": 37.3, "unit": "u00b0C", "respiratory_rate": "value": 16, "unit": "breaths/min"'
# '"patient_info": "age": 33, "gender": "Male", "visit_motivation": "Allergies", "symptoms": ["cough", "sore_throat", "nausea", "runny_nose", "rash", "sneezing", "itchy_eyes", "loss_of_taste_smell", "weight_loss"], "vital_signs": "blood_pressure": "systolic": "value": 104, "unit": "mmHg", "diastolic": "value": 71, "unit": "mmHg", "temperature": "value": 37.3, "unit": "u00b0C", "respiratory_rate": "value": 16, "unit": "breaths/min"']

In [None]:
data = pd.read_csv("/content/drive/MyDrive/Med_extraction/test_submission1 (3).csv")

In [None]:
data.head(2)

Unnamed: 0,ID,json
0,10823,"""patient_info"": ""age"": 48, ""gender"": ""Male"", ""..."
1,10824,"""patient_info"": ""age"": 53, ""gender"": ""Male"", ""..."


In [None]:
# data.json.to_dict()

In [None]:
prompt=data.json.to_list()

In [None]:
prompt1=prompt[:5]

In [None]:
# prompt1

In [None]:
response = chat.send_message(f"""Prompt: "{prompt1}" """)

In [None]:
# response.text

In [None]:
data['ID'][0]

10823

In [None]:
data_list = json.loads(response.text)

results_df = pd.DataFrame({
    "ID": range(1, len(data_list) + 1),
    "json": [json.dumps(entry) for entry in data_list]
})

In [None]:
results_df.head(5)

Unnamed: 0,ID,json
0,1,"{""patient_info"": {""age"": 48, ""gender"": ""Male""}..."
1,2,"{""patient_info"": {""age"": 53, ""gender"": ""Male""}..."
2,3,"{""patient_info"": {""age"": 71, ""gender"": ""Male""}..."
3,4,"{""patient_info"": {""age"": 73, ""gender"": ""Male""}..."
4,5,"{""patient_info"": {""age"": 48, ""gender"": ""Female..."


In [None]:
len(results_df)

5