In [None]:
############################################################################
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
############################################################################
#  DISCLAIMER
# The code shared by CEs with customers must be limited to non-production
# demo/example code or infrastructure automation scripts.
############################################################################
## Colab: @scavaletto, blackbelt team

# Vertex AI SDK for Python: ejemplos aplicados, clasificador **modelo text-bison**
<table align="left">
  <td>
    <a href="https://colab.sandbox.google.com/drive/1rlfzFxaWxUr3tjRSt2n-7IsTr-nFzEZl?resourcekey=0-3_sbCFpJ7OChy_uIm4X5Rg#scrollTo=9y2D2Fp99-7b">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo"> Run in Colab
    </a>
  </td>
  <td>
    <a href=" ">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      View on GitHub
    </a>
  </td>
  <td>
<a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/official/automl/sdk_automl_forecasting_hierarchical_batch.ipynb" target='_blank'>
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo">
      Open in Vertex AI Workbench
    </a>
  </td>
</table>

## Objetivo

Crear un clasificador en base a ejemplos, estos ejemplos están en un dataset público existente que contiene datos de diagnóstico médico con su respectiva clasificación.

## Instalación de paquetes básicos

In [None]:
from google.colab import auth as google_auth
google_auth.authenticate_user()

In [None]:
!pip install google-cloud-aiplatform

# Install HuggingFace Datasets and Evaluation
! pip install datasets
! pip install evaluate

! ^^^^ No olvidar hacer click en botón "Restart runtime" que se encuentra arriba.

## Autenticacíon

Cada usuario debe introducir los parámetros específicos de su proyecto/consola de GCP

In [None]:
# Introduzca los parámetros de su proyecto
PROJECT_ID = "cloud-llm-preview1"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}

In [None]:
# Librerias especiales para utilizar el dataset público
from typing import Union
from ast import literal_eval
import textwrap

from datasets import load_dataset
import evaluate
import pandas as pd

# Librerias de LLM SDK Vertex Preview

from google.cloud import aiplatform
import vertexai
from vertexai.preview.language_models import TextGenerationModel, ChatModel
vertexai.init(project=PROJECT_ID, location=LOCATION)

## Carga de dataset médico de ejemplo

Para este ejemplo vamos a utilizar un dataset existente en la librería datasets y el dataset específico se llama argilla/medical-domain

In [None]:
dataset = load_dataset("argilla/medical-domain")
dataset = dataset['train'].train_test_split(test_size=0.3)



  0%|          | 0/1 [00:00<?, ?it/s]

In [None]:
dataset.shape # Significa que tiene 3.476 casos con 13 descripciones en cada caso, una de esas es la predicción

{'train': (3476, 13), 'test': (1490, 13)}

In [None]:
# inspecciona la estructura de los datos
dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'inputs', 'prediction', 'prediction_agent', 'annotation', 'annotation_agent', 'multi_label', 'explanation', 'id', 'metadata', 'status', 'event_timestamp', 'metrics'],
        num_rows: 3476
    })
    test: Dataset({
        features: ['text', 'inputs', 'prediction', 'prediction_agent', 'annotation', 'annotation_agent', 'multi_label', 'explanation', 'id', 'metadata', 'status', 'event_timestamp', 'metrics'],
        num_rows: 1490
    })
})

In [None]:
# tomamos los 200 primeros datos de entrenamiento
dataset['train'][200]

{'text': 'REASON FOR CATHETERIZATION:,  ST-elevation myocardial infarction.,PROCEDURES UNDERTAKEN,1.  Left coronary system cineangiography.,2.  Right coronary system cineangiography.,3.  Left ventriculogram.,4.  PCI to the left circumflex with a 3.5 x 12 and a 3.5 x 8 mm Vision bare-metal stents postdilated with a 3.75-mm noncompliant balloon x2.,PROCEDURE: , After all risks and benefits were explained to the patient, informed consent was obtained.  The patient was brought to the cardiac cath suite.  Right groin was prepped in usual sterile fashion.  Right common femoral artery was cannulated with the modified Seldinger technique.  A 6-French sheath was introduced.  Next, Judkins right catheter was used to engage the right coronary artery and cineangiography was recorded in multiple views.  Next, an EBU 3.5 guide was used to engage the left coronary system.  Cineangiography was recorded in several views and it was noted to have a 99% proximal left circumflex stenosis.  Angiomax bolus a

In [None]:
# creamos un dataframe sparado para entrenamiento y prueba
df_train = pd.DataFrame(dataset['train'], columns=['text', 'prediction'])
df_test = pd.DataFrame(dataset['test'], columns=['text', 'prediction'])

# obtenemos el valor del label de la columna de prediccion en el dataframe, el cual es una lista de diccionarios
df_train['label'] = df_train['prediction'].apply(lambda x: literal_eval(str(x))[0]['label'].strip())
df_test['label'] = df_test['prediction'].apply(lambda x: literal_eval(str(x))[0]['label'].strip())

# eliminamos la columna predicción
df_train.drop(['prediction'], axis=1, inplace=True)
df_test.drop(['prediction'], axis=1, inplace=True)

# renombramos los nombres de columnas
df_train.rename(columns={'text': 'input_text', 'label': 'output_text'}, inplace=True)
df_test.rename(columns={'text': 'input_text', 'label': 'output_text'}, inplace=True)

df_train.head()

Unnamed: 0,input_text,output_text
0,"PROCEDURES:,1. Esophagogastroduodenoscopy.,2....",Surgery
1,"EXAM:, CT Abdomen & Pelvis W&WO Contrast, ,RE...",Gastroenterology
2,"Chief Complaint:, Dark urine and generalized w...",Consult - History and Phy.
3,"REFERRING DIAGNOSIS: , Motor neuron disease.,P...",Neurology
4,"PREOPERATIVE DIAGNOSIS: , Gangrene osteomyelit...",Podiatry


In [None]:
# Cuantos casos hay por categoria
df_train['output_text'].value_counts()

Surgery                          755
Consult - History and Phy.       373
Orthopedic                       253
Cardiovascular / Pulmonary       251
Radiology                        181
General Medicine                 178
Neurology                        167
Gastroenterology                 159
Urology                          108
SOAP / Chart / Progress Notes    105
Obstetrics / Gynecology          103
Discharge Summary                 76
ENT - Otolaryngology              69
Neurosurgery                      66
Hematology - Oncology             64
Ophthalmology                     60
Emergency Room Reports            56
Nephrology                        51
Pediatrics - Neonatal             47
Pain Management                   43
Psychiatry / Psychology           37
Podiatry                          35
Office Notes                      33
Cosmetic / Plastic Surgery        23
Dentistry                         21
Physical Medicine - Rehab         17
Dermatology                       16
L

## Llamado al funcacional (en este caso es Text-Bison)

In [None]:
model = TextGenerationModel.from_pretrained("text-bison")
parameters = {
    "max_output_tokens": 256,
    "temperature": 0.1,
    "top_k": 40,
    "top_p": 0.8,
}

## Zero-shot


In [None]:
df_test['input_text'][0] # Queremos revisar qué clasificación realiza con un caso cualquiera sin ejemplos asociados a clasificaciones

"PREOPERATIVE DIAGNOSIS: , Right profound mixed sensorineural conductive hearing loss.,POSTOPERATIVE DIAGNOSIS:,  Right profound mixed sensorineural conductive hearing loss.,PROCEDURE PERFORMED:,  Right middle ear exploration with a Goldenberg TORP reconstruction.,ANESTHESIA:,  General ,ESTIMATED BLOOD LOSS:,  Less than 5 cc.,COMPLICATIONS:,  None.,DESCRIPTION OF FINDINGS:,  The patient consented to revision surgery because of the profound hearing loss in her right ear.  It was unclear from her previous operative records and CT scan as to whether or not she was a reconstruction candidate.  She had reports of stapes fixation as well as otosclerosis on her CT scan.,At surgery, she was found to have a mobile malleus handle, but her stapes was fixed by otosclerosis.  There was no incus.  There was no specific round window niche.  There was a very minute crevice; however, exploration of this area did not reveal a niche to a round window membrane.  The patient had a type of TORP prosthesis, 

In [None]:
context = df_test['input_text'][2]
prompt = f"Text: {context}, \n\nLabel: "

print(prompt)
print("*"*80)

response = model.predict(
    prompt=prompt,
    **parameters)
print(str(response))

print(f"Actual value: {df_test['output_text'][0]}")

Text: SUMMARY:  ,This patient is one-day postop open parathyroid exploration with subtotal parathyroidectomy and intraoperative PTH monitoring for parathyroid hyperplasia.  She has had an uneventful postoperative night.  She put out 1175 mL of urine since surgery.  Her incision looks good.  IV site and extremities are unremarkable.,LABORATORY DATA:  ,Her calcium level was 7.5 this morning.  She has been on three Tums orally b.i.d. and I am increasing three Tums orally q.i.d. before meals and at bedtime.,PLAN:,  I will heparin lock her IV, advance her diet, and ambulate her.  I have asked her to increase her prednisone when she goes home.  She will double her regular dose for the next five days.  I will advance her diet.  I will continue to monitor her calcium levels throughout the day.  If they stabilize, I am hopeful that she will be ready for discharge either later today or tomorrow.  She will be given Lortab Elixir 2 to 4 teaspoons orally every four hours p.r.n. pain, dispensed #240

El modelo realiza su propia clasificación generada a base de lo que conoce

### Few-Shot

In [None]:
exemplers = [f'Text: {input_text} \nLabel: {output_text.strip()}'
           for input_text, output_text in df_test.head(5).values]
exemplers = '\n\n'.join(exemplers)

context = df_test['input_text'][200]
prompt = f"{exemplers} \n\nText: {context}, \nLabel: " # Agrego casos con su respectiva etiqueta, en este caso tomo 200 casos

print(prompt)
print("-"*80)

response = model.predict(
    prompt=prompt,
    **parameters)
print(f"Predicted: {str(response)}")
print("-"*80)

print(f"Actual value: {df_test['output_text'][200]}")

Text: PREOPERATIVE DIAGNOSIS: , Right profound mixed sensorineural conductive hearing loss.,POSTOPERATIVE DIAGNOSIS:,  Right profound mixed sensorineural conductive hearing loss.,PROCEDURE PERFORMED:,  Right middle ear exploration with a Goldenberg TORP reconstruction.,ANESTHESIA:,  General ,ESTIMATED BLOOD LOSS:,  Less than 5 cc.,COMPLICATIONS:,  None.,DESCRIPTION OF FINDINGS:,  The patient consented to revision surgery because of the profound hearing loss in her right ear.  It was unclear from her previous operative records and CT scan as to whether or not she was a reconstruction candidate.  She had reports of stapes fixation as well as otosclerosis on her CT scan.,At surgery, she was found to have a mobile malleus handle, but her stapes was fixed by otosclerosis.  There was no incus.  There was no specific round window niche.  There was a very minute crevice; however, exploration of this area did not reveal a niche to a round window membrane.  The patient had a type of TORP prosthe

In [None]:
print(f"Transcription: {df_test['input_text'][15]}")
print(f"Actual value: {df_test['output_text'][15]}")
print(f"Predicted value: {str(response)}")

Transcription: PREOPERATIVE DIAGNOSIS: , Blighted ovum, severe cramping.,POSTOPERATIVE DIAGNOSIS:,  Blighted ovum, severe cramping.,OPERATION PERFORMED: , Vacuum D&C.,DRAINS: , None.,ANESTHESIA: , General.,HISTORY: , This 21-year-old white female gravida 1, para 0 who was having severe cramping and was noted to have a blighted ovum with her first ultrasound in the office.  Due to the severe cramping, a decision to undergo vacuum D&C was made.  At the time of the procedure, moderate amount of tissue was obtained.,PROCEDURE:  ,The patient was taken to the operating room and placed in a supine position, at which time a general form of anesthesia was administered by the anesthesia department.  The patient was then repositioned in a modified dorsal lithotomy position and then prepped and draped in the usual fashion.  A weighted vaginal speculum was placed in the posterior vaginal vault.  Anterior lip of the cervix was grasped with single tooth tenaculum, and the cervix was dilated to approx

In [None]:
# revisamos el accuracy 
accuracy_few_shot = evaluate.load("accuracy")

In [None]:
# mostramos el resultado
print(accuracy_few_shot)

EvaluationModule(name: "accuracy", module_type: "metric", features: {'predictions': Value(dtype='int32', id=None), 'references': Value(dtype='int32', id=None)}, usage: """
Args:
    predictions (`list` of `int`): Predicted labels.
    references (`list` of `int`): Ground truth labels.
    normalize (`boolean`): If set to False, returns the number of correctly classified samples. Otherwise, returns the fraction of correctly classified samples. Defaults to True.
    sample_weight (`list` of `float`): Sample weights Defaults to None.

Returns:
    accuracy (`float` or `int`): Accuracy score. Minimum possible value is 0. Maximum possible value is 1.0, or the number of examples input, if `normalize` is set to `True`.. A higher score means higher accuracy.

Examples:

    Example 1-A simple example
        >>> accuracy_metric = evaluate.load("accuracy")
        >>> results = accuracy_metric.compute(references=[0, 1, 2, 0, 1, 2], predictions=[0, 1, 1, 2, 1, 0])
        >>> print(results)
    

In [None]:
exemplers = [f'Text: {input_text} \nLabel: {output_text.strip()}'
           for input_text, output_text in df_test.head(5).values]
exemplers = '\n\n'.join(exemplers)

def predict(model, input_text, exemplers, parameters):
  context = input_text
  prompt = f"{exemplers} \n\nText: {context}, \nLabel: "
  response = str(model.predict(prompt=prompt, **parameters))
  return response


#df_test['pred'] = df_test.apply(lambda row: predict(model, row['input_text'], exemplers, parameters), axis=1)

for input_text, output_text in df_test[:5].values:
  pred = predict(model, input_text, exemplers, parameters)
  print(f"Predicted: {pred}")
  print(f"Actual value: {output_text}")
  print("*"*80)

Predicted: ENT - Otolaryngology
Actual value: Surgery
********************************************************************************
Predicted: Gastroenterology
Actual value: Nephrology
********************************************************************************
Predicted: Gastroenterology
Actual value: ENT - Otolaryngology
********************************************************************************
Predicted: Gastroenterology
Actual value: ENT - Otolaryngology
********************************************************************************
Predicted: Orthopedic
Actual value: Orthopedic
********************************************************************************


### Tuning

Colocamos algunos ejemplos para guiar la forma de clasificar el modelo fundacional, se toman dos casos para cada una de las etiquetas y se ajusta. Posteriormente revisamos que tan eficiente resulta con esta modificación.

In [None]:
model_display_name = 'medical_transcript_classifier'

In [None]:
tuned_model = TextGenerationModel(endpoint_name="medical_transcript_classifier", model_id="001").from_pretrained("text-bison")

In [None]:
df_tune = df_train.groupby('output_text', as_index=False, group_keys=False).apply(lambda x: x.sample(min(2, len(x)), random_state=43)).reset_index(drop=True)
df_tune

Unnamed: 0,input_text,output_text
0,"HISTORY: , A 34-year-old male presents today s...",Allergy / Immunology
1,"HISTORY:, I had the pleasure of meeting and e...",Allergy / Immunology
2,"EXTERNAL EXAMINATION: , The autopsy is begun a...",Autopsy
3,"EXTERNAL EXAMINATION: , The body is that of a ...",Autopsy
4,"PAST MEDICAL HISTORY: ,She had a negative str...",Bariatrics
...,...,...
75,"CURRENT MEDICATIONS:, Lortab.,PREVIOUS MEDICA...",Speech - Language
76,"PREOPERATIVE DIAGNOSES:,1. Clinical stage T2,...",Surgery
77,"PREOPERATIVE DIAGNOSIS: , Adenocarcinoma of th...",Surgery
78,"PREOPERATIVE DIAGNOSIS: , Phimosis.,POSTOPERAT...",Urology


In [None]:
df_tune.output_text.value_counts()

Allergy / Immunology             2
Autopsy                          2
Neurology                        2
Neurosurgery                     2
Obstetrics / Gynecology          2
Office Notes                     2
Ophthalmology                    2
Orthopedic                       2
Pain Management                  2
Pediatrics - Neonatal            2
Physical Medicine - Rehab        2
Podiatry                         2
Psychiatry / Psychology          2
Radiology                        2
Rheumatology                     2
SOAP / Chart / Progress Notes    2
Sleep Medicine                   2
Speech - Language                2
Surgery                          2
Nephrology                       2
Letters                          2
Lab Medicine - Pathology         2
Diets and Nutritions             2
Bariatrics                       2
Cardiovascular / Pulmonary       2
Chiropractic                     2
Consult - History and Phy.       2
Cosmetic / Plastic Surgery       2
Dentistry           

In [None]:
tuned_model.tune_model(
    training_data=df_tune,
    # Optional:
    train_steps=1000,
    tuning_job_location="europe-west4",
    tuned_model_location="us-central1",
)

In [None]:
tuned_model = TextGenerationModel.get_tuned_model(tuned_model_name=_tuned_model.name)

In [None]:
context = df_test['input_text'][35]
prompt = f"Text: {context}, \n\nLabel: "

print(prompt)
print("*"*80)

response = tuned_model.predict(
    prompt=context,
    **parameters)

print(f"Actual value: {df_test['output_text'][35]}")
print(f"Predicted value: {str(response)}")

### Configuraciones recomendadas

Aquí tnemos algunas recomendaciones de configuración para tuning del modelo fundacional basado en la tarea

# Clasificación:

*   Se recomienda 100+ train set size.
*   Learning Rate = 3.0.
*   Training steps [100-500]. Pueden intentar mas de un valor para obtener el mejor performance en un dataset en particular (e.g. 100, 200, 500).

# Extractive QA:

* Se recomienda 100+ train set size
* Learning Rate = 3.0.
* Training steps [100-500]. Pueden intentar mas de un valor para obtener el mejor performance en un dataset en particular (e.g. 100, 200, 500).
# Summarization:

* Se recomienda 500+ train set size, pero al menos 100 para resultados decentes.
* Learning Rate = 3.0
* Training steps [200-1000]. Pueden intentar mas de un valor para obtener el mejor performance en un dataset en particular (e.g. 200, 500, 800).