In [1]:
# PATIENT-CONCERN CLASSIFIER

# INSTALLATION: Use pip installs for required packages
!pip install -q pandas ibm-watson-machine-learning
!pip install wget -q
!pip install scikit-learn -q
!pip install "ibm-watson-machine-learning>=1.0.310" -q
!pip install ibm-cos-sdk -q

In [2]:
# IBM Cloud credentials
url = "https://us-south.ml.cloud.ibm.com"
apikey = 'uGtUmYbE8vo0exjn3oa344me3oCcxlgOwDQ9u58AX7TN'  

credentials = {
    "url": url,
    "apikey": apikey
}

In [3]:
# PROJECT ID 
import os

try:
    project_id = os.environ["PROJECT_ID"]
except KeyError:
    project_id = input("Please enter your project_id (hit enter): ")
    
project_id

'a599d7d2-6918-4960-aad3-7af3e9f4851d'

In [4]:
# IBM COS (Cloud Object Storage)
import pandas as pd
import types
from botocore.client import Config
import ibm_boto3

In [5]:
cos_client = ibm_boto3.client(service_name='s3',
    ibm_api_key_id=apikey,
    ibm_auth_endpoint='https://iam.cloud.ibm.com/oidc/token',
    config=Config(signature_version='oauth'),
    endpoint_url='https://s3.us-south.cloud-object-storage.appdomain.cloud'
)

bucket = 'patient-concern-classifier'  
object_key = 'patient_concern_dataset.csv'  

In [6]:
# Load dataset
body = cos_client.get_object(Bucket=bucket, Key=object_key)['Body']
if not hasattr(body, "__iter__"):
    def __iter__(self): yield from self
    body.__iter__ = types.MethodType(__iter__, body)

data = pd.read_csv(body)
data.head()

Unnamed: 0,ID,Concern,Department
0,1,Bleeding gums during brushing every morning.,Dentistry
1,2,High blood pressure and dizziness occasionally.,Cardiology
2,3,Twisted ankle that's now swollen.,Orthopedics
3,4,Unusual spots on my arm that are spreading.,Dermatology
4,5,Joint stiffness in the morning and after rest.,Orthopedics


In [7]:
# Label mapping

data['Department'].value_counts()

Department
Dentistry           89
Dermatology         86
Orthopedics         85
General Medicine    84
Pediatrics          82
Cardiology          74
Name: count, dtype: int64

In [8]:
# Train-test split
from sklearn.model_selection import train_test_split

In [9]:
data_train, data_test, y_train, y_test = train_test_split(
    data['Concern'],
    data['Department'],
    test_size=0.3,
    random_state=33,
    stratify=data['Department']
)

data_train = pd.DataFrame({'Concern': data_train, 'Department': y_train})
data_test = pd.DataFrame({'Concern': data_test, 'Department': y_test})

In [10]:
# Select model
from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes
from ibm_watson_machine_learning.foundation_models import Model
from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams

In [11]:
model_id = ModelTypes.FLAN_T5_XXL

In [12]:
parameters = {
    GenParams.DECODING_METHOD: "greedy",
    GenParams.RANDOM_SEED: 33,
    GenParams.REPETITION_PENALTY: 1,
    GenParams.MIN_NEW_TOKENS: 1,
    GenParams.MAX_NEW_TOKENS: 1
}

model = Model(
    model_id=model_id,
    params=parameters,
    credentials=credentials,
    project_id=project_id
)

model.get_details()



{'model_id': 'google/flan-t5-xxl',
 'label': 'flan-t5-xxl-11b',
 'provider': 'Google',
 'source': 'Hugging Face',
 'functions': [{'id': 'text_generation'}],
 'short_description': 'flan-t5-xxl is an 11 billion parameter model based on the Flan-T5 family.',
 'long_description': 'flan-t5-xxl (11B) is an 11 billion parameter model based on the Flan-T5 family. It is a pretrained T5 - an encoder-decoder model pre-trained on a mixture of supervised / unsupervised tasks converted into a text-to-text format, and fine-tuned on the Fine-tuned Language Net (FLAN) with instructions for better zero-shot and few-shot performance.',
 'terms_url': 'https://huggingface.co/google/flan-t5-xxl/blob/main/README.md',
 'input_tier': 'class_2',
 'output_tier': 'class_2',
 'number_params': '11b',
 'min_shot_size': 0,
 'task_ids': ['question_answering',
  'summarization',
  'retrieval_augmented_generation',
  'classification',
  'generation',
  'extraction'],
 'tasks': [{'id': 'question_answering', 'ratings': {'

In [13]:
# Prepare few-shot prompt
instruction = """Classify the patient's concern into the correct department.
Choose one of the following:
General Medicine, Pediatrics, Dermatology, Dentistry, Orthopedics, Cardiology.

Use the examples below for reference.
"""

In [14]:
# Few-shot examples
data_train_and_labels = data_train.copy()
data_train_and_labels['Department'] = y_train

In [15]:
few_shot_example = []
for concern, department in data_train_and_labels \
    .groupby('Department') \
    .apply(lambda x: x.sample(2)).values:
    few_shot_example.append(f"\sentence:\t{concern}\n\tsentiment: {department}\n")

few_shot_examples = '\n'.join(few_shot_example)

In [16]:
# Prepare input
few_shot_inputs_ = [{"input": text} for text in data_test['Concern'].values]

for i in range(2):
    print(f"The sentence example {i+1} is:\n {few_shot_inputs_[i]['input']}\n")
    print(f"\tActual Category: {y_test.iloc[i]}\n")

The sentence example 1 is:
 Severe toothache and gum bleeding started last night.

	Actual Category: Dentistry

The sentence example 2 is:
 Itchy and flaky scalp.

	Actual Category: Dermatology



In [17]:
# Generate predictions 
results = []
for inp in few_shot_inputs_[:10]:
    prompt = instruction + '\n' + few_shot_examples + '\n' + inp['input']
    response = model.generate(prompt)
    results.append(response["results"][0])


In [18]:
# Evaluate results
y_true = [label for label in y_test.values[:10]]
y_pred = [result['generated_text'] for result in results]

In [19]:
# Prepare DataFrame for CSV export
export_df = pd.DataFrame({
    "Concern": data_test["Concern"].values[:10],
    "Predicted Department":y_pred ,
    "Actual Department": data_test["Department"].values[:10]
})

# Save to CSV
export_df.to_csv("patient_concern_predictions.csv", index=False)
print("✅ Results saved to 'patient_concern_predictions.csv'")


✅ Results saved to 'patient_concern_predictions.csv'


In [20]:
file_name = 'patient_concern_predictions.csv'

with open(file_name, 'rb') as file_data:
    cos_client.upload_fileobj(
        Fileobj=file_data,
        Bucket=bucket,
        Key=file_name
    )

print(f"✅ Uploaded '{file_name}' to bucket '{bucket}'")


✅ Uploaded 'patient_concern_predictions.csv' to bucket 'patient-concern-classifier'
