<a href="https://colab.research.google.com/github/KAMAL0657/KAMAL-HUSSAIN/blob/main/Copy_of_KamalHusain_Question17_FHIR_integration.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Q 17 — FHIR-integrated Implementation (Decision Tree)

**MY SELF:** KAMAL HUSSAIN

This notebook shows how to fetch lab test data from a FHIR server (if available), preprocess it, train a cost-sensitive Decision Tree, produce interpretability artifacts (rule export + SHAP), and export the model for mobile deployment.


In [None]:
# Install required packages in Colab (uncomment if running in Colab)
# !pip install requests pandas scikit-learn joblib shap

import os
import requests
import json
import numpy as np
import pandas as pd
from collections import Counter
from sklearn.tree import DecisionTreeClassifier, export_text
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.metrics import classification_report, confusion_matrix
import joblib

RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)

# Configuration: set your FHIR server details here if available
FHIR_SERVER_URL = os.environ.get('FHIR_SERVER_URL', '')  # e.g., 'https://hapi.fhir.org/baseR4'
FHIR_BEARER_TOKEN = os.environ.get('FHIR_BEARER_TOKEN', '')  # if auth needed

# For demo, define 15 feature names
FEATURE_NAMES = [f'marker_{i+1}' for i in range(15)]
print('FHIR configured?', bool(FHIR_SERVER_URL))


FHIR configured? False


In [None]:
def fetch_observations_for_patient(patient_id, server_url=FHIR_SERVER_URL, token=FHIR_BEARER_TOKEN, count=100):
    if not server_url:
        raise ValueError('FHIR_SERVER_URL not configured')
    headers = {'Accept': 'application/fhir+json'}
    if token:
        headers['Authorization'] = f'Bearer {token}'
    params = {'patient': patient_id, '_count': count}
    resp = requests.get(f'{server_url}/Observation', headers=headers, params=params, timeout=10)
    resp.raise_for_status()
    bundle = resp.json()
    entries = bundle.get('entry', [])
    observations = [e['resource'] for e in entries]
    return observations


def parse_observations_to_features(observations, default_features=FEATURE_NAMES):
    feat = {f: np.nan for f in default_features}
    for obs in observations:
        value = None
        if 'valueQuantity' in obs:
            value = obs['valueQuantity'].get('value')
        # attempt to map by code or display; here we expect code like 'marker_1' in coding
        try:
            coding = obs.get('code', {}).get('coding', [])
            if coding:
                code = coding[0].get('code') or coding[0].get('display')
            else:
                code = None
        except Exception:
            code = None
        # naive mapping: if code matches a marker name, use it
        if code and code in feat:
            try:
                feat[code] = float(value)
            except Exception:
                pass
    return feat


In [None]:
USE_FHIR = bool(FHIR_SERVER_URL)

if USE_FHIR:
    try:
        headers = {'Accept': 'application/fhir+json'}
        if FHIR_BEARER_TOKEN:
            headers['Authorization'] = f'Bearer {FHIR_BEARER_TOKEN}'
        resp = requests.get(f'{FHIR_SERVER_URL}/Patient?_count=50', headers=headers, timeout=10)
        resp.raise_for_status()
        bundle = resp.json()
        patient_entries = bundle.get('entry', [])
        patient_ids = [p['resource']['id'] for p in patient_entries]
        records = []
        for pid in patient_ids:
            obs = fetch_observations_for_patient(pid)
            features = parse_observations_to_features(obs)
            features['patient_id'] = pid
            records.append(features)
        df = pd.DataFrame(records)
        print('Dataframe from FHIR shape:', df.shape)
    except Exception as e:
        print('FHIR fetch failed; falling back to synthetic dataset. Exception:', e)
        USE_FHIR = False

if not USE_FHIR:
    print('Generating synthetic dataset (10,000 patients, 15 features)')
    N = 10000
    n_features = 15
    X = np.random.normal(loc=0.5, scale=0.15, size=(N, n_features))
    X = np.clip(X, 0, 1)
    y = np.array([0]*3000 + [1]*2500 + [2]*4500)
    perm = np.random.permutation(N)
    X = X[perm]
    y = y[perm]
    df = pd.DataFrame(X, columns=[f'marker_{i+1}' for i in range(n_features)])
    df['label'] = y
    print('Synthetic df shape:', df.shape)


Generating synthetic dataset (10,000 patients, 15 features)
Synthetic df shape: (10000, 16)


In [None]:
feature_cols = [c for c in df.columns if c.startswith('marker_')]
print('Feature cols:', feature_cols[:5])

# Impute missing values with median
imp = SimpleImputer(strategy='median')
df[feature_cols] = imp.fit_transform(df[feature_cols])

# Splits
train_df, test_df = train_test_split(df, test_size=0.15, stratify=df['label'], random_state=RANDOM_STATE)
train_df, val_df = train_test_split(train_df, test_size=0.17647, stratify=train_df['label'], random_state=RANDOM_STATE)
print('Splits:', train_df.shape, val_df.shape, test_df.shape)


Feature cols: ['marker_1', 'marker_2', 'marker_3', 'marker_4', 'marker_5']
Splits: (7000, 16) (1500, 16) (1500, 16)


In [None]:
from collections import Counter
class_weight = {0:10, 1:10, 2:1}
X_train = train_df[feature_cols].values
y_train = train_df['label'].values
X_val = val_df[feature_cols].values
y_val = val_df['label'].values
X_test = test_df[feature_cols].values
y_test = test_df['label'].values
print('Train class dist:', Counter(y_train))


Train class dist: Counter({np.int64(2): 3150, np.int64(0): 2100, np.int64(1): 1750})


In [None]:
clf = DecisionTreeClassifier(random_state=RANDOM_STATE, criterion='gini', max_depth=6, min_samples_leaf=50, max_leaf_nodes=64, class_weight=class_weight)
clf.fit(X_train, y_train)

print('Validation report:')
y_val_pred = clf.predict(X_val)
print(classification_report(y_val, y_val_pred, digits=4))
print('Confusion matrix:\n', confusion_matrix(y_val, y_val_pred))

rules = export_text(clf, feature_names=feature_cols)
print('\nSample rules:\n', '\n'.join(rules.split('\n')[:30]))


Validation report:
              precision    recall  f1-score   support

           0     0.3198    0.7889    0.4551       450
           1     0.2487    0.2587    0.2536       375
           2     0.0000    0.0000    0.0000       675

    accuracy                         0.3013      1500
   macro avg     0.1895    0.3492    0.2362      1500
weighted avg     0.1581    0.3013    0.1999      1500

Confusion matrix:
 [[355  95   0]
 [278  97   0]
 [477 198   0]]

Sample rules:
 |--- marker_12 <= 0.35
|   |--- marker_14 <= 0.47
|   |   |--- marker_11 <= 0.46
|   |   |   |--- marker_10 <= 0.57
|   |   |   |   |--- marker_11 <= 0.35
|   |   |   |   |   |--- class: 0
|   |   |   |   |--- marker_11 >  0.35
|   |   |   |   |   |--- class: 0
|   |   |   |--- marker_10 >  0.57
|   |   |   |   |--- class: 1
|   |   |--- marker_11 >  0.46
|   |   |   |--- marker_3 <= 0.57
|   |   |   |   |--- marker_3 <= 0.38
|   |   |   |   |   |--- class: 1
|   |   |   |   |--- marker_3 >  0.38
|   |   |   |   |

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
# Save model and tree JSON
os.makedirs('models', exist_ok=True)
joblib.dump({'model': clf, 'feature_cols': feature_cols}, 'models/decision_tree_model.joblib')

# Convert to JSON structure
from sklearn.tree import _tree

def tree_to_dict(tree, feature_names):
    tree_ = tree.tree_
    feature_name = [feature_names[i] if i != _tree.TREE_UNDEFINED else 'undefined!' for i in tree_.feature]
    def recurse(node):
        if tree_.feature[node] != _tree.TREE_UNDEFINED:
            name = feature_name[node]
            threshold = tree_.threshold[node]
            return {'feature': name, 'threshold': float(threshold), 'left': recurse(tree_.children_left[node]), 'right': recurse(tree_.children_right[node])}
        else:
            value = tree_.value[node][0].tolist()
            predicted = int(np.argmax(value))
            return {'leaf': True, 'value': value, 'predicted_class': predicted}
    return recurse(0)

tree_json = tree_to_dict(clf, feature_cols)
with open('models/tree_structure.json', 'w') as f:
    json.dump(tree_json, f, indent=2)
print('Saved models/decision_tree_model.joblib and models/tree_structure.json')


Saved models/decision_tree_model.joblib and models/tree_structure.json


In [None]:
# Provide a simple pure-Python inference using tree JSON for mobile porting
def predict_with_tree_json(x_dict, tree_dict):
    node = tree_dict
    while True:
        if 'leaf' in node and node['leaf']:
            return node['predicted_class']
        feat = node['feature']
        thr = node['threshold']
        val = x_dict.get(feat, 0.0)
        if val <= thr:
            node = node['left']
        else:
            node = node['right']

# Test
x0 = {feat: float(X_test[0, i]) for i, feat in enumerate(feature_cols)}
print('clf predict:', int(clf.predict([X_test[0]])[0]))
print('json predict:', predict_with_tree_json(x0, tree_json))


clf predict: 0
json predict: 0
