In [None]:
# imports
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler

# load and build DataFrame
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)  # 0 = malignant? (sklearn: 0=malignant? actually sklearn: 0=malignant? check) 
# Confirm mapping
# In sklearn, target names may be ['malignant','benign'] mapping 0=malignant,1=benign
target_names = data.target_names
print("target names:", target_names)  # inspect in notebook




In [None]:
# create priority label (high, medium, low)
# We'll create a composite: malignant tumors -> higher priority.
# Use mean radius as continuous proxy, create tertiles for medium/low for benign cases.
df = X.copy()
df['target'] = y
# Use 'mean radius' feature as risk proxy. Name is 'mean radius' in dataset feature names:
radius_col = [c for c in X.columns if 'mean radius' in c][0]
radius = df[radius_col]

# Map to 3-level priority
def make_priority(row):
    if row['target'] == 0:  # malignant => high
        return 'high'
    else:
        # benign -> medium or low by radius tertiles
        q = pd.qcut(radius[df['target'] == 1], q=3, labels=['low','medium','high'])
        # We will compute thresholds outside, then apply:
        return None

# compute thresholds:
benign_radius = radius[df['target'] == 1]
low_thr = benign_radius.quantile(1/3)
high_thr = benign_radius.quantile(2/3)

def priority_from_row(row):
    if row['target'] == 0:
        return 'high'
    else:
        if row[radius_col] <= low_thr:
            return 'low'
        elif row[radius_col] <= high_thr:
            return 'medium'
        else:
            return 'high'  # some benign with large radius -> medium/high

df['priority'] = df.apply(priority_from_row, axis=1)

# encode labels
df['priority_label'] = df['priority'].map({'low':0,'medium':1,'high':2})
y_priority = df['priority_label']

# features (drop target/priority cols)
X_features = df.drop(columns=['target','priority','priority_label'])

# split
X_train, X_test, y_train, y_test = train_test_split(X_features, y_priority, test_size=0.2, random_state=42, stratify=y_priority)

# scale
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# train Random Forest
clf = RandomForestClassifier(n_estimators=200, random_state=42)
clf.fit(X_train_scaled, y_train)

# predict + evaluate
y_pred = clf.predict(X_test_scaled)
acc = accuracy_score(y_test, y_pred)
f1_macro = f1_score(y_test, y_pred, average='macro')
print("Accuracy:", acc)
print("F1 (macro):", f1_macro)
print(classification_report(y_test, y_pred, target_names=['low','medium','high']))
print("Confusion matrix:\n", confusion_matrix(y_test, y_pred))

Accuracy: 0.9736842105263158
F1 (macro): 0.9719582489484343
              precision    recall  f1-score   support

         low       1.00      1.00      1.00        24
      medium       0.92      0.96      0.94        24
        high       0.98      0.97      0.98        66

    accuracy                           0.97       114
   macro avg       0.97      0.98      0.97       114
weighted avg       0.97      0.97      0.97       114

Confusion matrix:
 [[24  0  0]
 [ 0 23  1]
 [ 0  2 64]]
