In [None]:
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report
import numpy as np

# Load Data (Standard Scikit-learn Breast Cancer Dataset)
data = load_breast_cancer(as_frame=True)
df = data.frame

# --- Goal Modification: Create a Ternary Target Variable ---
# Original Target: 0 (Malignant/High Priority), 1 (Benign/Low Priority)
# New Target Simulation: Create three classes to represent "Issue Priority"
# We map the original diagnosis and a complexity score (e.g., mean radius) 
# to High (3), Medium (2), or Low (1) Priority.

# Calculate a feature representing tumor size/complexity
df['complexity'] = df['mean radius'] + df['mean texture']

def map_to_priority(row):
    # Class 3 (High Priority): Malignant (target=0) and high complexity
    if row['target'] == 0 and row['complexity'] >= df['complexity'].median():
        return 3 
    # Class 2 (Medium Priority): Malignant (target=0) OR high complexity
    elif row['target'] == 0 or row['complexity'] >= df['complexity'].median():
        return 2 
    # Class 1 (Low Priority): Benign (target=1) and low complexity
    else:
        return 1

df['priority'] = df.apply(map_to_priority, axis=1)

# Drop original target and the engineered complexity score for modeling
X = df.drop(columns=['target', 'complexity', 'priority'])
y = df['priority']

# Split Data (80% Train, 20% Test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"Dataset Size: {len(df)}")
print(f"Train Set Size: {len(X_train)}")
print(f"Test Set Size: {len(X_test)}")
print("\nSimulated Priority Class Distribution (Training):")
print(y_train.value_counts(normalize=True).sort_index())

# --- Model Training: Random Forest Classifier ---
# The Random Forest is robust and handles classification tasks well.
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# --- Prediction ---
y_pred = model.predict(X_test)

# --- Evaluation: Performance Metrics ---
# Accuracy: (Total Correct Predictions) / (Total Predictions)
accuracy = accuracy_score(y_test, y_pred)
# F1-Score (Macro): Average of F1-scores for each class (good for imbalanced data)
f1_macro = f1_score(y_test, y_pred, average='macro')

print("\n--- Model Performance Metrics ---")
print(f"Accuracy Score: {accuracy:.4f}")
print(f"F1-Score (Macro): {f1_macro:.4f}")

print("\n--- Detailed Classification Report ---")
print(classification_report(y_test, y_pred, target_names=['Low (1)', 'Medium (2)', 'High (3)']))