In [1]:
# Notebook: predictive_resource_allocation.ipynb
# pip install scikit-learn pandas matplotlib

import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score, classification_report

In [2]:
# Load dataset (as a stand-in for Kaggle dataset)
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y_binary = pd.Series(data.target)  # 0/1

# Example mapping to 'priority' labels:
# We'll create a synthetic priority target by splitting using the existing target and a feature threshold
# (In real project: replace with actual issue-priority labels.)
y = y_binary.map({0: "low", 1: "high"})  # simple mapping for demo

# Encode labels
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_enc = le.fit_transform(y)

In [3]:
# Split
X_train, X_test, y_train, y_test = train_test_split(X, y_enc, test_size=0.2, random_state=42, stratify=y_enc)

# Scale
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s = scaler.transform(X_test)

# Train Random Forest
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train_s, y_train)

# Predict & Evaluate
y_pred = clf.predict(X_test_s)
acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')

In [4]:
print("Accuracy:", acc)
print("F1 (weighted):", f1)
print(classification_report(y_test, y_pred, target_names=le.classes_))

Accuracy: 0.9736842105263158
F1 (weighted): 0.9734654095556351
              precision    recall  f1-score   support

        high       0.96      1.00      0.98        72
         low       1.00      0.93      0.96        42

    accuracy                           0.97       114
   macro avg       0.98      0.96      0.97       114
weighted avg       0.97      0.97      0.97       114

