<a href="https://colab.research.google.com/github/MorganPhillips02/Day-3-GitHub-Exercise/blob/main/CTM_hackathon.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:


# Modify priority assignment to introduce uncertainty
def assign_priority_with_uncertainty(task, comorbidities, time_since_requested, critical_result):
    base_priority = assign_priority(task, comorbidities, time_since_requested, critical_result)

    # Introduce randomness to create edge cases
    if np.random.rand() < 0.1:  # 10% chance of assigning a random priority
        return np.random.choice(["red", "amber", "green"])

    # Add uncertainty based on time since requested
    if base_priority == "green" and time_since_requested > 60 and np.random.rand() < 0.3:
        return "amber"  # Some green tasks escalate faster than expected

    if base_priority == "amber" and time_since_requested < 24 and np.random.rand() < 0.3:
        return "green"  # Some amber tasks remain lower priority

    if base_priority == "red" and time_since_requested < 12 and np.random.rand() < 0.3:
        return "amber"  # Some red tasks might be slightly over-prioritized

    return base_priority

# Generate dataset with uncertainty
data_with_uncertainty = []
for _ in range(num_samples):
    task = np.random.choice(task_types)
    comorbidity_flags = {c: np.random.choice([0, 1], p=[0.85, 0.15]) for c in comorbidities}
    bmi = np.random.uniform(18, 40)
    time_since_requested = np.random.randint(1, 100)  # Hours
    estimated_duration = np.random.uniform(5, 120)  # Minutes

    critical_result = 0
    if task == "review results":
        critical_result = np.random.choice([0, 1], p=[0.8, 0.2])

    priority = assign_priority_with_uncertainty(task, comorbidity_flags, time_since_requested, critical_result)

    row = [task] + list(comorbidity_flags.values()) + [bmi, time_since_requested, estimated_duration, critical_result, priority]
    data_with_uncertainty.append(row)

# Create DataFrame
df_uncertain = pd.DataFrame(data_with_uncertainty, columns=columns)

# Show sample of dataset with uncertainty
df_uncertain.head()


Unnamed: 0,Task Type,epilepsy,historical heart attack / stroke,diabetes,mental health,BMI,Time since task requested,Estimated task duration,Critical result flag,Priority
0,referral letters,1,0,0,0,21.431879,75,57.813623,0,amber
1,sick notes,0,1,0,0,22.000149,21,76.010374,0,green
2,medical reports,0,0,0,0,24.427182,80,31.768704,0,amber
3,sick notes,0,1,0,1,32.966766,73,12.480933,0,green
4,"patient communication (via phone, NHS app, Acc...",0,0,0,0,33.031797,44,61.945345,0,amber


In [8]:
# Define additional features specific to prescription requests
import pandas as pd
import numpy as np


# Generate synthetic dataset
np.random.seed(42)
num_samples = 500

# Define possible task types
task_types = [
    "prescription request", "patient communication (via phone, NHS app, AccuRX)",
    "sick notes", "referral letters", "medical reports", "review results"
]

# Define comorbidities
comorbidities = ["epilepsy", "historical heart attack / stroke", "diabetes", "mental health"]

medication_types = {
    "high": ["insulin", "anti-epileptics", "immunosuppressants"],
    "medium": ["blood pressure medication", "statins", "SSRI"],
    "low": ["vitamins", "emollients", "mild pain relief"],
}

request_types = {
    "high": ["out-of-hours gp", "pharmacist", "phone"],
    "medium": ["online", "NHS app"],
    "low": ["routine scheduled repeat"]
}

# Function to assign prescription-related features
def generate_prescription_features():
    med_risk = np.random.choice(["high", "medium", "low"], p=[0.2, 0.4, 0.4])
    medication = np.random.choice(medication_types[med_risk])
    days_until_out = np.random.randint(-3, 10)  # Can be negative if already run out
    polypharmacy = np.random.choice([0, 1], p=[0.8, 0.2])  # 20% chance of polypharmacy
    request_type_risk = np.random.choice(["high", "medium", "low"], p=[0.3, 0.5, 0.2])
    request_type = np.random.choice(request_types[request_type_risk])
    monitoring_required = np.random.choice([0, 1], p=[0.2, 0.8])  # 20% chance of needing monitoring
    return medication, days_until_out, polypharmacy, request_type, monitoring_required

# Updated priority function
def assign_priority_with_prescription_features(task, comorbidities, time_since_requested,
                                               critical_result, medication, days_until_out,
                                               polypharmacy, request_type, monitoring_required):
    if task == "review results":
        return "red" if critical_result else "amber"

    if task.startswith("patient communication"):
        return "red"

    if task == "prescription request":
        # Higher risk medication & emergency supply needed
        if days_until_out <= 1:
            return "red"
        if days_until_out <= 3:
            return "amber"

        # Medication risk level
        if medication in medication_types["high"]:
            return "red" if days_until_out <= 3 else "amber"
        if medication in medication_types["medium"]:
            return "amber" if days_until_out <= 3 else "green"

        # Polypharmacy and monitoring requirement influence
        if polypharmacy or monitoring_required:
            return "red" if days_until_out <= 3 else "amber"

        # Request type influence
        if request_type in request_types["high"]:
            return "red"
        if request_type in request_types["medium"]:
            return "amber"

        return "green"

    if task in ["sick notes", "referral letters", "medical reports"]:
        return "green"

    # Escalation over time
    if time_since_requested > 72:
        return "red"
    elif time_since_requested > 48:
        return "amber"

    return "green"

# Generate dataset with new features for prescription requests
data_expanded = []
for _ in range(num_samples):
    task = np.random.choice(task_types)
    comorbidity_flags = {c: np.random.choice([0, 1], p=[0.85, 0.15]) for c in comorbidities}
    bmi = np.random.uniform(18, 40)
    time_since_requested = np.random.randint(1, 100)  # Hours
    estimated_duration = np.random.uniform(5, 120)  # Minutes

    critical_result = 0
    medication = None
    days_until_out = None
    polypharmacy = None
    request_type = None
    monitoring_required = None

    if task == "review results":
        critical_result = np.random.choice([0, 1], p=[0.8, 0.2])

    if task == "prescription request":
        medication, days_until_out, polypharmacy, request_type, monitoring_required = generate_prescription_features()

    priority = assign_priority_with_prescription_features(task, comorbidity_flags, time_since_requested,
                                                          critical_result, medication, days_until_out,
                                                          polypharmacy, request_type, monitoring_required)

    row = [task] + list(comorbidity_flags.values()) + [bmi, time_since_requested, estimated_duration,
                                                       critical_result, medication, days_until_out,
                                                       polypharmacy, request_type, monitoring_required, priority]
    data_expanded.append(row)

# Create DataFrame with expanded features
columns_expanded = ["Task Type"] + comorbidities + ["BMI", "Time since task requested",
                   "Estimated task duration", "Critical result flag", "Medication",
                   "Days until out of supply", "Polypharmacy", "Request Type",
                   "Monitoring Required", "Priority"]

df_expanded = pd.DataFrame(data_expanded, columns=columns_expanded)

# Show sample of updated dataset
df_expanded.head()


Unnamed: 0,Task Type,epilepsy,historical heart attack / stroke,diabetes,mental health,BMI,Time since task requested,Estimated task duration,Critical result flag,Medication,Days until out of supply,Polypharmacy,Request Type,Monitoring Required,Priority
0,medical reports,1,0,0,0,19.688677,60,24.535348,0,,,,,,green
1,referral letters,0,0,0,0,18.528111,97,7.555125,0,,,,,,green
2,prescription request,0,1,0,0,24.938002,94,61.39698,0,anti-epileptics,-2.0,0.0,online,0.0,red
3,referral letters,0,0,0,0,39.835322,53,84.03899,0,,,,,,green
4,review results,0,0,0,0,39.520855,23,6.917719,0,,,,,,amber


In [15]:
# prompt: write me code to use scikit-learn to train a classification model. outcome is 'Priority' from df, all other columns are input features. assess performance with classification matrix (plot this)

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

categorical_features = df_expanded.select_dtypes(include=["object_"]).columns
# Convert categorical features to numerical using one-hot encoding
df_expanded = pd.get_dummies(df_expanded, columns=['Task Type'], drop_first=True)

# Define features (X) and target (y)
X = df_expanded.drop('Priority', axis=1)
y = df_expanded['Priority']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train a RandomForestClassifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
print(classification_report(y_test, y_pred))

# Create and plot the confusion matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=['amber', 'green', 'red'], yticklabels=['amber', 'green', 'red'])
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()


KeyError: "None of [Index(['Task Type'], dtype='object')] are in the [columns]"

In [16]:
import numpy as np
import pandas as pd

# Define task types
task_types = ["prescription request", "patient communication (via phone, NHS app, AccuRX)",
              "sick notes", "referral letters", "medical reports", "review results"]

# Define comorbidities
comorbidities = ["epilepsy", "historical heart attack / stroke", "diabetes", "mental health"]

# Define severity levels for test results
severity_levels = {
    "high": ["K+ > 6", "eGFR < 15", "PSA > 100", "New AF on ECG", "Hb < 70", "INR > 8 on warfarin", "Highly suspicious CXR", "Troponin"],
    "medium": ["HbA1c > 48", "eGFR 30-45", "Mildly raised LTFs"],
    "low": ["Slightly raised cholesterol", "Stable thyroid levels", "Minor anaemia"]
}

# Define investigation types
investigation_types = {
    "high": ["Troponin", "D-dimer", "eGFR drop", "Hb drop"],
    "medium": ["ECG", "Chest X-ray"],
    "low": ["Other tests"]
}

# Define test request urgency
test_request_urgency = {
    "high": ["Hospital-initiated urgent test"],
    "medium": ["Routine GP-initiated screening"],
    "low": ["Monitoring bloods with minor changes"]
}

# Function to generate investigation-related features
def generate_investigation_features():
    severity_level = np.random.choice(["high", "medium", "low"], p=[0.2, 0.4, 0.4])
    severity = np.random.choice(severity_levels[severity_level])

    investigation_urgency_level = np.random.choice(["high", "medium", "low"], p=[0.3, 0.5, 0.2])
    investigation = np.random.choice(investigation_types[investigation_urgency_level])

    request_urgency_level = np.random.choice(["high", "medium", "low"], p=[0.3, 0.5, 0.2])
    request_urgency = np.random.choice(test_request_urgency[request_urgency_level])

    return severity, investigation, request_urgency

# Function to assign priority with investigations
def assign_priority_with_investigation(task, comorbidities, time_since_requested,
                                       critical_result, severity, investigation, request_urgency):
    if task == "review results":
        # High severity or high urgency test types are automatically red
        if severity in severity_levels["high"] or investigation in investigation_types["high"] or request_urgency in test_request_urgency["high"]:
            return "red"
        if severity in severity_levels["medium"] or investigation in investigation_types["medium"] or request_urgency in test_request_urgency["medium"]:
            return "amber"
        return "green"

    # Escalation over time for non-investigation tasks
    if time_since_requested > 72:
        return "red"
    elif time_since_requested > 48:
        return "amber"

    return "green"

# Generate dataset with investigation features
num_samples = 1000
data_investigations = []
for _ in range(num_samples):
    task = np.random.choice(task_types)
    comorbidity_flags = {c: np.random.choice([0, 1], p=[0.85, 0.15]) for c in comorbidities}
    bmi = np.random.uniform(18, 40)
    time_since_requested = np.random.randint(1, 100)  # Hours
    estimated_duration = np.random.uniform(5, 120)  # Minutes

    critical_result = 0
    severity = None
    investigation = None
    request_urgency = None

    if task == "review results":
        severity, investigation, request_urgency = generate_investigation_features()

    priority = assign_priority_with_investigation(task, comorbidity_flags, time_since_requested,
                                                  critical_result, severity, investigation, request_urgency)

    row = [task] + list(comorbidity_flags.values()) + [bmi, time_since_requested, estimated_duration,
                                                       critical_result, severity, investigation, request_urgency, priority]
    data_investigations.append(row)

# Create DataFrame with investigation features
columns_investigations = ["Task Type"] + comorbidities + ["BMI", "Time since task requested",
                        "Estimated task duration", "Critical result flag", "Severity of abnormality",
                        "Investigation Type", "Test Request Urgency", "Priority"]

df_investigations = pd.DataFrame(data_investigations, columns=columns_investigations)

# Show sample of updated dataset
df_investigations.head()


Unnamed: 0,Task Type,epilepsy,historical heart attack / stroke,diabetes,mental health,BMI,Time since task requested,Estimated task duration,Critical result flag,Severity of abnormality,Investigation Type,Test Request Urgency,Priority
0,sick notes,0,0,0,0,23.830768,57,62.085876,0,,,,amber
1,medical reports,1,0,0,0,34.895269,58,31.194509,0,,,,amber
2,review results,0,0,0,1,26.685617,10,106.248519,0,eGFR 30-45,Troponin,Routine GP-initiated screening,red
3,review results,0,0,0,0,35.83607,28,30.42407,0,Mildly raised LTFs,eGFR drop,Hospital-initiated urgent test,red
4,"patient communication (via phone, NHS app, Acc...",0,0,0,1,39.975784,43,25.262357,0,,,,green


In [17]:
# Define prescription-related variables
medication_types = {
    "high": ["Insulin", "Anti-epileptics", "Immunosuppressants"],
    "medium": ["Blood pressure medication", "Statins", "SSRIs"],
    "low": ["Vitamins", "Emollients", "Mild pain relief"]
}

request_types = {
    "high": ["Out-of-hours GP", "Pharmacist", "Phone"],
    "medium": ["Routine request via online", "NHS app"],
    "low": ["Routine scheduled repeats"]
}

# Function to generate prescription-related features
def generate_prescription_features():
    med_risk_level = np.random.choice(["high", "medium", "low"], p=[0.2, 0.4, 0.4])
    medication = np.random.choice(medication_types[med_risk_level])

    days_until_out = np.random.randint(-2, 10)
    polypharmacy = np.random.choice(["Yes", "No"], p=[0.3, 0.7])
    request_type = np.random.choice(["high", "medium", "low"], p=[0.2, 0.5, 0.3])
    request_source = np.random.choice(request_types[request_type])
    monitoring_required = np.random.choice([True, False], p=[0.3, 0.7])

    return medication, days_until_out, polypharmacy, request_source, monitoring_required

# Updated priority function with prescription logic
def assign_priority_with_prescriptions(task, comorbidities, time_since_requested, critical_result,
                                       severity, investigation, request_urgency,
                                       medication, days_until_out, polypharmacy, request_source, monitoring_required):
    if task == "review results":
        if severity in severity_levels["high"] or investigation in investigation_types["high"] or request_urgency in test_request_urgency["high"]:
            return "red"
        if severity in severity_levels["medium"] or investigation in investigation_types["medium"] or request_urgency in test_request_urgency["medium"]:
            return "amber"
        return "green"

    if task == "prescription request":
        if medication in medication_types["high"] or days_until_out <= 1 or monitoring_required:
            return "red"
        if medication in medication_types["medium"] or days_until_out <= 3 or polypharmacy == "Yes" or request_source in request_types["high"]:
            return "amber"
        return "green"

    if time_since_requested > 72:
        return "red"
    elif time_since_requested > 48:
        return "amber"

    return "green"

# Generate dataset with both investigation and prescription features
data_full = []
for _ in range(num_samples):
    task = np.random.choice(task_types)
    comorbidity_flags = {c: np.random.choice([0, 1], p=[0.85, 0.15]) for c in comorbidities}
    bmi = np.random.uniform(18, 40)
    time_since_requested = np.random.randint(1, 100)  # Hours
    estimated_duration = np.random.uniform(5, 120)  # Minutes

    # Initialize variables
    critical_result, severity, investigation, request_urgency = None, None, None, None
    medication, days_until_out, polypharmacy, request_source, monitoring_required = None, None, None, None, None

    if task == "review results":
        severity, investigation, request_urgency = generate_investigation_features()

    if task == "prescription request":
        medication, days_until_out, polypharmacy, request_source, monitoring_required = generate_prescription_features()

    priority = assign_priority_with_prescriptions(task, comorbidity_flags, time_since_requested, critical_result,
                                                  severity, investigation, request_urgency,
                                                  medication, days_until_out, polypharmacy, request_source, monitoring_required)

    row = [task] + list(comorbidity_flags.values()) + [bmi, time_since_requested, estimated_duration,
                                                       critical_result, severity, investigation, request_urgency,
                                                       medication, days_until_out, polypharmacy, request_source, monitoring_required, priority]
    data_full.append(row)

# Create DataFrame with all features
columns_full = ["Task Type"] + comorbidities + ["BMI", "Time since task requested",
                   "Estimated task duration", "Critical result flag", "Severity of abnormality",
                   "Investigation Type", "Test Request Urgency", "Medication Type", "Days Until Out",
                   "Polypharmacy", "Request Type", "Monitoring Required", "Priority"]

df_full = pd.DataFrame(data_full, columns=columns_full)

# Show sample of updated dataset
df_full.head()


Unnamed: 0,Task Type,epilepsy,historical heart attack / stroke,diabetes,mental health,BMI,Time since task requested,Estimated task duration,Critical result flag,Severity of abnormality,Investigation Type,Test Request Urgency,Medication Type,Days Until Out,Polypharmacy,Request Type,Monitoring Required,Priority
0,"patient communication (via phone, NHS app, Acc...",0,0,0,0,29.382403,63,91.772046,,,,,,,,,,amber
1,"patient communication (via phone, NHS app, Acc...",0,0,0,0,31.603774,2,108.596253,,,,,,,,,,green
2,review results,0,0,0,0,29.279697,16,53.674801,,Stable thyroid levels,Chest X-ray,Routine GP-initiated screening,,,,,,amber
3,prescription request,0,1,0,0,26.689129,61,39.165461,,,,,SSRIs,-2.0,No,NHS app,True,red
4,sick notes,0,0,0,0,32.674034,65,38.386869,,,,,,,,,,amber


In [18]:
# Function to assign a priority score (0-100) instead of categorical labels
def assign_priority_score(task, comorbidities, time_since_requested, critical_result,
                          severity, investigation, request_urgency,
                          medication, days_until_out, polypharmacy, request_source, monitoring_required):
    score = 0

    # Increase score based on task type urgency
    if task == "review results":
        if severity in severity_levels["high"] or investigation in investigation_types["high"] or request_urgency in test_request_urgency["high"]:
            score += 80
        elif severity in severity_levels["medium"] or investigation in investigation_types["medium"] or request_urgency in test_request_urgency["medium"]:
            score += 50
        else:
            score += 20

    elif task == "prescription request":
        if medication in medication_types["high"] or days_until_out <= 1 or monitoring_required:
            score += 85
        elif medication in medication_types["medium"] or days_until_out <= 3 or polypharmacy == "Yes" or request_source in request_types["high"]:
            score += 55
        else:
            score += 25

    # Comorbidity influence
    for condition, flag in comorbidities.items():
        if flag:
            score += 5  # Small increase for each comorbidity

    # Increase priority score based on time waiting
    if time_since_requested > 72:
        score += 30
    elif time_since_requested > 48:
        score += 20

    # Cap the score at 100
    return min(score, 100)

# Generate dataset with numeric priority scores
data_full_priority = []
for _ in range(num_samples):
    task = np.random.choice(task_types)
    comorbidity_flags = {c: np.random.choice([0, 1], p=[0.85, 0.15]) for c in comorbidities}
    bmi = np.random.uniform(18, 40)
    time_since_requested = np.random.randint(1, 100)  # Hours
    estimated_duration = np.random.uniform(5, 120)  # Minutes

    # Initialize variables
    critical_result, severity, investigation, request_urgency = None, None, None, None
    medication, days_until_out, polypharmacy, request_source, monitoring_required = None, None, None, None, None

    if task == "review results":
        severity, investigation, request_urgency = generate_investigation_features()

    if task == "prescription request":
        medication, days_until_out, polypharmacy, request_source, monitoring_required = generate_prescription_features()

    priority_score = assign_priority_score(task, comorbidity_flags, time_since_requested, critical_result,
                                           severity, investigation, request_urgency,
                                           medication, days_until_out, polypharmacy, request_source, monitoring_required)

    # Assign category based on score
    if priority_score > 75:
        priority_category = "red"
    elif priority_score > 30:
        priority_category = "amber"
    else:
        priority_category = "green"

    row = [task] + list(comorbidity_flags.values()) + [bmi, time_since_requested, estimated_duration,
                                                       critical_result, severity, investigation, request_urgency,
                                                       medication, days_until_out, polypharmacy, request_source, monitoring_required,
                                                       priority_score, priority_category]
    data_full_priority.append(row)

# Create DataFrame with priority scores and categories
columns_priority = ["Task Type"] + comorbidities + ["BMI", "Time since task requested",
                   "Estimated task duration", "Critical result flag", "Severity of abnormality",
                   "Investigation Type", "Test Request Urgency", "Medication Type", "Days Until Out",
                   "Polypharmacy", "Request Type", "Monitoring Required", "Priority Score", "Priority Category"]

df_priority = pd.DataFrame(data_full_priority, columns=columns_priority)

# Show sample of updated dataset
df_priority.head()


Unnamed: 0,Task Type,epilepsy,historical heart attack / stroke,diabetes,mental health,BMI,Time since task requested,Estimated task duration,Critical result flag,Severity of abnormality,Investigation Type,Test Request Urgency,Medication Type,Days Until Out,Polypharmacy,Request Type,Monitoring Required,Priority Score,Priority Category
0,medical reports,0,0,0,0,27.120207,78,94.875162,,,,,,,,,,30,green
1,sick notes,0,0,0,0,21.353065,21,37.158408,,,,,,,,,,0,green
2,prescription request,0,0,1,1,19.902251,79,93.968618,,,,,Mild pain relief,6.0,No,Routine scheduled repeats,True,100,red
3,sick notes,0,0,1,0,31.164259,14,19.521727,,,,,,,,,,5,green
4,referral letters,0,0,0,0,28.534096,29,27.189134,,,,,,,,,,0,green
