In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import recall_score

# Import pandas as pd if not already done
pd.set_option('display.width', 1000)  # Set a high display width
pd.set_option('display.max_columns', None)  # Show all columns in a single line

# Load the Titanic dataset
# (Using an example Titanic dataset; replace with actual dataset path if available)
url = "https://web.stanford.edu/class/archive/cs/cs109/cs109.1166/stuff/titanic.csv"
titanic_data = pd.read_csv(url)

# For simplicity, let's use only a few features and handle missing values
# Selecting relevant features: Survival, Passenger Class (Pclass), Age, and Fare
titanic_data = titanic_data[['Survived', 'Pclass', 'Age', 'Fare']].dropna()

# Set up feature and target variables
X = titanic_data[['Pclass', 'Age', 'Fare']]
y = titanic_data['Survived']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train a classifier (Logistic Regression for simplicity)
model = LogisticRegression(random_state=42, max_iter=200)
model.fit(X_train, y_train)

# Get the predicted probabilities and predicted classes on the test set
pred_probs = model.predict_proba(X_test)[:, 1]  # Probability of being in class "1" (survived)
predictions = model.predict(X_test)

# Define a confidence threshold for high-confidence predictions
uncertainty_threshold = 0.63
high_confidence_predictions = pred_probs >= uncertainty_threshold

# Calculate traditional recall
traditional_recall = recall_score(y_test, predictions)
print(f"Traditional Recall: {traditional_recall:.2f}")

# Calculate Uncertainty-Adjusted Recall (UAR)
# UAR only counts high-confidence correct predictions in the recall calculation
correct_positive_predictions = (predictions == y_test) & (y_test == 1) & high_confidence_predictions
total_actual_positives = np.sum(y_test == 1)

UAR = np.sum(correct_positive_predictions) / total_actual_positives if total_actual_positives > 0 else 0
print(f"Uncertainty-Adjusted Recall (UAR): {UAR:.2f}")

# Create a DataFrame to display detailed prediction results
results_df = pd.DataFrame({
    'True Label': y_test,
    'Predicted Label': predictions,
    'Prediction Probability (Confidence)': pred_probs,
    'High Confidence Prediction': high_confidence_predictions,
    'Correct High Confidence Positive Prediction': correct_positive_predictions
})

# Display results for examination
print("\nDetailed Prediction Results:")
print(results_df.head(20))  # Displaying only the first 20 rows for clarity

# Explanation
print("\nExplanation of Metrics:")
print(f"Traditional Recall considers all positive predictions, regardless of confidence: {traditional_recall:.2f}")
print(f"Uncertainty-Adjusted Recall (UAR) only considers positive predictions made with high confidence (above {uncertainty_threshold} threshold), providing a more reliable measure in uncertain cases: {UAR:.2f}")


Traditional Recall: 0.49
Uncertainty-Adjusted Recall (UAR): 0.33

Detailed Prediction Results:
     True Label  Predicted Label  Prediction Probability (Confidence)  High Confidence Prediction  Correct High Confidence Positive Prediction
296           1                1                             0.555811                       False                                        False
682           0                1                             0.503154                       False                                        False
535           0                0                             0.056933                       False                                        False
644           1                0                             0.473347                       False                                        False
623           0                0                             0.217877                       False                                        False
39            1                0               