In [5]:
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix, f1_score

# Load the data
df = pd.read_csv('perf.csv')
y_true = df['yval']
y_prob = df['ypred']


In [6]:
# Define function to compute metrics at any threshold
def compute_metrics(y_true, y_prob, threshold):
    y_pred = (y_prob >= threshold).astype(int)
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()

    SEN = tp / (tp + fn) if (tp + fn) > 0 else 0  # Sensitivity
    SPE = tn / (tn + fp) if (tn + fp) > 0 else 0  # Specificity
    PPV = tp / (tp + fp) if (tp + fp) > 0 else 0  # Positive Predictive Value
    NPV = tn / (tn + fn) if (tn + fn) > 0 else 0  # Negative Predictive Value
    F1 = f1_score(y_true, y_pred)
    return SEN, SPE, PPV, NPV, F1


In [10]:
# Find the threshold where Sensitivity is closest to 0.98
# Search for the threshold where sensitivity is closest to 0.98
best_diff = 1
best_threshold = None
best_metrics = None

# Use 2000 evenly spaced thresholds between 0 and 1
for thresh in np.linspace(0, 1, 2000):
    SEN, SPE, PPV, NPV, F1 = compute_metrics(y_true, y_prob, thresh)
    diff = abs(SEN - 0.98)
    if diff < best_diff:
        best_diff = diff
        best_threshold = thresh
        best_metrics = (SEN, SPE, PPV, NPV, F1)

# Round results to 4 decimal digits
threshold_1a = round(best_threshold, 4)
SEN, SPE, PPV, NPV, F1 = [round(x, 4) for x in best_metrics]

print(f"Threshold: {threshold_1a}")
print(f"Sensitivity (SEN): {SEN}")
print(f"Specificity (SPE): {SPE}")
print(f"PPV: {PPV}")
print(f"NPV: {NPV}")
print(f"F1 score: {F1}")


Threshold: 0.1051
Sensitivity (SEN): 0.9816
Specificity (SPE): 0.0739
PPV: 0.3875
NPV: 0.8706
F1 score: 0.5556


In [22]:
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix, f1_score

# Load data
df = pd.read_csv('perf.csv')
y_true = df['yval']
y_prob = df['ypred']

# Function to compute metrics at any threshold
def compute_metrics(y_true, y_prob, threshold):
    y_pred = (y_prob >= threshold).astype(int)
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    SEN = tp / (tp + fn) if (tp + fn) > 0 else 0    # Sensitivity
    SPE = tn / (tn + fp) if (tn + fp) > 0 else 0    # Specificity
    PPV = tp / (tp + fp) if (tp + fp) > 0 else 0    # Positive Predictive Value
    NPV = tn / (tn + fn) if (tn + fn) > 0 else 0    # Negative Predictive Value
    F1 = f1_score(y_true, y_pred)
    return SEN, SPE, PPV, NPV, F1

# Loop through 2000 thresholds to find the one where specificity is closest to 0.98
best_diff = 1
best_threshold = None
best_metrics = None

for thresh in np.linspace(0, 1, 2000):  # 2000 evenly spaced thresholds from 0 to 1
    SEN, SPE, PPV, NPV, F1 = compute_metrics(y_true, y_prob, thresh)
    diff = abs(SPE - 0.98)  # Now compare specificity
    if diff < best_diff:
        best_diff = diff
        best_threshold = thresh
        best_metrics = (SEN, SPE, PPV, NPV, F1)

# Round results to 2 decimal digits for reporting
threshold_1b = round(best_threshold, 2)
SEN, SPE, PPV, NPV, F1 = [round(x, 2) for x in best_metrics]

print(f"Threshold: {threshold_1b}")
print(f"Sensitivity (SEN): {SEN}")
print(f"Specificity (SPE): {SPE}")
print(f"PPV: {PPV}")
print(f"NPV: {NPV}")
print(f"F1 score: {F1}")



Threshold: 0.81
Sensitivity (SEN): 0.18
Specificity (SPE): 0.98
PPV: 0.84
NPV: 0.67
F1 score: 0.29


In [23]:
# 1.c
row_c = results_df.sort_values('F1', ascending=False).iloc[0]

final_c = row_c[['thresh', 'SEN', 'SPE', 'PPV', 'NPV', 'F1']].round(3)
print(final_c)


thresh    0.367
SEN       0.622
SPE       0.793
PPV       0.642
NPV       0.779
F1        0.632
Name: 568, dtype: float64


In [24]:
import pandas as pd
import numpy as np

# Load the data
df = pd.read_csv('test_boot.csv')

# The first 500 columns are model probabilities
prob_cols = df.columns[:-1]
label_col = df.columns[-1]

# Calculate mean prediction for each sample across 500 models
p_mean = df[prob_cols].mean(axis=1).values
# True label
y_true = df[label_col].values
# Calculate expected bias error for each sample
bias_error = (p_mean - y_true) ** 2

# Calculate expected variance error for each sample
variance_error = df[prob_cols].var(axis=1).values

# Mean bias and variance error
expected_bias_error = np.round(np.mean(bias_error), 2)
expected_variance_error = np.round(np.mean(variance_error), 2)

print(f"Expected bias error: {expected_bias_error}")
print(f"Expected variance error: {expected_variance_error}")


Expected bias error: 0.22
Expected variance error: 0.01


2D

In [27]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load data
df = pd.read_csv('test_boot.csv')
prob_cols = df.columns[:-1]
label_col = df.columns[-1]
y_true = df[label_col].values

n_bins = 10
bin_edges = np.linspace(0, 1, n_bins + 1)
calib_results = []

# For each model, compute calibration per bin
for col in prob_cols:
    prob = df[col].values
    binids = np.digitize(prob, bin_edges) - 1
    bin_means = []
    frac_positives = []
    for b in range(n_bins):
        idx = binids == b
        if np.any(idx):
            bin_means.append(np.mean(prob[idx]))
            frac_positives.append(np.mean(y_true[idx]))
        else:
            bin_means.append(np.nan)
            frac_positives.append(np.nan)
    calib_results.append((bin_means, frac_positives))

# Convert to arrays
bin_means_arr = np.array([x[0] for x in calib_results])
frac_pos_arr = np.array([x[1] for x in calib_results])

# Take statistics across models
mean_pred_median = np.nanmedian(bin_means_arr, axis=0)
frac_pos_median = np.nanmedian(frac_pos_arr, axis=0)
frac_pos_low = np.nanpercentile(frac_pos_arr, 2.5, axis=0)
frac_pos_high = np.nanpercentile(frac_pos_arr, 97.5, axis=0)

# Plot and save figure
plt.figure(figsize=(6, 6))
plt.plot(mean_pred_median, frac_pos_median, 'o-', label='Model median prediction')
plt.fill_between(mean_pred_median, frac_pos_low, frac_pos_high, alpha=0.3, label='95% CI')
plt.plot([0, 1], [0, 1], 'k--', label='Perfect calibration')
plt.xlabel('Expected probability')
plt.ylabel('Predicted probability')
plt.title('Calibration Curve')
plt.legend()
plt.grid()

# Save as 600 dpi PNG
plt.savefig("calibration_curve.png", dpi=600, bbox_inches='tight')
plt.close()


  mean_pred_median = np.nanmedian(bin_means_arr, axis=0)
  frac_pos_median = np.nanmedian(frac_pos_arr, axis=0)
  return fnb._ureduce(a,


In [29]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load data
df = pd.read_csv('test_boot.csv')
prob_cols = df.columns[:-1]
label_col = df.columns[-1]
y_true = df[label_col].values

n_bins = 10
bin_edges = np.linspace(0, 1, n_bins + 1)
min_samples = 10  # Minimum number of samples in a bin to display

calib_results = []
bin_sample_counts = []

# For each model, compute calibration per bin with sample count
for col in prob_cols:
    prob = df[col].values
    binids = np.digitize(prob, bin_edges) - 1
    bin_means = []
    frac_positives = []
    sample_counts = []
    for b in range(n_bins):
        idx = binids == b
        sample_count = np.sum(idx)
        sample_counts.append(sample_count)
        if sample_count >= min_samples:
            bin_means.append(np.mean(prob[idx]))
            frac_positives.append(np.mean(y_true[idx]))
        else:
            bin_means.append(np.nan)
            frac_positives.append(np.nan)
    calib_results.append((bin_means, frac_positives))
    bin_sample_counts.append(sample_counts)

# Convert to arrays
bin_means_arr = np.array([x[0] for x in calib_results])
frac_pos_arr = np.array([x[1] for x in calib_results])
bin_sample_counts_arr = np.array(bin_sample_counts)

# Only keep bins where at least half the models have enough samples
enough_samples = (np.sum(~np.isnan(frac_pos_arr), axis=0) > (len(prob_cols) // 2))

# Calculate statistics for valid bins only
mean_pred_median = np.nanmedian(bin_means_arr[:, enough_samples], axis=0)
frac_pos_median = np.nanmedian(frac_pos_arr[:, enough_samples], axis=0)
frac_pos_low = np.nanpercentile(frac_pos_arr[:, enough_samples], 2.5, axis=0)
frac_pos_high = np.nanpercentile(frac_pos_arr[:, enough_samples], 97.5, axis=0)

# Bin centers for x-axis
bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
bin_centers_valid = bin_centers[enough_samples]

# Plot calibration curve with CI, only for valid bins
plt.figure(figsize=(6, 6))
plt.plot(bin_centers_valid, frac_pos_median, 'o-', label='Model median prediction')
plt.fill_between(bin_centers_valid, frac_pos_low, frac_pos_high, alpha=0.3, label='95% CI')
plt.plot([0, 1], [0, 1], 'k--', label='Perfect calibration')
plt.xlabel('Expected probability')
plt.ylabel('Fraction of positives')
plt.title('Calibration Curve (filtered, min 10 samples per bin)')
plt.legend()
plt.grid()
plt.savefig("calibration_curve_filtered.png", dpi=600, bbox_inches='tight')
plt.close()
