## Comparitive Analysis of Classification Metrics
1. Comparative Analysis between binary and multi-class datasets with:
    - Accuracy
    - Precision and Recall
    - F1 Score
    - MCC
To compare MCC with other metrics score on both binary and multi-class datasets, the method calculate_classification_metrics() sumarises the values of all metrics (i.e. accuracy, precision, recall, F1, and MCC. 

In [None]:
def calculate_classification_metrics(y_true, y_pred):
    # Calculate each metric
    accuracy = sm.accuracy_score(y_true, y_pred)
    recall = sm.recall_score(y_true, y_pred, average='binary')
    precision = sm.precision_score(y_true, y_pred, average='binary')
    f1 = sm.f1_score(y_true, y_pred, average='binary')
    mcc = sm.matthews_corrcoef(y_true, y_pred)
    
    # Print the results
    print(f"Accuracy: {accuracy:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"Precision: {precision:.2f}")
    print(f"F1 Score: {f1:.2f}")
    print(f"Matthews Correlation Coefficient (MCC): {mcc:.2f}")
    
    return accuracy, recall, precision, f1, mcc

In [None]:
n_samples = 10000
n_ground_truths = 5000

def generate_data(p):
    rng = np.random.default_rng(42)
    # generate predictions, assumed to be perfectly calibrated
    y_pred = rng.random(n_samples)**p # p adjusts class balance
    # generate possible ground truths
    y_true = (rng.random((n_samples, n_ground_truths)) 
              < y_pred[:, np.newaxis]).astype(int)
    for j in range(n_ground_truths):
        assert len(np.unique(y_true[:,j]))==2
    return y_pred, y_true

def neg_mcc(y_pred, y_true, t):
    mcc = []
    for j in range(n_ground_truths):
        mcc.append(np.corrcoef(y_true[:,j], (y_pred>t).astype(np.int64))[0,1])
    return -np.mean(mcc)

def neg_acc(y_pred, y_true, t):
    return -np.mean(y_true==(y_pred>t).astype(np.int64)[:, np.newaxis])

p_list = [0.25, 0.5, 1.0, 1.5, 2.0]
positive_weights = []
thresholds_mcc = []
thresholds_acc = []
for p in tqdm(p_list):
    y_pred, y_true = generate_data(p)
    positive_weights.append(y_pred.mean())
    res = minimize(lambda t: neg_mcc(y_pred, y_true, t), 
                   [0.5], method='Nelder-Mead', bounds=[(0,1)])
    thresholds_mcc.append(res.x[0])
    res = minimize(lambda t: neg_acc(y_pred, y_true, t), 
                   [0.5], method='Nelder-Mead', bounds=[(0,1)])
    thresholds_acc.append(res.x[0])
plt.plot(positive_weights, thresholds_mcc, '-o', label='MCC')
plt.plot(positive_weights, thresholds_acc, '-o', label='Accuracy')
plt.xlabel('positive weight')
plt.ylabel('optimal threshold')
plt.legend()
plt.show()