In [None]:
import numpy as np
from warnings import warn

from aif360.algorithms import Transformer
from aif360.metrics import utils
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric

In [None]:
class RejectOptionClassification(Transformer):

    """Reject option classification is a postprocessing technique that gives
    favorable outcomes to unpriviliged groups and unfavorable outcomes to
    priviliged groups in a confidence band around the decision boundary with the
    highest uncertainty [10]_.

    References:
        .. [10] F. Kamiran, A. Karim, and X. Zhang, "Decision Theory for
           Discrimination-Aware Classification," IEEE International Conference
           on Data Mining, 2012.
    """

    def __init__(self, unprivileged_groups, privileged_groups,
                low_class_thresh=0.01, high_class_thresh=0.99,
                num_class_thresh=100, num_ROC_margin=50,
                metric_name="Statistical parity difference",
                metric_ub=0.05, metric_lb=-0.05):
        """
        Args:
            unprivileged_groups (dict or list(dict)): Representation for
                unprivileged group.
            privileged_groups (dict or list(dict)): Representation for
                privileged group.
            low_class_thresh (float): Smallest classification threshold to use
                in the optimization. Should be between 0. and 1.
            high_class_thresh (float): Highest classification threshold to use
                in the optimization. Should be between 0. and 1.
            num_class_thresh (int): Number of classification thresholds between
                low_class_thresh and high_class_thresh for the optimization
                search. Should be > 0.
            num_ROC_margin (int): Number of relevant ROC margins to be used in
                the optimization search. Should be > 0.
            metric_name (str): Name of the metric to use for the optimization.
                Allowed options are "Statistical parity difference",
                "Average odds difference", "Equal opportunity difference".
            metric_ub (float): Upper bound of constraint on the metric value
            metric_lb (float): Lower bound of constraint on the metric value
        """
        super(RejectOptionClassification, self).__init__(
            unprivileged_groups=unprivileged_groups,
            privileged_groups=privileged_groups,
            low_class_thresh=low_class_thresh, high_class_thresh=high_class_thresh,
            num_class_thresh=num_class_thresh, num_ROC_margin=num_ROC_margin,
            metric_name=metric_name)

        allowed_metrics = ["Statistical parity difference",
                           "Average odds difference",
                           "Equal opportunity difference"]

        self.unprivileged_groups = unprivileged_groups
        self.privileged_groups = privileged_groups

        self.low_class_thresh = low_class_thresh
        self.high_class_thresh = high_class_thresh
        self.num_class_thresh = num_class_thresh
        self.num_ROC_margin = num_ROC_margin
        self.metric_name = metric_name
        self.metric_ub = metric_ub
        self.metric_lb = metric_lb

        self.classification_threshold = None
        self.ROC_margin = None

        if ((self.low_class_thresh < 0.0) or (self.low_class_thresh > 1.0) or\
            (self.high_class_thresh < 0.0) or (self.high_class_thresh > 1.0) or\
            (self.low_class_thresh >= self.high_class_thresh) or\
            (self.num_class_thresh < 1) or (self.num_ROC_margin < 1)):

            raise ValueError("Input parameter values out of bounds")

        if metric_name not in allowed_metrics:
            raise ValueError("metric name not in the list of allowed metrics")

    def fit(self, dataset_true, dataset_pred):
        """Estimates the optimal classification threshold and margin for reject
        option classification that optimizes the metric provided.

        Note:
            The `fit` function is a no-op for this algorithm.

        Args:
            dataset_true (BinaryLabelDataset): Dataset containing the true
                `labels`.
            dataset_pred (BinaryLabelDataset): Dataset containing the predicted
                `scores`.

        Returns:
            RejectOptionClassification: Returns self.
        """

        fair_metric_arr = np.zeros(self.num_class_thresh*self.num_ROC_margin)
        balanced_acc_arr = np.zeros_like(fair_metric_arr)
        ROC_margin_arr = np.zeros_like(fair_metric_arr)
        class_thresh_arr = np.zeros_like(fair_metric_arr)

        cnt = 0
        # Iterate through class thresholds
        for class_thresh in np.linspace(self.low_class_thresh,
                                        self.high_class_thresh,
                                        self.num_class_thresh):

            self.classification_threshold = class_thresh
            if class_thresh <= 0.5:
                low_ROC_margin = 0.0
                high_ROC_margin = class_thresh
            else:
                low_ROC_margin = 0.0
                high_ROC_margin = (1.0-class_thresh)

            # Iterate through ROC margins
            for ROC_margin in np.linspace(
                                low_ROC_margin,
                                high_ROC_margin,
                                self.num_ROC_margin):
                self.ROC_margin = ROC_margin

                # Predict using the current threshold and margin
                dataset_transf_pred = self.predict(dataset_pred)

                dataset_transf_metric_pred = BinaryLabelDatasetMetric(
                                             dataset_transf_pred,
                                             unprivileged_groups=self.unprivileged_groups,
                                             privileged_groups=self.privileged_groups)
                classified_transf_metric = ClassificationMetric(
                                             dataset_true,
                                             dataset_transf_pred,
                                             unprivileged_groups=self.unprivileged_groups,
                                             privileged_groups=self.privileged_groups)

                ROC_margin_arr[cnt] = self.ROC_margin
                class_thresh_arr[cnt] = self.classification_threshold

                # Balanced accuracy and fairness metric computations
                balanced_acc_arr[cnt] = 0.5*(classified_transf_metric.true_positive_rate()\
                                       +classified_transf_metric.true_negative_rate())
                if self.metric_name == "Statistical parity difference":
                    fair_metric_arr[cnt] = dataset_transf_metric_pred.mean_difference()
                elif self.metric_name == "Average odds difference":
                    fair_metric_arr[cnt] = classified_transf_metric.average_odds_difference()
                elif self.metric_name == "Equal opportunity difference":
                    fair_metric_arr[cnt] = classified_transf_metric.equal_opportunity_difference()

                cnt += 1

        rel_inds = np.logical_and(fair_metric_arr >= self.metric_lb,
                                  fair_metric_arr <= self.metric_ub)
        if any(rel_inds):
            best_ind = np.where(balanced_acc_arr[rel_inds]
                                == np.max(balanced_acc_arr[rel_inds]))[0][0]
        else:
            warn("Unable to satisy fairness constraints")
            rel_inds = np.ones(len(fair_metric_arr), dtype=bool)
            best_ind = np.where(fair_metric_arr[rel_inds]
                                == np.min(fair_metric_arr[rel_inds]))[0][0]

        self.ROC_margin = ROC_margin_arr[rel_inds][best_ind]
        self.classification_threshold = class_thresh_arr[rel_inds][best_ind]

        return self

    def predict(self, dataset):
        """Obtain fair predictions using the ROC method.

        Args:
            dataset (BinaryLabelDataset): Dataset containing scores that will
                be used to compute predicted labels.

        Returns:
            dataset_pred (BinaryLabelDataset): Output dataset with potentially
            fair predictions obtain using the ROC method.
        """
        dataset_new = dataset.copy(deepcopy=False)

        fav_pred_inds = (dataset.scores > self.classification_threshold)
        unfav_pred_inds = ~fav_pred_inds

        y_pred = np.zeros(dataset.scores.shape)
        y_pred[fav_pred_inds] = dataset.favorable_label
        y_pred[unfav_pred_inds] = dataset.unfavorable_label

        # Indices of critical region around the classification boundary
        crit_region_inds = np.logical_and(
                dataset.scores <= self.classification_threshold+self.ROC_margin,
                dataset.scores > self.classification_threshold-self.ROC_margin)

        # Indices of privileged and unprivileged groups
        cond_priv = utils.compute_boolean_conditioning_vector(
                        dataset.protected_attributes,
                        dataset.protected_attribute_names,
                        self.privileged_groups)
        cond_unpriv = utils.compute_boolean_conditioning_vector(
                        dataset.protected_attributes,
                        dataset.protected_attribute_names,
                        self.unprivileged_groups)

        # New, fairer labels
        dataset_new.labels = y_pred
        dataset_new.labels[np.logical_and(crit_region_inds,
                            cond_priv.reshape(-1,1))] = dataset.unfavorable_label
        dataset_new.labels[np.logical_and(crit_region_inds,
                            cond_unpriv.reshape(-1,1))] = dataset.favorable_label

        return dataset_new

    def fit_predict(self, dataset_true, dataset_pred):
        """fit and predict methods sequentially."""
        return self.fit(dataset_true, dataset_pred).predict(dataset_pred)



In [None]:
# Function to obtain the pareto frontier
def _get_pareto_frontier(scores, return_mask = True):  # <- Fastest for many points
    """
    :param scores: An (n_points, n_scores) array
    :param return_mask: True to return a mask, False to return integer indices of efficient points.
    :return: An array of indices of pareto-efficient points.
        If return_mask is True, this will be an (n_points, ) boolean array
        Otherwise it will be a (n_efficient_points, ) integer array of indices.

    adapted from: https://stackoverflow.com/questions/32791911/fast-calculation-of-pareto-front-in-python
    """
    is_efficient = np.arange(scores.shape[0])
    n_points = scores.shape[0]
    next_point_index = 0  # Next index in the is_efficient array to search for

    while next_point_index<len(scores):
        nondominated_point_mask = np.any(scores>=scores[next_point_index], axis=1)
        is_efficient = is_efficient[nondominated_point_mask]  # Remove dominated points
        scores = scores[nondominated_point_mask]
        next_point_index = np.sum(nondominated_point_mask[:next_point_index])+1

    if return_mask:
        is_efficient_mask = np.zeros(n_points, dtype = bool)
        is_efficient_mask[is_efficient] = True
        return is_efficient_mask
    else:
        return is_efficient

In [None]:
import pandas as pd
from aif360.datasets import BinaryLabelDataset
from sklearn.model_selection import train_test_split

In [None]:
test_predictions_df = pd.read_csv('hpc_space/Results/NEW RESULTS/NEW RESULTS/Doc2Vec SVM/Orig/SVMDoc2Vec5_augmentedtest_predictions.csv', sep=';')
validation_predictions_df = pd.read_csv('hpc_space/Results/NEW RESULTS/NEW RESULTS/Doc2Vec SVM/Orig/SVMDoc2Vec_5_val_predictions.csv', sep=';')

In [None]:
print(validation_predictions_df)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score

# Calculate accuracy for the positive class (label = 1)
accuracy_positive = accuracy_score(validation_predictions_df['label'], validation_predictions_df['final_prediction'])

# Calculate precision for the positive class (label = 1)
precision_positive = precision_score(validation_predictions_df['label'], validation_predictions_df['final_prediction'], pos_label=1)

# Calculate recall for the positive class (label = 1)
recall_positive = recall_score(validation_predictions_df['label'], validation_predictions_df['final_prediction'], pos_label=1)

# Calculate F1 score for the positive class (label = 1)
f1_positive = f1_score(validation_predictions_df['label'], validation_predictions_df['final_prediction'], pos_label=1)

# Calculate precision for the negative class (label = 0)
precision_negative = precision_score(validation_predictions_df['label'], validation_predictions_df['final_prediction'], pos_label=0)

# Calculate recall for the negative class (label = 0)
recall_negative = recall_score(validation_predictions_df['label'], validation_predictions_df['final_prediction'], pos_label=0)

# Calculate F1 score for the negative class (label = 0)
f1_negative = f1_score(validation_predictions_df['label'], validation_predictions_df['final_prediction'], pos_label=0)

# Calculate the AUC
auc = roc_auc_score(validation_predictions_df['label'], validation_predictions_df['final_prediction'])

# Calculate the AUC for males (Geslacht = 1)
auc_male = roc_auc_score(validation_predictions_df[validation_predictions_df['Geslacht'] == 1]['label'], validation_predictions_df[validation_predictions_df['Geslacht'] == 1]['final_prediction'])

# Calculate the AUC for females (Geslacht = 0)
auc_female = roc_auc_score(validation_predictions_df[validation_predictions_df['Geslacht'] == 0]['label'], validation_predictions_df[validation_predictions_df['Geslacht'] == 0]['final_prediction'])


# Print the calculated metrics separately for both classes
print(f"Accuracy (Overall): {accuracy_positive:.4f}")
print(f"Precision (Positive): {precision_positive:.4f}")
print(f"Recall (Positive): {recall_positive:.4f}")
print(f"F1 Score (Positive): {f1_positive:.4f}")
print(f"Precision (Negative): {precision_negative:.4f}")
print(f"Recall (Negative): {recall_negative:.4f}")
print(f"F1 Score (Negative): {f1_negative:.4f}")
print(f"AUC: {auc:.4f}")
print(f"AUC (Male): {auc_male:.4f}")
print(f"AUC (Female): {auc_female:.4f}")

In [None]:
from sklearn.metrics import confusion_matrix

# Calculate the confusion matrix for the entire dataset
cm = confusion_matrix(validation_predictions_df['label'], validation_predictions_df['final_prediction'])

# Calculate TPR and FPR for male (Geslacht = 1)
male_indices = validation_predictions_df['Geslacht'] == 1
cm_male = confusion_matrix(validation_predictions_df[male_indices]['label'], validation_predictions_df[male_indices]['final_prediction'])

tpr_male = cm_male[1, 1] / (cm_male[1, 0] + cm_male[1, 1])
fpr_male = cm_male[0, 1] / (cm_male[0, 0] + cm_male[0, 1])

# Calculate TPR and FPR for female (Geslacht = 0)
female_indices = validation_predictions_df['Geslacht'] == 0
cm_female = confusion_matrix(validation_predictions_df[female_indices]['label'], validation_predictions_df[female_indices]['final_prediction'])

tpr_female = cm_female[1, 1] / (cm_female[1, 0] + cm_female[1, 1])
fpr_female = cm_female[0, 1] / (cm_female[0, 0] + cm_female[0, 1])

# Print the calculated metrics separately for both classes
print(f"TPR (Male): {tpr_male:.4f}")
print(f"TPR (Female): {tpr_female:.4f}")
print(f"FPR (Male): {fpr_male:.4f}")
print(f"FPR (Female): {fpr_female:.4f}")

In [None]:
print(test_predictions_df)

In [None]:
print(validation_predictions_df)

In [None]:
ground_truth_df = pd.DataFrame({
    'label': validation_predictions_df['label'],
    'Geslacht': validation_predictions_df['Geslacht']
})

In [None]:
predictions_df = pd.DataFrame({
    'label': validation_predictions_df['final_prediction'],
    'predicted_probability': validation_predictions_df['predicted_probabilities'],
    'Geslacht': validation_predictions_df['Geslacht']
})

In [None]:
print(predictions_df)

In [None]:
privileged_protected_attributes = [0]  # 1 is male
unprivileged_protected_attributes = [1]  # 0 is female

In [None]:
favorable_label = 1
unfavorable_label = 0

In [None]:
protected_attribute_names = ['Geslacht']

In [None]:
dataset_true_validation = BinaryLabelDataset(
    favorable_label=favorable_label,
    unfavorable_label=unfavorable_label,
    df=ground_truth_df,
    label_names=['label'],
    protected_attribute_names=['Geslacht'],
    unprivileged_protected_attributes=[[0]],
    privileged_protected_attributes=[[1]],
)

In [None]:
print(dataset_true_validation)

In [None]:
dataset_pred_validation = BinaryLabelDataset(
    favorable_label=favorable_label,
    unfavorable_label=unfavorable_label,
    df=predictions_df,
    label_names=['label'],
    scores_names=['predicted_probability'],
    protected_attribute_names=['Geslacht'],
    unprivileged_protected_attributes=[[0]],
    privileged_protected_attributes=[[1]]
)

In [None]:
print(dataset_pred_validation)

In [None]:
roc = RejectOptionClassification(
    unprivileged_groups=[{'Geslacht': 0}],  # Replace with your unprivileged group representation
    privileged_groups=[{'Geslacht': 1}],    # Replace with your privileged group representation
    low_class_thresh=0.01,
    high_class_thresh=0.99,
    num_class_thresh=100,
    num_ROC_margin=50,
    metric_name="Average odds difference",
    metric_ub=0.05,
    metric_lb=-0.05
)

In [None]:
roc.fit_predict(dataset_true_validation, dataset_pred_validation)

In [None]:
fairness_metric = ClassificationMetric(dataset_true_validation, roc.predict(dataset_pred_validation),
                                       unprivileged_groups=[{'Geslacht': 0}],
                                       privileged_groups=[{'Geslacht': 1}])
print("Average Odds Difference:", fairness_metric.average_odds_difference())

In [None]:
new_predictions = roc.predict(dataset_pred_validation)

In [None]:
# Flatten multi-dimensional arrays
labels_flat = new_predictions.labels.flatten()
scores_flat = new_predictions.scores.flatten()

In [None]:
# Extract true labels from the original dataset
true_labels = dataset_true_validation.labels.flatten()

In [None]:
new_predictions_df = pd.DataFrame({
    'predicted_label': labels_flat, 
    'predicted_probabilities': scores_flat,
    'label': true_labels,
    'Geslacht': validation_predictions_df['Geslacht'],
})

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score

# Calculate accuracy for the positive class (label = 1)
accuracy_positive = accuracy_score(new_predictions_df['label'], new_predictions_df['predicted_label'])

# Calculate precision for the positive class (label = 1)
precision_positive = precision_score(new_predictions_df['label'], new_predictions_df['predicted_label'], pos_label=1)

# Calculate recall for the positive class (label = 1)
recall_positive = recall_score(new_predictions_df['label'], new_predictions_df['predicted_label'], pos_label=1)

# Calculate F1 score for the positive class (label = 1)
f1_positive = f1_score(new_predictions_df['label'], new_predictions_df['predicted_label'], pos_label=1)

# Calculate precision for the negative class (label = 0)
precision_negative = precision_score(new_predictions_df['label'], new_predictions_df['predicted_label'], pos_label=0)

# Calculate recall for the negative class (label = 0)
recall_negative = recall_score(new_predictions_df['label'], new_predictions_df['predicted_label'], pos_label=0)

# Calculate F1 score for the negative class (label = 0)
f1_negative = f1_score(new_predictions_df['label'], new_predictions_df['predicted_label'], pos_label=0)

# Calculate the AUC
auc = roc_auc_score(new_predictions_df['label'], new_predictions_df['predicted_label'])

# Calculate the AUC for males (Geslacht = 1)
auc_male = roc_auc_score(new_predictions_df[new_predictions_df['Geslacht'] == 1]['label'], new_predictions_df[new_predictions_df['Geslacht'] == 1]['predicted_label'])

# Calculate the AUC for females (Geslacht = 0)
auc_female = roc_auc_score(new_predictions_df[new_predictions_df['Geslacht'] == 0]['label'], new_predictions_df[new_predictions_df['Geslacht'] == 0]['predicted_label'])


# Print the calculated metrics separately for both classes
print(f"Accuracy (Overall): {accuracy_positive:.4f}")
print(f"Precision (Positive): {precision_positive:.4f}")
print(f"Recall (Positive): {recall_positive:.4f}")
print(f"F1 Score (Positive): {f1_positive:.4f}")
print(f"Precision (Negative): {precision_negative:.4f}")
print(f"Recall (Negative): {recall_negative:.4f}")
print(f"F1 Score (Negative): {f1_negative:.4f}")
print(f"AUC: {auc:.4f}")
print(f"AUC (Male): {auc_male:.4f}")
print(f"AUC (Female): {auc_female:.4f}")

In [None]:
from sklearn.metrics import confusion_matrix

# Calculate the confusion matrix for the entire dataset
cm = confusion_matrix(new_predictions_df['label'], new_predictions_df['predicted_label'])

# Calculate TPR and FPR for male (Geslacht = 1)
male_indices = new_predictions_df['Geslacht'] == 1
cm_male = confusion_matrix(new_predictions_df[male_indices]['label'], new_predictions_df[male_indices]['predicted_label'])

tpr_male = cm_male[1, 1] / (cm_male[1, 0] + cm_male[1, 1])
fpr_male = cm_male[0, 1] / (cm_male[0, 0] + cm_male[0, 1])

# Calculate TPR and FPR for female (Geslacht = 0)
female_indices = new_predictions_df['Geslacht'] == 0
cm_female = confusion_matrix(new_predictions_df[female_indices]['label'], new_predictions_df[female_indices]['predicted_label'])

tpr_female = cm_female[1, 1] / (cm_female[1, 0] + cm_female[1, 1])
fpr_female = cm_female[0, 1] / (cm_female[0, 0] + cm_female[0, 1])

# Print the calculated metrics separately for both classes
print(f"TPR (Male): {tpr_male:.4f}")
print(f"TPR (Female): {tpr_female:.4f}")
print(f"FPR (Male): {fpr_male:.4f}")
print(f"FPR (Female): {fpr_female:.4f}")

In [None]:
predictions_test = pd.DataFrame({
    'label': test_predictions_df['final_prediction'],
    'predicted_probability': test_predictions_df['predicted_probabilities'],
    'Geslacht': test_predictions_df['Geslacht']
})

In [None]:
dataset_pred_test = BinaryLabelDataset(
    favorable_label=favorable_label,
    unfavorable_label=unfavorable_label,
    df=predictions_test,
    label_names=['label'],
    protected_attribute_names=['Geslacht'],
    unprivileged_protected_attributes=[[0]],
    privileged_protected_attributes=[[1]],
    scores_names=['predicted_probability'],
)

In [None]:
new_predictions_test = roc.predict(dataset_pred_test)

In [None]:
# Flatten multi-dimensional arrays
labels_flat_test = new_predictions_test.labels.flatten()
scores_flat_test = new_predictions_test.scores.flatten()

In [None]:
new_test_predictions_df = pd.DataFrame({
    'predicted_label': labels_flat_test, 
    'predicted_probabilities': scores_flat_test,
    'label': test_predictions_df['label'],
    'Geslacht': test_predictions_df['Geslacht'],
})

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score

# Calculate accuracy for the positive class (label = 1)
accuracy_positive = accuracy_score(new_test_predictions_df['label'], new_test_predictions_df['predicted_label'])

# Calculate precision for the positive class (label = 1)
precision_positive = precision_score(new_test_predictions_df['label'], new_test_predictions_df['predicted_label'], pos_label=1)

# Calculate recall for the positive class (label = 1)
recall_positive = recall_score(new_test_predictions_df['label'], new_test_predictions_df['predicted_label'], pos_label=1)

# Calculate F1 score for the positive class (label = 1)
f1_positive = f1_score(new_test_predictions_df['label'], new_test_predictions_df['predicted_label'], pos_label=1)

# Calculate precision for the negative class (label = 0)
precision_negative = precision_score(new_test_predictions_df['label'], new_test_predictions_df['predicted_label'], pos_label=0)

# Calculate recall for the negative class (label = 0)
recall_negative = recall_score(new_test_predictions_df['label'], new_test_predictions_df['predicted_label'], pos_label=0)

# Calculate F1 score for the negative class (label = 0)
f1_negative = f1_score(new_test_predictions_df['label'], new_test_predictions_df['predicted_label'], pos_label=0)

# Calculate the AUC
auc = roc_auc_score(new_test_predictions_df['label'], new_test_predictions_df['predicted_label'])

# Calculate the AUC for males (Geslacht = 1)
auc_male = roc_auc_score(new_test_predictions_df[new_test_predictions_df['Geslacht'] == 1]['label'], new_test_predictions_df[new_test_predictions_df['Geslacht'] == 1]['predicted_label'])

# Calculate the AUC for females (Geslacht = 0)
auc_female = roc_auc_score(new_test_predictions_df[new_test_predictions_df['Geslacht'] == 0]['label'], new_test_predictions_df[new_test_predictions_df['Geslacht'] == 0]['predicted_label'])


# Print the calculated metrics separately for both classes
print(f"Accuracy (Overall): {accuracy_positive:.4f}")
print(f"Precision (Positive): {precision_positive:.4f}")
print(f"Recall (Positive): {recall_positive:.4f}")
print(f"F1 Score (Positive): {f1_positive:.4f}")
print(f"Precision (Negative): {precision_negative:.4f}")
print(f"Recall (Negative): {recall_negative:.4f}")
print(f"F1 Score (Negative): {f1_negative:.4f}")
print(f"AUC: {auc:.4f}")
print(f"AUC (Male): {auc_male:.4f}")
print(f"AUC (Female): {auc_female:.4f}")

In [None]:
from sklearn.metrics import confusion_matrix

# Calculate the confusion matrix for the entire dataset
cm = confusion_matrix(new_test_predictions_df['label'], new_test_predictions_df['predicted_label'])

# Calculate TPR and FPR for male (Geslacht = 1)
male_indices = new_test_predictions_df['Geslacht'] == 1
cm_male = confusion_matrix(new_test_predictions_df[male_indices]['label'], new_test_predictions_df[male_indices]['predicted_label'])

tpr_male = cm_male[1, 1] / (cm_male[1, 0] + cm_male[1, 1])
fpr_male = cm_male[0, 1] / (cm_male[0, 0] + cm_male[0, 1])

# Calculate TPR and FPR for female (Geslacht = 0)
female_indices = new_test_predictions_df['Geslacht'] == 0
cm_female = confusion_matrix(new_test_predictions_df[female_indices]['label'], new_test_predictions_df[female_indices]['predicted_label'])

tpr_female = cm_female[1, 1] / (cm_female[1, 0] + cm_female[1, 1])
fpr_female = cm_female[0, 1] / (cm_female[0, 0] + cm_female[0, 1])

# Print the calculated metrics separately for both classes
print(f"TPR (Male): {tpr_male:.4f}")
print(f"TPR (Female): {tpr_female:.4f}")
print(f"FPR (Male): {fpr_male:.4f}")
print(f"FPR (Female): {fpr_female:.4f}")

In [None]:
new_test_predictions_df['combined'] = new_test_predictions_df['Geslacht'].astype(str) + '_' + new_test_predictions_df['label'].astype(str) + '_' + new_test_predictions_df['predicted_label'].astype(str)

# Get the count of combinations
combination_counts = new_test_predictions_df['combined'].value_counts()

# Print the counts
print("Combined Counts:")
print(combination_counts)

In [None]:
# Split the DataFrame into first_half and second_half
first_half = new_test_predictions_df.iloc[:len(new_test_predictions_df) // 2]
second_half = new_test_predictions_df.iloc[len(new_test_predictions_df) // 2:]

first_half.reset_index(drop=True, inplace=True)
second_half.reset_index(drop=True, inplace=True)

# Compare the 'predicted_label' values in the two halves and calculate value counts
mismatch_counts = (first_half['predicted_label'] != second_half['predicted_label']).value_counts()

# Create a DataFrame to display the mismatch counts
mismatches = pd.DataFrame({'Mismatches': mismatch_counts})
print(mismatches)

In [None]:
new_test_predictions_df.to_csv('hpc_space/Results/NEW RESULTS/NEW RESULTS/Doc2Vec SVM/Orig/' + 'ROC_SVM5_predictions.csv')

In [None]:
final_df = first_half

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score

# Calculate accuracy for the positive class (label = 1)
accuracy_positive = accuracy_score(first_half['label'], first_half['predicted_label'])

# Calculate precision for the positive class (label = 1)
precision_positive = precision_score(first_half['label'], first_half['predicted_label'], pos_label=1)

# Calculate recall for the positive class (label = 1)
recall_positive = recall_score(first_half['label'], first_half['predicted_label'], pos_label=1)

# Calculate F1 score for the positive class (label = 1)
f1_positive = f1_score(first_half['label'], first_half['predicted_label'], pos_label=1)

# Calculate precision for the negative class (label = 0)
precision_negative = precision_score(first_half['label'], first_half['predicted_label'], pos_label=0)

# Calculate recall for the negative class (label = 0)
recall_negative = recall_score(first_half['label'], first_half['predicted_label'], pos_label=0)

# Calculate F1 score for the negative class (label = 0)
f1_negative = f1_score(first_half['label'], first_half['predicted_label'], pos_label=0)

# Calculate the AUC
auc = roc_auc_score(first_half['label'], first_half['predicted_label'])

# Calculate the AUC for males (Geslacht = 1)
auc_male = roc_auc_score(first_half[first_half['Geslacht'] == 1]['label'], first_half[first_half['Geslacht'] == 1]['predicted_label'])

# Calculate the AUC for females (Geslacht = 0)
auc_female = roc_auc_score(first_half[first_half['Geslacht'] == 0]['label'], first_half[first_half['Geslacht'] == 0]['predicted_label'])


# Print the calculated metrics separately for both classes
print(f"Accuracy (Overall): {accuracy_positive:.4f}")
print(f"Precision (Positive): {precision_positive:.4f}")
print(f"Recall (Positive): {recall_positive:.4f}")
print(f"F1 Score (Positive): {f1_positive:.4f}")
print(f"Precision (Negative): {precision_negative:.4f}")
print(f"Recall (Negative): {recall_negative:.4f}")
print(f"F1 Score (Negative): {f1_negative:.4f}")
print(f"AUC: {auc:.4f}")
print(f"AUC (Male): {auc_male:.4f}")
print(f"AUC (Female): {auc_female:.4f}")

In [None]:
from sklearn.metrics import confusion_matrix

# Calculate the confusion matrix for the entire dataset
cm = confusion_matrix(first_half['label'], first_half['predicted_label'])

# Calculate TPR and FPR for male (Geslacht = 1)
male_indices = first_half['Geslacht'] == 1
cm_male = confusion_matrix(first_half[male_indices]['label'], first_half[male_indices]['predicted_label'])

tpr_male = cm_male[1, 1] / (cm_male[1, 0] + cm_male[1, 1])
fpr_male = cm_male[0, 1] / (cm_male[0, 0] + cm_male[0, 1])

# Calculate TPR and FPR for female (Geslacht = 0)
female_indices = first_half['Geslacht'] == 0
cm_female = confusion_matrix(first_half[female_indices]['label'], first_half[female_indices]['predicted_label'])

tpr_female = cm_female[1, 1] / (cm_female[1, 0] + cm_female[1, 1])
fpr_female = cm_female[0, 1] / (cm_female[0, 0] + cm_female[0, 1])

# Print the calculated metrics separately for both classes
print(f"TPR (Male): {tpr_male:.4f}")
print(f"TPR (Female): {tpr_female:.4f}")
print(f"FPR (Male): {fpr_male:.4f}")
print(f"FPR (Female): {fpr_female:.4f}")

In [None]:
first_half['combined'] = first_half['Geslacht'].astype(str) + '_' + first_half['label'].astype(str) + '_' + first_half['predicted_label'].astype(str)

# Get the count of combinations
combination_counts = first_half['combined'].value_counts()

# Print the counts
print("Combined Counts:")
print(combination_counts)