In [None]:
! pip install lightning

Collecting lightning
  Downloading lightning-2.5.0.post0-py3-none-any.whl.metadata (40 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/40.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.4/40.4 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
Collecting lightning-utilities<2.0,>=0.10.0 (from lightning)
  Downloading lightning_utilities-0.12.0-py3-none-any.whl.metadata (5.6 kB)
Collecting torchmetrics<3.0,>=0.7.0 (from lightning)
  Downloading torchmetrics-1.6.1-py3-none-any.whl.metadata (21 kB)
Collecting pytorch-lightning (from lightning)
  Downloading pytorch_lightning-2.5.0.post0-py3-none-any.whl.metadata (21 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<4.0,>=2.1.0->lightning)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<4.0,>=2.1.0->lightning)
  Downloading nvidia_cuda_runtime_

In [None]:
# @title Imports

import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import pytorch_lightning as pl

import numpy as np
import pandas as pd

from imblearn.over_sampling import SMOTE
from sklearn.model_selection import GridSearchCV, KFold, train_test_split
from sklearn.metrics import recall_score, make_scorer, confusion_matrix, accuracy_score, confusion_matrix
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression


In [None]:
# @title DriveManager

# --------------------------------------------------
# DriveManager
# --------------------------------------------------
class DriveManager:
    """
    Class Name: DriveManager

    Purpose:
        - Handle mounting Google Drive in a Colab environment.
        - Provide convenient path handling if needed.

    Responsibilities:
        - Mount/unmount Google Drive.
        - Potentially provide standard path resolutions for loading/saving.

    Example Usage:
        drive_mgr = DriveManager()
        drive_mgr.mount_drive()
    """

    def __init__(self, mount_path="/content/drive/"):
        """
        Constructor for DriveManager.

        Args:
            mount_path (str): The path at which to mount Google Drive.

        Attributes:
            mount_path (str): Where Google Drive is mounted in Colab.
        """
        self.mount_path = mount_path

    def mount_drive(self, force_remount=True):
        """
        Mounts Google Drive using the provided mount path.

        Args:
            force_remount (bool): Whether to force-remount if already mounted.
        """
        from google.colab import drive
        drive.mount(self.mount_path, force_remount=force_remount)

In [None]:
# @title CSVDataset

# --------------------------------------------------
# CSVDataset
# --------------------------------------------------

class CSVDataset(Dataset):
    """
    Class Name: CSVDataset

    Purpose:
        - Custom PyTorch Dataset to load features and labels from a Pandas DataFrame.
        - Optionally apply SMOTE oversampling on the data to handle imbalance.

    Responsibilities:
        - Load data from a DataFrame, separate features and labels.
        - Normalize continuous features.
        - Optionally perform SMOTE oversampling to balance classes.

    Example Usage:
        dataset = CSVDataset(train_df, smote=True)
        data_loader = DataLoader(dataset, batch_size=32, shuffle=True)
    """

    def __init__(self, df, smote=True):
        """
        Constructor for CSVDataset.

        Args:
            df (pd.DataFrame): DataFrame containing the data.
            smote (bool): If True, applies SMOTE oversampling.
        """
        self.df = df
        x = df[['submission_word_count', 'num_image', 'num_gif', 'num_video', 'video_duration',
                'num_things', 'cad', 'schematic', 'code', 'code_lines', 'link']]

        # Normalize numeric features
        self.x = self.normalize_data(x).values
        self.y = df['winner'].values

        if smote:
            self.smote()

    def smote(self):
        """
        Applies SMOTE oversampling to the dataset to handle class imbalance.
        """
        sm = SMOTE(random_state=24)
        self.x, self.y = sm.fit_resample(self.x, self.y)

    def normalize_data(self, df_):
        """
        Normalizes numeric columns to [0, 1] range, ignoring binary columns.

        Args:
            df_ (pd.DataFrame): DataFrame of features to normalize.

        Returns:
            pd.DataFrame: The normalized DataFrame.
        """
        df = df_.copy()
        for column in df.columns:
            if df[column].dtype in ['int64', 'float64']:
                # Skip columns that are purely 0 or 1
                if not all(x in (0, 1) for x in df[column]):
                    min_val = df[column].min()
                    max_val = df[column].max()
                    df[column] = (df[column] - min_val) / (max_val - min_val)
        return df

    def __len__(self):
        """
        Returns the total number of samples.
        """
        return len(self.x)

    def __getitem__(self, idx):
        """
        Returns one sample of data.

        Args:
            idx (int): Index of the sample to retrieve.

        Returns:
            tuple(torch.Tensor, torch.Tensor): (features, label)
        """
        return torch.tensor(self.x[idx], dtype=torch.float), torch.tensor(self.y[idx], dtype=torch.float)


In [None]:
# @title SimpleMLP, LogisticRegressionModel, OptimizedModelWrapper

# --------------------------------------------------
# SimpleMLP
# --------------------------------------------------
class SimpleMLP(pl.LightningModule):
    """
    Class Name: SimpleMLP

    Purpose:
        - A simple Multilayer Perceptron (MLP) model built with PyTorch Lightning.

    Responsibilities:
        - Define a forward pass through a 3-layer fully-connected neural network.
        - Handle training/validation steps and loss computation using BCE.

    Example Usage:
        model = SimpleMLP(input_size=11, hidden_size=32, output_size=1)
        trainer = pl.Trainer(max_epochs=10)
        trainer.fit(model, train_loader, val_loader)
    """

    def __init__(self, input_size, hidden_size, output_size):
        """
        Constructor for SimpleMLP.

        Args:
            input_size (int): Number of input features.
            hidden_size (int): Number of units in hidden layers.
            output_size (int): Number of output units (1 for binary classification).
        """
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        """
        Forward pass of the MLP.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            torch.Tensor: Sigmoid output (probability of class 1).
        """
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return torch.sigmoid(x)

    def training_step(self, batch, batch_idx):
        """
        Training step: computes binary cross-entropy loss.

        Args:
            batch (tuple): (x, y) data from the DataLoader.
            batch_idx (int): Batch index (not used).

        Returns:
            torch.Tensor: Training loss.
        """
        x, y = batch
        y_hat = self(x).squeeze()
        y = y.float()
        loss = F.binary_cross_entropy(y_hat, y)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        """
        Validation step: computes binary cross-entropy loss.

        Args:
            batch (tuple): (x, y) data from the DataLoader.
            batch_idx (int): Batch index (not used).
        """
        x, y = batch
        y_hat = self(x).squeeze()
        y = y.float()
        loss = F.binary_cross_entropy(y_hat, y)
        self.log('val_loss', loss)

    def configure_optimizers(self):
        """
        Configures the Adam optimizer with a fixed learning rate.
        """
        return torch.optim.Adam(self.parameters(), lr=1e-4)


# --------------------------------------------------
# LogisticRegressionModel
# --------------------------------------------------
class LogisticRegressionModel(pl.LightningModule):
    """
    Class Name: LogisticRegressionModel

    Purpose:
        - A logistic regression model wrapped in PyTorch Lightning.

    Responsibilities:
        - Define a single linear layer for logistic regression.
        - Handle training/validation steps and loss computation using BCE.

    Example Usage:
        model = LogisticRegressionModel(input_size=11)
        trainer = pl.Trainer(max_epochs=10)
        trainer.fit(model, train_loader, val_loader)
    """

    def __init__(self, input_size):
        """
        Constructor for LogisticRegressionModel.

        Args:
            input_size (int): Number of input features.
        """
        super().__init__()
        self.linear = nn.Linear(input_size, 1)

    def forward(self, x):
        """
        Forward pass for logistic regression.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            torch.Tensor: Sigmoid output.
        """
        logits = self.linear(x)
        probs = torch.sigmoid(logits)
        return probs.squeeze()

    def training_step(self, batch, batch_idx):
        """
        Training step: computes binary cross-entropy loss.

        Args:
            batch (tuple): (x, y) data from the DataLoader.
            batch_idx (int): Batch index (not used).

        Returns:
            torch.Tensor: Training loss.
        """
        x, y = batch
        y_hat = self(x)
        y = y.float()
        loss = F.binary_cross_entropy(y_hat, y)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        """
        Validation step: computes binary cross-entropy loss.

        Args:
            batch (tuple): (x, y) data from the DataLoader.
            batch_idx (int): Batch index (not used).
        """
        x, y = batch
        y_hat = self(x)
        y = y.float()
        loss = F.binary_cross_entropy(y_hat, y)
        self.log('val_loss', loss)

    def configure_optimizers(self):
        """
        Configures the Adam optimizer with a fixed learning rate.
        """
        return torch.optim.Adam(self.parameters(), lr=1e-4)


# --------------------------------------------------
# OptimizedModelWrapper
# --------------------------------------------------
class OptimizedModelWrapper(pl.LightningModule):
    """
    Class Name: OptimizedModelWrapper

    Purpose:
        - A wrapper that trains various scikit-learn models (RandomForest, XGBoost, SVM, LogisticRegression)
          and performs hyperparameter optimization (via GridSearchCV where applicable).

    Responsibilities:
        - Select model based on provided model_name.
        - Perform grid search (where applicable).
        - For XGBoost, handle early stopping via eval set.
        - Provide a forward method to integrate with PyTorch Lightning workflow.
    """

    def __init__(self, model_name, train_X, train_y, val_X=None, val_y=None):
        """
        Constructor for OptimizedModelWrapper.

        Args:
            model_name (str): Name of the model to train (RandomForest, XGBoost, SVM, LogisticRegression).
            train_X (numpy.ndarray): Training features.
            train_y (numpy.ndarray): Training labels.
            val_X (numpy.ndarray, optional): Validation features (required for XGBoost).
            val_y (numpy.ndarray, optional): Validation labels (required for XGBoost).
        """
        super().__init__()
        self.model_name = model_name
        self.train_X = train_X
        self.train_y = train_y
        self.val_X = val_X
        self.val_y = val_y
        self.model = self._get_optimized_model()

    def _get_optimized_model(self):
        """
        Internal method: Defines and performs the required optimization for the selected model.

        Returns:
            sklearn model: Trained and optimized model.
        """
        if self.model_name == "RandomForest":
            param_grid = {
                'n_estimators': [100],
                'max_depth': [3],
                'min_samples_split': [10]
            }
            model = RandomForestClassifier()

        elif self.model_name == "XGBoost":
            if self.val_X is None or self.val_y is None:
                raise ValueError("Validation data is required for XGBoost with early stopping.")
            model = XGBClassifier(
                use_label_encoder=False,
                eval_metric='logloss',
                max_depth=3,
                n_estimators=50,
                learning_rate=0.05,
                subsample=0.8,
                colsample_bytree=0.8
            )
            model.fit(
                self.train_X,
                self.train_y,
                eval_set=[(self.val_X, self.val_y)],
                verbose=True  # Set to False if you want to suppress the training output
            )
            return model

        elif self.model_name == "SVM":
            param_grid = {
                'C': [0.1],
                'kernel': ['linear']
            }
            model = SVC(probability=True)

        elif self.model_name == "LogisticRegression":
            param_grid = {
                'C': [0.1],
                'penalty': ['l2']
            }
            model = LogisticRegression(max_iter=500)

        else:
            raise ValueError("Invalid model name. Choose from: RandomForest, XGBoost, SVM, LogisticRegression")

        # For non-XGBoost models, perform Grid Search
        scorer = make_scorer(recall_score)
        grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, scoring=scorer)
        grid_search.fit(self.train_X, self.train_y)
        print(f"Best Parameters for {self.model_name}: {grid_search.best_params_}")
        print(f"Best Cross-Validation Recall: {grid_search.best_score_}")
        return grid_search.best_estimator_

    def validation_step(self, batch, batch_idx):
        """
        Placeholder method for validation in a PyTorch Lightning loop.

        Args:
            batch (tuple): (features, labels).
            batch_idx (int): Batch index (not used).
        """
        x, y = batch
        x, y = x.cpu(), y.cpu()
        preds = self.forward(x)
        loss = F.binary_cross_entropy(preds, y.float())
        self.log("val_loss", loss)

    def forward(self, x):
        """
        Forward method to integrate with PyTorch Lightning.

        Args:
            x (torch.Tensor): Input features.

        Returns:
            torch.Tensor: Predictions as probabilities for class 1.
        """
        x_np = x.cpu().numpy()
        probs = self.model.predict_proba(x_np)[:, 1]
        return torch.tensor(probs, dtype=torch.float32)

    def configure_optimizers(self):
        """
        No optimizer is used here, as training occurs via scikit-learn's methods.
        """
        return None


In [None]:
# @title DiscriminativeModel

# --------------------------------------------------
# DiscriminativeModel
# --------------------------------------------------
class DiscriminativeModel:
    """
    Class Name: DiscriminativeModel

    Purpose:
        - Orchestrate model training (across multiple runs/folds) and evaluate performance.
        - Provide a high-level interface to:
            1. Split data into train/val/test sets.
            2. Train either PyTorch Lightning models (e.g., SimpleMLP) or scikit-learn models
               (wrapped in OptimizedModelWrapper).
            3. Evaluate on a test set using a submission-based recall metric.
            4. (Optionally) Compute and save false negatives, all submissions with model-generated scores,
               and summary metrics (Accuracy, Recall, TN, FP, FN, TP).

    Responsibilities:
        - Manage K-Fold logic, train/val/test splitting, and repeated runs.
        - Contain a method to perform evaluation on a given test set (`submission_evaluation`).
        - Contain a method (`run_evaluation`) that orchestrates multiple runs/folds and then
          aggregates fold-level metrics into a final run-level table.

    Example Usage:
        discriminative_model = DiscriminativeModel(df, output_path="/content/drive/MyDrive/...")
        discriminative_model.run_evaluation(n_runs=5, n_folds=5, model_type="LogisticRegression", dataset_name="28_11.csv")
    """

    def __init__(self, df, output_path):
        """
        Constructor for DiscriminativeModel.

        Args:
            df (pd.DataFrame): The full dataset.
            output_path (str): Where to store evaluation CSVs.
        """
        self.df = df
        self.output_path = output_path

    def submission_evaluation(
        self,
        model,
        test_df,
        run,
        fold,
        dataset_name,
        model_type,
        save_contest_results=False,
        save_false_negatives=False,
        save_all_submissions=False,
        compute_additional_metrics=True
    ):
        """
        Evaluate a trained model on the test set by filtering out various percentages of submissions.
        Optionally save false negatives and all submissions with model scores.
        Optionally compute additional per-filter metrics (Accuracy, Recall, TN, FP, FN, TP).

        Args:
            model: The trained model (could be PyTorch Lightning or sklearn).
            test_df (pd.DataFrame): DataFrame containing the test data.
            run (int): Current run number.
            fold (int): Current fold index.
            dataset_name (str): Name of the dataset (used for saving results).
            model_type (str): Type of the model ('MLP', 'LogisticRegression', 'RandomForest', 'XGBoost', 'SVM').
            save_contest_results (bool): If True, save per-contest results to CSV.
            save_false_negatives (bool): If True, save the false negatives to CSV.
            save_all_submissions (bool): If True, save all submissions with scores to CSV.
            compute_additional_metrics (bool): If True, compute & summarize Accuracy, Recall, TN, FP, FN, TP for each filter.

        Returns:
            (df_results, accuracy, overall_recall, conf_matrix, metrics_summary_df, fold_metrics_data):
                - df_results (pd.DataFrame): Per-contest results with recall at each filter percentage.
                - accuracy (float or None): Overall accuracy at 50% filtering (or None if no data).
                - overall_recall (float or None): Overall recall at 50% filtering (or None if no data).
                - conf_matrix (np.array or None): Confusion matrix at 50% filtering (or None if no data).
                - metrics_summary_df (pd.DataFrame or None): Additional metrics at each filter (this fold).
                - fold_metrics_data (dict): Raw fold-level sums/averages that can be aggregated across folds.
        """
        print(f"----- Testset Evaluation by filtering out varying percentages of submissions for each contest -----")
        filter_percentages = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]

        # Lists/dicts to store results
        per_contest_results = []
        false_negatives = []
        all_submissions = []

        # We will keep track of predictions for each filter percentage across all contests
        # in order to compute additional metrics if requested.
        metrics_dict = {
            p: {"all_preds": [], "all_labels": []}
            for p in filter_percentages
        }

        # Group the test set by 'contest_name'
        for contest_name, contest_data in test_df.groupby('contest_name'):

            # Extract the 'prizes_sum' information for the contest
            prizes_sum = contest_data['prizes_sum'].iloc[0] if 'prizes_sum' in contest_data.columns else None

            # Convert into dataset
            dataset = CSVDataset(contest_data, smote=False)
            x = torch.tensor(dataset.x, dtype=torch.float)
            y = torch.tensor(dataset.y, dtype=torch.float)

            total_submissions = len(y)
            total_winners = int(y.sum().item())

            if total_submissions == 0:
                continue

            contest_info = {
                'contest_name': contest_name,
                'prizes_sum': prizes_sum,
                'nr_winners': total_winners,
                'nr_submissions': total_submissions
            }

            # Generate predictions
            with torch.no_grad():
                if model_type == "MLP":
                    # MLP (PyTorch Lightning) forward
                    y_hat = model(x).squeeze()
                else:
                    # sklearn model or wrapper
                    if hasattr(model, "predict_proba"):
                        y_hat = torch.tensor(model.predict_proba(x.numpy())[:, 1])
                    else:
                        y_hat = torch.tensor(model.predict(x.numpy()))

            y_hat_flat = y_hat.flatten()
            sorted_probs, sorted_indices = torch.sort(y_hat_flat, descending=True)

            # Store quality scores for all submissions (for optional saving)
            for idx, submission in enumerate(contest_data.itertuples(index=False)):
                submission_info = submission._asdict()
                submission_info['quality_score'] = y_hat[idx].item()
                submission_info['model'] = fold + 1
                all_submissions.append(submission_info)

            # For each filter percentage, compute recall and store predictions
            for percentage in filter_percentages:
                keep_percentage = 1 - percentage
                top_n = int(keep_percentage * total_submissions)
                top_n = max(top_n, 1)

                top_indices = sorted_indices[:top_n]
                preds = torch.zeros_like(y_hat)
                preds[top_indices] = 1

                # Per-contest recall
                recall = recall_score(y.cpu().numpy(), preds.cpu().numpy())
                percentage_label = f'recall_{int(percentage*100)}%'
                contest_info[percentage_label] = recall

                # Accumulate global predictions if we want additional metrics
                if compute_additional_metrics:
                    metrics_dict[percentage]["all_preds"].append(preds)
                    metrics_dict[percentage]["all_labels"].append(y)

                # Track false negatives specifically at 50% filtering, or we can do
                # it for each percentage if desired. Here, we mirror your original logic:
                if abs(percentage - 0.5) < 1e-9:
                    # Identify false negatives
                    for idx, (true_label, pred_label) in enumerate(zip(y, preds)):
                        if true_label == 1 and pred_label == 0:
                            fn_info = contest_data.iloc[idx].to_dict()
                            fn_info['quality_score'] = y_hat[idx].item()
                            fn_info['model'] = fold + 1
                            false_negatives.append(fn_info)

            per_contest_results.append(contest_info)

        # -----------------------------
        # Compute metrics at 50% filtering (fold-level)
        # -----------------------------
        p_50 = 0.5
        if len(metrics_dict[p_50]["all_preds"]) > 0:
            all_preds_50 = torch.cat(metrics_dict[p_50]["all_preds"]).cpu().numpy()
            all_labels_50 = torch.cat(metrics_dict[p_50]["all_labels"]).cpu().numpy()

            accuracy = accuracy_score(all_labels_50, all_preds_50)
            overall_recall = recall_score(all_labels_50, all_preds_50)
            conf_matrix = confusion_matrix(all_labels_50, all_preds_50)

            print("\n----- Overall Evaluation at 50% filtering (FOLD LEVEL) -----")
            print(f"Fold {fold+1} accuracy: {accuracy}")
            print(f"Fold {fold+1} recall: {overall_recall}")
            print(f"Fold {fold+1} Confusion Matrix:\n{conf_matrix}")
            print(f"Fold {fold+1} Remaining % size of Submissions: {(sum(all_preds_50) / len(all_preds_50)) * 100:.2f}%")
        else:
            accuracy = None
            overall_recall = None
            conf_matrix = None

        df_results = pd.DataFrame(per_contest_results)

        # -----------------------------
        # Save per-contest results CSV
        # -----------------------------
        if save_contest_results:
            output_csv = os.path.join(self.output_path, f'{model_type}_results_{dataset_name[:-4]}_run{run}.csv')
            file_exists = os.path.exists(output_csv)
            df_results.to_csv(output_csv, mode='a', header=not file_exists, index=False)
            print(f"\nPer-contest results (fold-level) saved to {output_csv}")

        # -----------------------------
        # Save false negatives if requested
        # -----------------------------
        if save_false_negatives:
            if false_negatives:
                false_negatives_df = pd.DataFrame(false_negatives)
                false_negatives_csv = os.path.join(self.output_path, f'{model_type}_fn_{dataset_name[:-4]}_run{run}.csv')
                file_exists = os.path.exists(false_negatives_csv)
                false_negatives_df.to_csv(false_negatives_csv, mode='a', header=not file_exists, index=False)
                print(f"False negatives saved to {false_negatives_csv}")
            else:
                print("No false negatives found at 50% filtering in this fold.")

        # -----------------------------
        # Save all submissions if requested
        # -----------------------------
        if save_all_submissions:
            if all_submissions:
                all_submissions_df = pd.DataFrame(all_submissions)
                all_submissions_csv = os.path.join(self.output_path, f'{model_type}_all_submissions_{dataset_name[:-4]}_run{run}.csv')
                file_exists = os.path.exists(all_submissions_csv)
                all_submissions_df.to_csv(all_submissions_csv, mode='a', header=not file_exists, index=False)
                print(f"All submissions with quality scores saved to {all_submissions_csv}")
            else:
                print("No submissions found to save for this fold.")

        # -----------------------------
        # Compute & return additional metrics across all filter percentages (fold-level)
        # -----------------------------
        metrics_summary_df = None
        fold_metrics_data = {}  # We'll collect raw sums for tn, fp, fn, tp, plus lists to average.

        if compute_additional_metrics:
            all_metrics_rows = []
            for p in sorted(filter_percentages):
                preds_list = metrics_dict[p]["all_preds"]
                labels_list = metrics_dict[p]["all_labels"]
                if len(preds_list) == 0:
                    continue

                all_preds_p = torch.cat(preds_list).cpu().numpy()
                all_labels_p = torch.cat(labels_list).cpu().numpy()

                # confusion matrix
                tn, fp, fn, tp = confusion_matrix(all_labels_p, all_preds_p).ravel()
                accuracy_p = (tp + tn) / (tp + tn + fp + fn)
                recall_p = tp / (tp + fn) if (tp + fn) > 0 else 0.0
                remain_subs_pct = (sum(all_preds_p) / len(all_preds_p)) * 100

                row = {
                    "Filter_Percentage": p,
                    "Remaining_Percentage_Size_of_Submissions": remain_subs_pct,
                    "Accuracy": accuracy_p,
                    "Recall": recall_p,
                    "True_Negatives": tn,
                    "False_Positives": fp,
                    "False_Negatives": fn,
                    "True_Positives": tp
                }
                all_metrics_rows.append(row)

            metrics_summary_df = pd.DataFrame(all_metrics_rows)
            print("\nFold-Level Additional Metrics (All Contests Combined) for each Filter %:")
            print(metrics_summary_df)

            # Prepare the fold_metrics_data to later be aggregated in run_evaluation
            # We'll store sums for TN, FP, FN, TP, and lists for accuracy, recall, remain_subs_pct
            fold_metrics_data = {
                p: {
                    "tn": 0, "fp": 0, "fn": 0, "tp": 0,
                    "acc_list": [], "recall_list": [], "remain_pct_list": []
                }
                for p in sorted(filter_percentages)
            }

            for _, row in metrics_summary_df.iterrows():
                p = row["Filter_Percentage"]
                fold_metrics_data[p]["tn"] += row["True_Negatives"]
                fold_metrics_data[p]["fp"] += row["False_Positives"]
                fold_metrics_data[p]["fn"] += row["False_Negatives"]
                fold_metrics_data[p]["tp"] += row["True_Positives"]
                fold_metrics_data[p]["acc_list"].append(row["Accuracy"])
                fold_metrics_data[p]["recall_list"].append(row["Recall"])
                fold_metrics_data[p]["remain_pct_list"].append(row["Remaining_Percentage_Size_of_Submissions"])

        return (
            df_results,
            accuracy,
            overall_recall,
            conf_matrix,
            metrics_summary_df,
            fold_metrics_data  # raw sums/lists for each filter
        )

    def run_evaluation(
        self,
        n_runs,
        n_folds,
        model_type,
        dataset_name,
        mlp_hidden_size=32,
        mlp_epochs=20,
        save_contest_results=False,
        save_false_negatives=False,
        save_all_submissions=False,
        compute_additional_metrics=True
    ):
        """
        Run the training & evaluation loop multiple times (n_runs),
        each time with K-Fold splitting (n_folds).

        After each run, we aggregate fold-level metrics to produce a
        run-level summary table of filter percentages vs.
        (avg) Accuracy, (avg) Recall, (avg) Remaining%, (sum) TN/FP/FN/TP.

        Args:
            n_runs (int): Number of times to run the cross-validation process.
            n_folds (int): Number of folds to use in K-Fold.
            model_type (str): Type of model to train ('MLP', 'LogisticRegression', etc.).
            dataset_name (str): Name of the dataset (used for saving results).
            mlp_hidden_size (int): Hidden layer size for MLP (if MLP is used).
            mlp_epochs (int): Number of epochs for MLP training (if MLP is used).
            save_contest_results (bool): If True, save per-contest results to CSV.
            save_false_negatives (bool): If True, saves false negatives to CSV in `submission_evaluation`.
            save_all_submissions (bool): If True, saves all submissions to CSV in `submission_evaluation`.
            compute_additional_metrics (bool): If True, compute & print Accuracy, Recall, and confusion matrix values
                                               for each filter percentage, plus final run-level table.
        """
        from sklearn.model_selection import KFold, train_test_split
        import numpy as np

        # Unique contest names
        contest_names = self.df['contest_name'].unique()

        # For printing final average recall at 50% filter across folds
        recall_per_fold = []

        # Predefine the filter percentages we use
        filter_percentages = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]

        for run in range(1, n_runs + 1):
            print(f"\n========== STARTING RUN {run}/{n_runs} for {model_type} ==========")

            # We'll accumulate fold-level metrics into run_metrics_dict, so we can produce a final summary
            run_metrics_dict = {
                p: {
                    "tn": 0, "fp": 0, "fn": 0, "tp": 0,
                    "acc_list": [], "recall_list": [], "remain_pct_list": []
                }
                for p in filter_percentages
            }

            kf = KFold(n_splits=n_folds, shuffle=True, random_state=None)
            contest_folds = list(kf.split(contest_names))

            for fold, (train_val_idx, test_idx) in enumerate(contest_folds):
                print(f"\n--- Run {run}, Fold {fold+1}/{n_folds} ---")
                test_contests = contest_names[test_idx]
                train_val_contests = contest_names[train_val_idx]

                # 80/20 split of the 80% (train_val_contests)
                train_contests, val_contests = train_test_split(train_val_contests, test_size=0.2, random_state=None)

                train_df = self.df[self.df['contest_name'].isin(train_contests)]
                val_df = self.df[self.df['contest_name'].isin(val_contests)]
                test_df = self.df[self.df['contest_name'].isin(test_contests)]

                # Prepare DataLoaders for MLP
                train_dataset = CSVDataset(train_df)
                val_dataset = CSVDataset(val_df, smote=False)
                train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
                val_loader = DataLoader(val_dataset, batch_size=32)

                # ---------------------------------
                # Train model
                # ---------------------------------
                if model_type == "MLP":
                    model = SimpleMLP(
                        input_size=11,
                        hidden_size=mlp_hidden_size,
                        output_size=1
                    )
                    trainer = pl.Trainer(
                        max_epochs=mlp_epochs,
                        enable_checkpointing=False,
                        logger=False
                    )
                    trainer.fit(model, train_loader, val_loader)
                    model.eval()

                else:
                    # Sklearn-based model
                    model_wrapper = OptimizedModelWrapper(
                        model_name=model_type,
                        train_X=train_dataset.x,
                        train_y=train_dataset.y,
                        val_X=val_dataset.x,
                        val_y=val_dataset.y
                    )
                    model = model_wrapper.model  # The optimized sklearn model

                # ---------------------------------
                # Evaluate model (fold-level)
                # ---------------------------------
                df_results, accuracy, overall_recall, conf_matrix, fold_metrics_df, fold_metrics_data = self.submission_evaluation(
                    model,
                    test_df,
                    run,
                    fold,
                    dataset_name,
                    model_type,
                    save_contest_results=save_contest_results,
                    save_false_negatives=save_false_negatives,
                    save_all_submissions=save_all_submissions,
                    compute_additional_metrics=compute_additional_metrics
                )

                recall_per_fold.append(overall_recall)

                # ---------------------------------
                # ACCUMULATE fold-level metrics into run_metrics_dict
                # ---------------------------------
                if compute_additional_metrics and fold_metrics_data:
                    for p in fold_metrics_data.keys():
                        run_metrics_dict[p]["tn"] += fold_metrics_data[p]["tn"]
                        run_metrics_dict[p]["fp"] += fold_metrics_data[p]["fp"]
                        run_metrics_dict[p]["fn"] += fold_metrics_data[p]["fn"]
                        run_metrics_dict[p]["tp"] += fold_metrics_data[p]["tp"]

                        run_metrics_dict[p]["acc_list"].extend(fold_metrics_data[p]["acc_list"])
                        run_metrics_dict[p]["recall_list"].extend(fold_metrics_data[p]["recall_list"])
                        run_metrics_dict[p]["remain_pct_list"].extend(fold_metrics_data[p]["remain_pct_list"])

                # Clear GPU cache if using GPU
                torch.cuda.empty_cache()

            # -----------------------------
            # After all folds of this run, build final run-level metrics
            # -----------------------------
            if compute_additional_metrics:
                final_rows = []
                for p in filter_percentages:
                    data_p = run_metrics_dict[p]

                    # Sum of confusion matrix terms across folds
                    tn = data_p["tn"]
                    fp = data_p["fp"]
                    fn = data_p["fn"]
                    tp = data_p["tp"]
                    total = tn + fp + fn + tp

                    # Compute Accuracy & Recall from the overall sums
                    accuracy_from_sums = (tn + tp) / total if total > 0 else 0.0
                    recall_from_sums = tp / (tp + fn) if (tp + fn) > 0 else 0.0

                    # For Remaining% we can still average across folds
                    avg_remain_pct = np.mean(data_p["remain_pct_list"]) if data_p["remain_pct_list"] else 0.0

                    row = {
                        "Filter_Percentage": p,
                        "Remaining_Percentage_Size_of_Submissions": avg_remain_pct,
                        "Accuracy": accuracy_from_sums,
                        "Recall": recall_from_sums,
                        "True_Negatives": tn,
                        "False_Positives": fp,
                        "False_Negatives": fn,
                        "True_Positives": tp
                    }
                    final_rows.append(row)

                run_metrics_summary_df = pd.DataFrame(final_rows)
                print(f"\n========== RUN {run} - FINAL ADDITIONAL METRICS ACROSS ALL FOLDS ==========")
                print(run_metrics_summary_df)

                # Save this run-level summary to CSV
                additional_metrics_csv = os.path.join(
                    self.output_path,
                    f"{model_type}_additional_metrics_{dataset_name[:-4]}_run{run}.csv"
                )
                run_metrics_summary_df.to_csv(additional_metrics_csv, index=False)
                print(f"Run-level Additional Metrics saved to {additional_metrics_csv}")

            # -----------------------------
            # Print average recall for this run (at 50% filter) if desired
            # -----------------------------
            valid_recalls = [r for r in recall_per_fold if r is not None]
            avg_recall_50 = np.mean(valid_recalls) if len(valid_recalls) > 0 else 0.0
            print(f"\nAverage Recall at 50% filter across all folds (RUN {run}): {avg_recall_50:.4f}")

    def aggregate_average_recall_across_runs(self, model_type, dataset_name, n_runs=5):
        """
        Reads all run-level CSVs for a given model_type (e.g., "MLP") and dataset_name,
        then computes the average Recall across all runs for each Filter_Percentage.

        Args:
            model_type (str): The model type ("MLP", "RandomForest", "XGBoost", "SVM", "LogisticRegression").
            dataset_name (str): The dataset filename (used to locate CSVs).
            n_runs (int): Number of runs that were performed (defaults to 5).

        Returns:
            pd.DataFrame or None:
                A DataFrame with two columns: ["Filter_Percentage", "Average_Recall"],
                or None if no CSV files were found.

        Example:
            avg_recall_df = discriminative_model.aggregate_average_recall_across_runs("MLP", "28_11.csv", 5)
        """
        import os
        import pandas as pd

        dfs = []
        # Gather all CSV files for the specified runs
        for run in range(1, n_runs + 1):
            csv_name = f"{model_type}_additional_metrics_{dataset_name[:-4]}_run{run}.csv"
            csv_path = os.path.join(self.output_path, csv_name)
            if os.path.exists(csv_path):
                df_run = pd.read_csv(csv_path)
                dfs.append(df_run)
            else:
                print(f"Warning: CSV not found for run {run}: {csv_path}")

        if not dfs:
            print("No CSV files found for the specified model_type and dataset_name.")
            return None

        # Concatenate all runs
        combined_df = pd.concat(dfs, ignore_index=True)

        # Compute average recall for each Filter_Percentage
        avg_recall_df = (
            combined_df
            .groupby("Filter_Percentage", as_index=False)["Recall"]
            .mean()
            .rename(columns={"Recall": "Average_Recall"})
        )

        # Print and return the resulting DataFrame
        print(f"\n=== Average Recall Across {n_runs} Runs for {model_type} ===")
        print(avg_recall_df)
        return avg_recall_df



In [None]:
# @title ModelComparisonPlotter

import os
import pandas as pd
import matplotlib.pyplot as plt

class ModelComparisonPlotter:
    """
    Class Name: ModelComparisonPlotter

    Purpose:
        - Compare recall performance of multiple models at various filter percentages.
        - Aggregate average recall (and potentially other metrics) for each model and plot them on a single chart.

    Responsibilities:
        - Scan CSV files produced by Evaluator (e.g., "MLP_additional_metrics_28_11_run1.csv").
        - Aggregate average recall from each model across multiple runs.
        - Combine these into a single DataFrame (filter_percentage vs. recall per model).
        - Plot these recalls on a single figure for visual comparison.

    Example Usage:
        plotter = ModelComparisonPlotter(output_path, dataset_name="28_11.csv", n_runs=5)
        model_list = ["MLP", "RandomForest", "XGBoost", "SVM", "LogisticRegression"]
        combined_df = plotter.gather_average_recall_all_models(model_list)
        plotter.save_combined_averages(combined_df, "all_models_avg_recall.csv")
        plotter.plot_model_recall_comparison(combined_df, save_path="model_recall_plot.png")
    """

    def __init__(self, output_path, dataset_name, n_runs=5):
        """
        Constructor for ModelComparisonPlotter.

        Args:
            output_path (str): The directory where the run CSVs are located.
            dataset_name (str): The dataset file name (used to locate CSVs).
            n_runs (int): Number of runs performed for each model.
        """
        self.output_path = output_path
        self.dataset_name = dataset_name
        self.n_runs = n_runs

    def gather_average_recall_all_models(self, model_types):
        """
        Gathers the average recall for each model across multiple runs,
        then combines them into a single DataFrame with columns [Filter_Percentage, <Model1>, <Model2>, ...].

        Args:
            model_types (list of str): A list of model type strings, e.g. ["MLP", "RandomForest", "XGBoost"].

        Returns:
            pd.DataFrame: Combined DataFrame with columns:
                - Filter_Percentage
                - <ModelName1>
                - <ModelName2>
                - ...
        """
        filter_percentage_col = "Filter_Percentage"
        # We'll store each model's average recall data in a dict: { model_name: DataFrame_of_recall }
        model_recall_frames = {}

        for model_name in model_types:
            # For each model, read all runs and compute average recall at each filter percentage
            dfs = []
            for run in range(1, self.n_runs + 1):
                csv_filename = f"{model_name}_additional_metrics_{self.dataset_name[:-4]}_run{run}.csv"
                csv_path = os.path.join(self.output_path, csv_filename)
                if os.path.exists(csv_path):
                    df_run = pd.read_csv(csv_path)
                    dfs.append(df_run)
                else:
                    print(f"Warning: CSV not found for run {run}: {csv_path}")

            if not dfs:
                print(f"No data found for model: {model_name}")
                continue

            # Concatenate all runs for this model
            combined_df = pd.concat(dfs, ignore_index=True)

            # Group by Filter_Percentage and compute the mean of "Recall"
            avg_recall_df = (
                combined_df
                .groupby(filter_percentage_col, as_index=False)["Recall"]
                .mean()
                .rename(columns={"Recall": model_name})  # rename column to the model name
            )
            # Store it
            model_recall_frames[model_name] = avg_recall_df

        if not model_recall_frames:
            print("No models had valid data; returning empty DataFrame.")
            return pd.DataFrame()

        # Now we need to merge all these DataFrames on Filter_Percentage
        # Start with one of them arbitrarily:
        all_models_merged = None
        for i, (model_name, df_model) in enumerate(model_recall_frames.items()):
            if i == 0:
                all_models_merged = df_model
            else:
                all_models_merged = pd.merge(all_models_merged, df_model, on=filter_percentage_col, how="outer")

        # Sort by Filter_Percentage
        all_models_merged = all_models_merged.sort_values(by=[filter_percentage_col]).reset_index(drop=True)
        return all_models_merged

    def save_combined_averages(self, combined_df, filename="all_models_avg_recall.csv"):
        """
        Saves the combined average recall DataFrame to a CSV.

        Args:
            combined_df (pd.DataFrame): The DataFrame generated by gather_average_recall_all_models().
            filename (str): Name of the output CSV file.
        """
        output_csv = os.path.join(self.output_path, filename)
        combined_df.to_csv(output_csv, index=False)
        print(f"Combined averages saved to {output_csv}")

    def plot_model_recall_comparison(self, combined_df, save_path=None):
        """
        Plots the average recall comparison at different filter percentages for each model.

        Args:
            combined_df (pd.DataFrame): DataFrame from gather_average_recall_all_models().
                                         Columns: [Filter_Percentage, MLP, RandomForest, ...].
            save_path (str, optional): If provided, saves the figure to this path.
                                       Otherwise, shows the plot interactively.
        """
        import matplotlib.pyplot as plt

        # We'll assume the first column is Filter_Percentage, the rest are model columns
        if combined_df.empty:
            print("Warning: combined_df is empty. Nothing to plot.")
            return

        # X-values
        filter_percentages = combined_df["Filter_Percentage"].values
        # Model columns
        model_columns = [c for c in combined_df.columns if c != "Filter_Percentage"]

        plt.figure(figsize=(8, 6))
        for model_col in model_columns:
            plt.plot(filter_percentages, combined_df[model_col], marker='o', label=model_col)

        plt.title("Model Recall Comparison at Different Filtering Percentages")
        plt.xlabel("Filtering Percentage (%)")
        plt.ylabel("Average Recall")
        plt.xticks(filter_percentages)
        plt.ylim([0, 1])  # typical recall range
        plt.legend(loc="lower left")
        plt.grid(True)

        if save_path:
            plt.savefig(save_path, dpi=300, bbox_inches='tight')
            plt.close()
            print(f"Plot saved to: {save_path}")
        else:
            plt.show()


In [None]:
# Mount Drive
drive_mgr = DriveManager()
drive_mgr.mount_drive()

# Load dataset
input_path = '/content/drive/MyDrive/Master_Thesis/ML_Input/'
dataset_name = 'final_dataset.csv'
full_dataset_path = os.path.join(input_path, dataset_name)
df = pd.read_csv(full_dataset_path)

Mounted at /content/drive/


In [None]:
# Create DiscriminativeModel instance
output_path = '/content/drive/MyDrive/Master_Thesis/MLP_Output/'
discriminative_model = DiscriminativeModel(df, output_path=output_path)

model_type = "MLP"  # choose: MLP, RandomForest, XGBoost, SVM, LogisticRegression
amount_runs = 5
amount_folds = 5

# Run evaluation with desired parameters
discriminative_model.run_evaluation(
    n_runs=amount_runs,
    n_folds=amount_folds,
    model_type=model_type,
    dataset_name=dataset_name,
    save_contest_results=True,
    save_false_negatives=False,
    save_all_submissions=True,
    compute_additional_metrics=True
)


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name | Type   | Params | Mode 
----------------------------------------
0 | fc1  | Linear | 384    | train
1 | fc2  | Linear | 1.1 K  | train
2 | fc3  | Linear | 33     | train
----------------------------------------
1.5 K     Trainable params
0         Non-trainable params
1.5 K     Total params
0.006     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode




--- Run 1, Fold 1/5 ---


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


----- Testset Evaluation by filtering out varying percentages of submissions for each contest -----

----- Overall Evaluation at 50% filtering (FOLD LEVEL) -----
Fold 1 accuracy: 0.5618556701030928
Fold 1 recall: 0.9540229885057471
Fold 1 Confusion Matrix:
[[680 591]
 [  4  83]]
Fold 1 Remaining % size of Submissions: 49.63%

Per-contest results (fold-level) saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_results_final_dataset_run1.csv


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name | Type   | Params | Mode 
----------------------------------------
0 | fc1  | Linear | 384    | train
1 | fc2  | Linear | 1.1 K  | train
2 | fc3  | Linear | 33     | train
----------------------------------------
1.5 K     Trainable params
0         Non-trainable params
1.5 K     Total params
0.006     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


All submissions with quality scores saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_all_submissions_final_dataset_run1.csv

Fold-Level Additional Metrics (All Contests Combined) for each Filter %:
   Filter_Percentage  Remaining_Percentage_Size_of_Submissions  Accuracy  \
0                0.1                                 89.175258  0.172312   
1                0.2                                 79.307806  0.269514   
2                0.3                                 69.219440  0.370398   
3                0.4                                 59.351988  0.466127   
4                0.5                                 49.631811  0.561856   
5                0.6                                 39.322533  0.657585   
6                0.7                                 29.307806  0.744477   

     Recall  True_Negatives  False_Positives  False_Negatives  True_Positives  
0  1.000000             147             1124                0              87  
1  0.988506          

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


----- Testset Evaluation by filtering out varying percentages of submissions for each contest -----

----- Overall Evaluation at 50% filtering (FOLD LEVEL) -----
Fold 2 accuracy: 0.5711878685762426
Fold 2 recall: 0.9438202247191011
Fold 2 Confusion Matrix:
[[594 504]
 [  5  84]]
Fold 2 Remaining % size of Submissions: 49.54%

Per-contest results (fold-level) saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_results_final_dataset_run1.csv


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name | Type   | Params | Mode 
----------------------------------------
0 | fc1  | Linear | 384    | train
1 | fc2  | Linear | 1.1 K  | train
2 | fc3  | Linear | 33     | train
----------------------------------------
1.5 K     Trainable params
0         Non-trainable params
1.5 K     Total params
0.006     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


All submissions with quality scores saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_all_submissions_final_dataset_run1.csv

Fold-Level Additional Metrics (All Contests Combined) for each Filter %:
   Filter_Percentage  Remaining_Percentage_Size_of_Submissions  Accuracy  \
0                0.1                                 89.048020  0.184499   
1                0.2                                 79.106992  0.283909   
2                0.3                                 69.165965  0.383319   
3                0.4                                 59.056445  0.482730   
4                0.5                                 49.536647  0.571188   
5                0.6                                 39.258635  0.667228   
6                0.7                                 28.980623  0.751474   

     Recall  True_Negatives  False_Positives  False_Negatives  True_Positives  
0  1.000000             130              968                0              89  
1  1.000000          

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


----- Testset Evaluation by filtering out varying percentages of submissions for each contest -----

----- Overall Evaluation at 50% filtering (FOLD LEVEL) -----
Fold 3 accuracy: 0.5544117647058824
Fold 3 recall: 0.971830985915493
Fold 3 Confusion Matrix:
[[685 604]
 [  2  69]]
Fold 3 Remaining % size of Submissions: 49.49%

Per-contest results (fold-level) saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_results_final_dataset_run1.csv


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name | Type   | Params | Mode 
----------------------------------------
0 | fc1  | Linear | 384    | train
1 | fc2  | Linear | 1.1 K  | train
2 | fc3  | Linear | 33     | train
----------------------------------------
1.5 K     Trainable params
0         Non-trainable params
1.5 K     Total params
0.006     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


All submissions with quality scores saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_all_submissions_final_dataset_run1.csv

Fold-Level Additional Metrics (All Contests Combined) for each Filter %:
   Filter_Percentage  Remaining_Percentage_Size_of_Submissions  Accuracy  \
0                0.1                                 89.338235  0.158824   
1                0.2                                 79.338235  0.258824   
2                0.3                                 69.338235  0.357353   
3                0.4                                 59.264706  0.458088   
4                0.5                                 49.485294  0.554412   
5                0.6                                 39.411765  0.646324   
6                0.7                                 29.191176  0.741176   

     Recall  True_Negatives  False_Positives  False_Negatives  True_Positives  
0  1.000000             145             1144                0              71  
1  1.000000          

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


----- Testset Evaluation by filtering out varying percentages of submissions for each contest -----

----- Overall Evaluation at 50% filtering (FOLD LEVEL) -----
Fold 4 accuracy: 0.5535987748851455
Fold 4 recall: 0.9230769230769231
Fold 4 Confusion Matrix:
[[651 577]
 [  6  72]]
Fold 4 Remaining % size of Submissions: 49.69%

Per-contest results (fold-level) saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_results_final_dataset_run1.csv


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name | Type   | Params | Mode 
----------------------------------------
0 | fc1  | Linear | 384    | train
1 | fc2  | Linear | 1.1 K  | train
2 | fc3  | Linear | 33     | train
----------------------------------------
1.5 K     Trainable params
0         Non-trainable params
1.5 K     Total params
0.006     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


All submissions with quality scores saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_all_submissions_final_dataset_run1.csv

Fold-Level Additional Metrics (All Contests Combined) for each Filter %:
   Filter_Percentage  Remaining_Percentage_Size_of_Submissions  Accuracy  \
0                0.1                                 89.203675  0.166156   
1                0.2                                 79.326187  0.264931   
2                0.3                                 69.065850  0.362940   
3                0.4                                 59.188361  0.460184   
4                0.5                                 49.693721  0.553599   
5                0.6                                 39.356815  0.643185   
6                0.7                                 29.326187  0.731240   

     Recall  True_Negatives  False_Positives  False_Negatives  True_Positives  
0  0.987179             140             1088                1              77  
1  0.987179          

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


----- Testset Evaluation by filtering out varying percentages of submissions for each contest -----

----- Overall Evaluation at 50% filtering (FOLD LEVEL) -----
Fold 5 accuracy: 0.550191570881226
Fold 5 recall: 0.9838709677419355
Fold 5 Confusion Matrix:
[[657 586]
 [  1  61]]
Fold 5 Remaining % size of Submissions: 49.58%

Per-contest results (fold-level) saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_results_final_dataset_run1.csv


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name | Type   | Params | Mode 
----------------------------------------
0 | fc1  | Linear | 384    | train
1 | fc2  | Linear | 1.1 K  | train
2 | fc3  | Linear | 33     | train
----------------------------------------
1.5 K     Trainable params
0         Non-trainable params
1.5 K     Total params
0.006     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


All submissions with quality scores saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_all_submissions_final_dataset_run1.csv

Fold-Level Additional Metrics (All Contests Combined) for each Filter %:
   Filter_Percentage  Remaining_Percentage_Size_of_Submissions  Accuracy  \
0                0.1                                 89.195402  0.155556   
1                0.2                                 79.233716  0.255172   
2                0.3                                 69.195402  0.355556   
3                0.4                                 59.310345  0.454406   
4                0.5                                 49.578544  0.550192   
5                0.6                                 39.157088  0.649808   
6                0.7                                 29.272031  0.736398   

     Recall  True_Negatives  False_Positives  False_Negatives  True_Positives  
0  1.000000             141             1102                0              62  
1  1.000000          

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


----- Testset Evaluation by filtering out varying percentages of submissions for each contest -----

----- Overall Evaluation at 50% filtering (FOLD LEVEL) -----
Fold 1 accuracy: 0.5693979933110368
Fold 1 recall: 0.9753086419753086
Fold 1 Confusion Matrix:
[[602 513]
 [  2  79]]
Fold 1 Remaining % size of Submissions: 49.50%

Per-contest results (fold-level) saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_results_final_dataset_run2.csv


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name | Type   | Params | Mode 
----------------------------------------
0 | fc1  | Linear | 384    | train
1 | fc2  | Linear | 1.1 K  | train
2 | fc3  | Linear | 33     | train
----------------------------------------
1.5 K     Trainable params
0         Non-trainable params
1.5 K     Total params
0.006     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


All submissions with quality scores saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_all_submissions_final_dataset_run2.csv

Fold-Level Additional Metrics (All Contests Combined) for each Filter %:
   Filter_Percentage  Remaining_Percentage_Size_of_Submissions  Accuracy  \
0                0.1                                 89.046823  0.177258   
1                0.2                                 79.180602  0.275920   
2                0.3                                 69.063545  0.377090   
3                0.4                                 59.197324  0.474080   
4                0.5                                 49.498328  0.569398   
5                0.6                                 39.046823  0.660535   
6                0.7                                 29.180602  0.739130   

     Recall  True_Negatives  False_Positives  False_Negatives  True_Positives  
0  1.000000             131              984                0              81  
1  1.000000          

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


----- Testset Evaluation by filtering out varying percentages of submissions for each contest -----

----- Overall Evaluation at 50% filtering (FOLD LEVEL) -----
Fold 2 accuracy: 0.5424354243542435
Fold 2 recall: 0.90625
Fold 2 Confusion Matrix:
[[677 614]
 [  6  58]]
Fold 2 Remaining % size of Submissions: 49.59%

Per-contest results (fold-level) saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_results_final_dataset_run2.csv


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name | Type   | Params | Mode 
----------------------------------------
0 | fc1  | Linear | 384    | train
1 | fc2  | Linear | 1.1 K  | train
2 | fc3  | Linear | 33     | train
----------------------------------------
1.5 K     Trainable params
0         Non-trainable params
1.5 K     Total params
0.006     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


All submissions with quality scores saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_all_submissions_final_dataset_run2.csv

Fold-Level Additional Metrics (All Contests Combined) for each Filter %:
   Filter_Percentage  Remaining_Percentage_Size_of_Submissions  Accuracy  \
0                0.1                                 89.151292  0.155720   
1                0.2                                 79.261993  0.254613   
2                0.3                                 69.077491  0.353506   
3                0.4                                 59.188192  0.449446   
4                0.5                                 49.594096  0.542435   
5                0.6                                 39.335793  0.637638   
6                0.7                                 29.298893  0.733579   

     Recall  True_Negatives  False_Positives  False_Negatives  True_Positives  
0  1.000000             147             1144                0              64  
1  1.000000          

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


----- Testset Evaluation by filtering out varying percentages of submissions for each contest -----

----- Overall Evaluation at 50% filtering (FOLD LEVEL) -----
Fold 3 accuracy: 0.5516749821810406
Fold 3 recall: 0.9594594594594594
Fold 3 Confusion Matrix:
[[703 626]
 [  3  71]]
Fold 3 Remaining % size of Submissions: 49.68%

Per-contest results (fold-level) saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_results_final_dataset_run2.csv


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name | Type   | Params | Mode 
----------------------------------------
0 | fc1  | Linear | 384    | train
1 | fc2  | Linear | 1.1 K  | train
2 | fc3  | Linear | 33     | train
----------------------------------------
1.5 K     Trainable params
0         Non-trainable params
1.5 K     Total params
0.006     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


All submissions with quality scores saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_all_submissions_final_dataset_run2.csv

Fold-Level Additional Metrics (All Contests Combined) for each Filter %:
   Filter_Percentage  Remaining_Percentage_Size_of_Submissions  Accuracy  \
0                0.1                                 89.379900  0.158945   
1                0.2                                 79.401283  0.257306   
2                0.3                                 69.351390  0.357805   
3                0.4                                 59.372773  0.456165   
4                0.5                                 49.679259  0.551675   
5                0.6                                 39.558090  0.644334   
6                0.7                                 29.294369  0.735567   

     Recall  True_Negatives  False_Positives  False_Negatives  True_Positives  
0  1.000000             149             1180                0              74  
1  0.986486          

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


----- Testset Evaluation by filtering out varying percentages of submissions for each contest -----

----- Overall Evaluation at 50% filtering (FOLD LEVEL) -----
Fold 4 accuracy: 0.5601888276947286
Fold 4 recall: 0.95
Fold 4 Confusion Matrix:
[[636 555]
 [  4  76]]
Fold 4 Remaining % size of Submissions: 49.65%

Per-contest results (fold-level) saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_results_final_dataset_run2.csv


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name | Type   | Params | Mode 
----------------------------------------
0 | fc1  | Linear | 384    | train
1 | fc2  | Linear | 1.1 K  | train
2 | fc3  | Linear | 33     | train
----------------------------------------
1.5 K     Trainable params
0         Non-trainable params
1.5 K     Total params
0.006     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


All submissions with quality scores saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_all_submissions_final_dataset_run2.csv

Fold-Level Additional Metrics (All Contests Combined) for each Filter %:
   Filter_Percentage  Remaining_Percentage_Size_of_Submissions  Accuracy  \
0                0.1                                 89.221086  0.170732   
1                0.2                                 79.228954  0.270653   
2                0.3                                 69.315500  0.369788   
3                0.4                                 59.244689  0.468922   
4                0.5                                 49.645948  0.560189   
5                0.6                                 39.339103  0.655389   
6                0.7                                 29.189614  0.738002   

   Recall  True_Negatives  False_Positives  False_Negatives  True_Positives  
0  1.0000             137             1054                0              80  
1  1.0000             264

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


----- Testset Evaluation by filtering out varying percentages of submissions for each contest -----

----- Overall Evaluation at 50% filtering (FOLD LEVEL) -----
Fold 5 accuracy: 0.5701006971340047
Fold 5 recall: 0.9772727272727273
Fold 5 Confusion Matrix:
[[650 553]
 [  2  86]]
Fold 5 Remaining % size of Submissions: 49.50%

Per-contest results (fold-level) saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_results_final_dataset_run2.csv


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name | Type   | Params | Mode 
----------------------------------------
0 | fc1  | Linear | 384    | train
1 | fc2  | Linear | 1.1 K  | train
2 | fc3  | Linear | 33     | train
----------------------------------------
1.5 K     Trainable params
0         Non-trainable params
1.5 K     Total params
0.006     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


All submissions with quality scores saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_all_submissions_final_dataset_run2.csv

Fold-Level Additional Metrics (All Contests Combined) for each Filter %:
   Filter_Percentage  Remaining_Percentage_Size_of_Submissions  Accuracy  \
0                0.1                                 89.155693  0.176607   
1                0.2                                 79.240899  0.272657   
2                0.3                                 69.171185  0.373354   
3                0.4                                 59.178931  0.473277   
4                0.5                                 49.496514  0.570101   
5                0.6                                 39.194423  0.657630   
6                0.7                                 29.124710  0.742835   

     Recall  True_Negatives  False_Positives  False_Negatives  True_Positives  
0  1.000000             140             1063                0              88  
1  0.977273          

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


----- Testset Evaluation by filtering out varying percentages of submissions for each contest -----

----- Overall Evaluation at 50% filtering (FOLD LEVEL) -----
Fold 1 accuracy: 0.5567086730911787
Fold 1 recall: 0.95
Fold 1 Confusion Matrix:
[[675 594]
 [  4  76]]
Fold 1 Remaining % size of Submissions: 49.67%

Per-contest results (fold-level) saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_results_final_dataset_run3.csv


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name | Type   | Params | Mode 
----------------------------------------
0 | fc1  | Linear | 384    | train
1 | fc2  | Linear | 1.1 K  | train
2 | fc3  | Linear | 33     | train
----------------------------------------
1.5 K     Trainable params
0         Non-trainable params
1.5 K     Total params
0.006     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


All submissions with quality scores saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_all_submissions_final_dataset_run3.csv

Fold-Level Additional Metrics (All Contests Combined) for each Filter %:
   Filter_Percentage  Remaining_Percentage_Size_of_Submissions  Accuracy  \
0                0.1                                 89.177168  0.167532   
1                0.2                                 79.318013  0.266123   
2                0.3                                 69.162342  0.366197   
3                0.4                                 59.303188  0.463306   
4                0.5                                 49.666420  0.556709   
5                0.6                                 39.214233  0.653818   
6                0.7                                 29.280949  0.742772   

   Recall  True_Negatives  False_Positives  False_Negatives  True_Positives  
0  1.0000             146             1123                0              80  
1  1.0000             279

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


----- Testset Evaluation by filtering out varying percentages of submissions for each contest -----

----- Overall Evaluation at 50% filtering (FOLD LEVEL) -----
Fold 2 accuracy: 0.5677257525083612
Fold 2 recall: 0.987012987012987
Fold 2 Confusion Matrix:
[[603 516]
 [  1  76]]
Fold 2 Remaining % size of Submissions: 49.50%

Per-contest results (fold-level) saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_results_final_dataset_run3.csv


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name | Type   | Params | Mode 
----------------------------------------
0 | fc1  | Linear | 384    | train
1 | fc2  | Linear | 1.1 K  | train
2 | fc3  | Linear | 33     | train
----------------------------------------
1.5 K     Trainable params
0         Non-trainable params
1.5 K     Total params
0.006     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


All submissions with quality scores saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_all_submissions_final_dataset_run3.csv

Fold-Level Additional Metrics (All Contests Combined) for each Filter %:
   Filter_Percentage  Remaining_Percentage_Size_of_Submissions  Accuracy  \
0                0.1                                 89.046823  0.173913   
1                0.2                                 79.096990  0.273411   
2                0.3                                 68.896321  0.375418   
3                0.4                                 59.113712  0.473244   
4                0.5                                 49.498328  0.567726   
5                0.6                                 39.214047  0.658863   
6                0.7                                 29.180602  0.734114   

     Recall  True_Negatives  False_Positives  False_Negatives  True_Positives  
0  1.000000             131              988                0              77  
1  1.000000          

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


----- Testset Evaluation by filtering out varying percentages of submissions for each contest -----

----- Overall Evaluation at 50% filtering (FOLD LEVEL) -----
Fold 3 accuracy: 0.5688568856885688
Fold 3 recall: 0.961038961038961
Fold 3 Confusion Matrix:
[[558 476]
 [  3  74]]
Fold 3 Remaining % size of Submissions: 49.50%

Per-contest results (fold-level) saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_results_final_dataset_run3.csv


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name | Type   | Params | Mode 
----------------------------------------
0 | fc1  | Linear | 384    | train
1 | fc2  | Linear | 1.1 K  | train
2 | fc3  | Linear | 33     | train
----------------------------------------
1.5 K     Trainable params
0         Non-trainable params
1.5 K     Total params
0.006     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


All submissions with quality scores saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_all_submissions_final_dataset_run3.csv

Fold-Level Additional Metrics (All Contests Combined) for each Filter %:
   Filter_Percentage  Remaining_Percentage_Size_of_Submissions  Accuracy  \
0                0.1                                 88.928893  0.178218   
1                0.2                                 79.117912  0.276328   
2                0.3                                 69.216922  0.373537   
3                0.4                                 59.045905  0.473447   
4                0.5                                 49.504950  0.568857   
5                0.6                                 39.243924  0.662466   
6                0.7                                 28.802880  0.739874   

     Recall  True_Negatives  False_Positives  False_Negatives  True_Positives  
0  0.987013             122              912                1              76  
1  0.987013          

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


----- Testset Evaluation by filtering out varying percentages of submissions for each contest -----

----- Overall Evaluation at 50% filtering (FOLD LEVEL) -----
Fold 4 accuracy: 0.5520833333333334
Fold 4 recall: 0.9404761904761905
Fold 4 Confusion Matrix:
[[769 683]
 [  5  79]]
Fold 4 Remaining % size of Submissions: 49.61%

Per-contest results (fold-level) saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_results_final_dataset_run3.csv


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name | Type   | Params | Mode 
----------------------------------------
0 | fc1  | Linear | 384    | train
1 | fc2  | Linear | 1.1 K  | train
2 | fc3  | Linear | 33     | train
----------------------------------------
1.5 K     Trainable params
0         Non-trainable params
1.5 K     Total params
0.006     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


All submissions with quality scores saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_all_submissions_final_dataset_run3.csv

Fold-Level Additional Metrics (All Contests Combined) for each Filter %:
   Filter_Percentage  Remaining_Percentage_Size_of_Submissions  Accuracy  \
0                0.1                                 89.388021  0.160807   
1                0.2                                 79.427083  0.260417   
2                0.3                                 69.270833  0.359375   
3                0.4                                 59.375000  0.457031   
4                0.5                                 49.609375  0.552083   
5                0.6                                 39.453125  0.644531   
6                0.7                                 29.492188  0.733724   

     Recall  True_Negatives  False_Positives  False_Negatives  True_Positives  
0  1.000000             163             1289                0              84  
1  1.000000          

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


----- Testset Evaluation by filtering out varying percentages of submissions for each contest -----

----- Overall Evaluation at 50% filtering (FOLD LEVEL) -----
Fold 5 accuracy: 0.5498489425981873
Fold 5 recall: 0.9420289855072463
Fold 5 Confusion Matrix:
[[663 592]
 [  4  65]]
Fold 5 Remaining % size of Submissions: 49.62%

Per-contest results (fold-level) saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_results_final_dataset_run3.csv


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name | Type   | Params | Mode 
----------------------------------------
0 | fc1  | Linear | 384    | train
1 | fc2  | Linear | 1.1 K  | train
2 | fc3  | Linear | 33     | train
----------------------------------------
1.5 K     Trainable params
0         Non-trainable params
1.5 K     Total params
0.006     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


All submissions with quality scores saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_all_submissions_final_dataset_run3.csv

Fold-Level Additional Metrics (All Contests Combined) for each Filter %:
   Filter_Percentage  Remaining_Percentage_Size_of_Submissions  Accuracy  \
0                0.1                                 89.350453  0.158610   
1                0.2                                 79.305136  0.257553   
2                0.3                                 69.410876  0.356495   
3                0.4                                 59.290030  0.456193   
4                0.5                                 49.622356  0.549849   
5                0.6                                 39.350453  0.645015   
6                0.7                                 29.229607  0.741692   

     Recall  True_Negatives  False_Positives  False_Negatives  True_Positives  
0  1.000000             141             1114                0              69  
1  0.985507          

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


----- Testset Evaluation by filtering out varying percentages of submissions for each contest -----

----- Overall Evaluation at 50% filtering (FOLD LEVEL) -----
Fold 1 accuracy: 0.5444776119402985
Fold 1 recall: 0.9605263157894737
Fold 1 Confusion Matrix:
[[839 760]
 [  3  73]]
Fold 1 Remaining % size of Submissions: 49.73%

Per-contest results (fold-level) saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_results_final_dataset_run4.csv


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name | Type   | Params | Mode 
----------------------------------------
0 | fc1  | Linear | 384    | train
1 | fc2  | Linear | 1.1 K  | train
2 | fc3  | Linear | 33     | train
----------------------------------------
1.5 K     Trainable params
0         Non-trainable params
1.5 K     Total params
0.006     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


All submissions with quality scores saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_all_submissions_final_dataset_run4.csv

Fold-Level Additional Metrics (All Contests Combined) for each Filter %:
   Filter_Percentage  Remaining_Percentage_Size_of_Submissions  Accuracy  \
0                0.1                                 89.313433  0.152239   
1                0.2                                 79.402985  0.251343   
2                0.3                                 69.313433  0.351045   
3                0.4                                 59.402985  0.448955   
4                0.5                                 49.731343  0.544478   
5                0.6                                 39.402985  0.644179   
6                0.7                                 29.373134  0.739701   

     Recall  True_Negatives  False_Positives  False_Negatives  True_Positives  
0  1.000000             179             1420                0              76  
1  1.000000          

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


----- Testset Evaluation by filtering out varying percentages of submissions for each contest -----

----- Overall Evaluation at 50% filtering (FOLD LEVEL) -----
Fold 2 accuracy: 0.5743362831858407
Fold 2 recall: 0.9148936170212766
Fold 2 Confusion Matrix:
[[563 473]
 [  8  86]]
Fold 2 Remaining % size of Submissions: 49.47%

Per-contest results (fold-level) saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_results_final_dataset_run4.csv


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name | Type   | Params | Mode 
----------------------------------------
0 | fc1  | Linear | 384    | train
1 | fc2  | Linear | 1.1 K  | train
2 | fc3  | Linear | 33     | train
----------------------------------------
1.5 K     Trainable params
0         Non-trainable params
1.5 K     Total params
0.006     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


All submissions with quality scores saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_all_submissions_final_dataset_run4.csv

Fold-Level Additional Metrics (All Contests Combined) for each Filter %:
   Filter_Percentage  Remaining_Percentage_Size_of_Submissions  Accuracy  \
0                0.1                                 89.203540  0.189381   
1                0.2                                 79.203540  0.287611   
2                0.3                                 68.938053  0.388496   
3                0.4                                 59.115044  0.484956   
4                0.5                                 49.469027  0.574336   
5                0.6                                 39.203540  0.669912   
6                0.7                                 29.380531  0.748673   

     Recall  True_Negatives  False_Positives  False_Negatives  True_Positives  
0  0.989362             121              915                1              93  
1  0.978723          

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


----- Testset Evaluation by filtering out varying percentages of submissions for each contest -----

----- Overall Evaluation at 50% filtering (FOLD LEVEL) -----
Fold 3 accuracy: 0.5644654088050315
Fold 3 recall: 0.9230769230769231
Fold 3 Confusion Matrix:
[[634 547]
 [  7  84]]
Fold 3 Remaining % size of Submissions: 49.61%

Per-contest results (fold-level) saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_results_final_dataset_run4.csv


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name | Type   | Params | Mode 
----------------------------------------
0 | fc1  | Linear | 384    | train
1 | fc2  | Linear | 1.1 K  | train
2 | fc3  | Linear | 33     | train
----------------------------------------
1.5 K     Trainable params
0         Non-trainable params
1.5 K     Total params
0.006     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


All submissions with quality scores saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_all_submissions_final_dataset_run4.csv

Fold-Level Additional Metrics (All Contests Combined) for each Filter %:
   Filter_Percentage  Remaining_Percentage_Size_of_Submissions  Accuracy  \
0                0.1                                 89.150943  0.178459   
1                0.2                                 79.166667  0.278302   
2                0.3                                 69.261006  0.377358   
3                0.4                                 59.198113  0.474843   
4                0.5                                 49.606918  0.564465   
5                0.6                                 39.229560  0.658805   
6                0.7                                 29.088050  0.735063   

     Recall  True_Negatives  False_Positives  False_Negatives  True_Positives  
0  0.989011             137             1044                1              90  
1  0.989011          

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


----- Testset Evaluation by filtering out varying percentages of submissions for each contest -----

----- Overall Evaluation at 50% filtering (FOLD LEVEL) -----
Fold 4 accuracy: 0.5468337730870713
Fold 4 recall: 0.9846153846153847
Fold 4 Confusion Matrix:
[[765 686]
 [  1  64]]
Fold 4 Remaining % size of Submissions: 49.47%

Per-contest results (fold-level) saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_results_final_dataset_run4.csv


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name | Type   | Params | Mode 
----------------------------------------
0 | fc1  | Linear | 384    | train
1 | fc2  | Linear | 1.1 K  | train
2 | fc3  | Linear | 33     | train
----------------------------------------
1.5 K     Trainable params
0         Non-trainable params
1.5 K     Total params
0.006     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


All submissions with quality scores saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_all_submissions_final_dataset_run4.csv

Fold-Level Additional Metrics (All Contests Combined) for each Filter %:
   Filter_Percentage  Remaining_Percentage_Size_of_Submissions  Accuracy  \
0                0.1                                 89.248021  0.150396   
1                0.2                                 79.419525  0.248681   
2                0.3                                 69.393140  0.347625   
3                0.4                                 59.300792  0.448549   
4                0.5                                 49.472296  0.546834   
5                0.6                                 39.511873  0.642480   
6                0.7                                 29.221636  0.733509   

     Recall  True_Negatives  False_Positives  False_Negatives  True_Positives  
0  1.000000             163             1288                0              65  
1  1.000000          

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


----- Testset Evaluation by filtering out varying percentages of submissions for each contest -----

----- Overall Evaluation at 50% filtering (FOLD LEVEL) -----
Fold 5 accuracy: 0.5633802816901409
Fold 5 recall: 0.9508196721311475
Fold 5 Confusion Matrix:
[[462 400]
 [  3  58]]
Fold 5 Remaining % size of Submissions: 49.62%

Per-contest results (fold-level) saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_results_final_dataset_run4.csv


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name | Type   | Params | Mode 
----------------------------------------
0 | fc1  | Linear | 384    | train
1 | fc2  | Linear | 1.1 K  | train
2 | fc3  | Linear | 33     | train
----------------------------------------
1.5 K     Trainable params
0         Non-trainable params
1.5 K     Total params
0.006     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


All submissions with quality scores saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_all_submissions_final_dataset_run4.csv

Fold-Level Additional Metrics (All Contests Combined) for each Filter %:
   Filter_Percentage  Remaining_Percentage_Size_of_Submissions  Accuracy  \
0                0.1                                 88.949079  0.176598   
1                0.2                                 78.981582  0.276273   
2                0.3                                 68.905742  0.374865   
3                0.4                                 59.046587  0.473456   
4                0.5                                 49.620802  0.563380   
5                0.6                                 39.003250  0.660888   
6                0.7                                 28.927411  0.750813   

     Recall  True_Negatives  False_Positives  False_Negatives  True_Positives  
0  1.000000             102              760                0              61  
1  1.000000          

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


----- Testset Evaluation by filtering out varying percentages of submissions for each contest -----

----- Overall Evaluation at 50% filtering (FOLD LEVEL) -----
Fold 1 accuracy: 0.5643397813288478
Fold 1 recall: 0.9733333333333334
Fold 1 Confusion Matrix:
[[598 516]
 [  2  73]]
Fold 1 Remaining % size of Submissions: 49.54%

Per-contest results (fold-level) saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_results_final_dataset_run5.csv


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name | Type   | Params | Mode 
----------------------------------------
0 | fc1  | Linear | 384    | train
1 | fc2  | Linear | 1.1 K  | train
2 | fc3  | Linear | 33     | train
----------------------------------------
1.5 K     Trainable params
0         Non-trainable params
1.5 K     Total params
0.006     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


All submissions with quality scores saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_all_submissions_final_dataset_run5.csv

Fold-Level Additional Metrics (All Contests Combined) for each Filter %:
   Filter_Percentage  Remaining_Percentage_Size_of_Submissions  Accuracy  \
0                0.1                                 88.982338  0.171573   
1                0.2                                 79.058032  0.270816   
2                0.3                                 68.965517  0.370059   
3                0.4                                 59.125315  0.468461   
4                0.5                                 49.537426  0.564340   
5                0.6                                 39.024390  0.661060   
6                0.7                                 29.100084  0.750210   

     Recall  True_Negatives  False_Positives  False_Negatives  True_Positives  
0  0.986667             130              984                1              74  
1  0.986667          

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


----- Testset Evaluation by filtering out varying percentages of submissions for each contest -----

----- Overall Evaluation at 50% filtering (FOLD LEVEL) -----
Fold 2 accuracy: 0.5497076023391813
Fold 2 recall: 0.9655172413793104
Fold 2 Confusion Matrix:
[[856 767]
 [  3  84]]
Fold 2 Remaining % size of Submissions: 49.77%

Per-contest results (fold-level) saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_results_final_dataset_run5.csv


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name | Type   | Params | Mode 
----------------------------------------
0 | fc1  | Linear | 384    | train
1 | fc2  | Linear | 1.1 K  | train
2 | fc3  | Linear | 33     | train
----------------------------------------
1.5 K     Trainable params
0         Non-trainable params
1.5 K     Total params
0.006     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


All submissions with quality scores saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_all_submissions_final_dataset_run5.csv

Fold-Level Additional Metrics (All Contests Combined) for each Filter %:
   Filter_Percentage  Remaining_Percentage_Size_of_Submissions  Accuracy  \
0                0.1                                 89.532164  0.155556   
1                0.2                                 79.590643  0.252632   
2                0.3                                 69.415205  0.354386   
3                0.4                                 59.473684  0.453801   
4                0.5                                 49.766082  0.549708   
5                0.6                                 39.532164  0.645029   
6                0.7                                 29.473684  0.732749   

     Recall  True_Negatives  False_Positives  False_Negatives  True_Positives  
0  1.000000             179             1444                0              87  
1  0.977011          

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


----- Testset Evaluation by filtering out varying percentages of submissions for each contest -----

----- Overall Evaluation at 50% filtering (FOLD LEVEL) -----
Fold 3 accuracy: 0.5584532374100719
Fold 3 recall: 0.927536231884058
Fold 3 Confusion Matrix:
[[557 486]
 [  5  64]]
Fold 3 Remaining % size of Submissions: 49.46%

Per-contest results (fold-level) saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_results_final_dataset_run5.csv


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name | Type   | Params | Mode 
----------------------------------------
0 | fc1  | Linear | 384    | train
1 | fc2  | Linear | 1.1 K  | train
2 | fc3  | Linear | 33     | train
----------------------------------------
1.5 K     Trainable params
0         Non-trainable params
1.5 K     Total params
0.006     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


All submissions with quality scores saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_all_submissions_final_dataset_run5.csv

Fold-Level Additional Metrics (All Contests Combined) for each Filter %:
   Filter_Percentage  Remaining_Percentage_Size_of_Submissions  Accuracy  \
0                0.1                                 89.118705  0.170863   
1                0.2                                 79.226619  0.269784   
2                0.3                                 69.064748  0.371403   
3                0.4                                 59.172662  0.464928   
4                0.5                                 49.460432  0.558453   
5                0.6                                 39.298561  0.652878   
6                0.7                                 29.226619  0.739209   

     Recall  True_Negatives  False_Positives  False_Negatives  True_Positives  
0  1.000000             121              922                0              69  
1  1.000000          

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


----- Testset Evaluation by filtering out varying percentages of submissions for each contest -----

----- Overall Evaluation at 50% filtering (FOLD LEVEL) -----
Fold 4 accuracy: 0.5561918396564066
Fold 4 recall: 0.9493670886075949
Fold 4 Confusion Matrix:
[[702 616]
 [  4  75]]
Fold 4 Remaining % size of Submissions: 49.46%

Per-contest results (fold-level) saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_results_final_dataset_run5.csv


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name | Type   | Params | Mode 
----------------------------------------
0 | fc1  | Linear | 384    | train
1 | fc2  | Linear | 1.1 K  | train
2 | fc3  | Linear | 33     | train
----------------------------------------
1.5 K     Trainable params
0         Non-trainable params
1.5 K     Total params
0.006     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


All submissions with quality scores saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_all_submissions_final_dataset_run5.csv

Fold-Level Additional Metrics (All Contests Combined) for each Filter %:
   Filter_Percentage  Remaining_Percentage_Size_of_Submissions  Accuracy  \
0                0.1                                 89.191124  0.164639   
1                0.2                                 79.241231  0.264137   
2                0.3                                 69.362921  0.361489   
3                0.4                                 59.269864  0.460988   
4                0.5                                 49.463135  0.556192   
5                0.6                                 39.370079  0.647101   
6                0.7                                 29.205440  0.732999   

     Recall  True_Negatives  False_Positives  False_Negatives  True_Positives  
0  1.000000             151             1167                0              79  
1  1.000000          

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


----- Testset Evaluation by filtering out varying percentages of submissions for each contest -----

----- Overall Evaluation at 50% filtering (FOLD LEVEL) -----
Fold 5 accuracy: 0.5658844765342961
Fold 5 recall: 0.948051948051948
Fold 5 Confusion Matrix:
[[554 477]
 [  4  73]]
Fold 5 Remaining % size of Submissions: 49.64%

Per-contest results (fold-level) saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_results_final_dataset_run5.csv
All submissions with quality scores saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/MLP_all_submissions_final_dataset_run5.csv

Fold-Level Additional Metrics (All Contests Combined) for each Filter %:
   Filter_Percentage  Remaining_Percentage_Size_of_Submissions  Accuracy  \
0                0.1                                 88.989170  0.179603   
1                0.2                                 79.061372  0.278881   
2                0.3                                 69.043321  0.377256   
3                0.4              

In [None]:
plotter = ModelComparisonPlotter(
    output_path="/content/drive/MyDrive/Master_Thesis/MLP_Output/",
    dataset_name="28_11.csv",
    n_runs=5
)


models_to_compare = ["MLP", "RandomForest", "XGBoost", "SVM", "LogisticRegression"]
combined_df = plotter.gather_average_recall_all_models(models_to_compare)


plotter.save_combined_averages(combined_df, "all_models_avg_recall.csv")


plot_path = "/content/drive/MyDrive/Master_Thesis/MLP_Output/model_recall_comparison.png"
plotter.plot_model_recall_comparison(combined_df, save_path=plot_path)


Combined averages saved to /content/drive/MyDrive/Master_Thesis/MLP_Output/all_models_avg_recall.csv
Plot saved to: /content/drive/MyDrive/Master_Thesis/MLP_Output/model_recall_comparison.png
