# 3W Real Dataset Cleaning & Preperation

### Imports & Configuration

In [1]:
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('max_colwidth', None)
pd.set_option('display.float_format', '{:.2f}'.format)
from IPython.display import display
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import os
import sys
from pyspark.sql import SparkSession, Window, DataFrame
from pyspark.sql import functions as F
from pyspark.sql.functions import col, when, count, mean, isnan, expr, lit, countDistinct, round, lag, unix_timestamp, lead, explode, collect_list, struct, rand, row_number
from pyspark.sql.types import IntegerType, StringType, StructType, StructField, ArrayType, MapType
import pyspark.pandas as ps
from pyspark.ml.feature import VectorAssembler, StandardScaler
from pyspark.ml.functions import vector_to_array
from sklearn.model_selection import train_test_split
from functools import reduce
import json
from tqdm import tqdm



In [2]:
# Base directory paths
BASE_DIR = "../Cleaning & Preparation/"
STAGED_DATA_PATH = os.path.join(BASE_DIR, "Staged Cleaning Data")
RESULTS_PATH = os.path.join(BASE_DIR, "Results")
CLEANED_DATA_PATH = os.path.join(BASE_DIR, "Clean Data")
TRAIN_TEST_DATA_PATH = os.path.join(BASE_DIR, "Train Test Data")
SCALED_DATA_PATH = os.path.join(BASE_DIR, "Train Test (Scaled) Data")

In [3]:
### Initialize Spark Session
try:
    spark = SparkSession.builder \
        .appName("3W Real Dataset") \
        .master("local[*]") \
        .config("spark.driver.memory", "12g") \
        .config("spark.executor.memory", "6g") \
        .config("spark.sql.shuffle.partitions", "16") \
        .config("spark.default.parallelism", "16") \
        .config("spark.sql.parquet.filterPushdown", "true") \
        .config("spark.sql.parquet.enableVectorizedReader", "true") \
        .getOrCreate()
except Exception as e:
    print(f"Error initializing SparkSession: {e}")
    raise

24/11/27 08:02:39 WARN Utils: Your hostname, Ghazis-MacBook-Pro.local resolves to a loopback address: 127.0.0.1; using 166.87.130.137 instead (on interface en0)
24/11/27 08:02:39 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
24/11/27 08:02:40 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


In [4]:
### Load Reference Table
REFERENCE_TABLE_PATH = "../EDA/Results/reference_table_real.csv"
reference_table_missing_values_real = spark.read.csv(REFERENCE_TABLE_PATH, header=True)
reference_table_missing_values_real = reference_table_missing_values_real.drop("Unique Values (Real)", "Missing Values (%) (Real)")
reference_table_missing_values_real.sort("Value Type").show(truncate=False, n=100)

                                                                                

+-------------+------------------------------------------------------------+--------------------+-----------+
|Tag          |Name                                                        |Unit                |Value Type |
+-------------+------------------------------------------------------------+--------------------+-----------+
|ESTADO-DHSV  |State of the DHSV (downhole safety valve)                   |[0, 0.5, 1]         |Categorical|
|ESTADO-M1    |State of the PMV (production master valve)                  |[0, 0.5, 1]         |Categorical|
|ESTADO-PXO   |State of the PXO (pig-crossover) valve                      |[0, 0.5, 1]         |Categorical|
|ESTADO-M2    |State of the AMV (annulus master valve)                     |[0, 0.5, 1]         |Categorical|
|ESTADO-SDV-GL|State of the gas lift SDV (shutdown valve)                  |[0, 0.5, 1]         |Categorical|
|ESTADO-SDV-P |State of the production SDV (shutdown valve)                |[0, 0.5, 1]         |Categorical|
|ESTADO-W1

In [5]:
### Define Features
feature_names = [
    row["Tag"] for row in reference_table_missing_values_real.filter(col("Value Type").isin(["Continuous", "Categorical"])).collect()
    if row["Tag"] != "DataType"
]
features_continuous = [
    row["Tag"] for row in reference_table_missing_values_real.filter(col("Value Type") == "Continuous").collect()
]
features_categorical = [
    row["Tag"] for row in reference_table_missing_values_real.filter(col("Value Type") == "Categorical").collect()
    if row["Tag"] != "DataType"
]

In [6]:
### Load Original 3W (Real) Dataset
DATA_PATH = "../Data/real_instances.parquet"
spark_df_real = spark.read.parquet(DATA_PATH)

In [7]:
### Fill Missing Values by Instance
def clean_missing_values(input_df: DataFrame, reference_table: DataFrame, output_path: str, incomplete_dirs: set = None):
    """
    Cleans missing values for each instance in the dataset by forward and backward filling.

    Parameters:
        input_df (DataFrame): Input Spark DataFrame containing the raw data.
        reference_table (DataFrame): Reference table specifying feature types.
        output_path (str): Directory path to save cleaned instances.
        incomplete_dirs (set): Optional. Set of directories that need reprocessing.

    Returns:
        None
    """
    reference_df = reference_table.select("Tag", "Value Type").distinct()
    relevant_columns = [
        row["Tag"] for row in reference_df.filter(col("Value Type").isin(["Continuous", "Categorical"])).collect()
        if row["Tag"] != "DataType"
    ]
    empty_columns = [row["Tag"] for row in reference_df.filter(col("Value Type") == "Empty").collect()]
    input_df = input_df.drop(*empty_columns)

    unique_instances = input_df.select("Instance", "label").distinct().collect()
    for row in tqdm(unique_instances, desc="Cleaning Instances"):
        instance = row["Instance"]
        label = row["label"]

        label_dir = os.path.join(output_path, f"Label_{label}")
        output_file = os.path.join(label_dir, f"Instance_{instance}.parquet")
        if incomplete_dirs:
            if output_file not in incomplete_dirs:
                continue

        try:
            instance_df = input_df.filter(col("Instance") == instance).orderBy("timestamp")
            total_rows = instance_df.count()
            for column in instance_df.columns:
                if instance_df.filter(col(column).isNull()).count() == total_rows:
                    instance_df = instance_df.drop(column)

            instance_pdf = instance_df.toPandas()
            for column in relevant_columns:
                if column in instance_pdf.columns:
                    instance_pdf[column] = instance_pdf[column].ffill().bfill()

            cleaned_sdf = spark.createDataFrame(instance_pdf)
            os.makedirs(label_dir, exist_ok=True)
            cleaned_sdf.write.mode("overwrite").parquet(output_file)
            print(f"Processed: Label - {label}, Instance - {instance}")

        except Exception as e:
            print(f"Error processing instance: Label - {label}, Instance - {instance}, Error: {type(e).__name__}: {e}")
            continue

    print(f"Cleaning process completed. Data exported to: {output_path}")

# Clean Missing Values
clean_missing_values(spark_df_real, reference_table_missing_values_real, os.path.join(STAGED_DATA_PATH, "Stage 1"))

Cleaning Instances:   0%|          | 1/1119 [00:06<2:05:48,  6.75s/it]          

Processed: Label - 2, Instance - 1095


Cleaning Instances:   0%|          | 2/1119 [00:13<2:01:31,  6.53s/it]

Processed: Label - 3, Instance - 1058


Cleaning Instances:   0%|          | 3/1119 [00:19<1:57:49,  6.33s/it]

Processed: Label - 3, Instance - 1079


24/11/27 00:11:59 WARN SparkStringUtils: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.
Cleaning Instances:   0%|          | 4/1119 [00:23<1:44:19,  5.61s/it]

Processed: Label - 5, Instance - 1110


Cleaning Instances:   0%|          | 5/1119 [00:29<1:45:22,  5.68s/it]

Processed: Label - 3, Instance - 1070


Cleaning Instances:   1%|          | 6/1119 [00:33<1:34:26,  5.09s/it]

Processed: Label - 2, Instance - 1089


Cleaning Instances:   1%|          | 7/1119 [00:39<1:37:18,  5.25s/it]

Processed: Label - 3, Instance - 1057


Cleaning Instances:   1%|          | 8/1119 [00:44<1:40:13,  5.41s/it]

Processed: Label - 3, Instance - 1063


Cleaning Instances:   1%|          | 9/1119 [00:49<1:36:59,  5.24s/it]

Processed: Label - 2, Instance - 1088


Cleaning Instances:   1%|          | 10/1119 [00:58<1:55:14,  6.24s/it]

Processed: Label - 5, Instance - 1114


Cleaning Instances:   1%|          | 11/1119 [01:04<1:55:55,  6.28s/it]

Processed: Label - 5, Instance - 1115


Cleaning Instances:   1%|          | 12/1119 [01:10<1:51:39,  6.05s/it]

Processed: Label - 3, Instance - 1055


Cleaning Instances:   1%|          | 13/1119 [01:15<1:48:07,  5.87s/it]

Processed: Label - 3, Instance - 1066


Cleaning Instances:   1%|▏         | 14/1119 [01:21<1:46:13,  5.77s/it]

Processed: Label - 3, Instance - 1068


Cleaning Instances:   1%|▏         | 15/1119 [01:26<1:44:38,  5.69s/it]

Processed: Label - 3, Instance - 1082


Cleaning Instances:   1%|▏         | 16/1119 [01:30<1:36:56,  5.27s/it]

Processed: Label - 2, Instance - 1096


Cleaning Instances:   2%|▏         | 17/1119 [01:35<1:31:06,  4.96s/it]

Processed: Label - 5, Instance - 1118


Cleaning Instances:   2%|▏         | 18/1119 [01:40<1:34:03,  5.13s/it]

Processed: Label - 3, Instance - 1064


Cleaning Instances:   2%|▏         | 19/1119 [01:45<1:34:34,  5.16s/it]

Processed: Label - 3, Instance - 1073


Cleaning Instances:   2%|▏         | 20/1119 [01:51<1:36:09,  5.25s/it]

Processed: Label - 3, Instance - 1083


Cleaning Instances:   2%|▏         | 21/1119 [01:56<1:37:06,  5.31s/it]

Processed: Label - 3, Instance - 1084


Cleaning Instances:   2%|▏         | 22/1119 [02:00<1:30:28,  4.95s/it]

Processed: Label - 2, Instance - 1102


Cleaning Instances:   2%|▏         | 23/1119 [02:05<1:26:23,  4.73s/it]

Processed: Label - 4, Instance - 1052


Cleaning Instances:   2%|▏         | 24/1119 [02:10<1:30:50,  4.98s/it]

Processed: Label - 3, Instance - 1062


Cleaning Instances:   2%|▏         | 25/1119 [02:16<1:33:55,  5.15s/it]

Processed: Label - 3, Instance - 1074


Cleaning Instances:   2%|▏         | 26/1119 [02:21<1:35:42,  5.25s/it]

Processed: Label - 3, Instance - 1067


Cleaning Instances:   2%|▏         | 27/1119 [02:27<1:37:51,  5.38s/it]

Processed: Label - 3, Instance - 1072


Cleaning Instances:   3%|▎         | 28/1119 [02:31<1:29:11,  4.90s/it]

Processed: Label - 2, Instance - 1094


Cleaning Instances:   3%|▎         | 29/1119 [02:34<1:23:22,  4.59s/it]

Processed: Label - 2, Instance - 1099


Cleaning Instances:   3%|▎         | 30/1119 [02:39<1:22:07,  4.52s/it]

Processed: Label - 2, Instance - 1101


Cleaning Instances:   3%|▎         | 31/1119 [02:46<1:35:24,  5.26s/it]

Processed: Label - 5, Instance - 1108


Cleaning Instances:   3%|▎         | 32/1119 [02:52<1:37:40,  5.39s/it]         

Processed: Label - 5, Instance - 1113


Cleaning Instances:   3%|▎         | 33/1119 [02:56<1:30:09,  4.98s/it]

Processed: Label - 2, Instance - 1086


Cleaning Instances:   3%|▎         | 34/1119 [03:00<1:27:47,  4.85s/it]

Processed: Label - 2, Instance - 1092


Cleaning Instances:   3%|▎         | 35/1119 [03:04<1:23:48,  4.64s/it]

Processed: Label - 2, Instance - 1105


Cleaning Instances:   3%|▎         | 36/1119 [03:15<1:56:54,  6.48s/it]         

Processed: Label - 5, Instance - 1116


Cleaning Instances:   3%|▎         | 37/1119 [03:19<1:42:44,  5.70s/it]

Processed: Label - 5, Instance - 1117


Cleaning Instances:   3%|▎         | 38/1119 [03:23<1:36:44,  5.37s/it]

Processed: Label - 4, Instance - 1053


Cleaning Instances:   3%|▎         | 39/1119 [03:29<1:39:08,  5.51s/it]

Processed: Label - 3, Instance - 1056


Cleaning Instances:   4%|▎         | 40/1119 [03:35<1:39:12,  5.52s/it]

Processed: Label - 3, Instance - 1069


Cleaning Instances:   4%|▎         | 41/1119 [03:41<1:40:06,  5.57s/it]

Processed: Label - 3, Instance - 1061


Cleaning Instances:   4%|▍         | 42/1119 [03:46<1:39:23,  5.54s/it]

Processed: Label - 3, Instance - 1075


Cleaning Instances:   4%|▍         | 43/1119 [03:52<1:39:55,  5.57s/it]

Processed: Label - 3, Instance - 1085


Cleaning Instances:   4%|▍         | 44/1119 [03:57<1:37:15,  5.43s/it]

Processed: Label - 2, Instance - 1104


Cleaning Instances:   4%|▍         | 45/1119 [04:03<1:38:53,  5.52s/it]

Processed: Label - 3, Instance - 1080


Cleaning Instances:   4%|▍         | 46/1119 [04:07<1:32:39,  5.18s/it]

Processed: Label - 2, Instance - 1087


Cleaning Instances:   4%|▍         | 47/1119 [04:12<1:30:17,  5.05s/it]

Processed: Label - 2, Instance - 1107


Cleaning Instances:   4%|▍         | 48/1119 [04:18<1:35:24,  5.34s/it]

Processed: Label - 3, Instance - 1059


Cleaning Instances:   4%|▍         | 49/1119 [04:23<1:36:13,  5.40s/it]

Processed: Label - 3, Instance - 1071


Cleaning Instances:   4%|▍         | 50/1119 [04:29<1:36:08,  5.40s/it]

Processed: Label - 3, Instance - 1076


Cleaning Instances:   5%|▍         | 51/1119 [04:33<1:29:36,  5.03s/it]

Processed: Label - 2, Instance - 1091


Cleaning Instances:   5%|▍         | 52/1119 [04:36<1:21:27,  4.58s/it]

Processed: Label - 2, Instance - 1098


Cleaning Instances:   5%|▍         | 53/1119 [04:40<1:15:38,  4.26s/it]

Processed: Label - 2, Instance - 1100


Cleaning Instances:   5%|▍         | 54/1119 [04:48<1:37:07,  5.47s/it]

Processed: Label - 5, Instance - 1111


Cleaning Instances:   5%|▍         | 55/1119 [04:54<1:37:00,  5.47s/it]

Processed: Label - 3, Instance - 1060


Cleaning Instances:   5%|▌         | 56/1119 [04:59<1:35:55,  5.41s/it]

Processed: Label - 3, Instance - 1065


Cleaning Instances:   5%|▌         | 57/1119 [05:04<1:34:30,  5.34s/it]

Processed: Label - 2, Instance - 1106


Cleaning Instances:   5%|▌         | 58/1119 [05:09<1:33:49,  5.31s/it]

Processed: Label - 3, Instance - 1054


Cleaning Instances:   5%|▌         | 59/1119 [05:14<1:33:04,  5.27s/it]

Processed: Label - 3, Instance - 1077


Cleaning Instances:   5%|▌         | 60/1119 [05:20<1:33:32,  5.30s/it]

Processed: Label - 3, Instance - 1081


Cleaning Instances:   5%|▌         | 61/1119 [05:24<1:24:59,  4.82s/it]

Processed: Label - 2, Instance - 1090


Cleaning Instances:   6%|▌         | 62/1119 [05:28<1:20:34,  4.57s/it]

Processed: Label - 2, Instance - 1093


Cleaning Instances:   6%|▌         | 63/1119 [05:32<1:18:37,  4.47s/it]

Processed: Label - 2, Instance - 1097


24/11/27 00:17:29 WARN TaskSetManager: Stage 6734 contains a task of very large size (2196 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:   6%|▌         | 64/1119 [05:53<2:45:25,  9.41s/it]         

Processed: Label - 5, Instance - 1112


Cleaning Instances:   6%|▌         | 65/1119 [05:58<2:24:20,  8.22s/it]

Processed: Label - 3, Instance - 1078


Cleaning Instances:   6%|▌         | 66/1119 [06:02<2:00:11,  6.85s/it]

Processed: Label - 2, Instance - 1103


Cleaning Instances:   6%|▌         | 67/1119 [06:10<2:08:39,  7.34s/it]

Processed: Label - 5, Instance - 1109


Cleaning Instances:   6%|▌         | 68/1119 [06:15<1:54:40,  6.55s/it]

Processed: Label - 6, Instance - 691


Cleaning Instances:   6%|▌         | 69/1119 [06:25<2:13:55,  7.65s/it]

Processed: Label - 1, Instance - 696


24/11/27 00:18:17 WARN TaskSetManager: Stage 7364 contains a task of very large size (1449 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:   6%|▋         | 70/1119 [06:40<2:51:34,  9.81s/it]         

Processed: Label - 7, Instance - 676


24/11/27 00:18:31 WARN TaskSetManager: Stage 7469 contains a task of very large size (1467 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:   6%|▋         | 71/1119 [06:54<3:14:57, 11.16s/it]

Processed: Label - 1, Instance - 693


24/11/27 00:18:43 WARN TaskSetManager: Stage 7574 contains a task of very large size (1089 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:   6%|▋         | 72/1119 [07:06<3:17:51, 11.34s/it]

Processed: Label - 7, Instance - 684


Cleaning Instances:   7%|▋         | 73/1119 [07:10<2:38:16,  9.08s/it]

Processed: Label - 6, Instance - 692


24/11/27 00:18:57 WARN TaskSetManager: Stage 7784 contains a task of very large size (1031 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:   7%|▋         | 74/1119 [07:20<2:44:28,  9.44s/it]         

Processed: Label - 7, Instance - 680


24/11/27 00:19:33 WARN TaskSetManager: Stage 7889 contains a task of very large size (4383 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:   7%|▋         | 75/1119 [07:57<5:05:09, 17.54s/it]         

Processed: Label - 7, Instance - 678


Cleaning Instances:   7%|▋         | 76/1119 [08:01<3:55:03, 13.52s/it]

Processed: Label - 1, Instance - 695


Cleaning Instances:   7%|▋         | 77/1119 [08:05<3:03:51, 10.59s/it]

Processed: Label - 6, Instance - 688


Cleaning Instances:   7%|▋         | 78/1119 [08:13<2:51:09,  9.86s/it]

Processed: Label - 7, Instance - 681


24/11/27 00:20:22 WARN TaskSetManager: Stage 8309 contains a task of very large size (3663 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:   7%|▋         | 79/1119 [08:46<4:50:36, 16.77s/it]         

Processed: Label - 7, Instance - 686


24/11/27 00:21:29 WARN TaskSetManager: Stage 8414 contains a task of very large size (8497 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:   7%|▋         | 80/1119 [09:56<9:28:07, 32.81s/it]         

Processed: Label - 8, Instance - 697


24/11/27 00:21:47 WARN TaskSetManager: Stage 8519 contains a task of very large size (1269 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:   7%|▋         | 81/1119 [10:10<7:51:19, 27.24s/it]         

Processed: Label - 7, Instance - 677


24/11/27 00:22:05 WARN TaskSetManager: Stage 8624 contains a task of very large size (1953 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:   7%|▋         | 82/1119 [10:28<7:01:42, 24.40s/it]         

Processed: Label - 7, Instance - 679


24/11/27 00:22:22 WARN TaskSetManager: Stage 8729 contains a task of very large size (1647 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:   7%|▋         | 83/1119 [10:45<6:25:30, 22.33s/it]         

Processed: Label - 7, Instance - 685


Cleaning Instances:   8%|▊         | 84/1119 [10:49<4:50:02, 16.81s/it]

Processed: Label - 6, Instance - 687


Cleaning Instances:   8%|▊         | 85/1119 [10:53<3:44:07, 13.01s/it]

Processed: Label - 6, Instance - 689


24/11/27 00:24:00 WARN TaskSetManager: Stage 9044 contains a task of very large size (11189 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:   8%|▊         | 86/1119 [12:30<10:55:46, 38.09s/it]        

Processed: Label - 7, Instance - 683


24/11/27 00:24:24 WARN TaskSetManager: Stage 9149 contains a task of very large size (1710 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:   8%|▊         | 87/1119 [12:47<9:07:20, 31.82s/it]         

Processed: Label - 7, Instance - 682


Cleaning Instances:   8%|▊         | 88/1119 [12:57<7:13:53, 25.25s/it]

Processed: Label - 1, Instance - 694


24/11/27 00:24:57 WARN TaskSetManager: Stage 9359 contains a task of very large size (2169 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:   8%|▊         | 89/1119 [13:20<7:01:47, 24.57s/it]         

Processed: Label - 7, Instance - 675


Cleaning Instances:   8%|▊         | 90/1119 [13:24<5:16:01, 18.43s/it]

Processed: Label - 6, Instance - 690


Cleaning Instances:   8%|▊         | 91/1119 [13:29<4:04:00, 14.24s/it]

Processed: Label - 4, Instance - 722


Cleaning Instances:   8%|▊         | 92/1119 [13:33<3:14:42, 11.38s/it]

Processed: Label - 4, Instance - 737


Cleaning Instances:   8%|▊         | 93/1119 [13:38<2:39:25,  9.32s/it]

Processed: Label - 4, Instance - 741


Cleaning Instances:   8%|▊         | 94/1119 [13:42<2:11:56,  7.72s/it]

Processed: Label - 4, Instance - 758


Cleaning Instances:   8%|▊         | 95/1119 [13:47<1:56:42,  6.84s/it]

Processed: Label - 4, Instance - 744


Cleaning Instances:   9%|▊         | 96/1119 [13:51<1:46:05,  6.22s/it]

Processed: Label - 4, Instance - 748


24/11/27 00:26:39 WARN TaskSetManager: Stage 10199 contains a task of very large size (8515 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:   9%|▊         | 97/1119 [15:07<7:38:50, 26.94s/it]         

Processed: Label - 8, Instance - 710


Cleaning Instances:   9%|▉         | 98/1119 [15:13<5:52:44, 20.73s/it]         

Processed: Label - 4, Instance - 716


Cleaning Instances:   9%|▉         | 99/1119 [15:17<4:27:21, 15.73s/it]

Processed: Label - 4, Instance - 754


24/11/27 00:27:32 WARN TaskSetManager: Stage 10514 contains a task of very large size (4455 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:   9%|▉         | 100/1119 [15:55<6:21:39, 22.47s/it]        

Processed: Label - 8, Instance - 706


Cleaning Instances:   9%|▉         | 101/1119 [15:59<4:45:46, 16.84s/it]

Processed: Label - 4, Instance - 728


24/11/27 00:28:34 WARN TaskSetManager: Stage 10724 contains a task of very large size (7543 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:   9%|▉         | 102/1119 [16:58<8:20:52, 29.55s/it]        

Processed: Label - 8, Instance - 707


Cleaning Instances:   9%|▉         | 103/1119 [17:03<6:15:28, 22.17s/it]

Processed: Label - 4, Instance - 714


Cleaning Instances:   9%|▉         | 104/1119 [17:07<4:41:23, 16.63s/it]

Processed: Label - 4, Instance - 721


Cleaning Instances:   9%|▉         | 105/1119 [17:11<3:38:17, 12.92s/it]

Processed: Label - 4, Instance - 727


Cleaning Instances:   9%|▉         | 106/1119 [17:16<2:55:21, 10.39s/it]

Processed: Label - 4, Instance - 759


24/11/27 00:29:12 WARN TaskSetManager: Stage 11249 contains a task of very large size (1953 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  10%|▉         | 107/1119 [17:35<3:39:54, 13.04s/it]        

Processed: Label - 8, Instance - 708


Cleaning Instances:  10%|▉         | 108/1119 [17:39<2:55:47, 10.43s/it]

Processed: Label - 4, Instance - 712


Cleaning Instances:  10%|▉         | 109/1119 [17:44<2:25:43,  8.66s/it]

Processed: Label - 4, Instance - 743


24/11/27 00:29:51 WARN TaskSetManager: Stage 11564 contains a task of very large size (3411 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  10%|▉         | 110/1119 [18:14<4:17:14, 15.30s/it]        

Processed: Label - 8, Instance - 709


Cleaning Instances:  10%|▉         | 111/1119 [18:19<3:23:59, 12.14s/it]        

Processed: Label - 4, Instance - 726


Cleaning Instances:  10%|█         | 112/1119 [18:23<2:43:11,  9.72s/it]

Processed: Label - 4, Instance - 750


Cleaning Instances:  10%|█         | 113/1119 [18:27<2:13:31,  7.96s/it]

Processed: Label - 4, Instance - 751


Cleaning Instances:  10%|█         | 114/1119 [18:31<1:52:21,  6.71s/it]

Processed: Label - 4, Instance - 719


Cleaning Instances:  10%|█         | 115/1119 [18:35<1:37:56,  5.85s/it]

Processed: Label - 4, Instance - 738


Cleaning Instances:  10%|█         | 116/1119 [18:39<1:29:03,  5.33s/it]

Processed: Label - 4, Instance - 740


Cleaning Instances:  10%|█         | 117/1119 [18:43<1:23:51,  5.02s/it]

Processed: Label - 4, Instance - 747


Cleaning Instances:  11%|█         | 118/1119 [18:47<1:17:03,  4.62s/it]

Processed: Label - 4, Instance - 724


Cleaning Instances:  11%|█         | 119/1119 [18:51<1:14:53,  4.49s/it]

Processed: Label - 4, Instance - 729


Cleaning Instances:  11%|█         | 120/1119 [18:55<1:11:49,  4.31s/it]

Processed: Label - 4, Instance - 734


Cleaning Instances:  11%|█         | 121/1119 [18:59<1:11:53,  4.32s/it]

Processed: Label - 4, Instance - 753


Cleaning Instances:  11%|█         | 122/1119 [19:04<1:11:53,  4.33s/it]        

Processed: Label - 4, Instance - 755


Cleaning Instances:  11%|█         | 123/1119 [19:07<1:09:06,  4.16s/it]

Processed: Label - 4, Instance - 713


Cleaning Instances:  11%|█         | 124/1119 [19:12<1:10:23,  4.24s/it]

Processed: Label - 4, Instance - 718


Cleaning Instances:  11%|█         | 125/1119 [19:16<1:11:09,  4.29s/it]

Processed: Label - 4, Instance - 732


Cleaning Instances:  11%|█▏        | 126/1119 [19:20<1:08:51,  4.16s/it]

Processed: Label - 4, Instance - 752


Cleaning Instances:  11%|█▏        | 127/1119 [19:24<1:09:04,  4.18s/it]

Processed: Label - 4, Instance - 756


Cleaning Instances:  11%|█▏        | 128/1119 [19:28<1:07:21,  4.08s/it]

Processed: Label - 4, Instance - 745


Cleaning Instances:  12%|█▏        | 129/1119 [19:32<1:05:15,  3.96s/it]

Processed: Label - 4, Instance - 757


Cleaning Instances:  12%|█▏        | 130/1119 [19:36<1:07:18,  4.08s/it]

Processed: Label - 4, Instance - 711


Cleaning Instances:  12%|█▏        | 131/1119 [19:40<1:06:01,  4.01s/it]

Processed: Label - 4, Instance - 715


Cleaning Instances:  12%|█▏        | 132/1119 [19:44<1:06:00,  4.01s/it]

Processed: Label - 4, Instance - 723


Cleaning Instances:  12%|█▏        | 133/1119 [19:48<1:07:41,  4.12s/it]

Processed: Label - 4, Instance - 733


Cleaning Instances:  12%|█▏        | 134/1119 [19:53<1:08:50,  4.19s/it]

Processed: Label - 4, Instance - 736


Cleaning Instances:  12%|█▏        | 135/1119 [19:57<1:07:34,  4.12s/it]

Processed: Label - 4, Instance - 717


Cleaning Instances:  12%|█▏        | 136/1119 [20:01<1:08:03,  4.15s/it]

Processed: Label - 4, Instance - 730


Cleaning Instances:  12%|█▏        | 137/1119 [20:05<1:09:34,  4.25s/it]

Processed: Label - 4, Instance - 735


Cleaning Instances:  12%|█▏        | 138/1119 [20:10<1:10:04,  4.29s/it]

Processed: Label - 4, Instance - 746


Cleaning Instances:  12%|█▏        | 139/1119 [20:14<1:07:25,  4.13s/it]

Processed: Label - 4, Instance - 725


Cleaning Instances:  13%|█▎        | 140/1119 [20:18<1:08:06,  4.17s/it]

Processed: Label - 4, Instance - 731


Cleaning Instances:  13%|█▎        | 141/1119 [20:22<1:06:35,  4.09s/it]

Processed: Label - 4, Instance - 739


Cleaning Instances:  13%|█▎        | 142/1119 [20:26<1:05:56,  4.05s/it]

Processed: Label - 4, Instance - 749


Cleaning Instances:  13%|█▎        | 143/1119 [20:30<1:06:59,  4.12s/it]

Processed: Label - 4, Instance - 760


Cleaning Instances:  13%|█▎        | 144/1119 [20:34<1:07:47,  4.17s/it]

Processed: Label - 4, Instance - 720


Cleaning Instances:  13%|█▎        | 145/1119 [20:38<1:05:41,  4.05s/it]        

Processed: Label - 4, Instance - 742


Cleaning Instances:  13%|█▎        | 146/1119 [20:42<1:07:00,  4.13s/it]

Processed: Label - 0, Instance - 311


Cleaning Instances:  13%|█▎        | 147/1119 [20:47<1:08:40,  4.24s/it]

Processed: Label - 0, Instance - 314


Cleaning Instances:  13%|█▎        | 148/1119 [20:51<1:10:30,  4.36s/it]

Processed: Label - 0, Instance - 328


Cleaning Instances:  13%|█▎        | 149/1119 [20:56<1:13:13,  4.53s/it]        

Processed: Label - 0, Instance - 338


Cleaning Instances:  13%|█▎        | 150/1119 [21:02<1:19:05,  4.90s/it]

Processed: Label - 0, Instance - 405


Cleaning Instances:  13%|█▎        | 151/1119 [21:08<1:21:42,  5.06s/it]

Processed: Label - 0, Instance - 282


Cleaning Instances:  14%|█▎        | 152/1119 [21:12<1:16:32,  4.75s/it]

Processed: Label - 0, Instance - 300


Cleaning Instances:  14%|█▎        | 153/1119 [21:16<1:13:02,  4.54s/it]

Processed: Label - 0, Instance - 306


Cleaning Instances:  14%|█▍        | 154/1119 [21:20<1:13:04,  4.54s/it]

Processed: Label - 0, Instance - 321


Cleaning Instances:  14%|█▍        | 155/1119 [21:24<1:10:59,  4.42s/it]

Processed: Label - 0, Instance - 323


Cleaning Instances:  14%|█▍        | 156/1119 [21:30<1:17:18,  4.82s/it]

Processed: Label - 0, Instance - 349


Cleaning Instances:  14%|█▍        | 157/1119 [21:36<1:20:51,  5.04s/it]

Processed: Label - 0, Instance - 355


Cleaning Instances:  14%|█▍        | 158/1119 [21:40<1:15:32,  4.72s/it]

Processed: Label - 0, Instance - 381


Cleaning Instances:  14%|█▍        | 159/1119 [21:45<1:18:43,  4.92s/it]

Processed: Label - 0, Instance - 385


Cleaning Instances:  14%|█▍        | 160/1119 [21:50<1:17:16,  4.83s/it]

Processed: Label - 0, Instance - 403


Cleaning Instances:  14%|█▍        | 161/1119 [21:54<1:14:17,  4.65s/it]

Processed: Label - 0, Instance - 425


Cleaning Instances:  14%|█▍        | 162/1119 [21:58<1:13:49,  4.63s/it]

Processed: Label - 0, Instance - 428


Cleaning Instances:  15%|█▍        | 163/1119 [22:04<1:18:08,  4.90s/it]

Processed: Label - 0, Instance - 436


Cleaning Instances:  15%|█▍        | 164/1119 [22:08<1:15:28,  4.74s/it]        

Processed: Label - 0, Instance - 297


Cleaning Instances:  15%|█▍        | 165/1119 [22:13<1:13:09,  4.60s/it]

Processed: Label - 0, Instance - 337


Cleaning Instances:  15%|█▍        | 166/1119 [22:17<1:13:26,  4.62s/it]

Processed: Label - 0, Instance - 339


Cleaning Instances:  15%|█▍        | 167/1119 [22:23<1:19:12,  4.99s/it]

Processed: Label - 0, Instance - 346


Cleaning Instances:  15%|█▌        | 168/1119 [22:28<1:18:23,  4.95s/it]

Processed: Label - 0, Instance - 352


Cleaning Instances:  15%|█▌        | 169/1119 [22:32<1:14:43,  4.72s/it]

Processed: Label - 0, Instance - 365


Cleaning Instances:  15%|█▌        | 170/1119 [22:38<1:18:09,  4.94s/it]

Processed: Label - 0, Instance - 371


Cleaning Instances:  15%|█▌        | 171/1119 [22:42<1:14:50,  4.74s/it]

Processed: Label - 0, Instance - 378


Cleaning Instances:  15%|█▌        | 172/1119 [22:46<1:12:46,  4.61s/it]

Processed: Label - 0, Instance - 404


Cleaning Instances:  15%|█▌        | 173/1119 [22:51<1:12:12,  4.58s/it]

Processed: Label - 0, Instance - 418


Cleaning Instances:  16%|█▌        | 174/1119 [22:56<1:16:13,  4.84s/it]

Processed: Label - 0, Instance - 434


Cleaning Instances:  16%|█▌        | 175/1119 [23:00<1:13:06,  4.65s/it]

Processed: Label - 0, Instance - 287


Cleaning Instances:  16%|█▌        | 176/1119 [23:06<1:17:30,  4.93s/it]

Processed: Label - 0, Instance - 289


Cleaning Instances:  16%|█▌        | 177/1119 [23:11<1:18:45,  5.02s/it]

Processed: Label - 0, Instance - 295


Cleaning Instances:  16%|█▌        | 178/1119 [23:17<1:20:18,  5.12s/it]

Processed: Label - 0, Instance - 357


Cleaning Instances:  16%|█▌        | 179/1119 [23:22<1:21:50,  5.22s/it]

Processed: Label - 0, Instance - 377


Cleaning Instances:  16%|█▌        | 180/1119 [23:28<1:23:13,  5.32s/it]

Processed: Label - 0, Instance - 379


Cleaning Instances:  16%|█▌        | 181/1119 [23:31<1:15:52,  4.85s/it]

Processed: Label - 0, Instance - 387


Cleaning Instances:  16%|█▋        | 182/1119 [23:37<1:20:29,  5.15s/it]

Processed: Label - 0, Instance - 397


Cleaning Instances:  16%|█▋        | 183/1119 [23:43<1:21:55,  5.25s/it]

Processed: Label - 0, Instance - 410


Cleaning Instances:  16%|█▋        | 184/1119 [23:47<1:17:13,  4.96s/it]

Processed: Label - 0, Instance - 412


Cleaning Instances:  17%|█▋        | 185/1119 [23:52<1:19:10,  5.09s/it]

Processed: Label - 0, Instance - 413


Cleaning Instances:  17%|█▋        | 186/1119 [23:58<1:21:09,  5.22s/it]

Processed: Label - 0, Instance - 417


Cleaning Instances:  17%|█▋        | 187/1119 [24:02<1:16:02,  4.90s/it]

Processed: Label - 0, Instance - 283


Cleaning Instances:  17%|█▋        | 188/1119 [24:06<1:13:49,  4.76s/it]

Processed: Label - 0, Instance - 334


Cleaning Instances:  17%|█▋        | 189/1119 [24:11<1:11:05,  4.59s/it]

Processed: Label - 0, Instance - 348


Cleaning Instances:  17%|█▋        | 190/1119 [24:14<1:07:26,  4.36s/it]

Processed: Label - 0, Instance - 367


Cleaning Instances:  17%|█▋        | 191/1119 [24:20<1:12:28,  4.69s/it]

Processed: Label - 0, Instance - 376


Cleaning Instances:  17%|█▋        | 192/1119 [24:25<1:13:07,  4.73s/it]

Processed: Label - 0, Instance - 399


Cleaning Instances:  17%|█▋        | 193/1119 [24:30<1:14:17,  4.81s/it]

Processed: Label - 0, Instance - 414


Cleaning Instances:  17%|█▋        | 194/1119 [24:34<1:13:00,  4.74s/it]

Processed: Label - 0, Instance - 416


Cleaning Instances:  17%|█▋        | 195/1119 [24:39<1:13:16,  4.76s/it]

Processed: Label - 0, Instance - 427


Cleaning Instances:  18%|█▊        | 196/1119 [24:43<1:11:06,  4.62s/it]

Processed: Label - 0, Instance - 285


Cleaning Instances:  18%|█▊        | 197/1119 [24:49<1:16:03,  4.95s/it]

Processed: Label - 0, Instance - 298


Cleaning Instances:  18%|█▊        | 198/1119 [24:54<1:14:12,  4.83s/it]

Processed: Label - 0, Instance - 299


Cleaning Instances:  18%|█▊        | 199/1119 [24:59<1:17:40,  5.07s/it]

Processed: Label - 0, Instance - 319


Cleaning Instances:  18%|█▊        | 200/1119 [25:05<1:19:20,  5.18s/it]

Processed: Label - 0, Instance - 336


Cleaning Instances:  18%|█▊        | 201/1119 [25:09<1:16:01,  4.97s/it]

Processed: Label - 0, Instance - 351


Cleaning Instances:  18%|█▊        | 202/1119 [25:15<1:18:08,  5.11s/it]

Processed: Label - 0, Instance - 358


Cleaning Instances:  18%|█▊        | 203/1119 [25:19<1:15:00,  4.91s/it]

Processed: Label - 0, Instance - 360


Cleaning Instances:  18%|█▊        | 204/1119 [25:23<1:12:22,  4.75s/it]

Processed: Label - 0, Instance - 369


Cleaning Instances:  18%|█▊        | 205/1119 [25:29<1:16:12,  5.00s/it]

Processed: Label - 0, Instance - 374


Cleaning Instances:  18%|█▊        | 206/1119 [25:33<1:12:36,  4.77s/it]

Processed: Label - 0, Instance - 409


Cleaning Instances:  18%|█▊        | 207/1119 [25:39<1:16:06,  5.01s/it]

Processed: Label - 0, Instance - 294


Cleaning Instances:  19%|█▊        | 208/1119 [25:43<1:13:20,  4.83s/it]

Processed: Label - 0, Instance - 296


Cleaning Instances:  19%|█▊        | 209/1119 [25:48<1:10:41,  4.66s/it]

Processed: Label - 0, Instance - 309


Cleaning Instances:  19%|█▉        | 210/1119 [25:51<1:06:04,  4.36s/it]

Processed: Label - 0, Instance - 384


Cleaning Instances:  19%|█▉        | 211/1119 [25:57<1:12:06,  4.77s/it]

Processed: Label - 0, Instance - 388


Cleaning Instances:  19%|█▉        | 212/1119 [26:03<1:16:18,  5.05s/it]

Processed: Label - 0, Instance - 389


Cleaning Instances:  19%|█▉        | 213/1119 [26:07<1:13:24,  4.86s/it]

Processed: Label - 0, Instance - 393


Cleaning Instances:  19%|█▉        | 214/1119 [26:13<1:16:19,  5.06s/it]

Processed: Label - 0, Instance - 415


Cleaning Instances:  19%|█▉        | 215/1119 [26:17<1:12:58,  4.84s/it]

Processed: Label - 0, Instance - 423


Cleaning Instances:  19%|█▉        | 216/1119 [26:21<1:10:58,  4.72s/it]

Processed: Label - 0, Instance - 312


Cleaning Instances:  19%|█▉        | 217/1119 [26:27<1:14:08,  4.93s/it]

Processed: Label - 0, Instance - 329


Cleaning Instances:  19%|█▉        | 218/1119 [26:33<1:20:20,  5.35s/it]        

Processed: Label - 0, Instance - 366


Cleaning Instances:  20%|█▉        | 219/1119 [26:39<1:21:14,  5.42s/it]

Processed: Label - 0, Instance - 382


Cleaning Instances:  20%|█▉        | 220/1119 [26:43<1:16:40,  5.12s/it]

Processed: Label - 0, Instance - 394


Cleaning Instances:  20%|█▉        | 221/1119 [26:48<1:13:45,  4.93s/it]

Processed: Label - 0, Instance - 400


Cleaning Instances:  20%|█▉        | 222/1119 [26:53<1:16:09,  5.09s/it]

Processed: Label - 0, Instance - 435


Cleaning Instances:  20%|█▉        | 223/1119 [26:59<1:18:19,  5.24s/it]

Processed: Label - 0, Instance - 281


Cleaning Instances:  20%|██        | 224/1119 [27:03<1:14:51,  5.02s/it]

Processed: Label - 0, Instance - 286


Cleaning Instances:  20%|██        | 225/1119 [27:09<1:16:21,  5.12s/it]

Processed: Label - 0, Instance - 288


Cleaning Instances:  20%|██        | 226/1119 [27:14<1:18:25,  5.27s/it]

Processed: Label - 0, Instance - 316


Cleaning Instances:  20%|██        | 227/1119 [27:18<1:14:09,  4.99s/it]

Processed: Label - 0, Instance - 317


Cleaning Instances:  20%|██        | 228/1119 [27:23<1:11:33,  4.82s/it]

Processed: Label - 0, Instance - 332


Cleaning Instances:  20%|██        | 229/1119 [27:28<1:14:20,  5.01s/it]

Processed: Label - 0, Instance - 347


Cleaning Instances:  21%|██        | 230/1119 [27:33<1:13:45,  4.98s/it]

Processed: Label - 0, Instance - 362


Cleaning Instances:  21%|██        | 231/1119 [27:39<1:15:52,  5.13s/it]

Processed: Label - 0, Instance - 375


Cleaning Instances:  21%|██        | 232/1119 [27:44<1:18:04,  5.28s/it]

Processed: Label - 0, Instance - 380


Cleaning Instances:  21%|██        | 233/1119 [27:49<1:15:05,  5.08s/it]

Processed: Label - 0, Instance - 386


Cleaning Instances:  21%|██        | 234/1119 [27:55<1:17:16,  5.24s/it]

Processed: Label - 0, Instance - 396


Cleaning Instances:  21%|██        | 235/1119 [28:00<1:17:58,  5.29s/it]        

Processed: Label - 0, Instance - 402


Cleaning Instances:  21%|██        | 236/1119 [28:05<1:18:37,  5.34s/it]

Processed: Label - 0, Instance - 406


Cleaning Instances:  21%|██        | 237/1119 [28:09<1:12:42,  4.95s/it]

Processed: Label - 0, Instance - 290


Cleaning Instances:  21%|██▏       | 238/1119 [28:15<1:14:50,  5.10s/it]

Processed: Label - 0, Instance - 308


Cleaning Instances:  21%|██▏       | 239/1119 [28:19<1:11:59,  4.91s/it]

Processed: Label - 0, Instance - 310


Cleaning Instances:  21%|██▏       | 240/1119 [28:24<1:12:25,  4.94s/it]

Processed: Label - 0, Instance - 315


Cleaning Instances:  22%|██▏       | 241/1119 [28:29<1:08:54,  4.71s/it]

Processed: Label - 0, Instance - 327


Cleaning Instances:  22%|██▏       | 242/1119 [28:34<1:11:07,  4.87s/it]

Processed: Label - 0, Instance - 330


Cleaning Instances:  22%|██▏       | 243/1119 [28:38<1:07:57,  4.65s/it]

Processed: Label - 0, Instance - 335


Cleaning Instances:  22%|██▏       | 244/1119 [28:42<1:06:48,  4.58s/it]

Processed: Label - 0, Instance - 340


Cleaning Instances:  22%|██▏       | 245/1119 [28:48<1:11:20,  4.90s/it]

Processed: Label - 0, Instance - 390


Cleaning Instances:  22%|██▏       | 246/1119 [28:53<1:09:43,  4.79s/it]

Processed: Label - 0, Instance - 391


Cleaning Instances:  22%|██▏       | 247/1119 [28:57<1:08:52,  4.74s/it]

Processed: Label - 0, Instance - 398


Cleaning Instances:  22%|██▏       | 248/1119 [29:01<1:05:21,  4.50s/it]

Processed: Label - 0, Instance - 422


Cleaning Instances:  22%|██▏       | 249/1119 [29:07<1:09:14,  4.78s/it]

Processed: Label - 0, Instance - 429


Cleaning Instances:  22%|██▏       | 250/1119 [29:11<1:09:25,  4.79s/it]

Processed: Label - 0, Instance - 292


Cleaning Instances:  22%|██▏       | 251/1119 [29:17<1:12:08,  4.99s/it]

Processed: Label - 0, Instance - 302


Cleaning Instances:  23%|██▎       | 252/1119 [29:21<1:08:44,  4.76s/it]

Processed: Label - 0, Instance - 304


Cleaning Instances:  23%|██▎       | 253/1119 [29:26<1:07:38,  4.69s/it]

Processed: Label - 0, Instance - 318


Cleaning Instances:  23%|██▎       | 254/1119 [29:30<1:05:18,  4.53s/it]

Processed: Label - 0, Instance - 331


Cleaning Instances:  23%|██▎       | 255/1119 [29:34<1:04:34,  4.48s/it]

Processed: Label - 0, Instance - 341


Cleaning Instances:  23%|██▎       | 256/1119 [29:39<1:06:12,  4.60s/it]

Processed: Label - 0, Instance - 343


Cleaning Instances:  23%|██▎       | 257/1119 [29:44<1:07:28,  4.70s/it]

Processed: Label - 0, Instance - 344


Cleaning Instances:  23%|██▎       | 258/1119 [29:48<1:05:26,  4.56s/it]

Processed: Label - 0, Instance - 354


Cleaning Instances:  23%|██▎       | 259/1119 [29:54<1:10:40,  4.93s/it]

Processed: Label - 0, Instance - 359


Cleaning Instances:  23%|██▎       | 260/1119 [29:58<1:06:40,  4.66s/it]

Processed: Label - 0, Instance - 370


Cleaning Instances:  23%|██▎       | 261/1119 [30:03<1:10:22,  4.92s/it]

Processed: Label - 0, Instance - 372


Cleaning Instances:  23%|██▎       | 262/1119 [30:09<1:12:56,  5.11s/it]

Processed: Label - 0, Instance - 419


Cleaning Instances:  24%|██▎       | 263/1119 [30:13<1:10:03,  4.91s/it]

Processed: Label - 0, Instance - 305


Cleaning Instances:  24%|██▎       | 264/1119 [30:18<1:08:21,  4.80s/it]

Processed: Label - 0, Instance - 364


Cleaning Instances:  24%|██▎       | 265/1119 [30:23<1:08:08,  4.79s/it]

Processed: Label - 0, Instance - 408


Cleaning Instances:  24%|██▍       | 266/1119 [30:27<1:05:46,  4.63s/it]

Processed: Label - 0, Instance - 424


Cleaning Instances:  24%|██▍       | 267/1119 [30:32<1:07:01,  4.72s/it]

Processed: Label - 0, Instance - 431


Cleaning Instances:  24%|██▍       | 268/1119 [30:38<1:13:11,  5.16s/it]

Processed: Label - 0, Instance - 432


Cleaning Instances:  24%|██▍       | 269/1119 [30:43<1:10:47,  5.00s/it]

Processed: Label - 0, Instance - 293


Cleaning Instances:  24%|██▍       | 270/1119 [30:48<1:13:02,  5.16s/it]

Processed: Label - 0, Instance - 303


Cleaning Instances:  24%|██▍       | 271/1119 [30:53<1:12:14,  5.11s/it]

Processed: Label - 0, Instance - 320


Cleaning Instances:  24%|██▍       | 272/1119 [30:59<1:15:10,  5.33s/it]

Processed: Label - 0, Instance - 324


Cleaning Instances:  24%|██▍       | 273/1119 [31:04<1:14:32,  5.29s/it]

Processed: Label - 0, Instance - 368


Cleaning Instances:  24%|██▍       | 274/1119 [31:09<1:10:42,  5.02s/it]

Processed: Label - 0, Instance - 392


Cleaning Instances:  25%|██▍       | 275/1119 [31:13<1:08:29,  4.87s/it]

Processed: Label - 0, Instance - 313


Cleaning Instances:  25%|██▍       | 276/1119 [31:18<1:06:44,  4.75s/it]

Processed: Label - 0, Instance - 333


Cleaning Instances:  25%|██▍       | 277/1119 [31:22<1:05:43,  4.68s/it]

Processed: Label - 0, Instance - 342


Cleaning Instances:  25%|██▍       | 278/1119 [31:28<1:09:25,  4.95s/it]

Processed: Label - 0, Instance - 345


Cleaning Instances:  25%|██▍       | 279/1119 [31:33<1:08:31,  4.89s/it]

Processed: Label - 0, Instance - 363


Cleaning Instances:  25%|██▌       | 280/1119 [31:38<1:11:12,  5.09s/it]

Processed: Label - 0, Instance - 395


Cleaning Instances:  25%|██▌       | 281/1119 [31:43<1:08:14,  4.89s/it]

Processed: Label - 0, Instance - 407


Cleaning Instances:  25%|██▌       | 282/1119 [31:46<1:03:33,  4.56s/it]

Processed: Label - 0, Instance - 420


Cleaning Instances:  25%|██▌       | 283/1119 [31:52<1:07:46,  4.86s/it]

Processed: Label - 0, Instance - 426


Cleaning Instances:  25%|██▌       | 284/1119 [31:56<1:05:16,  4.69s/it]

Processed: Label - 0, Instance - 430


Cleaning Instances:  25%|██▌       | 285/1119 [32:02<1:08:25,  4.92s/it]

Processed: Label - 0, Instance - 437


Cleaning Instances:  26%|██▌       | 286/1119 [32:07<1:10:40,  5.09s/it]

Processed: Label - 0, Instance - 350


Cleaning Instances:  26%|██▌       | 287/1119 [32:12<1:07:59,  4.90s/it]

Processed: Label - 0, Instance - 361


Cleaning Instances:  26%|██▌       | 288/1119 [32:16<1:06:02,  4.77s/it]

Processed: Label - 0, Instance - 373


Cleaning Instances:  26%|██▌       | 289/1119 [32:20<1:04:20,  4.65s/it]

Processed: Label - 0, Instance - 383


Cleaning Instances:  26%|██▌       | 290/1119 [32:26<1:07:42,  4.90s/it]

Processed: Label - 0, Instance - 401


Cleaning Instances:  26%|██▌       | 291/1119 [32:31<1:10:10,  5.09s/it]

Processed: Label - 0, Instance - 433


Cleaning Instances:  26%|██▌       | 292/1119 [32:37<1:11:03,  5.16s/it]

Processed: Label - 0, Instance - 284


Cleaning Instances:  26%|██▌       | 293/1119 [32:42<1:12:07,  5.24s/it]

Processed: Label - 0, Instance - 291


Cleaning Instances:  26%|██▋       | 294/1119 [32:47<1:08:42,  5.00s/it]

Processed: Label - 0, Instance - 301


Cleaning Instances:  26%|██▋       | 295/1119 [32:51<1:05:09,  4.74s/it]

Processed: Label - 0, Instance - 307


Cleaning Instances:  26%|██▋       | 296/1119 [32:55<1:04:10,  4.68s/it]

Processed: Label - 0, Instance - 322


Cleaning Instances:  27%|██▋       | 297/1119 [33:01<1:07:37,  4.94s/it]

Processed: Label - 0, Instance - 325


Cleaning Instances:  27%|██▋       | 298/1119 [33:05<1:04:45,  4.73s/it]

Processed: Label - 0, Instance - 326


Cleaning Instances:  27%|██▋       | 299/1119 [33:09<1:02:28,  4.57s/it]

Processed: Label - 0, Instance - 353


Cleaning Instances:  27%|██▋       | 300/1119 [33:14<1:01:48,  4.53s/it]

Processed: Label - 0, Instance - 356


Cleaning Instances:  27%|██▋       | 301/1119 [33:19<1:03:47,  4.68s/it]        

Processed: Label - 0, Instance - 411


Cleaning Instances:  27%|██▋       | 302/1119 [33:23<1:01:30,  4.52s/it]

Processed: Label - 0, Instance - 421


Cleaning Instances:  27%|██▋       | 303/1119 [33:27<1:01:04,  4.49s/it]

Processed: Label - 0, Instance - 540


Cleaning Instances:  27%|██▋       | 304/1119 [33:31<59:03,  4.35s/it]  

Processed: Label - 0, Instance - 596


Cleaning Instances:  27%|██▋       | 305/1119 [33:36<58:12,  4.29s/it]

Processed: Label - 0, Instance - 612


Cleaning Instances:  27%|██▋       | 306/1119 [33:40<58:35,  4.32s/it]

Processed: Label - 0, Instance - 635


Cleaning Instances:  27%|██▋       | 307/1119 [33:45<1:03:18,  4.68s/it]

Processed: Label - 0, Instance - 646


Cleaning Instances:  28%|██▊       | 308/1119 [33:50<1:02:03,  4.59s/it]

Processed: Label - 0, Instance - 650


Cleaning Instances:  28%|██▊       | 309/1119 [33:54<1:00:50,  4.51s/it]

Processed: Label - 0, Instance - 559


Cleaning Instances:  28%|██▊       | 310/1119 [33:58<1:00:03,  4.45s/it]        

Processed: Label - 0, Instance - 566


Cleaning Instances:  28%|██▊       | 311/1119 [34:03<58:56,  4.38s/it]  

Processed: Label - 0, Instance - 578


Cleaning Instances:  28%|██▊       | 312/1119 [34:08<1:03:31,  4.72s/it]

Processed: Label - 0, Instance - 599


Cleaning Instances:  28%|██▊       | 313/1119 [34:13<1:01:52,  4.61s/it]

Processed: Label - 0, Instance - 601


Cleaning Instances:  28%|██▊       | 314/1119 [34:17<1:00:35,  4.52s/it]

Processed: Label - 0, Instance - 606


Cleaning Instances:  28%|██▊       | 315/1119 [34:21<58:54,  4.40s/it]  

Processed: Label - 0, Instance - 608


Cleaning Instances:  28%|██▊       | 316/1119 [34:26<1:03:05,  4.71s/it]

Processed: Label - 0, Instance - 620


Cleaning Instances:  28%|██▊       | 317/1119 [34:31<1:01:48,  4.62s/it]

Processed: Label - 0, Instance - 544


Cleaning Instances:  28%|██▊       | 318/1119 [34:35<1:00:08,  4.51s/it]

Processed: Label - 0, Instance - 561


Cleaning Instances:  29%|██▊       | 319/1119 [34:39<59:12,  4.44s/it]  

Processed: Label - 0, Instance - 569


Cleaning Instances:  29%|██▊       | 320/1119 [34:44<59:09,  4.44s/it]

Processed: Label - 0, Instance - 572


Cleaning Instances:  29%|██▊       | 321/1119 [34:49<1:03:30,  4.78s/it]

Processed: Label - 0, Instance - 585


Cleaning Instances:  29%|██▉       | 322/1119 [34:54<1:02:49,  4.73s/it]

Processed: Label - 0, Instance - 604


Cleaning Instances:  29%|██▉       | 323/1119 [34:59<1:02:33,  4.72s/it]

Processed: Label - 0, Instance - 617


Cleaning Instances:  29%|██▉       | 324/1119 [35:03<59:46,  4.51s/it]  

Processed: Label - 0, Instance - 556


Cleaning Instances:  29%|██▉       | 325/1119 [35:08<1:03:43,  4.82s/it]

Processed: Label - 0, Instance - 576


Cleaning Instances:  29%|██▉       | 326/1119 [35:14<1:06:02,  5.00s/it]

Processed: Label - 0, Instance - 577


Cleaning Instances:  29%|██▉       | 327/1119 [35:18<1:03:16,  4.79s/it]

Processed: Label - 0, Instance - 595


Cleaning Instances:  29%|██▉       | 328/1119 [35:23<1:05:41,  4.98s/it]

Processed: Label - 0, Instance - 603


Cleaning Instances:  29%|██▉       | 329/1119 [35:27<1:01:07,  4.64s/it]

Processed: Label - 0, Instance - 609


Cleaning Instances:  29%|██▉       | 330/1119 [35:32<59:55,  4.56s/it]  

Processed: Label - 0, Instance - 623


Cleaning Instances:  30%|██▉       | 331/1119 [35:37<1:01:33,  4.69s/it]

Processed: Label - 0, Instance - 649


24/11/27 00:47:33 WARN TaskSetManager: Stage 34874 contains a task of very large size (1989 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  30%|██▉       | 332/1119 [35:56<2:00:03,  9.15s/it]        

Processed: Label - 7, Instance - 651


Cleaning Instances:  30%|██▉       | 333/1119 [36:00<1:40:28,  7.67s/it]

Processed: Label - 0, Instance - 542


Cleaning Instances:  30%|██▉       | 334/1119 [36:05<1:27:28,  6.69s/it]

Processed: Label - 0, Instance - 551


Cleaning Instances:  30%|██▉       | 335/1119 [36:09<1:18:01,  5.97s/it]

Processed: Label - 0, Instance - 564


Cleaning Instances:  30%|███       | 336/1119 [36:14<1:15:53,  5.82s/it]

Processed: Label - 0, Instance - 570


Cleaning Instances:  30%|███       | 337/1119 [36:20<1:14:23,  5.71s/it]

Processed: Label - 0, Instance - 598


Cleaning Instances:  30%|███       | 338/1119 [36:24<1:07:58,  5.22s/it]

Processed: Label - 0, Instance - 605


Cleaning Instances:  30%|███       | 339/1119 [36:28<1:04:50,  4.99s/it]

Processed: Label - 0, Instance - 611


Cleaning Instances:  30%|███       | 340/1119 [36:33<1:02:52,  4.84s/it]

Processed: Label - 0, Instance - 627


Cleaning Instances:  30%|███       | 341/1119 [36:37<1:01:21,  4.73s/it]

Processed: Label - 0, Instance - 634


Cleaning Instances:  31%|███       | 342/1119 [36:42<1:01:36,  4.76s/it]

Processed: Label - 0, Instance - 641


Cleaning Instances:  31%|███       | 343/1119 [36:46<59:09,  4.57s/it]  

Processed: Label - 0, Instance - 642


Cleaning Instances:  31%|███       | 344/1119 [36:51<58:59,  4.57s/it]

Processed: Label - 0, Instance - 549


Cleaning Instances:  31%|███       | 345/1119 [36:55<57:51,  4.48s/it]

Processed: Label - 0, Instance - 583


Cleaning Instances:  31%|███       | 346/1119 [37:00<59:43,  4.64s/it]

Processed: Label - 0, Instance - 591


Cleaning Instances:  31%|███       | 347/1119 [37:05<59:51,  4.65s/it]

Processed: Label - 0, Instance - 602


Cleaning Instances:  31%|███       | 348/1119 [37:11<1:04:13,  5.00s/it]

Processed: Label - 0, Instance - 622


Cleaning Instances:  31%|███       | 349/1119 [37:15<1:01:29,  4.79s/it]

Processed: Label - 0, Instance - 638


Cleaning Instances:  31%|███▏      | 350/1119 [37:21<1:04:46,  5.05s/it]        

Processed: Label - 0, Instance - 546


Cleaning Instances:  31%|███▏      | 351/1119 [37:25<1:02:50,  4.91s/it]

Processed: Label - 0, Instance - 563


Cleaning Instances:  31%|███▏      | 352/1119 [37:31<1:05:57,  5.16s/it]

Processed: Label - 0, Instance - 581


Cleaning Instances:  32%|███▏      | 353/1119 [37:36<1:05:02,  5.09s/it]

Processed: Label - 0, Instance - 592


Cleaning Instances:  32%|███▏      | 354/1119 [37:40<1:02:17,  4.89s/it]

Processed: Label - 0, Instance - 607


Cleaning Instances:  32%|███▏      | 355/1119 [37:44<58:54,  4.63s/it]  

Processed: Label - 0, Instance - 613


Cleaning Instances:  32%|███▏      | 356/1119 [37:48<56:41,  4.46s/it]

Processed: Label - 0, Instance - 615


Cleaning Instances:  32%|███▏      | 357/1119 [37:52<54:48,  4.32s/it]

Processed: Label - 0, Instance - 616


Cleaning Instances:  32%|███▏      | 358/1119 [37:58<59:35,  4.70s/it]

Processed: Label - 0, Instance - 631


24/11/27 00:49:46 WARN TaskSetManager: Stage 37709 contains a task of very large size (1143 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  32%|███▏      | 359/1119 [38:09<1:24:13,  6.65s/it]

Processed: Label - 7, Instance - 654


Cleaning Instances:  32%|███▏      | 360/1119 [38:15<1:18:59,  6.24s/it]

Processed: Label - 0, Instance - 543


Cleaning Instances:  32%|███▏      | 361/1119 [38:19<1:13:09,  5.79s/it]

Processed: Label - 0, Instance - 550


Cleaning Instances:  32%|███▏      | 362/1119 [38:25<1:11:46,  5.69s/it]

Processed: Label - 0, Instance - 571


Cleaning Instances:  32%|███▏      | 363/1119 [38:29<1:06:59,  5.32s/it]

Processed: Label - 0, Instance - 573


Cleaning Instances:  33%|███▎      | 364/1119 [38:34<1:03:32,  5.05s/it]

Processed: Label - 0, Instance - 593


Cleaning Instances:  33%|███▎      | 365/1119 [38:38<1:00:38,  4.83s/it]

Processed: Label - 0, Instance - 633


Cleaning Instances:  33%|███▎      | 366/1119 [38:44<1:06:10,  5.27s/it]        

Processed: Label - 0, Instance - 636


Cleaning Instances:  33%|███▎      | 367/1119 [38:49<1:03:42,  5.08s/it]

Processed: Label - 0, Instance - 637


Cleaning Instances:  33%|███▎      | 368/1119 [38:54<1:02:06,  4.96s/it]

Processed: Label - 0, Instance - 648


Cleaning Instances:  33%|███▎      | 369/1119 [38:58<1:00:58,  4.88s/it]

Processed: Label - 0, Instance - 547


Cleaning Instances:  33%|███▎      | 370/1119 [39:04<1:02:44,  5.03s/it]        

Processed: Label - 0, Instance - 554


Cleaning Instances:  33%|███▎      | 371/1119 [39:08<1:01:43,  4.95s/it]

Processed: Label - 0, Instance - 575


Cleaning Instances:  33%|███▎      | 372/1119 [39:13<1:00:13,  4.84s/it]

Processed: Label - 0, Instance - 579


Cleaning Instances:  33%|███▎      | 373/1119 [39:17<57:33,  4.63s/it]  

Processed: Label - 0, Instance - 584


Cleaning Instances:  33%|███▎      | 374/1119 [39:22<58:04,  4.68s/it]

Processed: Label - 0, Instance - 587


Cleaning Instances:  34%|███▎      | 375/1119 [39:27<1:01:12,  4.94s/it]

Processed: Label - 0, Instance - 597


Cleaning Instances:  34%|███▎      | 376/1119 [39:32<1:00:20,  4.87s/it]

Processed: Label - 0, Instance - 614


Cleaning Instances:  34%|███▎      | 377/1119 [39:36<58:24,  4.72s/it]  

Processed: Label - 0, Instance - 624


Cleaning Instances:  34%|███▍      | 378/1119 [39:41<57:00,  4.62s/it]

Processed: Label - 0, Instance - 630


Cleaning Instances:  34%|███▍      | 379/1119 [39:46<1:00:21,  4.89s/it]

Processed: Label - 0, Instance - 640


Cleaning Instances:  34%|███▍      | 380/1119 [39:52<1:02:53,  5.11s/it]

Processed: Label - 0, Instance - 647


24/11/27 00:51:47 WARN TaskSetManager: Stage 40019 contains a task of very large size (1809 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  34%|███▍      | 381/1119 [40:10<1:51:24,  9.06s/it]

Processed: Label - 7, Instance - 655


Cleaning Instances:  34%|███▍      | 382/1119 [40:15<1:34:02,  7.66s/it]

Processed: Label - 0, Instance - 541


Cleaning Instances:  34%|███▍      | 383/1119 [40:20<1:24:12,  6.86s/it]

Processed: Label - 0, Instance - 560


Cleaning Instances:  34%|███▍      | 384/1119 [40:24<1:16:12,  6.22s/it]

Processed: Label - 0, Instance - 621


Cleaning Instances:  34%|███▍      | 385/1119 [40:28<1:08:04,  5.56s/it]

Processed: Label - 0, Instance - 557


Cleaning Instances:  34%|███▍      | 386/1119 [40:33<1:03:04,  5.16s/it]

Processed: Label - 0, Instance - 565


Cleaning Instances:  35%|███▍      | 387/1119 [40:37<1:00:36,  4.97s/it]

Processed: Label - 0, Instance - 574


Cleaning Instances:  35%|███▍      | 388/1119 [40:43<1:02:45,  5.15s/it]

Processed: Label - 0, Instance - 618


Cleaning Instances:  35%|███▍      | 389/1119 [40:48<1:03:57,  5.26s/it]

Processed: Label - 0, Instance - 632


Cleaning Instances:  35%|███▍      | 390/1119 [40:54<1:05:17,  5.37s/it]

Processed: Label - 0, Instance - 645


Cleaning Instances:  35%|███▍      | 391/1119 [40:58<1:01:45,  5.09s/it]

Processed: Label - 0, Instance - 568


Cleaning Instances:  35%|███▌      | 392/1119 [41:03<1:00:44,  5.01s/it]

Processed: Label - 0, Instance - 590


Cleaning Instances:  35%|███▌      | 393/1119 [41:09<1:02:33,  5.17s/it]

Processed: Label - 0, Instance - 600


Cleaning Instances:  35%|███▌      | 394/1119 [41:13<58:00,  4.80s/it]  

Processed: Label - 0, Instance - 626


Cleaning Instances:  35%|███▌      | 395/1119 [41:18<1:00:49,  5.04s/it]

Processed: Label - 0, Instance - 628


Cleaning Instances:  35%|███▌      | 396/1119 [41:23<1:00:40,  5.03s/it]

Processed: Label - 0, Instance - 545


Cleaning Instances:  35%|███▌      | 397/1119 [41:27<56:33,  4.70s/it]  

Processed: Label - 0, Instance - 558


Cleaning Instances:  36%|███▌      | 398/1119 [41:32<55:27,  4.61s/it]

Processed: Label - 0, Instance - 582


Cleaning Instances:  36%|███▌      | 399/1119 [41:37<58:31,  4.88s/it]

Processed: Label - 0, Instance - 619


Cleaning Instances:  36%|███▌      | 400/1119 [41:41<55:52,  4.66s/it]

Processed: Label - 0, Instance - 629


Cleaning Instances:  36%|███▌      | 401/1119 [41:46<54:26,  4.55s/it]

Processed: Label - 0, Instance - 548


Cleaning Instances:  36%|███▌      | 402/1119 [41:51<58:00,  4.85s/it]

Processed: Label - 0, Instance - 555


Cleaning Instances:  36%|███▌      | 403/1119 [41:57<1:00:15,  5.05s/it]

Processed: Label - 0, Instance - 562


Cleaning Instances:  36%|███▌      | 404/1119 [42:01<58:23,  4.90s/it]  

Processed: Label - 0, Instance - 588


Cleaning Instances:  36%|███▌      | 405/1119 [42:07<1:00:30,  5.08s/it]

Processed: Label - 0, Instance - 610


Cleaning Instances:  36%|███▋      | 406/1119 [42:11<58:27,  4.92s/it]  

Processed: Label - 0, Instance - 625


Cleaning Instances:  36%|███▋      | 407/1119 [42:15<53:13,  4.49s/it]

Processed: Label - 0, Instance - 643


24/11/27 00:54:12 WARN TaskSetManager: Stage 42854 contains a task of very large size (1989 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  36%|███▋      | 408/1119 [42:35<1:49:23,  9.23s/it]        

Processed: Label - 7, Instance - 653


Cleaning Instances:  37%|███▋      | 409/1119 [42:40<1:32:37,  7.83s/it]

Processed: Label - 0, Instance - 552


Cleaning Instances:  37%|███▋      | 410/1119 [42:44<1:19:49,  6.75s/it]

Processed: Label - 0, Instance - 580


Cleaning Instances:  37%|███▋      | 411/1119 [42:48<1:11:02,  6.02s/it]

Processed: Label - 0, Instance - 586


Cleaning Instances:  37%|███▋      | 412/1119 [42:53<1:05:18,  5.54s/it]

Processed: Label - 0, Instance - 644


Cleaning Instances:  37%|███▋      | 413/1119 [42:57<1:00:05,  5.11s/it]

Processed: Label - 0, Instance - 553


Cleaning Instances:  37%|███▋      | 414/1119 [43:02<1:01:01,  5.19s/it]

Processed: Label - 0, Instance - 567


Cleaning Instances:  37%|███▋      | 415/1119 [43:06<57:14,  4.88s/it]  

Processed: Label - 0, Instance - 589


Cleaning Instances:  37%|███▋      | 416/1119 [43:10<55:12,  4.71s/it]

Processed: Label - 0, Instance - 594


Cleaning Instances:  37%|███▋      | 417/1119 [43:15<55:18,  4.73s/it]

Processed: Label - 0, Instance - 639


Cleaning Instances:  37%|███▋      | 418/1119 [43:23<1:04:08,  5.49s/it]

Processed: Label - 7, Instance - 652


24/11/27 00:55:56 WARN TaskSetManager: Stage 44009 contains a task of very large size (7057 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  37%|███▋      | 419/1119 [44:20<4:06:12, 21.10s/it]        

Processed: Label - 7, Instance - 656


Cleaning Instances:  38%|███▊      | 420/1119 [44:26<3:12:30, 16.52s/it]

Processed: Label - 9, Instance - 35


Cleaning Instances:  38%|███▊      | 421/1119 [44:30<2:29:54, 12.89s/it]

Processed: Label - 0, Instance - 68


Cleaning Instances:  38%|███▊      | 422/1119 [44:36<2:03:15, 10.61s/it]

Processed: Label - 0, Instance - 73


Cleaning Instances:  38%|███▊      | 423/1119 [44:43<1:52:57,  9.74s/it]

Processed: Label - 9, Instance - 22


Cleaning Instances:  38%|███▊      | 424/1119 [44:50<1:42:55,  8.89s/it]

Processed: Label - 9, Instance - 24


Cleaning Instances:  38%|███▊      | 425/1119 [44:54<1:26:45,  7.50s/it]

Processed: Label - 9, Instance - 26


Cleaning Instances:  38%|███▊      | 426/1119 [45:01<1:25:00,  7.36s/it]

Processed: Label - 9, Instance - 38


24/11/27 00:57:02 WARN TaskSetManager: Stage 44849 contains a task of very large size (2770 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  38%|███▊      | 427/1119 [45:25<2:21:57, 12.31s/it]        

Processed: Label - 9, Instance - 44


24/11/27 00:57:25 WARN TaskSetManager: Stage 44954 contains a task of very large size (2519 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  38%|███▊      | 428/1119 [45:48<2:56:25, 15.32s/it]        

Processed: Label - 9, Instance - 49


Cleaning Instances:  38%|███▊      | 429/1119 [45:54<2:24:30, 12.57s/it]

Processed: Label - 9, Instance - 53


Cleaning Instances:  38%|███▊      | 430/1119 [45:59<2:00:01, 10.45s/it]

Processed: Label - 0, Instance - 59


Cleaning Instances:  39%|███▊      | 431/1119 [46:04<1:39:18,  8.66s/it]

Processed: Label - 0, Instance - 70


Cleaning Instances:  39%|███▊      | 432/1119 [46:09<1:28:16,  7.71s/it]

Processed: Label - 0, Instance - 76


Cleaning Instances:  39%|███▊      | 433/1119 [46:15<1:21:45,  7.15s/it]

Processed: Label - 9, Instance - 32


Cleaning Instances:  39%|███▉      | 434/1119 [46:20<1:13:34,  6.44s/it]

Processed: Label - 9, Instance - 36


Cleaning Instances:  39%|███▉      | 435/1119 [46:24<1:05:52,  5.78s/it]

Processed: Label - 0, Instance - 75


24/11/27 00:58:16 WARN TaskSetManager: Stage 45794 contains a task of very large size (1515 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  39%|███▉      | 436/1119 [46:39<1:36:11,  8.45s/it]        

Processed: Label - 9, Instance - 50


Cleaning Instances:  39%|███▉      | 437/1119 [46:43<1:21:59,  7.21s/it]

Processed: Label - 0, Instance - 81


Cleaning Instances:  39%|███▉      | 438/1119 [46:50<1:21:46,  7.20s/it]

Processed: Label - 9, Instance - 28


24/11/27 00:58:42 WARN TaskSetManager: Stage 46109 contains a task of very large size (1515 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  39%|███▉      | 439/1119 [47:05<1:46:45,  9.42s/it]

Processed: Label - 9, Instance - 43


Cleaning Instances:  39%|███▉      | 440/1119 [47:09<1:27:26,  7.73s/it]

Processed: Label - 9, Instance - 52


Cleaning Instances:  39%|███▉      | 441/1119 [47:13<1:15:15,  6.66s/it]

Processed: Label - 0, Instance - 64


Cleaning Instances:  39%|███▉      | 442/1119 [47:18<1:10:44,  6.27s/it]

Processed: Label - 0, Instance - 74


Cleaning Instances:  40%|███▉      | 443/1119 [47:23<1:05:06,  5.78s/it]

Processed: Label - 9, Instance - 20


Cleaning Instances:  40%|███▉      | 444/1119 [47:30<1:08:13,  6.06s/it]

Processed: Label - 9, Instance - 40


Cleaning Instances:  40%|███▉      | 445/1119 [47:34<1:02:29,  5.56s/it]

Processed: Label - 9, Instance - 25


Cleaning Instances:  40%|███▉      | 446/1119 [47:38<55:59,  4.99s/it]  

Processed: Label - 9, Instance - 30


Cleaning Instances:  40%|███▉      | 447/1119 [47:42<52:15,  4.67s/it]

Processed: Label - 9, Instance - 48


Cleaning Instances:  40%|████      | 448/1119 [47:47<53:11,  4.76s/it]

Processed: Label - 9, Instance - 54


Cleaning Instances:  40%|████      | 449/1119 [47:51<51:41,  4.63s/it]

Processed: Label - 9, Instance - 56


Cleaning Instances:  40%|████      | 450/1119 [47:55<50:48,  4.56s/it]

Processed: Label - 0, Instance - 57


Cleaning Instances:  40%|████      | 451/1119 [48:00<49:55,  4.48s/it]

Processed: Label - 9, Instance - 34


24/11/27 00:59:56 WARN TaskSetManager: Stage 47474 contains a task of very large size (2017 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  40%|████      | 452/1119 [48:19<1:40:37,  9.05s/it]        

Processed: Label - 9, Instance - 51


Cleaning Instances:  40%|████      | 453/1119 [48:24<1:24:42,  7.63s/it]

Processed: Label - 0, Instance - 65


Cleaning Instances:  41%|████      | 454/1119 [48:28<1:14:09,  6.69s/it]

Processed: Label - 0, Instance - 78


Cleaning Instances:  41%|████      | 455/1119 [48:32<1:05:41,  5.94s/it]

Processed: Label - 0, Instance - 82


Cleaning Instances:  41%|████      | 456/1119 [48:36<59:37,  5.40s/it]          

Processed: Label - 9, Instance - 21


Cleaning Instances:  41%|████      | 457/1119 [48:42<1:00:38,  5.50s/it]

Processed: Label - 0, Instance - 69


Cleaning Instances:  41%|████      | 458/1119 [48:48<1:01:02,  5.54s/it]

Processed: Label - 0, Instance - 60


Cleaning Instances:  41%|████      | 459/1119 [48:54<1:02:56,  5.72s/it]

Processed: Label - 0, Instance - 77


Cleaning Instances:  41%|████      | 460/1119 [49:02<1:11:16,  6.49s/it]

Processed: Label - 9, Instance - 19


Cleaning Instances:  41%|████      | 461/1119 [49:07<1:07:07,  6.12s/it]

Processed: Label - 9, Instance - 23


Cleaning Instances:  41%|████▏     | 462/1119 [49:14<1:07:45,  6.19s/it]

Processed: Label - 9, Instance - 41


24/11/27 01:01:05 WARN TaskSetManager: Stage 48629 contains a task of very large size (1264 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  41%|████▏     | 463/1119 [49:28<1:35:04,  8.70s/it]        

Processed: Label - 9, Instance - 46


Cleaning Instances:  41%|████▏     | 464/1119 [49:33<1:20:49,  7.40s/it]

Processed: Label - 0, Instance - 67


Cleaning Instances:  42%|████▏     | 465/1119 [49:38<1:14:54,  6.87s/it]

Processed: Label - 0, Instance - 72


Cleaning Instances:  42%|████▏     | 466/1119 [49:43<1:08:45,  6.32s/it]

Processed: Label - 9, Instance - 47


Cleaning Instances:  42%|████▏     | 467/1119 [49:48<1:02:56,  5.79s/it]

Processed: Label - 0, Instance - 62


Cleaning Instances:  42%|████▏     | 468/1119 [49:54<1:01:58,  5.71s/it]

Processed: Label - 9, Instance - 33


Cleaning Instances:  42%|████▏     | 469/1119 [49:57<54:16,  5.01s/it]  

Processed: Label - 9, Instance - 37


Cleaning Instances:  42%|████▏     | 470/1119 [50:02<55:36,  5.14s/it]

Processed: Label - 0, Instance - 63


Cleaning Instances:  42%|████▏     | 471/1119 [50:08<56:37,  5.24s/it]

Processed: Label - 0, Instance - 80


Cleaning Instances:  42%|████▏     | 472/1119 [50:16<1:07:39,  6.27s/it]

Processed: Label - 9, Instance - 31


Cleaning Instances:  42%|████▏     | 473/1119 [50:21<1:00:40,  5.64s/it]

Processed: Label - 9, Instance - 39


Cleaning Instances:  42%|████▏     | 474/1119 [50:24<54:49,  5.10s/it]  

Processed: Label - 9, Instance - 42


Cleaning Instances:  42%|████▏     | 475/1119 [50:32<1:03:25,  5.91s/it]

Processed: Label - 9, Instance - 55


Cleaning Instances:  43%|████▎     | 476/1119 [50:38<1:01:42,  5.76s/it]

Processed: Label - 0, Instance - 61


Cleaning Instances:  43%|████▎     | 477/1119 [50:42<56:45,  5.31s/it]  

Processed: Label - 0, Instance - 66


Cleaning Instances:  43%|████▎     | 478/1119 [50:46<53:47,  5.04s/it]

Processed: Label - 0, Instance - 71


Cleaning Instances:  43%|████▎     | 479/1119 [50:51<51:15,  4.81s/it]

Processed: Label - 0, Instance - 79


Cleaning Instances:  43%|████▎     | 480/1119 [50:55<51:21,  4.82s/it]

Processed: Label - 9, Instance - 27


Cleaning Instances:  43%|████▎     | 481/1119 [51:02<57:05,  5.37s/it]

Processed: Label - 9, Instance - 29


Cleaning Instances:  43%|████▎     | 482/1119 [51:06<53:45,  5.06s/it]

Processed: Label - 9, Instance - 45


Cleaning Instances:  43%|████▎     | 483/1119 [51:11<52:35,  4.96s/it]

Processed: Label - 0, Instance - 58


24/11/27 01:03:12 WARN TaskSetManager: Stage 50834 contains a task of very large size (2439 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  43%|████▎     | 484/1119 [51:35<1:51:40, 10.55s/it]        

Processed: Label - 8, Instance - 702


24/11/27 01:03:34 WARN TaskSetManager: Stage 50939 contains a task of very large size (2439 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  43%|████▎     | 485/1119 [51:58<2:32:21, 14.42s/it]        

Processed: Label - 8, Instance - 700


24/11/27 01:03:57 WARN TaskSetManager: Stage 51044 contains a task of very large size (2439 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  43%|████▎     | 486/1119 [52:20<2:55:10, 16.60s/it]

Processed: Label - 8, Instance - 703


24/11/27 01:04:21 WARN TaskSetManager: Stage 51149 contains a task of very large size (2682 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  44%|████▎     | 487/1119 [52:44<3:19:04, 18.90s/it]        

Processed: Label - 8, Instance - 699


24/11/27 01:04:59 WARN TaskSetManager: Stage 51254 contains a task of very large size (4626 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  44%|████▎     | 488/1119 [53:23<4:21:16, 24.84s/it]        

Processed: Label - 8, Instance - 698


24/11/27 01:05:34 WARN TaskSetManager: Stage 51359 contains a task of very large size (3969 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  44%|████▎     | 489/1119 [53:57<4:51:15, 27.74s/it]        

Processed: Label - 8, Instance - 701


24/11/27 01:07:01 WARN TaskSetManager: Stage 51464 contains a task of very large size (11099 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  44%|████▍     | 490/1119 [55:27<8:06:28, 46.40s/it]        

Processed: Label - 8, Instance - 704


24/11/27 01:07:21 WARN TaskSetManager: Stage 51569 contains a task of very large size (1548 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  44%|████▍     | 491/1119 [55:44<6:31:51, 37.44s/it]

Processed: Label - 7, Instance - 664


24/11/27 01:07:56 WARN TaskSetManager: Stage 51674 contains a task of very large size (4140 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  44%|████▍     | 492/1119 [56:19<6:23:55, 36.74s/it]        

Processed: Label - 7, Instance - 674


24/11/27 01:08:10 WARN TaskSetManager: Stage 51779 contains a task of very large size (1269 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  44%|████▍     | 493/1119 [56:33<5:11:42, 29.88s/it]

Processed: Label - 7, Instance - 668


24/11/27 01:08:34 WARN TaskSetManager: Stage 51884 contains a task of very large size (2682 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  44%|████▍     | 494/1119 [56:57<4:53:10, 28.15s/it]        

Processed: Label - 7, Instance - 670


Cleaning Instances:  44%|████▍     | 495/1119 [57:07<3:55:45, 22.67s/it]

Processed: Label - 7, Instance - 669


24/11/27 01:09:54 WARN TaskSetManager: Stage 52094 contains a task of very large size (8272 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  44%|████▍     | 496/1119 [58:18<6:25:30, 37.13s/it]        

Processed: Label - 7, Instance - 671


Cleaning Instances:  44%|████▍     | 497/1119 [58:27<4:56:56, 28.64s/it]

Processed: Label - 7, Instance - 673


Cleaning Instances:  45%|████▍     | 498/1119 [58:32<3:44:52, 21.73s/it]

Processed: Label - 7, Instance - 667


24/11/27 01:10:56 WARN TaskSetManager: Stage 52409 contains a task of very large size (5842 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  45%|████▍     | 499/1119 [59:20<5:04:44, 29.49s/it]        

Processed: Label - 7, Instance - 672


24/11/27 01:11:10 WARN TaskSetManager: Stage 52514 contains a task of very large size (1224 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  45%|████▍     | 500/1119 [59:33<4:14:08, 24.63s/it]        

Processed: Label - 7, Instance - 666


24/11/27 01:11:32 WARN TaskSetManager: Stage 52619 contains a task of very large size (2151 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  45%|████▍     | 501/1119 [59:55<4:05:35, 23.84s/it]        

Processed: Label - 7, Instance - 665


24/11/27 01:11:49 WARN TaskSetManager: Stage 52724 contains a task of very large size (1710 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  45%|████▍     | 502/1119 [1:00:12<3:44:30, 21.83s/it]      

Processed: Label - 7, Instance - 661


Cleaning Instances:  45%|████▍     | 503/1119 [1:00:20<3:01:23, 17.67s/it]

Processed: Label - 7, Instance - 657


24/11/27 01:12:35 WARN TaskSetManager: Stage 52934 contains a task of very large size (4329 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  45%|████▌     | 504/1119 [1:00:58<4:03:25, 23.75s/it]      

Processed: Label - 7, Instance - 658


24/11/27 01:12:50 WARN TaskSetManager: Stage 53039 contains a task of very large size (1269 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  45%|████▌     | 505/1119 [1:01:13<3:36:52, 21.19s/it]

Processed: Label - 7, Instance - 659


24/11/27 01:13:27 WARN TaskSetManager: Stage 53144 contains a task of very large size (3681 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  45%|████▌     | 506/1119 [1:01:51<4:25:40, 26.00s/it]      

Processed: Label - 7, Instance - 663


24/11/27 01:13:57 WARN TaskSetManager: Stage 53249 contains a task of very large size (3069 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  45%|████▌     | 507/1119 [1:02:20<4:36:44, 27.13s/it]      

Processed: Label - 7, Instance - 662


24/11/27 01:14:18 WARN TaskSetManager: Stage 53354 contains a task of very large size (1989 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  45%|████▌     | 508/1119 [1:02:41<4:15:14, 25.07s/it]

Processed: Label - 7, Instance - 660


Cleaning Instances:  45%|████▌     | 509/1119 [1:02:45<3:11:42, 18.86s/it]

Processed: Label - 4, Instance - 865


Cleaning Instances:  46%|████▌     | 510/1119 [1:02:49<2:26:04, 14.39s/it]

Processed: Label - 4, Instance - 892


Cleaning Instances:  46%|████▌     | 511/1119 [1:02:54<1:56:08, 11.46s/it]

Processed: Label - 4, Instance - 905


Cleaning Instances:  46%|████▌     | 512/1119 [1:02:58<1:33:40,  9.26s/it]

Processed: Label - 4, Instance - 913


Cleaning Instances:  46%|████▌     | 513/1119 [1:03:02<1:17:22,  7.66s/it]

Processed: Label - 4, Instance - 927


Cleaning Instances:  46%|████▌     | 514/1119 [1:03:06<1:06:15,  6.57s/it]

Processed: Label - 4, Instance - 931


Cleaning Instances:  46%|████▌     | 515/1119 [1:03:10<58:25,  5.80s/it]  

Processed: Label - 4, Instance - 947


Cleaning Instances:  46%|████▌     | 516/1119 [1:03:14<53:12,  5.30s/it]

Processed: Label - 4, Instance - 952


Cleaning Instances:  46%|████▌     | 517/1119 [1:03:18<49:05,  4.89s/it]

Processed: Label - 4, Instance - 964


Cleaning Instances:  46%|████▋     | 518/1119 [1:03:22<47:05,  4.70s/it]

Processed: Label - 4, Instance - 994


Cleaning Instances:  46%|████▋     | 519/1119 [1:03:26<44:40,  4.47s/it]

Processed: Label - 4, Instance - 1018


Cleaning Instances:  46%|████▋     | 520/1119 [1:03:30<45:07,  4.52s/it]

Processed: Label - 4, Instance - 1028


Cleaning Instances:  47%|████▋     | 521/1119 [1:03:35<44:55,  4.51s/it]

Processed: Label - 4, Instance - 1029


Cleaning Instances:  47%|████▋     | 522/1119 [1:03:39<43:13,  4.34s/it]        

Processed: Label - 4, Instance - 877


Cleaning Instances:  47%|████▋     | 523/1119 [1:03:43<41:31,  4.18s/it]

Processed: Label - 4, Instance - 880


Cleaning Instances:  47%|████▋     | 524/1119 [1:03:47<41:22,  4.17s/it]

Processed: Label - 4, Instance - 884


Cleaning Instances:  47%|████▋     | 525/1119 [1:03:51<40:22,  4.08s/it]

Processed: Label - 4, Instance - 903


Cleaning Instances:  47%|████▋     | 526/1119 [1:03:55<40:06,  4.06s/it]

Processed: Label - 4, Instance - 925


Cleaning Instances:  47%|████▋     | 527/1119 [1:03:59<41:38,  4.22s/it]

Processed: Label - 4, Instance - 979


Cleaning Instances:  47%|████▋     | 528/1119 [1:04:04<42:29,  4.31s/it]

Processed: Label - 4, Instance - 1012


Cleaning Instances:  47%|████▋     | 529/1119 [1:04:08<43:00,  4.37s/it]

Processed: Label - 4, Instance - 1020


Cleaning Instances:  47%|████▋     | 530/1119 [1:04:13<43:17,  4.41s/it]

Processed: Label - 4, Instance - 1034


Cleaning Instances:  47%|████▋     | 531/1119 [1:04:17<43:21,  4.42s/it]

Processed: Label - 4, Instance - 1037


Cleaning Instances:  48%|████▊     | 532/1119 [1:04:22<44:14,  4.52s/it]

Processed: Label - 4, Instance - 1038


Cleaning Instances:  48%|████▊     | 533/1119 [1:04:27<44:49,  4.59s/it]

Processed: Label - 4, Instance - 864


Cleaning Instances:  48%|████▊     | 534/1119 [1:04:31<44:24,  4.55s/it]

Processed: Label - 4, Instance - 886


Cleaning Instances:  48%|████▊     | 535/1119 [1:04:35<42:51,  4.40s/it]

Processed: Label - 4, Instance - 887


Cleaning Instances:  48%|████▊     | 536/1119 [1:04:40<43:30,  4.48s/it]

Processed: Label - 4, Instance - 897


Cleaning Instances:  48%|████▊     | 537/1119 [1:04:44<43:24,  4.47s/it]

Processed: Label - 4, Instance - 930


Cleaning Instances:  48%|████▊     | 538/1119 [1:04:48<41:35,  4.30s/it]

Processed: Label - 4, Instance - 938


Cleaning Instances:  48%|████▊     | 539/1119 [1:04:53<41:58,  4.34s/it]

Processed: Label - 4, Instance - 939


Cleaning Instances:  48%|████▊     | 540/1119 [1:04:57<41:39,  4.32s/it]

Processed: Label - 4, Instance - 943


Cleaning Instances:  48%|████▊     | 541/1119 [1:05:01<40:46,  4.23s/it]

Processed: Label - 4, Instance - 970


Cleaning Instances:  48%|████▊     | 542/1119 [1:05:05<40:17,  4.19s/it]

Processed: Label - 4, Instance - 981


Cleaning Instances:  49%|████▊     | 543/1119 [1:05:09<39:29,  4.11s/it]

Processed: Label - 4, Instance - 991


Cleaning Instances:  49%|████▊     | 544/1119 [1:05:13<39:21,  4.11s/it]

Processed: Label - 4, Instance - 1004


Cleaning Instances:  49%|████▊     | 545/1119 [1:05:18<39:46,  4.16s/it]

Processed: Label - 4, Instance - 1016


Cleaning Instances:  49%|████▉     | 546/1119 [1:05:22<39:35,  4.15s/it]

Processed: Label - 4, Instance - 1043


Cleaning Instances:  49%|████▉     | 547/1119 [1:05:26<41:03,  4.31s/it]

Processed: Label - 4, Instance - 1050


Cleaning Instances:  49%|████▉     | 548/1119 [1:05:31<41:50,  4.40s/it]

Processed: Label - 4, Instance - 1051


Cleaning Instances:  49%|████▉     | 549/1119 [1:05:35<39:55,  4.20s/it]

Processed: Label - 4, Instance - 857


Cleaning Instances:  49%|████▉     | 550/1119 [1:05:39<39:25,  4.16s/it]

Processed: Label - 4, Instance - 859


Cleaning Instances:  49%|████▉     | 551/1119 [1:05:43<40:44,  4.30s/it]

Processed: Label - 4, Instance - 862


Cleaning Instances:  49%|████▉     | 552/1119 [1:05:47<40:00,  4.23s/it]

Processed: Label - 4, Instance - 889


Cleaning Instances:  49%|████▉     | 553/1119 [1:05:51<39:15,  4.16s/it]

Processed: Label - 4, Instance - 894


Cleaning Instances:  50%|████▉     | 554/1119 [1:05:56<39:07,  4.15s/it]

Processed: Label - 4, Instance - 950


Cleaning Instances:  50%|████▉     | 555/1119 [1:06:00<40:08,  4.27s/it]

Processed: Label - 4, Instance - 984


Cleaning Instances:  50%|████▉     | 556/1119 [1:06:05<40:38,  4.33s/it]

Processed: Label - 4, Instance - 990


Cleaning Instances:  50%|████▉     | 557/1119 [1:06:09<41:19,  4.41s/it]

Processed: Label - 4, Instance - 992


Cleaning Instances:  50%|████▉     | 558/1119 [1:06:13<40:16,  4.31s/it]

Processed: Label - 4, Instance - 996


Cleaning Instances:  50%|████▉     | 559/1119 [1:06:18<40:24,  4.33s/it]

Processed: Label - 4, Instance - 1044


Cleaning Instances:  50%|█████     | 560/1119 [1:06:21<38:58,  4.18s/it]

Processed: Label - 4, Instance - 867


Cleaning Instances:  50%|█████     | 561/1119 [1:06:26<38:33,  4.15s/it]

Processed: Label - 4, Instance - 868


Cleaning Instances:  50%|█████     | 562/1119 [1:06:30<39:30,  4.26s/it]

Processed: Label - 4, Instance - 878


Cleaning Instances:  50%|█████     | 563/1119 [1:06:34<38:57,  4.20s/it]

Processed: Label - 4, Instance - 914


Cleaning Instances:  50%|█████     | 564/1119 [1:06:38<38:21,  4.15s/it]

Processed: Label - 4, Instance - 937


Cleaning Instances:  50%|█████     | 565/1119 [1:06:42<37:23,  4.05s/it]

Processed: Label - 4, Instance - 942


Cleaning Instances:  51%|█████     | 566/1119 [1:06:46<37:18,  4.05s/it]

Processed: Label - 4, Instance - 946


Cleaning Instances:  51%|█████     | 567/1119 [1:06:50<37:22,  4.06s/it]

Processed: Label - 4, Instance - 968


Cleaning Instances:  51%|█████     | 568/1119 [1:06:55<38:50,  4.23s/it]

Processed: Label - 4, Instance - 971


Cleaning Instances:  51%|█████     | 569/1119 [1:06:59<38:35,  4.21s/it]

Processed: Label - 4, Instance - 983


Cleaning Instances:  51%|█████     | 570/1119 [1:07:03<37:54,  4.14s/it]

Processed: Label - 4, Instance - 1000


Cleaning Instances:  51%|█████     | 571/1119 [1:07:07<38:33,  4.22s/it]

Processed: Label - 4, Instance - 1025


Cleaning Instances:  51%|█████     | 572/1119 [1:07:12<39:33,  4.34s/it]

Processed: Label - 4, Instance - 1039


Cleaning Instances:  51%|█████     | 573/1119 [1:07:16<38:42,  4.25s/it]

Processed: Label - 4, Instance - 1042


Cleaning Instances:  51%|█████▏    | 574/1119 [1:07:20<37:47,  4.16s/it]

Processed: Label - 4, Instance - 910


Cleaning Instances:  51%|█████▏    | 575/1119 [1:07:24<37:13,  4.11s/it]

Processed: Label - 4, Instance - 911


Cleaning Instances:  51%|█████▏    | 576/1119 [1:07:28<36:20,  4.02s/it]

Processed: Label - 4, Instance - 920


Cleaning Instances:  52%|█████▏    | 577/1119 [1:07:32<35:57,  3.98s/it]

Processed: Label - 4, Instance - 948


Cleaning Instances:  52%|█████▏    | 578/1119 [1:07:36<36:04,  4.00s/it]

Processed: Label - 4, Instance - 951


Cleaning Instances:  52%|█████▏    | 579/1119 [1:07:40<36:22,  4.04s/it]

Processed: Label - 4, Instance - 957


Cleaning Instances:  52%|█████▏    | 580/1119 [1:07:44<37:40,  4.19s/it]

Processed: Label - 4, Instance - 963


Cleaning Instances:  52%|█████▏    | 581/1119 [1:07:49<37:47,  4.21s/it]

Processed: Label - 4, Instance - 969


Cleaning Instances:  52%|█████▏    | 582/1119 [1:07:53<38:31,  4.30s/it]

Processed: Label - 4, Instance - 1017


Cleaning Instances:  52%|█████▏    | 583/1119 [1:07:57<37:48,  4.23s/it]

Processed: Label - 4, Instance - 870


Cleaning Instances:  52%|█████▏    | 584/1119 [1:08:02<38:54,  4.36s/it]

Processed: Label - 4, Instance - 896


Cleaning Instances:  52%|█████▏    | 585/1119 [1:08:06<38:03,  4.28s/it]

Processed: Label - 4, Instance - 928


Cleaning Instances:  52%|█████▏    | 586/1119 [1:08:10<37:43,  4.25s/it]        

Processed: Label - 4, Instance - 941


Cleaning Instances:  52%|█████▏    | 587/1119 [1:08:14<36:36,  4.13s/it]

Processed: Label - 4, Instance - 953


Cleaning Instances:  53%|█████▎    | 588/1119 [1:08:18<36:12,  4.09s/it]

Processed: Label - 4, Instance - 962


Cleaning Instances:  53%|█████▎    | 589/1119 [1:08:22<35:48,  4.05s/it]

Processed: Label - 4, Instance - 965


Cleaning Instances:  53%|█████▎    | 590/1119 [1:08:26<35:58,  4.08s/it]

Processed: Label - 4, Instance - 977


Cleaning Instances:  53%|█████▎    | 591/1119 [1:08:30<36:43,  4.17s/it]

Processed: Label - 4, Instance - 987


Cleaning Instances:  53%|█████▎    | 592/1119 [1:08:35<36:52,  4.20s/it]

Processed: Label - 4, Instance - 1013


Cleaning Instances:  53%|█████▎    | 593/1119 [1:08:39<36:09,  4.12s/it]

Processed: Label - 4, Instance - 1030


Cleaning Instances:  53%|█████▎    | 594/1119 [1:08:43<37:33,  4.29s/it]

Processed: Label - 4, Instance - 869


Cleaning Instances:  53%|█████▎    | 595/1119 [1:08:48<38:25,  4.40s/it]

Processed: Label - 4, Instance - 876


Cleaning Instances:  53%|█████▎    | 596/1119 [1:08:52<37:17,  4.28s/it]

Processed: Label - 4, Instance - 890


Cleaning Instances:  53%|█████▎    | 597/1119 [1:08:56<37:51,  4.35s/it]

Processed: Label - 4, Instance - 899


Cleaning Instances:  53%|█████▎    | 598/1119 [1:09:00<36:34,  4.21s/it]

Processed: Label - 4, Instance - 932


Cleaning Instances:  54%|█████▎    | 599/1119 [1:09:04<36:14,  4.18s/it]

Processed: Label - 4, Instance - 940


Cleaning Instances:  54%|█████▎    | 600/1119 [1:09:08<35:46,  4.14s/it]

Processed: Label - 4, Instance - 961


Cleaning Instances:  54%|█████▎    | 601/1119 [1:09:13<35:23,  4.10s/it]

Processed: Label - 4, Instance - 1003


Cleaning Instances:  54%|█████▍    | 602/1119 [1:09:16<34:54,  4.05s/it]

Processed: Label - 4, Instance - 1007


Cleaning Instances:  54%|█████▍    | 603/1119 [1:09:20<34:37,  4.03s/it]

Processed: Label - 4, Instance - 1011


Cleaning Instances:  54%|█████▍    | 604/1119 [1:09:25<36:10,  4.21s/it]

Processed: Label - 4, Instance - 1047


Cleaning Instances:  54%|█████▍    | 605/1119 [1:09:30<36:47,  4.30s/it]

Processed: Label - 4, Instance - 858


Cleaning Instances:  54%|█████▍    | 606/1119 [1:09:34<35:55,  4.20s/it]

Processed: Label - 4, Instance - 871


Cleaning Instances:  54%|█████▍    | 607/1119 [1:09:38<35:19,  4.14s/it]        

Processed: Label - 4, Instance - 875


Cleaning Instances:  54%|█████▍    | 608/1119 [1:09:42<35:50,  4.21s/it]

Processed: Label - 4, Instance - 882


Cleaning Instances:  54%|█████▍    | 609/1119 [1:09:46<36:17,  4.27s/it]

Processed: Label - 4, Instance - 902


Cleaning Instances:  55%|█████▍    | 610/1119 [1:09:50<35:23,  4.17s/it]

Processed: Label - 4, Instance - 908


Cleaning Instances:  55%|█████▍    | 611/1119 [1:09:55<36:26,  4.30s/it]

Processed: Label - 4, Instance - 912


Cleaning Instances:  55%|█████▍    | 612/1119 [1:09:59<35:12,  4.17s/it]

Processed: Label - 4, Instance - 917


Cleaning Instances:  55%|█████▍    | 613/1119 [1:10:03<35:43,  4.24s/it]

Processed: Label - 4, Instance - 960


Cleaning Instances:  55%|█████▍    | 614/1119 [1:10:08<36:43,  4.36s/it]

Processed: Label - 4, Instance - 967


Cleaning Instances:  55%|█████▍    | 615/1119 [1:10:12<35:33,  4.23s/it]

Processed: Label - 4, Instance - 975


Cleaning Instances:  55%|█████▌    | 616/1119 [1:10:16<36:32,  4.36s/it]

Processed: Label - 4, Instance - 976


Cleaning Instances:  55%|█████▌    | 617/1119 [1:10:21<37:22,  4.47s/it]

Processed: Label - 4, Instance - 1002


Cleaning Instances:  55%|█████▌    | 618/1119 [1:10:25<36:07,  4.33s/it]

Processed: Label - 4, Instance - 1023


Cleaning Instances:  55%|█████▌    | 619/1119 [1:10:29<34:57,  4.20s/it]

Processed: Label - 4, Instance - 1024


Cleaning Instances:  55%|█████▌    | 620/1119 [1:10:33<35:35,  4.28s/it]

Processed: Label - 4, Instance - 1048


Cleaning Instances:  55%|█████▌    | 621/1119 [1:10:37<34:22,  4.14s/it]

Processed: Label - 4, Instance - 861


Cleaning Instances:  56%|█████▌    | 622/1119 [1:10:42<35:03,  4.23s/it]

Processed: Label - 4, Instance - 879


Cleaning Instances:  56%|█████▌    | 623/1119 [1:10:46<35:18,  4.27s/it]

Processed: Label - 4, Instance - 904


Cleaning Instances:  56%|█████▌    | 624/1119 [1:10:50<34:03,  4.13s/it]

Processed: Label - 4, Instance - 907


Cleaning Instances:  56%|█████▌    | 625/1119 [1:10:54<33:19,  4.05s/it]

Processed: Label - 4, Instance - 933


Cleaning Instances:  56%|█████▌    | 626/1119 [1:10:58<32:55,  4.01s/it]

Processed: Label - 4, Instance - 966


Cleaning Instances:  56%|█████▌    | 627/1119 [1:11:02<33:53,  4.13s/it]

Processed: Label - 4, Instance - 973


Cleaning Instances:  56%|█████▌    | 628/1119 [1:11:06<33:56,  4.15s/it]

Processed: Label - 4, Instance - 974


Cleaning Instances:  56%|█████▌    | 629/1119 [1:11:11<34:38,  4.24s/it]        

Processed: Label - 4, Instance - 986


Cleaning Instances:  56%|█████▋    | 630/1119 [1:11:14<33:03,  4.06s/it]

Processed: Label - 4, Instance - 1005


Cleaning Instances:  56%|█████▋    | 631/1119 [1:11:18<32:28,  3.99s/it]

Processed: Label - 4, Instance - 1021


Cleaning Instances:  56%|█████▋    | 632/1119 [1:11:22<32:35,  4.02s/it]

Processed: Label - 4, Instance - 1022


Cleaning Instances:  57%|█████▋    | 633/1119 [1:11:27<33:17,  4.11s/it]

Processed: Label - 4, Instance - 1027


Cleaning Instances:  57%|█████▋    | 634/1119 [1:11:31<33:57,  4.20s/it]

Processed: Label - 4, Instance - 866


Cleaning Instances:  57%|█████▋    | 635/1119 [1:11:35<32:36,  4.04s/it]

Processed: Label - 4, Instance - 873


Cleaning Instances:  57%|█████▋    | 636/1119 [1:11:39<32:13,  4.00s/it]

Processed: Label - 4, Instance - 874


Cleaning Instances:  57%|█████▋    | 637/1119 [1:11:45<37:38,  4.69s/it]        

Processed: Label - 4, Instance - 885


Cleaning Instances:  57%|█████▋    | 638/1119 [1:11:49<37:05,  4.63s/it]

Processed: Label - 4, Instance - 893


Cleaning Instances:  57%|█████▋    | 639/1119 [1:11:53<35:31,  4.44s/it]

Processed: Label - 4, Instance - 895


Cleaning Instances:  57%|█████▋    | 640/1119 [1:11:57<33:48,  4.23s/it]

Processed: Label - 4, Instance - 900


Cleaning Instances:  57%|█████▋    | 641/1119 [1:12:01<32:27,  4.07s/it]

Processed: Label - 4, Instance - 918


Cleaning Instances:  57%|█████▋    | 642/1119 [1:12:05<32:52,  4.13s/it]

Processed: Label - 4, Instance - 922


Cleaning Instances:  57%|█████▋    | 643/1119 [1:12:09<32:20,  4.08s/it]

Processed: Label - 4, Instance - 924


Cleaning Instances:  58%|█████▊    | 644/1119 [1:12:13<33:00,  4.17s/it]

Processed: Label - 4, Instance - 935


Cleaning Instances:  58%|█████▊    | 645/1119 [1:12:18<33:13,  4.21s/it]

Processed: Label - 4, Instance - 956


Cleaning Instances:  58%|█████▊    | 646/1119 [1:12:21<31:47,  4.03s/it]

Processed: Label - 4, Instance - 978


Cleaning Instances:  58%|█████▊    | 647/1119 [1:12:26<32:38,  4.15s/it]

Processed: Label - 4, Instance - 1006


Cleaning Instances:  58%|█████▊    | 648/1119 [1:12:29<31:31,  4.02s/it]

Processed: Label - 4, Instance - 1033


Cleaning Instances:  58%|█████▊    | 649/1119 [1:12:33<30:53,  3.94s/it]

Processed: Label - 4, Instance - 1035


Cleaning Instances:  58%|█████▊    | 650/1119 [1:12:37<30:39,  3.92s/it]

Processed: Label - 4, Instance - 1049


Cleaning Instances:  58%|█████▊    | 651/1119 [1:12:41<29:45,  3.81s/it]

Processed: Label - 4, Instance - 872


Cleaning Instances:  58%|█████▊    | 652/1119 [1:12:45<30:34,  3.93s/it]

Processed: Label - 4, Instance - 921


Cleaning Instances:  58%|█████▊    | 653/1119 [1:12:49<30:35,  3.94s/it]

Processed: Label - 4, Instance - 934


Cleaning Instances:  58%|█████▊    | 654/1119 [1:12:53<31:47,  4.10s/it]

Processed: Label - 4, Instance - 936


Cleaning Instances:  59%|█████▊    | 655/1119 [1:12:57<31:06,  4.02s/it]

Processed: Label - 4, Instance - 944


Cleaning Instances:  59%|█████▊    | 656/1119 [1:13:01<30:35,  3.96s/it]        

Processed: Label - 4, Instance - 949


Cleaning Instances:  59%|█████▊    | 657/1119 [1:13:05<29:55,  3.89s/it]

Processed: Label - 4, Instance - 954


Cleaning Instances:  59%|█████▉    | 658/1119 [1:13:09<30:03,  3.91s/it]

Processed: Label - 4, Instance - 989


Cleaning Instances:  59%|█████▉    | 659/1119 [1:13:13<30:05,  3.92s/it]

Processed: Label - 4, Instance - 999


Cleaning Instances:  59%|█████▉    | 660/1119 [1:13:16<29:50,  3.90s/it]

Processed: Label - 4, Instance - 1008


Cleaning Instances:  59%|█████▉    | 661/1119 [1:13:21<30:40,  4.02s/it]

Processed: Label - 4, Instance - 909


Cleaning Instances:  59%|█████▉    | 662/1119 [1:13:25<31:29,  4.13s/it]

Processed: Label - 4, Instance - 916


Cleaning Instances:  59%|█████▉    | 663/1119 [1:13:29<30:29,  4.01s/it]

Processed: Label - 4, Instance - 919


Cleaning Instances:  59%|█████▉    | 664/1119 [1:13:33<29:51,  3.94s/it]

Processed: Label - 4, Instance - 923


Cleaning Instances:  59%|█████▉    | 665/1119 [1:13:37<30:39,  4.05s/it]

Processed: Label - 4, Instance - 945


Cleaning Instances:  60%|█████▉    | 666/1119 [1:13:41<30:04,  3.98s/it]

Processed: Label - 4, Instance - 972


Cleaning Instances:  60%|█████▉    | 667/1119 [1:13:45<30:18,  4.02s/it]

Processed: Label - 4, Instance - 980


Cleaning Instances:  60%|█████▉    | 668/1119 [1:13:49<31:06,  4.14s/it]

Processed: Label - 4, Instance - 993


Cleaning Instances:  60%|█████▉    | 669/1119 [1:13:54<31:23,  4.19s/it]

Processed: Label - 4, Instance - 997


Cleaning Instances:  60%|█████▉    | 670/1119 [1:13:58<30:44,  4.11s/it]

Processed: Label - 4, Instance - 1001


Cleaning Instances:  60%|█████▉    | 671/1119 [1:14:01<30:12,  4.04s/it]

Processed: Label - 4, Instance - 1009


Cleaning Instances:  60%|██████    | 672/1119 [1:14:05<29:25,  3.95s/it]

Processed: Label - 4, Instance - 1036


Cleaning Instances:  60%|██████    | 673/1119 [1:14:09<29:55,  4.03s/it]

Processed: Label - 4, Instance - 860


Cleaning Instances:  60%|██████    | 674/1119 [1:14:14<30:33,  4.12s/it]

Processed: Label - 4, Instance - 883


Cleaning Instances:  60%|██████    | 675/1119 [1:14:18<29:52,  4.04s/it]

Processed: Label - 4, Instance - 888


Cleaning Instances:  60%|██████    | 676/1119 [1:14:21<29:19,  3.97s/it]

Processed: Label - 4, Instance - 898


Cleaning Instances:  61%|██████    | 677/1119 [1:14:25<29:06,  3.95s/it]

Processed: Label - 4, Instance - 901


Cleaning Instances:  61%|██████    | 678/1119 [1:14:29<29:38,  4.03s/it]

Processed: Label - 4, Instance - 915


Cleaning Instances:  61%|██████    | 679/1119 [1:14:33<29:04,  3.96s/it]

Processed: Label - 4, Instance - 926


Cleaning Instances:  61%|██████    | 680/1119 [1:14:38<30:14,  4.13s/it]

Processed: Label - 4, Instance - 929


Cleaning Instances:  61%|██████    | 681/1119 [1:14:42<29:14,  4.01s/it]

Processed: Label - 4, Instance - 998


Cleaning Instances:  61%|██████    | 682/1119 [1:14:45<28:49,  3.96s/it]

Processed: Label - 4, Instance - 1010


Cleaning Instances:  61%|██████    | 683/1119 [1:14:50<29:45,  4.10s/it]        

Processed: Label - 4, Instance - 1015


Cleaning Instances:  61%|██████    | 684/1119 [1:14:54<28:58,  4.00s/it]

Processed: Label - 4, Instance - 1041


Cleaning Instances:  61%|██████    | 685/1119 [1:14:58<28:58,  4.01s/it]

Processed: Label - 4, Instance - 1045


Cleaning Instances:  61%|██████▏   | 686/1119 [1:15:02<29:58,  4.15s/it]

Processed: Label - 4, Instance - 1046


Cleaning Instances:  61%|██████▏   | 687/1119 [1:15:06<28:58,  4.02s/it]

Processed: Label - 4, Instance - 863


Cleaning Instances:  61%|██████▏   | 688/1119 [1:15:10<28:37,  3.99s/it]

Processed: Label - 4, Instance - 881


Cleaning Instances:  62%|██████▏   | 689/1119 [1:15:13<28:04,  3.92s/it]

Processed: Label - 4, Instance - 906


Cleaning Instances:  62%|██████▏   | 690/1119 [1:15:18<28:53,  4.04s/it]

Processed: Label - 4, Instance - 958


Cleaning Instances:  62%|██████▏   | 691/1119 [1:15:22<28:50,  4.04s/it]

Processed: Label - 4, Instance - 1019


Cleaning Instances:  62%|██████▏   | 692/1119 [1:15:26<29:57,  4.21s/it]

Processed: Label - 4, Instance - 1026


Cleaning Instances:  62%|██████▏   | 693/1119 [1:15:30<29:05,  4.10s/it]

Processed: Label - 4, Instance - 1031


Cleaning Instances:  62%|██████▏   | 694/1119 [1:15:34<28:13,  3.99s/it]

Processed: Label - 4, Instance - 1032


Cleaning Instances:  62%|██████▏   | 695/1119 [1:15:38<27:39,  3.91s/it]

Processed: Label - 4, Instance - 1040


Cleaning Instances:  62%|██████▏   | 696/1119 [1:15:43<29:37,  4.20s/it]

Processed: Label - 4, Instance - 891


Cleaning Instances:  62%|██████▏   | 697/1119 [1:15:47<30:02,  4.27s/it]

Processed: Label - 4, Instance - 955


Cleaning Instances:  62%|██████▏   | 698/1119 [1:15:51<29:32,  4.21s/it]

Processed: Label - 4, Instance - 959


Cleaning Instances:  62%|██████▏   | 699/1119 [1:15:55<29:23,  4.20s/it]

Processed: Label - 4, Instance - 982


Cleaning Instances:  63%|██████▎   | 700/1119 [1:16:00<30:05,  4.31s/it]

Processed: Label - 4, Instance - 985


Cleaning Instances:  63%|██████▎   | 701/1119 [1:16:04<29:40,  4.26s/it]

Processed: Label - 4, Instance - 988


Cleaning Instances:  63%|██████▎   | 702/1119 [1:16:08<29:24,  4.23s/it]

Processed: Label - 4, Instance - 995


Cleaning Instances:  63%|██████▎   | 703/1119 [1:16:12<28:44,  4.15s/it]

Processed: Label - 4, Instance - 1014


Cleaning Instances:  63%|██████▎   | 704/1119 [1:16:18<32:23,  4.68s/it]

Processed: Label - 0, Instance - 187


Cleaning Instances:  63%|██████▎   | 705/1119 [1:16:22<31:26,  4.56s/it]

Processed: Label - 0, Instance - 194


Cleaning Instances:  63%|██████▎   | 706/1119 [1:16:28<34:12,  4.97s/it]

Processed: Label - 0, Instance - 196


Cleaning Instances:  63%|██████▎   | 707/1119 [1:16:34<36:39,  5.34s/it]

Processed: Label - 0, Instance - 243


Cleaning Instances:  63%|██████▎   | 708/1119 [1:16:39<35:12,  5.14s/it]

Processed: Label - 0, Instance - 247


Cleaning Instances:  63%|██████▎   | 709/1119 [1:16:44<34:07,  4.99s/it]

Processed: Label - 0, Instance - 250


Cleaning Instances:  63%|██████▎   | 710/1119 [1:16:49<33:49,  4.96s/it]

Processed: Label - 0, Instance - 252


Cleaning Instances:  64%|██████▎   | 711/1119 [1:16:55<35:46,  5.26s/it]

Processed: Label - 0, Instance - 263


Cleaning Instances:  64%|██████▎   | 712/1119 [1:16:59<34:00,  5.01s/it]

Processed: Label - 0, Instance - 270


Cleaning Instances:  64%|██████▎   | 713/1119 [1:17:04<33:21,  4.93s/it]

Processed: Label - 0, Instance - 191


Cleaning Instances:  64%|██████▍   | 714/1119 [1:17:09<33:24,  4.95s/it]

Processed: Label - 0, Instance - 193


Cleaning Instances:  64%|██████▍   | 715/1119 [1:17:13<32:49,  4.88s/it]

Processed: Label - 0, Instance - 210


Cleaning Instances:  64%|██████▍   | 716/1119 [1:17:19<34:54,  5.20s/it]

Processed: Label - 0, Instance - 216


Cleaning Instances:  64%|██████▍   | 717/1119 [1:17:25<36:08,  5.39s/it]

Processed: Label - 0, Instance - 223


Cleaning Instances:  64%|██████▍   | 718/1119 [1:17:30<33:47,  5.06s/it]

Processed: Label - 0, Instance - 235


Cleaning Instances:  64%|██████▍   | 719/1119 [1:17:34<32:42,  4.91s/it]

Processed: Label - 0, Instance - 244


Cleaning Instances:  64%|██████▍   | 720/1119 [1:17:39<32:09,  4.84s/it]

Processed: Label - 0, Instance - 260


Cleaning Instances:  64%|██████▍   | 721/1119 [1:17:44<32:06,  4.84s/it]

Processed: Label - 0, Instance - 204


Cleaning Instances:  65%|██████▍   | 722/1119 [1:17:49<32:24,  4.90s/it]

Processed: Label - 0, Instance - 217


Cleaning Instances:  65%|██████▍   | 723/1119 [1:17:53<31:58,  4.85s/it]

Processed: Label - 0, Instance - 218


Cleaning Instances:  65%|██████▍   | 724/1119 [1:17:58<30:54,  4.70s/it]

Processed: Label - 0, Instance - 231


Cleaning Instances:  65%|██████▍   | 725/1119 [1:18:04<33:14,  5.06s/it]

Processed: Label - 0, Instance - 266


Cleaning Instances:  65%|██████▍   | 726/1119 [1:18:09<34:23,  5.25s/it]

Processed: Label - 0, Instance - 184


Cleaning Instances:  65%|██████▍   | 727/1119 [1:18:15<35:21,  5.41s/it]

Processed: Label - 0, Instance - 209


Cleaning Instances:  65%|██████▌   | 728/1119 [1:18:21<36:00,  5.53s/it]

Processed: Label - 0, Instance - 214


Cleaning Instances:  65%|██████▌   | 729/1119 [1:18:26<34:16,  5.27s/it]

Processed: Label - 0, Instance - 230


24/11/27 01:30:22 WARN TaskSetManager: Stage 76664 contains a task of very large size (1945 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  65%|██████▌   | 730/1119 [1:18:45<1:02:31,  9.64s/it]      

Processed: Label - 0, Instance - 236


Cleaning Instances:  65%|██████▌   | 731/1119 [1:18:51<54:56,  8.50s/it]  

Processed: Label - 0, Instance - 242


Cleaning Instances:  65%|██████▌   | 732/1119 [1:18:56<47:42,  7.40s/it]

Processed: Label - 0, Instance - 258


Cleaning Instances:  66%|██████▌   | 733/1119 [1:19:01<42:12,  6.56s/it]

Processed: Label - 0, Instance - 265


Cleaning Instances:  66%|██████▌   | 734/1119 [1:19:05<38:07,  5.94s/it]

Processed: Label - 0, Instance - 268


Cleaning Instances:  66%|██████▌   | 735/1119 [1:19:10<35:33,  5.56s/it]

Processed: Label - 0, Instance - 278


Cleaning Instances:  66%|██████▌   | 736/1119 [1:19:15<33:52,  5.31s/it]

Processed: Label - 0, Instance - 201


Cleaning Instances:  66%|██████▌   | 737/1119 [1:19:19<32:49,  5.16s/it]

Processed: Label - 0, Instance - 205


Cleaning Instances:  66%|██████▌   | 738/1119 [1:19:25<34:00,  5.36s/it]

Processed: Label - 0, Instance - 219


Cleaning Instances:  66%|██████▌   | 739/1119 [1:19:31<34:43,  5.48s/it]

Processed: Label - 0, Instance - 240


Cleaning Instances:  66%|██████▌   | 740/1119 [1:19:37<35:50,  5.67s/it]

Processed: Label - 0, Instance - 256


Cleaning Instances:  66%|██████▌   | 741/1119 [1:19:48<45:07,  7.16s/it]        

Processed: Label - 0, Instance - 261


Cleaning Instances:  66%|██████▋   | 742/1119 [1:19:53<42:19,  6.74s/it]

Processed: Label - 0, Instance - 264


Cleaning Instances:  66%|██████▋   | 743/1119 [1:19:59<40:39,  6.49s/it]

Processed: Label - 0, Instance - 272


Cleaning Instances:  66%|██████▋   | 744/1119 [1:20:04<36:20,  5.81s/it]

Processed: Label - 0, Instance - 280


Cleaning Instances:  67%|██████▋   | 745/1119 [1:20:10<38:00,  6.10s/it]

Processed: Label - 0, Instance - 185


Cleaning Instances:  67%|██████▋   | 746/1119 [1:20:16<36:17,  5.84s/it]

Processed: Label - 0, Instance - 188


Cleaning Instances:  67%|██████▋   | 747/1119 [1:20:21<35:27,  5.72s/it]

Processed: Label - 0, Instance - 197


Cleaning Instances:  67%|██████▋   | 748/1119 [1:20:26<33:46,  5.46s/it]

Processed: Label - 0, Instance - 200


Cleaning Instances:  67%|██████▋   | 749/1119 [1:20:31<33:31,  5.44s/it]

Processed: Label - 0, Instance - 262


Cleaning Instances:  67%|██████▋   | 750/1119 [1:20:37<34:02,  5.53s/it]

Processed: Label - 0, Instance - 267


Cleaning Instances:  67%|██████▋   | 751/1119 [1:20:42<33:12,  5.41s/it]

Processed: Label - 0, Instance - 269


Cleaning Instances:  67%|██████▋   | 752/1119 [1:20:48<33:57,  5.55s/it]

Processed: Label - 0, Instance - 190


Cleaning Instances:  67%|██████▋   | 753/1119 [1:20:54<34:28,  5.65s/it]        

Processed: Label - 0, Instance - 198


Cleaning Instances:  67%|██████▋   | 754/1119 [1:21:00<34:40,  5.70s/it]

Processed: Label - 0, Instance - 202


Cleaning Instances:  67%|██████▋   | 755/1119 [1:21:05<33:16,  5.49s/it]

Processed: Label - 0, Instance - 224


Cleaning Instances:  68%|██████▊   | 756/1119 [1:21:11<34:14,  5.66s/it]

Processed: Label - 0, Instance - 253


Cleaning Instances:  68%|██████▊   | 757/1119 [1:21:16<32:30,  5.39s/it]

Processed: Label - 0, Instance - 208


Cleaning Instances:  68%|██████▊   | 758/1119 [1:21:20<31:19,  5.21s/it]

Processed: Label - 0, Instance - 211


Cleaning Instances:  68%|██████▊   | 759/1119 [1:21:25<30:20,  5.06s/it]

Processed: Label - 0, Instance - 228


Cleaning Instances:  68%|██████▊   | 760/1119 [1:21:30<30:04,  5.03s/it]

Processed: Label - 0, Instance - 239


Cleaning Instances:  68%|██████▊   | 761/1119 [1:21:35<30:13,  5.06s/it]

Processed: Label - 0, Instance - 203


Cleaning Instances:  68%|██████▊   | 762/1119 [1:21:40<29:44,  5.00s/it]

Processed: Label - 0, Instance - 213


Cleaning Instances:  68%|██████▊   | 763/1119 [1:21:47<33:29,  5.64s/it]        

Processed: Label - 0, Instance - 241


Cleaning Instances:  68%|██████▊   | 764/1119 [1:21:53<33:56,  5.74s/it]

Processed: Label - 0, Instance - 195


Cleaning Instances:  68%|██████▊   | 765/1119 [1:21:58<32:11,  5.45s/it]

Processed: Label - 0, Instance - 215


Cleaning Instances:  68%|██████▊   | 766/1119 [1:22:04<33:26,  5.68s/it]

Processed: Label - 0, Instance - 220


Cleaning Instances:  69%|██████▊   | 767/1119 [1:22:08<30:42,  5.23s/it]

Processed: Label - 0, Instance - 229


Cleaning Instances:  69%|██████▊   | 768/1119 [1:22:13<29:40,  5.07s/it]

Processed: Label - 0, Instance - 233


Cleaning Instances:  69%|██████▊   | 769/1119 [1:22:19<31:25,  5.39s/it]

Processed: Label - 0, Instance - 237


Cleaning Instances:  69%|██████▉   | 770/1119 [1:22:25<32:29,  5.59s/it]

Processed: Label - 0, Instance - 251


Cleaning Instances:  69%|██████▉   | 771/1119 [1:22:31<32:53,  5.67s/it]

Processed: Label - 0, Instance - 254


Cleaning Instances:  69%|██████▉   | 772/1119 [1:22:36<31:18,  5.41s/it]

Processed: Label - 0, Instance - 273


Cleaning Instances:  69%|██████▉   | 773/1119 [1:22:40<29:32,  5.12s/it]

Processed: Label - 0, Instance - 207


Cleaning Instances:  69%|██████▉   | 774/1119 [1:22:45<28:17,  4.92s/it]

Processed: Label - 0, Instance - 225


Cleaning Instances:  69%|██████▉   | 775/1119 [1:22:49<27:48,  4.85s/it]

Processed: Label - 0, Instance - 245


Cleaning Instances:  69%|██████▉   | 776/1119 [1:22:55<29:26,  5.15s/it]

Processed: Label - 0, Instance - 279


Cleaning Instances:  69%|██████▉   | 777/1119 [1:23:01<30:49,  5.41s/it]

Processed: Label - 0, Instance - 206


Cleaning Instances:  70%|██████▉   | 778/1119 [1:23:07<31:09,  5.48s/it]

Processed: Label - 0, Instance - 222


Cleaning Instances:  70%|██████▉   | 779/1119 [1:23:13<31:57,  5.64s/it]

Processed: Label - 0, Instance - 227


Cleaning Instances:  70%|██████▉   | 780/1119 [1:23:17<29:52,  5.29s/it]

Processed: Label - 0, Instance - 232


Cleaning Instances:  70%|██████▉   | 781/1119 [1:23:23<30:38,  5.44s/it]

Processed: Label - 0, Instance - 234


Cleaning Instances:  70%|██████▉   | 782/1119 [1:23:28<29:32,  5.26s/it]

Processed: Label - 0, Instance - 257


Cleaning Instances:  70%|██████▉   | 783/1119 [1:23:33<28:25,  5.08s/it]

Processed: Label - 0, Instance - 276


Cleaning Instances:  70%|███████   | 784/1119 [1:23:38<27:56,  5.00s/it]

Processed: Label - 0, Instance - 186


Cleaning Instances:  70%|███████   | 785/1119 [1:23:44<29:49,  5.36s/it]

Processed: Label - 0, Instance - 199


Cleaning Instances:  70%|███████   | 786/1119 [1:23:48<28:34,  5.15s/it]

Processed: Label - 0, Instance - 238


Cleaning Instances:  70%|███████   | 787/1119 [1:23:53<27:45,  5.02s/it]

Processed: Label - 0, Instance - 255


Cleaning Instances:  70%|███████   | 788/1119 [1:23:58<27:35,  5.00s/it]

Processed: Label - 0, Instance - 274


Cleaning Instances:  71%|███████   | 789/1119 [1:24:03<27:01,  4.91s/it]

Processed: Label - 0, Instance - 275


Cleaning Instances:  71%|███████   | 790/1119 [1:24:08<26:40,  4.86s/it]

Processed: Label - 0, Instance - 192


Cleaning Instances:  71%|███████   | 791/1119 [1:24:13<27:15,  4.99s/it]

Processed: Label - 0, Instance - 221


Cleaning Instances:  71%|███████   | 792/1119 [1:24:17<26:44,  4.91s/it]

Processed: Label - 0, Instance - 246


Cleaning Instances:  71%|███████   | 793/1119 [1:24:22<26:42,  4.92s/it]

Processed: Label - 0, Instance - 248


Cleaning Instances:  71%|███████   | 794/1119 [1:24:28<27:47,  5.13s/it]

Processed: Label - 0, Instance - 249


Cleaning Instances:  71%|███████   | 795/1119 [1:24:34<29:05,  5.39s/it]

Processed: Label - 0, Instance - 259


Cleaning Instances:  71%|███████   | 796/1119 [1:24:39<28:02,  5.21s/it]

Processed: Label - 0, Instance - 271


Cleaning Instances:  71%|███████   | 797/1119 [1:24:45<29:10,  5.44s/it]

Processed: Label - 0, Instance - 277


Cleaning Instances:  71%|███████▏  | 798/1119 [1:24:50<28:24,  5.31s/it]

Processed: Label - 0, Instance - 189


Cleaning Instances:  71%|███████▏  | 799/1119 [1:24:55<27:18,  5.12s/it]

Processed: Label - 0, Instance - 212


Cleaning Instances:  71%|███████▏  | 800/1119 [1:25:00<27:58,  5.26s/it]

Processed: Label - 0, Instance - 226


Cleaning Instances:  72%|███████▏  | 801/1119 [1:25:06<28:40,  5.41s/it]

Processed: Label - 0, Instance - 134


Cleaning Instances:  72%|███████▏  | 802/1119 [1:25:12<29:14,  5.54s/it]

Processed: Label - 0, Instance - 135


Cleaning Instances:  72%|███████▏  | 803/1119 [1:25:16<27:45,  5.27s/it]

Processed: Label - 0, Instance - 141


Cleaning Instances:  72%|███████▏  | 804/1119 [1:25:21<26:56,  5.13s/it]

Processed: Label - 0, Instance - 158


Cleaning Instances:  72%|███████▏  | 805/1119 [1:25:26<25:51,  4.94s/it]

Processed: Label - 0, Instance - 86


Cleaning Instances:  72%|███████▏  | 806/1119 [1:25:31<27:06,  5.20s/it]

Processed: Label - 0, Instance - 113


Cleaning Instances:  72%|███████▏  | 807/1119 [1:25:36<25:38,  4.93s/it]

Processed: Label - 0, Instance - 126


Cleaning Instances:  72%|███████▏  | 808/1119 [1:25:42<27:07,  5.23s/it]

Processed: Label - 0, Instance - 160


Cleaning Instances:  72%|███████▏  | 809/1119 [1:25:46<26:14,  5.08s/it]

Processed: Label - 0, Instance - 94


Cleaning Instances:  72%|███████▏  | 810/1119 [1:25:52<27:24,  5.32s/it]

Processed: Label - 0, Instance - 137


Cleaning Instances:  72%|███████▏  | 811/1119 [1:25:58<27:17,  5.32s/it]

Processed: Label - 0, Instance - 88


Cleaning Instances:  73%|███████▎  | 812/1119 [1:26:02<26:17,  5.14s/it]

Processed: Label - 0, Instance - 107


Cleaning Instances:  73%|███████▎  | 813/1119 [1:26:07<25:10,  4.94s/it]

Processed: Label - 0, Instance - 155


Cleaning Instances:  73%|███████▎  | 814/1119 [1:26:12<25:11,  4.96s/it]

Processed: Label - 0, Instance - 165


Cleaning Instances:  73%|███████▎  | 815/1119 [1:26:17<24:50,  4.90s/it]

Processed: Label - 0, Instance - 167


Cleaning Instances:  73%|███████▎  | 816/1119 [1:26:23<26:31,  5.25s/it]

Processed: Label - 0, Instance - 179


Cleaning Instances:  73%|███████▎  | 817/1119 [1:26:27<25:31,  5.07s/it]

Processed: Label - 0, Instance - 90


Cleaning Instances:  73%|███████▎  | 818/1119 [1:26:32<24:44,  4.93s/it]

Processed: Label - 0, Instance - 97


Cleaning Instances:  73%|███████▎  | 819/1119 [1:26:36<23:56,  4.79s/it]

Processed: Label - 0, Instance - 112


Cleaning Instances:  73%|███████▎  | 820/1119 [1:26:42<25:45,  5.17s/it]

Processed: Label - 0, Instance - 143


Cleaning Instances:  73%|███████▎  | 821/1119 [1:26:48<27:00,  5.44s/it]

Processed: Label - 0, Instance - 157


Cleaning Instances:  73%|███████▎  | 822/1119 [1:26:54<27:44,  5.61s/it]

Processed: Label - 0, Instance - 168


Cleaning Instances:  74%|███████▎  | 823/1119 [1:27:01<28:26,  5.76s/it]

Processed: Label - 0, Instance - 181


Cleaning Instances:  74%|███████▎  | 824/1119 [1:27:05<26:31,  5.39s/it]

Processed: Label - 0, Instance - 100


Cleaning Instances:  74%|███████▎  | 825/1119 [1:27:10<25:05,  5.12s/it]

Processed: Label - 0, Instance - 102


Cleaning Instances:  74%|███████▍  | 826/1119 [1:27:14<24:25,  5.00s/it]

Processed: Label - 0, Instance - 124


Cleaning Instances:  74%|███████▍  | 827/1119 [1:27:20<25:43,  5.29s/it]

Processed: Label - 0, Instance - 127


Cleaning Instances:  74%|███████▍  | 828/1119 [1:27:28<28:39,  5.91s/it]        

Processed: Label - 0, Instance - 140


Cleaning Instances:  74%|███████▍  | 829/1119 [1:27:32<26:50,  5.55s/it]

Processed: Label - 0, Instance - 156


Cleaning Instances:  74%|███████▍  | 830/1119 [1:27:37<25:20,  5.26s/it]

Processed: Label - 0, Instance - 164


Cleaning Instances:  74%|███████▍  | 831/1119 [1:27:43<26:19,  5.48s/it]        

Processed: Label - 0, Instance - 175


Cleaning Instances:  74%|███████▍  | 832/1119 [1:27:49<26:36,  5.56s/it]

Processed: Label - 0, Instance - 123


Cleaning Instances:  74%|███████▍  | 833/1119 [1:27:54<25:26,  5.34s/it]        

Processed: Label - 0, Instance - 136


Cleaning Instances:  75%|███████▍  | 834/1119 [1:27:58<24:14,  5.10s/it]

Processed: Label - 0, Instance - 172


Cleaning Instances:  75%|███████▍  | 835/1119 [1:28:04<25:22,  5.36s/it]

Processed: Label - 0, Instance - 87


Cleaning Instances:  75%|███████▍  | 836/1119 [1:28:09<24:05,  5.11s/it]

Processed: Label - 0, Instance - 104


Cleaning Instances:  75%|███████▍  | 837/1119 [1:28:14<23:56,  5.09s/it]

Processed: Label - 0, Instance - 131


Cleaning Instances:  75%|███████▍  | 838/1119 [1:28:19<24:56,  5.33s/it]

Processed: Label - 0, Instance - 142


Cleaning Instances:  75%|███████▍  | 839/1119 [1:28:26<26:14,  5.62s/it]

Processed: Label - 0, Instance - 149


Cleaning Instances:  75%|███████▌  | 840/1119 [1:28:30<24:44,  5.32s/it]

Processed: Label - 0, Instance - 152


Cleaning Instances:  75%|███████▌  | 841/1119 [1:28:35<23:43,  5.12s/it]

Processed: Label - 0, Instance - 159


Cleaning Instances:  75%|███████▌  | 842/1119 [1:28:41<24:48,  5.38s/it]

Processed: Label - 0, Instance - 176


Cleaning Instances:  75%|███████▌  | 843/1119 [1:28:46<23:41,  5.15s/it]

Processed: Label - 0, Instance - 85


Cleaning Instances:  75%|███████▌  | 844/1119 [1:28:51<23:14,  5.07s/it]

Processed: Label - 0, Instance - 89


Cleaning Instances:  76%|███████▌  | 845/1119 [1:28:57<24:41,  5.41s/it]

Processed: Label - 0, Instance - 116


Cleaning Instances:  76%|███████▌  | 846/1119 [1:29:01<23:38,  5.20s/it]

Processed: Label - 0, Instance - 162


Cleaning Instances:  76%|███████▌  | 847/1119 [1:29:05<21:53,  4.83s/it]

Processed: Label - 0, Instance - 163


Cleaning Instances:  76%|███████▌  | 848/1119 [1:29:10<21:58,  4.86s/it]

Processed: Label - 0, Instance - 174


Cleaning Instances:  76%|███████▌  | 849/1119 [1:29:16<23:23,  5.20s/it]

Processed: Label - 0, Instance - 96


Cleaning Instances:  76%|███████▌  | 850/1119 [1:29:21<22:18,  4.98s/it]

Processed: Label - 0, Instance - 101


Cleaning Instances:  76%|███████▌  | 851/1119 [1:29:25<21:40,  4.85s/it]

Processed: Label - 0, Instance - 106


Cleaning Instances:  76%|███████▌  | 852/1119 [1:29:31<23:07,  5.20s/it]

Processed: Label - 0, Instance - 111


Cleaning Instances:  76%|███████▌  | 853/1119 [1:29:37<23:49,  5.37s/it]

Processed: Label - 0, Instance - 133


Cleaning Instances:  76%|███████▋  | 854/1119 [1:29:41<22:15,  5.04s/it]

Processed: Label - 0, Instance - 146


Cleaning Instances:  76%|███████▋  | 855/1119 [1:29:46<21:35,  4.91s/it]

Processed: Label - 0, Instance - 91


Cleaning Instances:  76%|███████▋  | 856/1119 [1:29:51<22:04,  5.04s/it]

Processed: Label - 0, Instance - 115


Cleaning Instances:  77%|███████▋  | 857/1119 [1:29:58<23:31,  5.39s/it]

Processed: Label - 0, Instance - 122


Cleaning Instances:  77%|███████▋  | 858/1119 [1:30:04<24:30,  5.63s/it]

Processed: Label - 0, Instance - 139


Cleaning Instances:  77%|███████▋  | 859/1119 [1:30:08<23:14,  5.36s/it]

Processed: Label - 0, Instance - 153


Cleaning Instances:  77%|███████▋  | 860/1119 [1:30:13<22:26,  5.20s/it]

Processed: Label - 0, Instance - 166


Cleaning Instances:  77%|███████▋  | 861/1119 [1:30:18<22:16,  5.18s/it]

Processed: Label - 0, Instance - 92


Cleaning Instances:  77%|███████▋  | 862/1119 [1:30:23<21:33,  5.03s/it]

Processed: Label - 0, Instance - 108


Cleaning Instances:  77%|███████▋  | 863/1119 [1:30:27<20:30,  4.81s/it]

Processed: Label - 0, Instance - 109


Cleaning Instances:  77%|███████▋  | 864/1119 [1:30:32<20:09,  4.74s/it]

Processed: Label - 0, Instance - 132


Cleaning Instances:  77%|███████▋  | 865/1119 [1:30:37<20:07,  4.75s/it]

Processed: Label - 0, Instance - 138


Cleaning Instances:  77%|███████▋  | 866/1119 [1:30:41<19:44,  4.68s/it]

Processed: Label - 0, Instance - 145


Cleaning Instances:  77%|███████▋  | 867/1119 [1:30:46<19:54,  4.74s/it]

Processed: Label - 0, Instance - 154


Cleaning Instances:  78%|███████▊  | 868/1119 [1:30:52<21:13,  5.07s/it]

Processed: Label - 0, Instance - 161


Cleaning Instances:  78%|███████▊  | 869/1119 [1:30:58<22:23,  5.37s/it]

Processed: Label - 0, Instance - 99


Cleaning Instances:  78%|███████▊  | 870/1119 [1:31:04<23:08,  5.58s/it]

Processed: Label - 0, Instance - 110


Cleaning Instances:  78%|███████▊  | 871/1119 [1:31:10<23:24,  5.66s/it]

Processed: Label - 0, Instance - 118


Cleaning Instances:  78%|███████▊  | 872/1119 [1:31:15<22:46,  5.53s/it]

Processed: Label - 0, Instance - 120


Cleaning Instances:  78%|███████▊  | 873/1119 [1:31:21<23:27,  5.72s/it]

Processed: Label - 0, Instance - 121


Cleaning Instances:  78%|███████▊  | 874/1119 [1:31:27<23:48,  5.83s/it]

Processed: Label - 0, Instance - 128


Cleaning Instances:  78%|███████▊  | 875/1119 [1:31:32<22:06,  5.44s/it]

Processed: Label - 0, Instance - 147


Cleaning Instances:  78%|███████▊  | 876/1119 [1:31:38<22:49,  5.64s/it]

Processed: Label - 0, Instance - 173


Cleaning Instances:  78%|███████▊  | 877/1119 [1:31:43<21:35,  5.35s/it]

Processed: Label - 0, Instance - 178


Cleaning Instances:  78%|███████▊  | 878/1119 [1:31:49<22:13,  5.53s/it]

Processed: Label - 0, Instance - 180


Cleaning Instances:  79%|███████▊  | 879/1119 [1:31:54<21:11,  5.30s/it]

Processed: Label - 0, Instance - 98


Cleaning Instances:  79%|███████▊  | 880/1119 [1:31:58<20:23,  5.12s/it]

Processed: Label - 0, Instance - 114


Cleaning Instances:  79%|███████▊  | 881/1119 [1:32:03<19:37,  4.95s/it]

Processed: Label - 0, Instance - 117


Cleaning Instances:  79%|███████▉  | 882/1119 [1:32:07<19:14,  4.87s/it]

Processed: Label - 0, Instance - 125


Cleaning Instances:  79%|███████▉  | 883/1119 [1:32:14<20:35,  5.24s/it]

Processed: Label - 0, Instance - 130


Cleaning Instances:  79%|███████▉  | 884/1119 [1:32:18<19:43,  5.04s/it]

Processed: Label - 0, Instance - 148


Cleaning Instances:  79%|███████▉  | 885/1119 [1:32:24<20:45,  5.32s/it]

Processed: Label - 0, Instance - 150


Cleaning Instances:  79%|███████▉  | 886/1119 [1:32:29<20:16,  5.22s/it]

Processed: Label - 0, Instance - 169


Cleaning Instances:  79%|███████▉  | 887/1119 [1:32:35<21:10,  5.48s/it]

Processed: Label - 0, Instance - 171


Cleaning Instances:  79%|███████▉  | 888/1119 [1:32:40<20:03,  5.21s/it]

Processed: Label - 0, Instance - 182


Cleaning Instances:  79%|███████▉  | 889/1119 [1:32:46<20:40,  5.39s/it]

Processed: Label - 0, Instance - 83


Cleaning Instances:  80%|███████▉  | 890/1119 [1:32:52<21:27,  5.62s/it]

Processed: Label - 0, Instance - 93


Cleaning Instances:  80%|███████▉  | 891/1119 [1:32:58<21:41,  5.71s/it]

Processed: Label - 0, Instance - 95


Cleaning Instances:  80%|███████▉  | 892/1119 [1:33:04<21:54,  5.79s/it]

Processed: Label - 0, Instance - 119


Cleaning Instances:  80%|███████▉  | 893/1119 [1:33:09<21:50,  5.80s/it]

Processed: Label - 0, Instance - 144


Cleaning Instances:  80%|███████▉  | 894/1119 [1:33:15<21:58,  5.86s/it]

Processed: Label - 0, Instance - 170


Cleaning Instances:  80%|███████▉  | 895/1119 [1:33:21<22:00,  5.89s/it]

Processed: Label - 0, Instance - 183


Cleaning Instances:  80%|████████  | 896/1119 [1:33:26<20:51,  5.61s/it]

Processed: Label - 0, Instance - 84


Cleaning Instances:  80%|████████  | 897/1119 [1:33:32<20:27,  5.53s/it]

Processed: Label - 0, Instance - 103


Cleaning Instances:  80%|████████  | 898/1119 [1:33:38<20:48,  5.65s/it]

Processed: Label - 0, Instance - 105


Cleaning Instances:  80%|████████  | 899/1119 [1:33:42<19:29,  5.31s/it]

Processed: Label - 0, Instance - 129


Cleaning Instances:  80%|████████  | 900/1119 [1:33:48<20:09,  5.52s/it]

Processed: Label - 0, Instance - 151


Cleaning Instances:  81%|████████  | 901/1119 [1:33:53<18:51,  5.19s/it]

Processed: Label - 0, Instance - 177


Cleaning Instances:  81%|████████  | 902/1119 [1:33:58<18:34,  5.14s/it]

Processed: Label - 0, Instance - 448


Cleaning Instances:  81%|████████  | 903/1119 [1:34:02<17:46,  4.94s/it]

Processed: Label - 0, Instance - 454


Cleaning Instances:  81%|████████  | 904/1119 [1:34:08<18:35,  5.19s/it]

Processed: Label - 0, Instance - 455


Cleaning Instances:  81%|████████  | 905/1119 [1:34:12<17:16,  4.85s/it]

Processed: Label - 0, Instance - 466


Cleaning Instances:  81%|████████  | 906/1119 [1:34:16<16:49,  4.74s/it]

Processed: Label - 0, Instance - 494


Cleaning Instances:  81%|████████  | 907/1119 [1:34:22<18:00,  5.10s/it]

Processed: Label - 0, Instance - 504


Cleaning Instances:  81%|████████  | 908/1119 [1:34:27<17:45,  5.05s/it]

Processed: Label - 0, Instance - 528


Cleaning Instances:  81%|████████  | 909/1119 [1:34:32<17:13,  4.92s/it]

Processed: Label - 0, Instance - 444


Cleaning Instances:  81%|████████▏ | 910/1119 [1:34:36<16:12,  4.65s/it]

Processed: Label - 0, Instance - 447


Cleaning Instances:  81%|████████▏ | 911/1119 [1:34:40<16:04,  4.64s/it]

Processed: Label - 0, Instance - 451


Cleaning Instances:  82%|████████▏ | 912/1119 [1:34:46<17:21,  5.03s/it]

Processed: Label - 0, Instance - 464


Cleaning Instances:  82%|████████▏ | 913/1119 [1:34:53<18:28,  5.38s/it]

Processed: Label - 0, Instance - 470


Cleaning Instances:  82%|████████▏ | 914/1119 [1:34:59<19:10,  5.61s/it]

Processed: Label - 0, Instance - 481


Cleaning Instances:  82%|████████▏ | 915/1119 [1:35:05<19:28,  5.73s/it]

Processed: Label - 0, Instance - 507


Cleaning Instances:  82%|████████▏ | 916/1119 [1:35:11<19:39,  5.81s/it]

Processed: Label - 0, Instance - 516


Cleaning Instances:  82%|████████▏ | 917/1119 [1:35:17<19:42,  5.85s/it]

Processed: Label - 0, Instance - 530


Cleaning Instances:  82%|████████▏ | 918/1119 [1:35:23<19:48,  5.91s/it]

Processed: Label - 0, Instance - 531


Cleaning Instances:  82%|████████▏ | 919/1119 [1:35:27<18:17,  5.49s/it]

Processed: Label - 0, Instance - 534


Cleaning Instances:  82%|████████▏ | 920/1119 [1:35:34<19:51,  5.99s/it]        

Processed: Label - 0, Instance - 442


Cleaning Instances:  82%|████████▏ | 921/1119 [1:35:39<18:37,  5.64s/it]

Processed: Label - 0, Instance - 456


Cleaning Instances:  82%|████████▏ | 922/1119 [1:35:44<17:49,  5.43s/it]

Processed: Label - 0, Instance - 457


Cleaning Instances:  82%|████████▏ | 923/1119 [1:35:49<17:05,  5.23s/it]

Processed: Label - 0, Instance - 475


Cleaning Instances:  83%|████████▎ | 924/1119 [1:35:55<17:50,  5.49s/it]

Processed: Label - 0, Instance - 476


Cleaning Instances:  83%|████████▎ | 925/1119 [1:36:01<17:51,  5.52s/it]

Processed: Label - 0, Instance - 478


Cleaning Instances:  83%|████████▎ | 926/1119 [1:36:05<16:58,  5.28s/it]

Processed: Label - 0, Instance - 488


Cleaning Instances:  83%|████████▎ | 927/1119 [1:36:10<16:21,  5.11s/it]

Processed: Label - 0, Instance - 510


Cleaning Instances:  83%|████████▎ | 928/1119 [1:36:15<16:14,  5.10s/it]

Processed: Label - 0, Instance - 452


Cleaning Instances:  83%|████████▎ | 929/1119 [1:36:21<16:54,  5.34s/it]

Processed: Label - 0, Instance - 477


Cleaning Instances:  83%|████████▎ | 930/1119 [1:36:26<16:21,  5.20s/it]

Processed: Label - 0, Instance - 505


Cleaning Instances:  83%|████████▎ | 931/1119 [1:36:31<15:48,  5.05s/it]

Processed: Label - 0, Instance - 462


Cleaning Instances:  83%|████████▎ | 932/1119 [1:36:35<15:24,  4.94s/it]

Processed: Label - 0, Instance - 465


Cleaning Instances:  83%|████████▎ | 933/1119 [1:36:40<15:01,  4.85s/it]

Processed: Label - 0, Instance - 469


Cleaning Instances:  83%|████████▎ | 934/1119 [1:36:44<14:36,  4.74s/it]

Processed: Label - 0, Instance - 483


Cleaning Instances:  84%|████████▎ | 935/1119 [1:36:50<15:36,  5.09s/it]

Processed: Label - 0, Instance - 486


Cleaning Instances:  84%|████████▎ | 936/1119 [1:36:56<16:25,  5.38s/it]

Processed: Label - 0, Instance - 489


Cleaning Instances:  84%|████████▎ | 937/1119 [1:37:01<15:28,  5.10s/it]

Processed: Label - 0, Instance - 491


Cleaning Instances:  84%|████████▍ | 938/1119 [1:37:06<15:02,  4.99s/it]

Processed: Label - 0, Instance - 512


Cleaning Instances:  84%|████████▍ | 939/1119 [1:37:12<15:57,  5.32s/it]

Processed: Label - 0, Instance - 525


Cleaning Instances:  84%|████████▍ | 940/1119 [1:37:16<15:12,  5.10s/it]

Processed: Label - 0, Instance - 539


Cleaning Instances:  84%|████████▍ | 941/1119 [1:37:22<15:58,  5.38s/it]

Processed: Label - 0, Instance - 467


Cleaning Instances:  84%|████████▍ | 942/1119 [1:37:27<15:27,  5.24s/it]

Processed: Label - 0, Instance - 506


Cleaning Instances:  84%|████████▍ | 943/1119 [1:37:33<15:59,  5.45s/it]        

Processed: Label - 0, Instance - 511


Cleaning Instances:  84%|████████▍ | 944/1119 [1:37:37<14:50,  5.09s/it]

Processed: Label - 0, Instance - 536


Cleaning Instances:  84%|████████▍ | 945/1119 [1:37:42<14:18,  4.93s/it]

Processed: Label - 0, Instance - 537


Cleaning Instances:  85%|████████▍ | 946/1119 [1:37:46<13:42,  4.75s/it]

Processed: Label - 0, Instance - 453


Cleaning Instances:  85%|████████▍ | 947/1119 [1:37:51<13:48,  4.82s/it]

Processed: Label - 0, Instance - 468


Cleaning Instances:  85%|████████▍ | 948/1119 [1:37:57<14:39,  5.15s/it]

Processed: Label - 0, Instance - 474


Cleaning Instances:  85%|████████▍ | 949/1119 [1:38:02<13:59,  4.94s/it]

Processed: Label - 0, Instance - 519


Cleaning Instances:  85%|████████▍ | 950/1119 [1:38:06<13:31,  4.80s/it]

Processed: Label - 0, Instance - 532


Cleaning Instances:  85%|████████▍ | 951/1119 [1:38:11<13:24,  4.79s/it]

Processed: Label - 0, Instance - 439


Cleaning Instances:  85%|████████▌ | 952/1119 [1:38:15<13:05,  4.70s/it]

Processed: Label - 0, Instance - 445


Cleaning Instances:  85%|████████▌ | 953/1119 [1:38:21<14:01,  5.07s/it]

Processed: Label - 0, Instance - 535


Cleaning Instances:  85%|████████▌ | 954/1119 [1:38:26<13:40,  4.97s/it]

Processed: Label - 0, Instance - 459


Cleaning Instances:  85%|████████▌ | 955/1119 [1:38:32<14:09,  5.18s/it]

Processed: Label - 0, Instance - 463


Cleaning Instances:  85%|████████▌ | 956/1119 [1:38:38<14:43,  5.42s/it]

Processed: Label - 0, Instance - 509


Cleaning Instances:  86%|████████▌ | 957/1119 [1:38:44<15:01,  5.56s/it]

Processed: Label - 0, Instance - 513


Cleaning Instances:  86%|████████▌ | 958/1119 [1:38:49<14:36,  5.44s/it]

Processed: Label - 0, Instance - 527


Cleaning Instances:  86%|████████▌ | 959/1119 [1:38:54<13:59,  5.25s/it]

Processed: Label - 0, Instance - 529


Cleaning Instances:  86%|████████▌ | 960/1119 [1:38:58<13:39,  5.15s/it]

Processed: Label - 0, Instance - 533


Cleaning Instances:  86%|████████▌ | 961/1119 [1:39:05<14:15,  5.41s/it]

Processed: Label - 0, Instance - 440


Cleaning Instances:  86%|████████▌ | 962/1119 [1:39:09<13:27,  5.14s/it]

Processed: Label - 0, Instance - 471


Cleaning Instances:  86%|████████▌ | 963/1119 [1:39:14<13:08,  5.06s/it]

Processed: Label - 0, Instance - 480


Cleaning Instances:  86%|████████▌ | 964/1119 [1:39:20<13:35,  5.26s/it]

Processed: Label - 0, Instance - 487


Cleaning Instances:  86%|████████▌ | 965/1119 [1:39:24<12:53,  5.02s/it]

Processed: Label - 0, Instance - 498


Cleaning Instances:  86%|████████▋ | 966/1119 [1:39:30<13:28,  5.28s/it]

Processed: Label - 0, Instance - 514


Cleaning Instances:  86%|████████▋ | 967/1119 [1:39:35<12:52,  5.08s/it]

Processed: Label - 0, Instance - 515


Cleaning Instances:  87%|████████▋ | 968/1119 [1:39:40<12:44,  5.07s/it]

Processed: Label - 0, Instance - 522


Cleaning Instances:  87%|████████▋ | 969/1119 [1:39:44<12:16,  4.91s/it]

Processed: Label - 0, Instance - 538


Cleaning Instances:  87%|████████▋ | 970/1119 [1:39:48<11:44,  4.73s/it]

Processed: Label - 0, Instance - 443


Cleaning Instances:  87%|████████▋ | 971/1119 [1:39:54<12:34,  5.10s/it]

Processed: Label - 0, Instance - 446


Cleaning Instances:  87%|████████▋ | 972/1119 [1:40:01<13:16,  5.42s/it]

Processed: Label - 0, Instance - 458


Cleaning Instances:  87%|████████▋ | 973/1119 [1:40:07<14:13,  5.85s/it]        

Processed: Label - 0, Instance - 461


Cleaning Instances:  87%|████████▋ | 974/1119 [1:40:14<14:31,  6.01s/it]

Processed: Label - 0, Instance - 500


Cleaning Instances:  87%|████████▋ | 975/1119 [1:40:19<14:04,  5.86s/it]

Processed: Label - 0, Instance - 521


Cleaning Instances:  87%|████████▋ | 976/1119 [1:40:26<14:36,  6.13s/it]

Processed: Label - 0, Instance - 484


Cleaning Instances:  87%|████████▋ | 977/1119 [1:40:31<13:52,  5.86s/it]

Processed: Label - 0, Instance - 495


Cleaning Instances:  87%|████████▋ | 978/1119 [1:40:36<13:04,  5.56s/it]

Processed: Label - 0, Instance - 503


Cleaning Instances:  87%|████████▋ | 979/1119 [1:40:41<12:31,  5.37s/it]

Processed: Label - 0, Instance - 523


Cleaning Instances:  88%|████████▊ | 980/1119 [1:40:46<12:07,  5.23s/it]

Processed: Label - 0, Instance - 524


Cleaning Instances:  88%|████████▊ | 981/1119 [1:40:51<11:37,  5.05s/it]

Processed: Label - 0, Instance - 438


Cleaning Instances:  88%|████████▊ | 982/1119 [1:40:57<12:06,  5.30s/it]

Processed: Label - 0, Instance - 441


Cleaning Instances:  88%|████████▊ | 983/1119 [1:41:02<12:23,  5.46s/it]

Processed: Label - 0, Instance - 449


Cleaning Instances:  88%|████████▊ | 984/1119 [1:41:07<11:32,  5.13s/it]

Processed: Label - 0, Instance - 482


Cleaning Instances:  88%|████████▊ | 985/1119 [1:41:11<11:01,  4.94s/it]

Processed: Label - 0, Instance - 485


Cleaning Instances:  88%|████████▊ | 986/1119 [1:41:17<11:30,  5.19s/it]

Processed: Label - 0, Instance - 497


Cleaning Instances:  88%|████████▊ | 987/1119 [1:41:21<10:56,  4.97s/it]

Processed: Label - 0, Instance - 501


Cleaning Instances:  88%|████████▊ | 988/1119 [1:41:26<10:41,  4.90s/it]

Processed: Label - 0, Instance - 450


Cleaning Instances:  88%|████████▊ | 989/1119 [1:41:31<10:46,  4.97s/it]

Processed: Label - 0, Instance - 460


Cleaning Instances:  88%|████████▊ | 990/1119 [1:41:36<10:29,  4.88s/it]

Processed: Label - 0, Instance - 502


Cleaning Instances:  89%|████████▊ | 991/1119 [1:41:42<11:03,  5.19s/it]

Processed: Label - 0, Instance - 508


Cleaning Instances:  89%|████████▊ | 992/1119 [1:41:49<11:55,  5.64s/it]        

Processed: Label - 0, Instance - 518


Cleaning Instances:  89%|████████▊ | 993/1119 [1:41:53<11:17,  5.38s/it]

Processed: Label - 0, Instance - 526


Cleaning Instances:  89%|████████▉ | 994/1119 [1:41:59<11:36,  5.57s/it]

Processed: Label - 0, Instance - 499


Cleaning Instances:  89%|████████▉ | 995/1119 [1:42:06<11:51,  5.74s/it]

Processed: Label - 0, Instance - 472


Cleaning Instances:  89%|████████▉ | 996/1119 [1:42:11<11:53,  5.80s/it]

Processed: Label - 0, Instance - 473


Cleaning Instances:  89%|████████▉ | 997/1119 [1:42:18<11:57,  5.88s/it]

Processed: Label - 0, Instance - 479


Cleaning Instances:  89%|████████▉ | 998/1119 [1:42:23<11:50,  5.87s/it]

Processed: Label - 0, Instance - 490


Cleaning Instances:  89%|████████▉ | 999/1119 [1:42:29<11:46,  5.88s/it]

Processed: Label - 0, Instance - 492


Cleaning Instances:  89%|████████▉ | 1000/1119 [1:42:35<11:39,  5.87s/it]

Processed: Label - 0, Instance - 493


Cleaning Instances:  89%|████████▉ | 1001/1119 [1:42:40<10:56,  5.56s/it]

Processed: Label - 0, Instance - 496


Cleaning Instances:  90%|████████▉ | 1002/1119 [1:42:44<10:12,  5.24s/it]

Processed: Label - 0, Instance - 517


Cleaning Instances:  90%|████████▉ | 1003/1119 [1:42:49<09:56,  5.14s/it]

Processed: Label - 0, Instance - 520


Cleaning Instances:  90%|████████▉ | 1004/1119 [1:42:58<11:34,  6.04s/it]

Processed: Label - 9, Instance - 1


24/11/27 01:55:01 WARN TaskSetManager: Stage 105539 contains a task of very large size (2770 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  90%|████████▉ | 1005/1119 [1:43:24<23:21, 12.29s/it]       

Processed: Label - 9, Instance - 2


Cleaning Instances:  90%|████████▉ | 1006/1119 [1:43:32<20:17, 10.77s/it]

Processed: Label - 9, Instance - 5


Cleaning Instances:  90%|████████▉ | 1007/1119 [1:43:40<18:57, 10.16s/it]

Processed: Label - 9, Instance - 8


24/11/27 01:55:28 WARN TaskSetManager: Stage 105854 contains a task of very large size (1012 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  90%|█████████ | 1008/1119 [1:43:51<18:59, 10.26s/it]

Processed: Label - 9, Instance - 18


Cleaning Instances:  90%|█████████ | 1009/1119 [1:43:56<16:12,  8.84s/it]

Processed: Label - 9, Instance - 10


Cleaning Instances:  90%|█████████ | 1010/1119 [1:44:00<13:28,  7.41s/it]

Processed: Label - 9, Instance - 17


Cleaning Instances:  90%|█████████ | 1011/1119 [1:44:08<13:14,  7.36s/it]

Processed: Label - 9, Instance - 6


Cleaning Instances:  90%|█████████ | 1012/1119 [1:44:17<14:15,  8.00s/it]

Processed: Label - 9, Instance - 9


24/11/27 01:56:05 WARN TaskSetManager: Stage 106379 contains a task of very large size (1039 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  91%|█████████ | 1013/1119 [1:44:28<15:29,  8.76s/it]

Processed: Label - 9, Instance - 3


Cleaning Instances:  91%|█████████ | 1014/1119 [1:44:33<13:39,  7.80s/it]

Processed: Label - 9, Instance - 7


Cleaning Instances:  91%|█████████ | 1015/1119 [1:44:38<11:55,  6.88s/it]

Processed: Label - 9, Instance - 14


Cleaning Instances:  91%|█████████ | 1016/1119 [1:44:43<10:39,  6.21s/it]

Processed: Label - 9, Instance - 11


Cleaning Instances:  91%|█████████ | 1017/1119 [1:44:48<10:17,  6.05s/it]

Processed: Label - 9, Instance - 15


Cleaning Instances:  91%|█████████ | 1018/1119 [1:44:52<09:11,  5.46s/it]

Processed: Label - 9, Instance - 12


Cleaning Instances:  91%|█████████ | 1019/1119 [1:45:03<11:52,  7.13s/it]       

Processed: Label - 9, Instance - 0


Cleaning Instances:  91%|█████████ | 1020/1119 [1:45:12<12:36,  7.64s/it]

Processed: Label - 9, Instance - 13


Cleaning Instances:  91%|█████████ | 1021/1119 [1:45:17<10:58,  6.72s/it]

Processed: Label - 9, Instance - 16


Cleaning Instances:  91%|█████████▏| 1022/1119 [1:45:24<11:04,  6.85s/it]

Processed: Label - 9, Instance - 4


24/11/27 01:57:33 WARN TaskSetManager: Stage 107429 contains a task of very large size (3654 KiB). The maximum recommended task size is 1000 KiB.
Cleaning Instances:  91%|█████████▏| 1023/1119 [1:45:58<23:56, 14.96s/it]       

Processed: Label - 8, Instance - 705


Cleaning Instances:  92%|█████████▏| 1024/1119 [1:46:02<18:42, 11.81s/it]

Processed: Label - 4, Instance - 774


Cleaning Instances:  92%|█████████▏| 1025/1119 [1:46:06<14:47,  9.44s/it]

Processed: Label - 4, Instance - 782


Cleaning Instances:  92%|█████████▏| 1026/1119 [1:46:10<12:07,  7.82s/it]

Processed: Label - 4, Instance - 797


Cleaning Instances:  92%|█████████▏| 1027/1119 [1:46:14<10:16,  6.70s/it]

Processed: Label - 4, Instance - 813


Cleaning Instances:  92%|█████████▏| 1028/1119 [1:46:19<09:08,  6.03s/it]

Processed: Label - 4, Instance - 821


Cleaning Instances:  92%|█████████▏| 1029/1119 [1:46:23<08:21,  5.57s/it]

Processed: Label - 4, Instance - 827


Cleaning Instances:  92%|█████████▏| 1030/1119 [1:46:28<07:48,  5.27s/it]

Processed: Label - 4, Instance - 839


Cleaning Instances:  92%|█████████▏| 1031/1119 [1:46:32<07:10,  4.90s/it]

Processed: Label - 4, Instance - 844


Cleaning Instances:  92%|█████████▏| 1032/1119 [1:46:36<06:41,  4.61s/it]

Processed: Label - 4, Instance - 761


Cleaning Instances:  92%|█████████▏| 1033/1119 [1:46:40<06:22,  4.45s/it]

Processed: Label - 4, Instance - 778


Cleaning Instances:  92%|█████████▏| 1034/1119 [1:46:45<06:31,  4.61s/it]

Processed: Label - 4, Instance - 790


Cleaning Instances:  92%|█████████▏| 1035/1119 [1:46:49<06:12,  4.44s/it]

Processed: Label - 4, Instance - 802


Cleaning Instances:  93%|█████████▎| 1036/1119 [1:46:53<05:58,  4.32s/it]

Processed: Label - 4, Instance - 785


Cleaning Instances:  93%|█████████▎| 1037/1119 [1:46:58<05:59,  4.38s/it]

Processed: Label - 4, Instance - 799


Cleaning Instances:  93%|█████████▎| 1038/1119 [1:47:02<06:03,  4.49s/it]

Processed: Label - 4, Instance - 810


Cleaning Instances:  93%|█████████▎| 1039/1119 [1:47:06<05:43,  4.29s/it]

Processed: Label - 4, Instance - 811


Cleaning Instances:  93%|█████████▎| 1040/1119 [1:47:10<05:31,  4.20s/it]

Processed: Label - 4, Instance - 812


Cleaning Instances:  93%|█████████▎| 1041/1119 [1:47:15<05:41,  4.38s/it]

Processed: Label - 4, Instance - 829


Cleaning Instances:  93%|█████████▎| 1042/1119 [1:47:19<05:41,  4.43s/it]

Processed: Label - 4, Instance - 841


Cleaning Instances:  93%|█████████▎| 1043/1119 [1:47:24<05:40,  4.48s/it]

Processed: Label - 4, Instance - 767


Cleaning Instances:  93%|█████████▎| 1044/1119 [1:47:29<05:39,  4.53s/it]

Processed: Label - 4, Instance - 816


Cleaning Instances:  93%|█████████▎| 1045/1119 [1:47:33<05:38,  4.57s/it]

Processed: Label - 4, Instance - 823


Cleaning Instances:  93%|█████████▎| 1046/1119 [1:47:37<05:18,  4.37s/it]

Processed: Label - 4, Instance - 832


Cleaning Instances:  94%|█████████▎| 1047/1119 [1:47:41<05:07,  4.27s/it]

Processed: Label - 4, Instance - 835


Cleaning Instances:  94%|█████████▎| 1048/1119 [1:47:46<05:09,  4.36s/it]

Processed: Label - 4, Instance - 847


Cleaning Instances:  94%|█████████▎| 1049/1119 [1:47:50<05:08,  4.41s/it]

Processed: Label - 4, Instance - 848


Cleaning Instances:  94%|█████████▍| 1050/1119 [1:47:54<04:52,  4.24s/it]

Processed: Label - 4, Instance - 803


Cleaning Instances:  94%|█████████▍| 1051/1119 [1:47:58<04:41,  4.14s/it]

Processed: Label - 4, Instance - 804


Cleaning Instances:  94%|█████████▍| 1052/1119 [1:48:02<04:31,  4.05s/it]

Processed: Label - 4, Instance - 836


Cleaning Instances:  94%|█████████▍| 1053/1119 [1:48:07<04:39,  4.24s/it]

Processed: Label - 4, Instance - 840


Cleaning Instances:  94%|█████████▍| 1054/1119 [1:48:12<05:00,  4.62s/it]

Processed: Label - 4, Instance - 843


Cleaning Instances:  94%|█████████▍| 1055/1119 [1:48:17<05:06,  4.79s/it]

Processed: Label - 4, Instance - 775


Cleaning Instances:  94%|█████████▍| 1056/1119 [1:48:22<05:01,  4.78s/it]

Processed: Label - 4, Instance - 777


Cleaning Instances:  94%|█████████▍| 1057/1119 [1:48:26<04:46,  4.62s/it]

Processed: Label - 4, Instance - 779


Cleaning Instances:  95%|█████████▍| 1058/1119 [1:48:31<04:39,  4.58s/it]

Processed: Label - 4, Instance - 783


Cleaning Instances:  95%|█████████▍| 1059/1119 [1:48:36<04:46,  4.78s/it]

Processed: Label - 4, Instance - 784


Cleaning Instances:  95%|█████████▍| 1060/1119 [1:48:41<04:36,  4.69s/it]

Processed: Label - 4, Instance - 796


Cleaning Instances:  95%|█████████▍| 1061/1119 [1:48:45<04:26,  4.60s/it]

Processed: Label - 4, Instance - 806


Cleaning Instances:  95%|█████████▍| 1062/1119 [1:48:50<04:23,  4.63s/it]

Processed: Label - 4, Instance - 825


Cleaning Instances:  95%|█████████▍| 1063/1119 [1:48:55<04:26,  4.75s/it]

Processed: Label - 4, Instance - 851


Cleaning Instances:  95%|█████████▌| 1064/1119 [1:49:00<04:22,  4.77s/it]

Processed: Label - 4, Instance - 762


Cleaning Instances:  95%|█████████▌| 1065/1119 [1:49:04<04:19,  4.80s/it]

Processed: Label - 4, Instance - 787


Cleaning Instances:  95%|█████████▌| 1066/1119 [1:49:09<04:17,  4.86s/it]

Processed: Label - 4, Instance - 792


Cleaning Instances:  95%|█████████▌| 1067/1119 [1:49:14<04:13,  4.88s/it]

Processed: Label - 4, Instance - 805


Cleaning Instances:  95%|█████████▌| 1068/1119 [1:49:18<03:54,  4.59s/it]

Processed: Label - 4, Instance - 824


Cleaning Instances:  96%|█████████▌| 1069/1119 [1:49:22<03:39,  4.39s/it]

Processed: Label - 4, Instance - 853


Cleaning Instances:  96%|█████████▌| 1070/1119 [1:49:27<03:40,  4.49s/it]

Processed: Label - 4, Instance - 771


Cleaning Instances:  96%|█████████▌| 1071/1119 [1:49:31<03:33,  4.46s/it]

Processed: Label - 4, Instance - 780


Cleaning Instances:  96%|█████████▌| 1072/1119 [1:49:36<03:27,  4.41s/it]

Processed: Label - 4, Instance - 793


Cleaning Instances:  96%|█████████▌| 1073/1119 [1:49:40<03:22,  4.41s/it]

Processed: Label - 4, Instance - 834


Cleaning Instances:  96%|█████████▌| 1074/1119 [1:49:44<03:18,  4.40s/it]

Processed: Label - 4, Instance - 845


Cleaning Instances:  96%|█████████▌| 1075/1119 [1:49:48<03:09,  4.31s/it]

Processed: Label - 4, Instance - 854


Cleaning Instances:  96%|█████████▌| 1076/1119 [1:49:53<03:03,  4.26s/it]

Processed: Label - 4, Instance - 765


Cleaning Instances:  96%|█████████▌| 1077/1119 [1:49:57<02:58,  4.25s/it]

Processed: Label - 4, Instance - 769


Cleaning Instances:  96%|█████████▋| 1078/1119 [1:50:01<02:52,  4.21s/it]

Processed: Label - 4, Instance - 772


Cleaning Instances:  96%|█████████▋| 1079/1119 [1:50:05<02:51,  4.30s/it]

Processed: Label - 4, Instance - 818


Cleaning Instances:  97%|█████████▋| 1080/1119 [1:50:09<02:44,  4.22s/it]

Processed: Label - 4, Instance - 820


Cleaning Instances:  97%|█████████▋| 1081/1119 [1:50:14<02:44,  4.33s/it]

Processed: Label - 4, Instance - 846


Cleaning Instances:  97%|█████████▋| 1082/1119 [1:50:18<02:36,  4.23s/it]

Processed: Label - 4, Instance - 794


Cleaning Instances:  97%|█████████▋| 1083/1119 [1:50:22<02:30,  4.19s/it]

Processed: Label - 4, Instance - 801


Cleaning Instances:  97%|█████████▋| 1084/1119 [1:50:27<02:31,  4.32s/it]

Processed: Label - 4, Instance - 814


Cleaning Instances:  97%|█████████▋| 1085/1119 [1:50:31<02:27,  4.35s/it]       

Processed: Label - 4, Instance - 830


Cleaning Instances:  97%|█████████▋| 1086/1119 [1:50:35<02:21,  4.30s/it]

Processed: Label - 4, Instance - 849


Cleaning Instances:  97%|█████████▋| 1087/1119 [1:50:40<02:17,  4.30s/it]

Processed: Label - 4, Instance - 788


Cleaning Instances:  97%|█████████▋| 1088/1119 [1:50:44<02:14,  4.35s/it]

Processed: Label - 4, Instance - 791


Cleaning Instances:  97%|█████████▋| 1089/1119 [1:50:48<02:09,  4.33s/it]

Processed: Label - 4, Instance - 798


Cleaning Instances:  97%|█████████▋| 1090/1119 [1:50:52<02:00,  4.14s/it]

Processed: Label - 4, Instance - 822


Cleaning Instances:  97%|█████████▋| 1091/1119 [1:50:56<01:55,  4.14s/it]

Processed: Label - 4, Instance - 826


Cleaning Instances:  98%|█████████▊| 1092/1119 [1:51:00<01:50,  4.09s/it]

Processed: Label - 4, Instance - 856


Cleaning Instances:  98%|█████████▊| 1093/1119 [1:51:05<01:50,  4.27s/it]

Processed: Label - 4, Instance - 773


Cleaning Instances:  98%|█████████▊| 1094/1119 [1:51:09<01:44,  4.18s/it]

Processed: Label - 4, Instance - 781


Cleaning Instances:  98%|█████████▊| 1095/1119 [1:51:13<01:37,  4.08s/it]

Processed: Label - 4, Instance - 789


Cleaning Instances:  98%|█████████▊| 1096/1119 [1:51:17<01:33,  4.09s/it]

Processed: Label - 4, Instance - 815


Cleaning Instances:  98%|█████████▊| 1097/1119 [1:51:21<01:29,  4.05s/it]

Processed: Label - 4, Instance - 817


Cleaning Instances:  98%|█████████▊| 1098/1119 [1:51:26<01:29,  4.26s/it]

Processed: Label - 4, Instance - 819


Cleaning Instances:  98%|█████████▊| 1099/1119 [1:51:30<01:26,  4.33s/it]

Processed: Label - 4, Instance - 850


Cleaning Instances:  98%|█████████▊| 1100/1119 [1:51:35<01:23,  4.41s/it]

Processed: Label - 4, Instance - 800


Cleaning Instances:  98%|█████████▊| 1101/1119 [1:51:39<01:20,  4.45s/it]

Processed: Label - 4, Instance - 808


Cleaning Instances:  98%|█████████▊| 1102/1119 [1:51:43<01:13,  4.33s/it]

Processed: Label - 4, Instance - 837


Cleaning Instances:  99%|█████████▊| 1103/1119 [1:51:48<01:11,  4.49s/it]

Processed: Label - 4, Instance - 766


Cleaning Instances:  99%|█████████▊| 1104/1119 [1:51:53<01:07,  4.50s/it]

Processed: Label - 4, Instance - 768


Cleaning Instances:  99%|█████████▊| 1105/1119 [1:51:57<01:00,  4.35s/it]

Processed: Label - 4, Instance - 770


Cleaning Instances:  99%|█████████▉| 1106/1119 [1:52:00<00:54,  4.19s/it]

Processed: Label - 4, Instance - 776


Cleaning Instances:  99%|█████████▉| 1107/1119 [1:52:04<00:49,  4.10s/it]

Processed: Label - 4, Instance - 828


Cleaning Instances:  99%|█████████▉| 1108/1119 [1:52:09<00:47,  4.29s/it]

Processed: Label - 4, Instance - 831


Cleaning Instances:  99%|█████████▉| 1109/1119 [1:52:13<00:42,  4.21s/it]

Processed: Label - 4, Instance - 852


Cleaning Instances:  99%|█████████▉| 1110/1119 [1:52:18<00:38,  4.31s/it]

Processed: Label - 4, Instance - 763


Cleaning Instances:  99%|█████████▉| 1111/1119 [1:52:22<00:34,  4.36s/it]

Processed: Label - 4, Instance - 764


Cleaning Instances:  99%|█████████▉| 1112/1119 [1:52:27<00:30,  4.40s/it]

Processed: Label - 4, Instance - 838


Cleaning Instances:  99%|█████████▉| 1113/1119 [1:52:31<00:25,  4.27s/it]

Processed: Label - 4, Instance - 842


Cleaning Instances: 100%|█████████▉| 1114/1119 [1:52:35<00:21,  4.36s/it]

Processed: Label - 4, Instance - 786


Cleaning Instances: 100%|█████████▉| 1115/1119 [1:52:39<00:17,  4.27s/it]

Processed: Label - 4, Instance - 795


Cleaning Instances: 100%|█████████▉| 1116/1119 [1:52:44<00:12,  4.33s/it]

Processed: Label - 4, Instance - 807


Cleaning Instances: 100%|█████████▉| 1117/1119 [1:52:47<00:08,  4.17s/it]

Processed: Label - 4, Instance - 809


Cleaning Instances: 100%|█████████▉| 1118/1119 [1:52:52<00:04,  4.15s/it]

Processed: Label - 4, Instance - 833


Cleaning Instances: 100%|██████████| 1119/1119 [1:52:56<00:00,  6.06s/it]

Processed: Label - 4, Instance - 855
Cleaning process completed. Data exported to: ../Cleaning & Preparation/Staged Cleaning Data/Stage 1





In [8]:
### Transform Class Column to Binary Classification
def transform_class_column_in_cleaned_data(base_path, output_path):
    """
    Transforms the 'class' column into a binary classification column ('target').

    Parameters:
        base_path (str): Directory containing the cleaned instances.
        output_path (str): Directory path to save transformed instances.

    Returns:
        None
    """
    for label_dir in tqdm(os.listdir(base_path), desc="Transforming Class Column"):
        label_path = os.path.join(base_path, label_dir)
        if not os.path.isdir(label_path):
            continue

        for instance_dir in os.listdir(label_path):
            instance_path = os.path.join(label_path, instance_dir)
            if not instance_path.endswith(".parquet"):
                continue

            try:
                instance_df = spark.read.parquet(instance_path)
                if "class" not in instance_df.columns:
                    print(f"Skipping instance {instance_dir}: 'class' column not found.")
                    continue

                updated_df = instance_df.withColumn(
                    "target",
                    when(col("class").isin(None, 1, 2, 5, 6, 7, 8, 9), 0)
                    .when(col("class").isin(3, 4, 101, 102, 105, 106, 107, 108, 109), 1)
                    .otherwise(lit(0))
                )

                output_label_dir = os.path.join(output_path, label_dir)
                os.makedirs(output_label_dir, exist_ok=True)
                output_file = os.path.join(output_label_dir, instance_dir)
                updated_df.write.mode("overwrite").parquet(output_file)
                print(f"Processed: Label - {label_dir}, Instance - {instance_dir}")

            except Exception as e:
                print(f"Error processing {instance_path}: {e}")
                continue

    print(f"Processed data saved to: {output_path}")

# Transform Class Column
transform_class_column_in_cleaned_data(os.path.join(STAGED_DATA_PATH, "Stage 1"), os.path.join(STAGED_DATA_PATH, "Stage 2"))

Transforming Class Column:   0%|          | 0/10 [00:00<?, ?it/s]

Processed: Label - Label_9, Instance - Instance_11.parquet
Processed: Label - Label_9, Instance - Instance_5.parquet
Processed: Label - Label_9, Instance - Instance_18.parquet
Processed: Label - Label_9, Instance - Instance_45.parquet
Processed: Label - Label_9, Instance - Instance_55.parquet
Processed: Label - Label_9, Instance - Instance_27.parquet
Processed: Label - Label_9, Instance - Instance_37.parquet
Processed: Label - Label_9, Instance - Instance_19.parquet
Processed: Label - Label_9, Instance - Instance_54.parquet
Processed: Label - Label_9, Instance - Instance_44.parquet
Processed: Label - Label_9, Instance - Instance_36.parquet
Processed: Label - Label_9, Instance - Instance_26.parquet


                                                                                

Processed: Label - Label_9, Instance - Instance_10.parquet
Processed: Label - Label_9, Instance - Instance_4.parquet
Processed: Label - Label_9, Instance - Instance_24.parquet
Processed: Label - Label_9, Instance - Instance_34.parquet
Processed: Label - Label_9, Instance - Instance_46.parquet
Processed: Label - Label_9, Instance - Instance_56.parquet
Processed: Label - Label_9, Instance - Instance_6.parquet
Processed: Label - Label_9, Instance - Instance_12.parquet
Processed: Label - Label_9, Instance - Instance_7.parquet
Processed: Label - Label_9, Instance - Instance_13.parquet
Processed: Label - Label_9, Instance - Instance_35.parquet
Processed: Label - Label_9, Instance - Instance_25.parquet
Processed: Label - Label_9, Instance - Instance_47.parquet
Processed: Label - Label_9, Instance - Instance_20.parquet
Processed: Label - Label_9, Instance - Instance_30.parquet
Processed: Label - Label_9, Instance - Instance_42.parquet
Processed: Label - Label_9, Instance - Instance_52.parquet


                                                                                

Processed: Label - Label_9, Instance - Instance_2.parquet
Processed: Label - Label_9, Instance - Instance_16.parquet
Processed: Label - Label_9, Instance - Instance_28.parquet
Processed: Label - Label_9, Instance - Instance_38.parquet
Processed: Label - Label_9, Instance - Instance_3.parquet
Processed: Label - Label_9, Instance - Instance_17.parquet
Processed: Label - Label_9, Instance - Instance_31.parquet
Processed: Label - Label_9, Instance - Instance_21.parquet
Processed: Label - Label_9, Instance - Instance_53.parquet
Processed: Label - Label_9, Instance - Instance_43.parquet
Processed: Label - Label_9, Instance - Instance_48.parquet
Processed: Label - Label_9, Instance - Instance_15.parquet
Processed: Label - Label_9, Instance - Instance_1.parquet
Processed: Label - Label_9, Instance - Instance_41.parquet
Processed: Label - Label_9, Instance - Instance_51.parquet
Processed: Label - Label_9, Instance - Instance_23.parquet
Processed: Label - Label_9, Instance - Instance_33.parquet


Transforming Class Column:  10%|█         | 1/10 [00:37<05:39, 37.70s/it]

Processed: Label - Label_9, Instance - Instance_0.parquet
Processed: Label - Label_7, Instance - Instance_653.parquet
Processed: Label - Label_7, Instance - Instance_675.parquet
Processed: Label - Label_7, Instance - Instance_665.parquet
Processed: Label - Label_7, Instance - Instance_681.parquet


                                                                                

Processed: Label - Label_7, Instance - Instance_664.parquet
Processed: Label - Label_7, Instance - Instance_674.parquet
Processed: Label - Label_7, Instance - Instance_680.parquet
Processed: Label - Label_7, Instance - Instance_652.parquet
Processed: Label - Label_7, Instance - Instance_676.parquet
Processed: Label - Label_7, Instance - Instance_666.parquet
Processed: Label - Label_7, Instance - Instance_682.parquet
Processed: Label - Label_7, Instance - Instance_659.parquet
Processed: Label - Label_7, Instance - Instance_651.parquet
Processed: Label - Label_7, Instance - Instance_667.parquet
Processed: Label - Label_7, Instance - Instance_677.parquet


                                                                                

Processed: Label - Label_7, Instance - Instance_683.parquet


                                                                                

Processed: Label - Label_7, Instance - Instance_658.parquet
Processed: Label - Label_7, Instance - Instance_686.parquet
Processed: Label - Label_7, Instance - Instance_672.parquet


                                                                                

Processed: Label - Label_7, Instance - Instance_662.parquet
Processed: Label - Label_7, Instance - Instance_654.parquet
Processed: Label - Label_7, Instance - Instance_655.parquet


                                                                                

Processed: Label - Label_7, Instance - Instance_663.parquet


                                                                                

Processed: Label - Label_7, Instance - Instance_673.parquet
Processed: Label - Label_7, Instance - Instance_657.parquet
Processed: Label - Label_7, Instance - Instance_668.parquet


                                                                                

Processed: Label - Label_7, Instance - Instance_678.parquet
Processed: Label - Label_7, Instance - Instance_685.parquet


                                                                                

Processed: Label - Label_7, Instance - Instance_671.parquet


                                                                                

Processed: Label - Label_7, Instance - Instance_661.parquet
Processed: Label - Label_7, Instance - Instance_684.parquet
Processed: Label - Label_7, Instance - Instance_660.parquet
Processed: Label - Label_7, Instance - Instance_670.parquet


                                                                                

Processed: Label - Label_7, Instance - Instance_656.parquet
Processed: Label - Label_7, Instance - Instance_679.parquet


Transforming Class Column:  20%|██        | 2/10 [01:06<04:21, 32.73s/it]

Processed: Label - Label_7, Instance - Instance_669.parquet
Processed: Label - Label_0, Instance - Instance_245.parquet
Processed: Label - Label_0, Instance - Instance_255.parquet
Processed: Label - Label_0, Instance - Instance_410.parquet
Processed: Label - Label_0, Instance - Instance_568.parquet
Processed: Label - Label_0, Instance - Instance_400.parquet
Processed: Label - Label_0, Instance - Instance_578.parquet
Processed: Label - Label_0, Instance - Instance_123.parquet
Processed: Label - Label_0, Instance - Instance_133.parquet
Processed: Label - Label_0, Instance - Instance_384.parquet
Processed: Label - Label_0, Instance - Instance_394.parquet
Processed: Label - Label_0, Instance - Instance_141.parquet
Processed: Label - Label_0, Instance - Instance_151.parquet
Processed: Label - Label_0, Instance - Instance_63.parquet
Processed: Label - Label_0, Instance - Instance_73.parquet
Processed: Label - Label_0, Instance - Instance_472.parquet
Processed: Label - Label_0, Instance - Ins

                                                                                

Processed: Label - Label_0, Instance - Instance_454.parquet
Processed: Label - Label_0, Instance - Instance_444.parquet
Processed: Label - Label_0, Instance - Instance_201.parquet
Processed: Label - Label_0, Instance - Instance_379.parquet
Processed: Label - Label_0, Instance - Instance_211.parquet
Processed: Label - Label_0, Instance - Instance_369.parquet
Processed: Label - Label_0, Instance - Instance_167.parquet
Processed: Label - Label_0, Instance - Instance_177.parquet
Processed: Label - Label_0, Instance - Instance_595.parquet
Processed: Label - Label_0, Instance - Instance_585.parquet
Processed: Label - Label_0, Instance - Instance_628.parquet
Processed: Label - Label_0, Instance - Instance_638.parquet
Processed: Label - Label_0, Instance - Instance_105.parquet
Processed: Label - Label_0, Instance - Instance_115.parquet
Processed: Label - Label_0, Instance - Instance_263.parquet
Processed: Label - Label_0, Instance - Instance_273.parquet
Processed: Label - Label_0, Instance - I

                                                                                

Processed: Label - Label_0, Instance - Instance_570.parquet
Processed: Label - Label_0, Instance - Instance_408.parquet
Processed: Label - Label_0, Instance - Instance_335.parquet
Processed: Label - Label_0, Instance - Instance_325.parquet
Processed: Label - Label_0, Instance - Instance_606.parquet
Processed: Label - Label_0, Instance - Instance_616.parquet
Processed: Label - Label_0, Instance - Instance_584.parquet
Processed: Label - Label_0, Instance - Instance_594.parquet
Processed: Label - Label_0, Instance - Instance_368.parquet
Processed: Label - Label_0, Instance - Instance_210.parquet
Processed: Label - Label_0, Instance - Instance_378.parquet
Processed: Label - Label_0, Instance - Instance_200.parquet
Processed: Label - Label_0, Instance - Instance_445.parquet
Processed: Label - Label_0, Instance - Instance_455.parquet
Processed: Label - Label_0, Instance - Instance_176.parquet
Processed: Label - Label_0, Instance - Instance_166.parquet
Processed: Label - Label_0, Instance - I

                                                                                

Processed: Label - Label_0, Instance - Instance_642.parquet
Processed: Label - Label_0, Instance - Instance_180.parquet
Processed: Label - Label_0, Instance - Instance_190.parquet
Processed: Label - Label_0, Instance - Instance_614.parquet
Processed: Label - Label_0, Instance - Instance_604.parquet
Processed: Label - Label_0, Instance - Instance_139.parquet
Processed: Label - Label_0, Instance - Instance_129.parquet
Processed: Label - Label_0, Instance - Instance_327.parquet
Processed: Label - Label_0, Instance - Instance_337.parquet
Processed: Label - Label_0, Instance - Instance_572.parquet


                                                                                

Processed: Label - Label_0, Instance - Instance_562.parquet
Processed: Label - Label_0, Instance - Instance_510.parquet
Processed: Label - Label_0, Instance - Instance_468.parquet
Processed: Label - Label_0, Instance - Instance_500.parquet
Processed: Label - Label_0, Instance - Instance_478.parquet
Processed: Label - Label_0, Instance - Instance_345.parquet
Processed: Label - Label_0, Instance - Instance_355.parquet
Processed: Label - Label_0, Instance - Instance_79.parquet
Processed: Label - Label_0, Instance - Instance_69.parquet


                                                                                

Processed: Label - Label_0, Instance - Instance_284.parquet
Processed: Label - Label_0, Instance - Instance_294.parquet
Processed: Label - Label_0, Instance - Instance_318.parquet
Processed: Label - Label_0, Instance - Instance_260.parquet
Processed: Label - Label_0, Instance - Instance_308.parquet
Processed: Label - Label_0, Instance - Instance_270.parquet
Processed: Label - Label_0, Instance - Instance_435.parquet
Processed: Label - Label_0, Instance - Instance_425.parquet
Processed: Label - Label_0, Instance - Instance_106.parquet
Processed: Label - Label_0, Instance - Instance_116.parquet
Processed: Label - Label_0, Instance - Instance_596.parquet
Processed: Label - Label_0, Instance - Instance_586.parquet
Processed: Label - Label_0, Instance - Instance_164.parquet
Processed: Label - Label_0, Instance - Instance_174.parquet
Processed: Label - Label_0, Instance - Instance_649.parquet
Processed: Label - Label_0, Instance - Instance_457.parquet
Processed: Label - Label_0, Instance - I

                                                                                

Processed: Label - Label_0, Instance - Instance_212.parquet
Processed: Label - Label_0, Instance - Instance_509.parquet
Processed: Label - Label_0, Instance - Instance_471.parquet
Processed: Label - Label_0, Instance - Instance_519.parquet
Processed: Label - Label_0, Instance - Instance_461.parquet
Processed: Label - Label_0, Instance - Instance_224.parquet
Processed: Label - Label_0, Instance - Instance_234.parquet
Processed: Label - Label_0, Instance - Instance_142.parquet
Processed: Label - Label_0, Instance - Instance_152.parquet
Processed: Label - Label_0, Instance - Instance_60.parquet
Processed: Label - Label_0, Instance - Instance_70.parquet
Processed: Label - Label_0, Instance - Instance_387.parquet


                                                                                

Processed: Label - Label_0, Instance - Instance_397.parquet
Processed: Label - Label_0, Instance - Instance_199.parquet
Processed: Label - Label_0, Instance - Instance_189.parquet
Processed: Label - Label_0, Instance - Instance_120.parquet
Processed: Label - Label_0, Instance - Instance_130.parquet
Processed: Label - Label_0, Instance - Instance_246.parquet
Processed: Label - Label_0, Instance - Instance_256.parquet
Processed: Label - Label_0, Instance - Instance_413.parquet
Processed: Label - Label_0, Instance - Instance_403.parquet
Processed: Label - Label_0, Instance - Instance_650.parquet


                                                                                

Processed: Label - Label_0, Instance - Instance_640.parquet
Processed: Label - Label_0, Instance - Instance_536.parquet
Processed: Label - Label_0, Instance - Instance_526.parquet
Processed: Label - Label_0, Instance - Instance_363.parquet
Processed: Label - Label_0, Instance - Instance_373.parquet
Processed: Label - Label_0, Instance - Instance_301.parquet
Processed: Label - Label_0, Instance - Instance_279.parquet
Processed: Label - Label_0, Instance - Instance_311.parquet
Processed: Label - Label_0, Instance - Instance_269.parquet
Processed: Label - Label_0, Instance - Instance_554.parquet


                                                                                

Processed: Label - Label_0, Instance - Instance_544.parquet
Processed: Label - Label_0, Instance - Instance_632.parquet
Processed: Label - Label_0, Instance - Instance_622.parquet
Processed: Label - Label_0, Instance - Instance_84.parquet
Processed: Label - Label_0, Instance - Instance_94.parquet
Processed: Label - Label_0, Instance - Instance_495.parquet
Processed: Label - Label_0, Instance - Instance_485.parquet
Processed: Label - Label_0, Instance - Instance_235.parquet
Processed: Label - Label_0, Instance - Instance_225.parquet
Processed: Label - Label_0, Instance - Instance_460.parquet


                                                                                

Processed: Label - Label_0, Instance - Instance_518.parquet
Processed: Label - Label_0, Instance - Instance_470.parquet
Processed: Label - Label_0, Instance - Instance_508.parquet
Processed: Label - Label_0, Instance - Instance_71.parquet
Processed: Label - Label_0, Instance - Instance_61.parquet
Processed: Label - Label_0, Instance - Instance_153.parquet
Processed: Label - Label_0, Instance - Instance_143.parquet
Processed: Label - Label_0, Instance - Instance_131.parquet
Processed: Label - Label_0, Instance - Instance_121.parquet


                                                                                

Processed: Label - Label_0, Instance - Instance_402.parquet
Processed: Label - Label_0, Instance - Instance_412.parquet
Processed: Label - Label_0, Instance - Instance_257.parquet
Processed: Label - Label_0, Instance - Instance_247.parquet
Processed: Label - Label_0, Instance - Instance_396.parquet
Processed: Label - Label_0, Instance - Instance_386.parquet
Processed: Label - Label_0, Instance - Instance_188.parquet
Processed: Label - Label_0, Instance - Instance_198.parquet
Processed: Label - Label_0, Instance - Instance_641.parquet
Processed: Label - Label_0, Instance - Instance_372.parquet
Processed: Label - Label_0, Instance - Instance_362.parquet
Processed: Label - Label_0, Instance - Instance_527.parquet
Processed: Label - Label_0, Instance - Instance_537.parquet
Processed: Label - Label_0, Instance - Instance_95.parquet
Processed: Label - Label_0, Instance - Instance_85.parquet
Processed: Label - Label_0, Instance - Instance_484.parquet
Processed: Label - Label_0, Instance - Ins

                                                                                

Processed: Label - Label_0, Instance - Instance_310.parquet
Processed: Label - Label_0, Instance - Instance_278.parquet
Processed: Label - Label_0, Instance - Instance_300.parquet
Processed: Label - Label_0, Instance - Instance_623.parquet
Processed: Label - Label_0, Instance - Instance_633.parquet
Processed: Label - Label_0, Instance - Instance_128.parquet
Processed: Label - Label_0, Instance - Instance_138.parquet
Processed: Label - Label_0, Instance - Instance_605.parquet
Processed: Label - Label_0, Instance - Instance_615.parquet
Processed: Label - Label_0, Instance - Instance_563.parquet
Processed: Label - Label_0, Instance - Instance_573.parquet
Processed: Label - Label_0, Instance - Instance_336.parquet
Processed: Label - Label_0, Instance - Instance_326.parquet
Processed: Label - Label_0, Instance - Instance_191.parquet
Processed: Label - Label_0, Instance - Instance_181.parquet
Processed: Label - Label_0, Instance - Instance_295.parquet
Processed: Label - Label_0, Instance - I

                                                                                

Processed: Label - Label_0, Instance - Instance_469.parquet
Processed: Label - Label_0, Instance - Instance_511.parquet
Processed: Label - Label_0, Instance - Instance_68.parquet
Processed: Label - Label_0, Instance - Instance_78.parquet
Processed: Label - Label_0, Instance - Instance_424.parquet
Processed: Label - Label_0, Instance - Instance_434.parquet
Processed: Label - Label_0, Instance - Instance_271.parquet
Processed: Label - Label_0, Instance - Instance_309.parquet
Processed: Label - Label_0, Instance - Instance_261.parquet
Processed: Label - Label_0, Instance - Instance_319.parquet
Processed: Label - Label_0, Instance - Instance_117.parquet
Processed: Label - Label_0, Instance - Instance_107.parquet
Processed: Label - Label_0, Instance - Instance_57.parquet
Processed: Label - Label_0, Instance - Instance_648.parquet
Processed: Label - Label_0, Instance - Instance_175.parquet
Processed: Label - Label_0, Instance - Instance_165.parquet
Processed: Label - Label_0, Instance - Inst

                                                                                

Processed: Label - Label_0, Instance - Instance_366.parquet
Processed: Label - Label_0, Instance - Instance_168.parquet
Processed: Label - Label_0, Instance - Instance_178.parquet
Processed: Label - Label_0, Instance - Instance_645.parquet
Processed: Label - Label_0, Instance - Instance_627.parquet
Processed: Label - Label_0, Instance - Instance_637.parquet
Processed: Label - Label_0, Instance - Instance_314.parquet
Processed: Label - Label_0, Instance - Instance_304.parquet
Processed: Label - Label_0, Instance - Instance_541.parquet
Processed: Label - Label_0, Instance - Instance_439.parquet
Processed: Label - Label_0, Instance - Instance_551.parquet
Processed: Label - Label_0, Instance - Instance_429.parquet
Processed: Label - Label_0, Instance - Instance_480.parquet
Processed: Label - Label_0, Instance - Instance_490.parquet
Processed: Label - Label_0, Instance - Instance_91.parquet
Processed: Label - Label_0, Instance - Instance_81.parquet
Processed: Label - Label_0, Instance - Ins

                                                                                

Processed: Label - Label_0, Instance - Instance_552.parquet
Processed: Label - Label_0, Instance - Instance_624.parquet
Processed: Label - Label_0, Instance - Instance_634.parquet
Processed: Label - Label_0, Instance - Instance_109.parquet
Processed: Label - Label_0, Instance - Instance_119.parquet
Processed: Label - Label_0, Instance - Instance_646.parquet
Processed: Label - Label_0, Instance - Instance_59.parquet
Processed: Label - Label_0, Instance - Instance_458.parquet
Processed: Label - Label_0, Instance - Instance_520.parquet
Processed: Label - Label_0, Instance - Instance_448.parquet
Processed: Label - Label_0, Instance - Instance_530.parquet
Processed: Label - Label_0, Instance - Instance_375.parquet
Processed: Label - Label_0, Instance - Instance_365.parquet
Processed: Label - Label_0, Instance - Instance_599.parquet
Processed: Label - Label_0, Instance - Instance_589.parquet
Processed: Label - Label_0, Instance - Instance_136.parquet
Processed: Label - Label_0, Instance - In

Transforming Class Column:  30%|███       | 3/10 [07:13<21:37, 185.33s/it]

Processed: Label - Label_0, Instance - Instance_66.parquet
Processed: Label - Label_1, Instance - Instance_693.parquet
Processed: Label - Label_1, Instance - Instance_696.parquet
Processed: Label - Label_1, Instance - Instance_695.parquet


Transforming Class Column:  40%|████      | 4/10 [07:16<11:18, 113.11s/it]

Processed: Label - Label_1, Instance - Instance_694.parquet
Processed: Label - Label_6, Instance - Instance_688.parquet
Processed: Label - Label_6, Instance - Instance_691.parquet
Processed: Label - Label_6, Instance - Instance_690.parquet
Processed: Label - Label_6, Instance - Instance_689.parquet
Processed: Label - Label_6, Instance - Instance_692.parquet


Transforming Class Column:  50%|█████     | 5/10 [07:19<06:07, 73.54s/it] 

Processed: Label - Label_6, Instance - Instance_687.parquet


                                                                                

Processed: Label - Label_8, Instance - Instance_704.parquet
Processed: Label - Label_8, Instance - Instance_698.parquet
Processed: Label - Label_8, Instance - Instance_705.parquet
Processed: Label - Label_8, Instance - Instance_699.parquet


                                                                                

Processed: Label - Label_8, Instance - Instance_707.parquet


                                                                                

Processed: Label - Label_8, Instance - Instance_706.parquet


                                                                                

Processed: Label - Label_8, Instance - Instance_703.parquet
Processed: Label - Label_8, Instance - Instance_702.parquet


                                                                                

Processed: Label - Label_8, Instance - Instance_697.parquet


                                                                                

Processed: Label - Label_8, Instance - Instance_710.parquet
Processed: Label - Label_8, Instance - Instance_700.parquet
Processed: Label - Label_8, Instance - Instance_709.parquet
Processed: Label - Label_8, Instance - Instance_708.parquet


Transforming Class Column:  60%|██████    | 6/10 [07:31<03:30, 52.68s/it]       

Processed: Label - Label_8, Instance - Instance_701.parquet
Processed: Label - Label_3, Instance - Instance_1085.parquet
Processed: Label - Label_3, Instance - Instance_1071.parquet
Processed: Label - Label_3, Instance - Instance_1061.parquet
Processed: Label - Label_3, Instance - Instance_1057.parquet
Processed: Label - Label_3, Instance - Instance_1068.parquet
Processed: Label - Label_3, Instance - Instance_1078.parquet
Processed: Label - Label_3, Instance - Instance_1056.parquet
Processed: Label - Label_3, Instance - Instance_1079.parquet
Processed: Label - Label_3, Instance - Instance_1069.parquet
Processed: Label - Label_3, Instance - Instance_1084.parquet
Processed: Label - Label_3, Instance - Instance_1060.parquet
Processed: Label - Label_3, Instance - Instance_1070.parquet
Processed: Label - Label_3, Instance - Instance_1054.parquet
Processed: Label - Label_3, Instance - Instance_1072.parquet
Processed: Label - Label_3, Instance - Instance_1062.parquet
Processed: Label - Label_

Transforming Class Column:  70%|███████   | 7/10 [07:50<02:05, 41.67s/it]

Processed: Label - Label_3, Instance - Instance_1080.parquet
Processed: Label - Label_4, Instance - Instance_776.parquet
Processed: Label - Label_4, Instance - Instance_766.parquet
Processed: Label - Label_4, Instance - Instance_803.parquet
Processed: Label - Label_4, Instance - Instance_813.parquet
Processed: Label - Label_4, Instance - Instance_861.parquet
Processed: Label - Label_4, Instance - Instance_919.parquet
Processed: Label - Label_4, Instance - Instance_871.parquet
Processed: Label - Label_4, Instance - Instance_909.parquet
Processed: Label - Label_4, Instance - Instance_714.parquet
Processed: Label - Label_4, Instance - Instance_944.parquet
Processed: Label - Label_4, Instance - Instance_954.parquet
Processed: Label - Label_4, Instance - Instance_749.parquet
Processed: Label - Label_4, Instance - Instance_759.parquet
Processed: Label - Label_4, Instance - Instance_885.parquet
Processed: Label - Label_4, Instance - Instance_895.parquet
Processed: Label - Label_4, Instance - 

                                                                                

Processed: Label - Label_4, Instance - Instance_760.parquet
Processed: Label - Label_4, Instance - Instance_770.parquet
Processed: Label - Label_4, Instance - Instance_1048.parquet
Processed: Label - Label_4, Instance - Instance_746.parquet
Processed: Label - Label_4, Instance - Instance_756.parquet
Processed: Label - Label_4, Instance - Instance_833.parquet
Processed: Label - Label_4, Instance - Instance_823.parquet
Processed: Label - Label_4, Instance - Instance_929.parquet
Processed: Label - Label_4, Instance - Instance_851.parquet
Processed: Label - Label_4, Instance - Instance_939.parquet
Processed: Label - Label_4, Instance - Instance_841.parquet
Processed: Label - Label_4, Instance - Instance_990.parquet
Processed: Label - Label_4, Instance - Instance_980.parquet
Processed: Label - Label_4, Instance - Instance_724.parquet
Processed: Label - Label_4, Instance - Instance_734.parquet
Processed: Label - Label_4, Instance - Instance_974.parquet
Processed: Label - Label_4, Instance - 

                                                                                

Processed: Label - Label_4, Instance - Instance_976.parquet
Processed: Label - Label_4, Instance - Instance_1053.parquet
Processed: Label - Label_4, Instance - Instance_1043.parquet
Processed: Label - Label_4, Instance - Instance_993.parquet
Processed: Label - Label_4, Instance - Instance_983.parquet
Processed: Label - Label_4, Instance - Instance_727.parquet
Processed: Label - Label_4, Instance - Instance_737.parquet
Processed: Label - Label_4, Instance - Instance_852.parquet
Processed: Label - Label_4, Instance - Instance_842.parquet
Processed: Label - Label_4, Instance - Instance_830.parquet
Processed: Label - Label_4, Instance - Instance_948.parquet
Processed: Label - Label_4, Instance - Instance_820.parquet
Processed: Label - Label_4, Instance - Instance_958.parquet
Processed: Label - Label_4, Instance - Instance_745.parquet
Processed: Label - Label_4, Instance - Instance_755.parquet
Processed: Label - Label_4, Instance - Instance_889.parquet
Processed: Label - Label_4, Instance -

Transforming Class Column:  80%|████████  | 8/10 [11:04<03:00, 90.00s/it]

Processed: Label - Label_4, Instance - Instance_1029.parquet
Processed: Label - Label_5, Instance - Instance_1109.parquet
Processed: Label - Label_5, Instance - Instance_1110.parquet
Processed: Label - Label_5, Instance - Instance_1111.parquet
Processed: Label - Label_5, Instance - Instance_1118.parquet
Processed: Label - Label_5, Instance - Instance_1108.parquet
Processed: Label - Label_5, Instance - Instance_1113.parquet
Processed: Label - Label_5, Instance - Instance_1112.parquet
Processed: Label - Label_5, Instance - Instance_1117.parquet
Processed: Label - Label_5, Instance - Instance_1116.parquet
Processed: Label - Label_5, Instance - Instance_1114.parquet


Transforming Class Column:  90%|█████████ | 9/10 [11:10<01:03, 63.92s/it]

Processed: Label - Label_5, Instance - Instance_1115.parquet
Processed: Label - Label_2, Instance - Instance_1095.parquet
Processed: Label - Label_2, Instance - Instance_1100.parquet


                                                                                

Processed: Label - Label_2, Instance - Instance_1101.parquet
Processed: Label - Label_2, Instance - Instance_1094.parquet
Processed: Label - Label_2, Instance - Instance_1103.parquet
Processed: Label - Label_2, Instance - Instance_1096.parquet
Processed: Label - Label_2, Instance - Instance_1086.parquet
Processed: Label - Label_2, Instance - Instance_1087.parquet
Processed: Label - Label_2, Instance - Instance_1097.parquet
Processed: Label - Label_2, Instance - Instance_1102.parquet
Processed: Label - Label_2, Instance - Instance_1107.parquet
Processed: Label - Label_2, Instance - Instance_1092.parquet
Processed: Label - Label_2, Instance - Instance_1093.parquet
Processed: Label - Label_2, Instance - Instance_1106.parquet
Processed: Label - Label_2, Instance - Instance_1091.parquet
Processed: Label - Label_2, Instance - Instance_1104.parquet
Processed: Label - Label_2, Instance - Instance_1088.parquet
Processed: Label - Label_2, Instance - Instance_1098.parquet
Processed: Label - Label

Transforming Class Column: 100%|██████████| 10/10 [11:24<00:00, 68.45s/it]

Processed: Label - Label_2, Instance - Instance_1090.parquet
Processed data saved to: ../Cleaning & Preparation/Staged Cleaning Data/Stage 2





In [9]:
### Combine Partitioned Data
def combine_partitioned_data_with_aligned_schema(base_path: str, original_df: DataFrame, output_file: str):
    """
    Combines partitioned Parquet files into a single dataset while aligning schemas.

    Parameters:
        base_path (str): Directory containing partitioned Parquet files.
        original_df (DataFrame): Original DataFrame to infer the schema.
        output_file (str): Path to save the combined dataset.

    Returns:
        DataFrame: Combined Spark DataFrame.
    """
    try:
        # Define the full schema (original schema + target column)
        full_schema = original_df.schema
        full_schema_fields = set(col.name for col in full_schema.fields)
        full_schema_fields.add("target")  # Ensure the 'target' column is included

        combined_df = None
        print(f"Traversing base path: {base_path}")
        
        for label_dir in os.listdir(base_path):
            label_path = os.path.join(base_path, label_dir)
            if not os.path.isdir(label_path):
                continue

            for instance_file in os.listdir(label_path):
                instance_path = os.path.join(label_path, instance_file)
                if not instance_path.endswith(".parquet"):
                    continue
                
                try:
                    instance_df = spark.read.parquet(instance_path)
                    print(f"Reading instance file: {instance_path}")
                    
                    # Align schema: Add missing columns with null values
                    instance_fields = set(instance_df.columns)
                    for col_name in full_schema_fields - instance_fields:
                        instance_df = instance_df.withColumn(col_name, lit(None))
                    
                    instance_df = instance_df.select(*full_schema_fields)

                    # Combine with the existing DataFrame
                    if combined_df is None:
                        combined_df = instance_df
                    else:
                        combined_df = combined_df.union(instance_df)

                except Exception as e:
                    print(f"Error reading {instance_path}: {type(e).__name__}: {e}")
                    continue

        # Drop VOID columns
        if combined_df is not None:
            void_columns = [col_name for col_name, dtype in combined_df.dtypes if dtype == "void"]
            if void_columns:
                print(f"Dropping VOID columns: {void_columns}")
                combined_df = combined_df.drop(*void_columns)


            # Save the combined dataset
            print(f"Saving combined dataset to: {output_file}")
            os.makedirs(os.path.dirname(output_file), exist_ok=True)
            combined_df.write.mode("overwrite").parquet(output_file)
            print(f"Combined dataset saved successfully at: {output_file}")

        else:
            print("No data to combine. Ensure the directory structure is correct and contains Parquet files.")

        return combined_df

    except Exception as e:
        print(f"Error combining partitioned data: {type(e).__name__}: {e}")
        return None

# Combine Partitioned Data
combined_df_real = combine_partitioned_data_with_aligned_schema(
    os.path.join(STAGED_DATA_PATH, "Stage 2"),
    spark_df_real,
    os.path.join(STAGED_DATA_PATH, "Stage 3", "real_instances_clean.parquet")
)

Traversing base path: ../Cleaning & Preparation/Staged Cleaning Data/Stage 2
Reading instance file: ../Cleaning & Preparation/Staged Cleaning Data/Stage 2/Label_9/Instance_11.parquet
Reading instance file: ../Cleaning & Preparation/Staged Cleaning Data/Stage 2/Label_9/Instance_5.parquet
Reading instance file: ../Cleaning & Preparation/Staged Cleaning Data/Stage 2/Label_9/Instance_18.parquet
Reading instance file: ../Cleaning & Preparation/Staged Cleaning Data/Stage 2/Label_9/Instance_45.parquet
Reading instance file: ../Cleaning & Preparation/Staged Cleaning Data/Stage 2/Label_9/Instance_55.parquet
Reading instance file: ../Cleaning & Preparation/Staged Cleaning Data/Stage 2/Label_9/Instance_27.parquet
Reading instance file: ../Cleaning & Preparation/Staged Cleaning Data/Stage 2/Label_9/Instance_37.parquet
Reading instance file: ../Cleaning & Preparation/Staged Cleaning Data/Stage 2/Label_9/Instance_19.parquet
Reading instance file: ../Cleaning & Preparation/Staged Cleaning Data/Stage 

24/11/27 02:44:35 WARN DAGScheduler: Broadcasting large task binary with size 19.6 MiB
                                                                                

Combined dataset saved successfully at: ../Cleaning & Preparation/Staged Cleaning Data/Stage 3/real_instances_clean.parquet


In [10]:
# Stage 3 Combined Clean by Instance Dataset Path
df_clean = spark.read.parquet(os.path.join(STAGED_DATA_PATH, "Stage 3", "real_instances_clean.parquet"))

24/11/27 02:58:24 WARN SharedInMemoryCache: Evicting cached table partition metadata from memory due to size constraints (spark.sql.hive.filesourcePartitionFileCacheSize = 262144000 bytes). This may impact query planning performance.


In [11]:
# Add Missingness Indicators
def create_missingness_columns(df, feature_names):
    """
    Adds missingness indicator columns for each feature in the dataset.

    Parameters:
        df (DataFrame): Input Spark DataFrame.
        feature_names (list): List of feature columns to process.

    Returns:
        DataFrame: Spark DataFrame with missingness indicator columns added.
    """
    for feature in feature_names:
        missing_indicator = f"{feature}_missing"
        df = df.withColumn(
            missing_indicator, when(col(feature).isNull(), lit(1)).otherwise(lit(0))
        )
    return df

df_with_missingness = create_missingness_columns(df_clean, feature_names)

In [12]:
# Impute Continuous Features
def impute_continuous_features(df, features_continuous, strategy="mean"):
    """
    Imputes missing values in continuous features using the specified strategy.

    Parameters:
        df (DataFrame): Input Spark DataFrame.
        features_continuous (list): List of continuous feature columns to impute.
        strategy (str): Imputation strategy ("mean" or "median").

    Returns:
        DataFrame: Spark DataFrame with imputed continuous features.
    """
    for feature in features_continuous:
        if strategy == "mean":
            imputed_value = df.select(mean(col(feature))).first()[0]
        elif strategy == "median":
            imputed_value = (
                df.approxQuantile(feature, [0.5], 0.01)[0]
                if df.select(col(feature)).dropna().count() > 0
                else None
            )
        else:
            raise ValueError("Unsupported strategy. Use 'mean' or 'median'.")

        if imputed_value is not None:
            df = df.withColumn(
                feature, when(col(feature).isNull(), lit(imputed_value)).otherwise(col(feature))
            )
    return df

df_imputed_continuous = impute_continuous_features(df_with_missingness, features_continuous, strategy="mean")

                                                                                

In [13]:
# Impute Categorical Features
def impute_categorical_features(df, features_categorical):
    """
    Imputes missing values in categorical features using the mode (most frequent value).

    Parameters:
        df (DataFrame): Input Spark DataFrame.
        features_categorical (list): List of categorical feature columns to impute.

    Returns:
        DataFrame: Spark DataFrame with imputed categorical features.
    """
    for feature in features_categorical:
        mode_value = (
            df.filter(col(feature).isNotNull())
              .groupBy(feature)
              .agg(count("*").alias("freq"))
              .orderBy(col("freq").desc())
              .select(feature)
              .first()[0]
        )
        if mode_value is not None:
            df = df.withColumn(
                feature, when(col(feature).isNull(), lit(mode_value)).otherwise(col(feature))
            )
    return df

df_clean = impute_categorical_features(df_imputed_continuous, features_categorical)

                                                                                

In [14]:
# Save Cleaned Dataset
cleaned_dataset_path = os.path.join(CLEANED_DATA_PATH, "cleaned_data.parquet")
df_clean.write.parquet(cleaned_dataset_path, mode="overwrite")

                                                                                

In [8]:
### Data Splitting into Train and Test Sets
def split_data(df, train_ratio=0.8):
    """
    Splits the DataFrame into train and test sets based on 'Instance', ensuring no overlap.
    """
    # Get distinct instances
    instances_df = df.select("Instance", "label").distinct()
    
    # Assign a random number to each instance
    instances_df = instances_df.withColumn("rand", rand(seed=42))
    
    # Handle labels with low number of instances
    window = Window.partitionBy("label").orderBy("rand")
    instances_df = instances_df.withColumn("row_number", row_number().over(window))
    instances_df = instances_df.withColumn("total_instances", F.count("*").over(Window.partitionBy("label")))
    
    # Define train and test indices
    instances_df = instances_df.withColumn(
        "split",
        when(
            (col("total_instances") > 4) & (col("row_number") <= col("total_instances") * train_ratio),
            "train"
        ).when(
            (col("total_instances") > 4),
            "test"
        ).when(
            (col("total_instances") <= 4) & (col("row_number") < col("total_instances")),
            "train"
        ).otherwise("test")
    )
    
    # Extract train and test instances
    train_instances = instances_df.filter(col("split") == "train").select("Instance")
    test_instances = instances_df.filter(col("split") == "test").select("Instance")
    
    # Join back to the main DataFrame
    train_df = df.join(train_instances, on="Instance", how="inner")
    test_df = df.join(test_instances, on="Instance", how="inner")
    
    return train_df, test_df

df_clean = spark.read.parquet(os.path.join(CLEANED_DATA_PATH, "cleaned_data.parquet"))
train_df, test_df = split_data(df_clean)

In [9]:
# Save Train and Test Data
train_data_path = os.path.join(TRAIN_TEST_DATA_PATH, "train_data.parquet")
test_data_path = os.path.join(TRAIN_TEST_DATA_PATH, "test_data.parquet")

train_df.write.parquet(train_data_path, mode="overwrite")
test_df.write.parquet(test_data_path, mode="overwrite")

24/11/27 08:03:42 WARN SparkStringUtils: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.
                                                                                

In [10]:
### Verify Splits
def verify_splits(train_df, test_df):
    # Ensure no overlap in Instances
    train_instances = train_df.select("Instance").distinct()
    test_instances = test_df.select("Instance").distinct()
    overlapping_instances = train_instances.intersect(test_instances)
    overlap_count = overlapping_instances.count()
    if overlap_count > 0:
        print(f"Overlap detected in Instances: {overlap_count}")
    else:
        print("No overlap in Instances between train and test sets.")

    # Show counts per label
    print("Train set counts per label:")
    train_df.groupBy("label").agg(countDistinct("Instance").alias("instance_count")).show()
    print("Test set counts per label:")
    test_df.groupBy("label").agg(countDistinct("Instance").alias("instance_count")).show()

train_df = spark.read.parquet(train_data_path)
test_df = spark.read.parquet(test_data_path)
verify_splits(train_df, test_df)

                                                                                

No overlap in Instances between train and test sets.
Train set counts per label:
+-----+--------------+
|label|instance_count|
+-----+--------------+
|    9|            45|
|    3|            25|
|    4|           274|
|    6|             4|
|    2|            17|
|    7|            28|
|    8|            11|
|    5|             8|
|    0|           475|
|    1|             3|
+-----+--------------+

Test set counts per label:
+-----+--------------+
|label|instance_count|
+-----+--------------+
|    9|            12|
|    3|             7|
|    4|            69|
|    6|             2|
|    2|             5|
|    7|             8|
|    5|             3|
|    8|             3|
|    0|           119|
|    1|             1|
+-----+--------------+



In [11]:
### Apply Standard Scaler to Train and Test Data
def scale_features(train_df, test_df, feature_names):
    # Assemble features
    assembler = VectorAssembler(inputCols=feature_names, outputCol="features_vector", handleInvalid="skip")
    
    # Fit scaler on training data
    train_vectorized = assembler.transform(train_df)
    scaler = StandardScaler(inputCol="features_vector", outputCol="scaled_features", withStd=True, withMean=True)
    scaler_model = scaler.fit(train_vectorized)
    train_scaled = scaler_model.transform(train_vectorized)
    
    # Apply scaler to test data
    test_vectorized = assembler.transform(test_df)
    test_scaled = scaler_model.transform(test_vectorized)
    
    # Convert scaled features to array and update feature columns
    def extract_scaled_features(df):
        df = df.withColumn("scaled_features_array", vector_to_array(col("scaled_features")))
        for idx, feature in enumerate(feature_names):
            df = df.withColumn(feature, col("scaled_features_array")[idx])
        return df
    
    train_scaled = extract_scaled_features(train_scaled)
    test_scaled = extract_scaled_features(test_scaled)
    
    # Select relevant columns
    columns_to_keep = [col_name for col_name in df_clean.columns if col_name not in feature_names] + feature_names
    train_final_scaled = train_scaled.select(*columns_to_keep)
    test_final_scaled = test_scaled.select(*columns_to_keep)
    
    return train_final_scaled, test_final_scaled

train_final_scaled, test_final_scaled = scale_features(train_df, test_df, feature_names)

                                                                                

In [12]:
### Save Scaled Data
scaled_train_path = os.path.join(SCALED_DATA_PATH, "scaled_train_data.parquet")
scaled_test_path = os.path.join(SCALED_DATA_PATH, "scaled_test_data.parquet")

train_final_scaled.write.mode("overwrite").parquet(scaled_train_path)
test_final_scaled.write.mode("overwrite").parquet(scaled_test_path)

                                                                                

In [13]:
### End Spark Session
spark.stop()