In [1]:
import pandas as pd

# Baca file CSV dengan delimiter titik koma
dataset = 'ai_inacbg.csv'
df_data = pd.read_csv(dataset, sep=';', na_values=['NULL', ''], engine='python')

# Tampilkan beberapa baris pertama
print(df_data.head())

   ID              Tanggal       RegID                SEPID    INACBG  ICD10  \
0   1  2023-09-01 02:07:14  P230900003  0901R0030923V000001  Q-5-42-0    NaN   
1   2  2023-09-01 02:07:14  P230900003  0901R0030923V000001  Q-5-42-0    NaN   
2   3  2023-09-01 09:52:27  P230900494  0901R0030923V000428  Q-5-44-0    N30   
3   4  2023-09-01 09:52:27  P230900494  0901R0030923V000428  Q-5-44-0  N30.9   
4   5  2023-09-01 13:34:31  P230900609  0901R0030923V001192  Q-5-44-0    C11   

     ICD9                      INACBG_Desc                         ICD10_Desc  \
0  8907.0    PENYAKIT AKUT KECIL LAIN-LAIN                                NaN   
1  9922.0    PENYAKIT AKUT KECIL LAIN-LAIN                                NaN   
2  8907.0  PENYAKIT KRONIS KECIL LAIN-LAIN                           Cystitis   
3     NaN  PENYAKIT KRONIS KECIL LAIN-LAIN              Cystitis, unspecified   
4  8907.0  PENYAKIT KRONIS KECIL LAIN-LAIN  Malignant neoplasm of nasopharynx   

                                

## Clean Data

In [2]:
#mencopy data agar data dapat dibersihkan dengan baik
df_cleaned = df_data.copy() 

In [3]:
import numpy as np

df_cleaned.replace('NULL', np.nan, inplace=True)

In [4]:
# Menghapus Kolom INACBG yang merupakan NULL
df_cleaned = df_cleaned.dropna(subset=['INACBG'])

# Menghapus Kolom ICD10 dan ICD9 yang merupakan NULL
df_cleaned = df_cleaned.dropna(subset=['ICD9', 'ICD10'], how='all')

# Membersihkan data string: menghapus spasi dan mengubah ke huruf besar jika bertipe string
for col in df_cleaned.select_dtypes(include=['object']):
    df_cleaned[col] = df_cleaned[col].str.strip().str.upper()

In [5]:
# Melihat jumlah baris awal
baris_awal = len(df_data)
print(f"JUMLAH ROW/DATA AWAL: {baris_awal}")

# Menghitung dan menghapus baris dengan nilai null di kolom INACBG
jumlah_baris_INACBG = len(df_data[df_data['INACBG'].isna()])
df_cleaned = df_data.dropna(subset=['INACBG'])
print(f"Baris yang terhapus dari INACBG yang NULL =  {jumlah_baris_INACBG}")

# Menghitung dan menghapus baris dengan nilai null di kedua kolom ICD9 dan ICD10
jumlah_baris_ICD = len(df_cleaned[
    df_cleaned['ICD9'].isna() & df_cleaned['ICD10'].isna()
])
df_cleaned = df_cleaned.dropna(subset=['ICD9', 'ICD10'], how='all')
print(f"Baris yang terhapus dari IC9 dan ICD10 yang NULL =  {jumlah_baris_ICD}")

# Melihat jumlah baris akhir
print(f"JUMLAH BARIS SETELAH PEMBERSIHAN: {len(df_cleaned)}")

JUMLAH ROW/DATA AWAL: 278785
Baris yang terhapus dari INACBG yang NULL =  77440
Baris yang terhapus dari IC9 dan ICD10 yang NULL =  81
JUMLAH BARIS SETELAH PEMBERSIHAN: 201264


In [6]:
# Hapus duplikat, tetapi pertahankan satu baris untuk setiap grup duplikat
df_cleaned = df_cleaned.drop_duplicates(subset=['ICD9', 'ICD10', 'INACBG'], keep='first')

# Verifikasi jumlah kode INACBG tetap 587
print("\nJumlah kode unik INACBG setelah penghapusan duplikat:", df_cleaned['INACBG'].nunique())

# Hitung jumlah baris yang tersisa
print("\nJumlah baris yang tersisa setelah penghapusan duplikat:", len(df_cleaned))


Jumlah kode unik INACBG setelah penghapusan duplikat: 587

Jumlah baris yang tersisa setelah penghapusan duplikat: 27501


In [7]:
# Hitung jumlah baris untuk masing-masing kode INACBG
jumlah_baris_per_inacbg = df_cleaned.groupby('INACBG').size()

# Ubah menjadi DataFrame untuk mempermudah pengurutan
jumlah_baris_per_inacbg_df = jumlah_baris_per_inacbg.reset_index(name='Jumlah Baris')

# Urutkan dari yang terbanyak ke sedikit
jumlah_baris_per_inacbg_sorted = jumlah_baris_per_inacbg_df.sort_values(by='Jumlah Baris', ascending=False)

# Tampilkan hasil yang sudah diurutkan
print("\nJumlah baris untuk masing-masing kode INACBG (dari terbanyak ke sedikit):")
print(jumlah_baris_per_inacbg_sorted)

# Hitung total jumlah baris
total_baris = jumlah_baris_per_inacbg_sorted['Jumlah Baris'].sum()

# Tampilkan total jumlah baris
print("\nTotal jumlah baris setelah penghapusan duplikat:", total_baris)


Jumlah baris untuk masing-masing kode INACBG (dari terbanyak ke sedikit):
       INACBG  Jumlah Baris
482  Q-5-44-0          6810
374  M-3-16-0          1189
165  H-3-12-0           987
580  Z-3-27-0           758
570  Z-3-16-0           657
..        ...           ...
506  U-2-25-0             2
40   C-3-17-0             2
39   C-3-16-0             2
38   C-3-15-0             2
37   C-3-14-0             1

[587 rows x 2 columns]

Total jumlah baris setelah penghapusan duplikat: 27501


In [8]:
# Tampilkan Top 10 kode INACBG
top_10_inacbg = jumlah_baris_per_inacbg_sorted.head(10)
print("Top 10 kode INACBG berdasarkan jumlah baris:")
print(top_10_inacbg)

# Tampilkan Bottom 10 kode INACBG
bottom_10_inacbg = jumlah_baris_per_inacbg_sorted.tail(10)
print("\nBottom 10 kode INACBG berdasarkan jumlah baris:")
print(bottom_10_inacbg)

Top 10 kode INACBG berdasarkan jumlah baris:
       INACBG  Jumlah Baris
482  Q-5-44-0          6810
374  M-3-16-0          1189
165  H-3-12-0           987
580  Z-3-27-0           758
570  Z-3-16-0           657
576  Z-3-23-0           653
568  Z-3-12-0           617
234  J-3-16-0           407
573  Z-3-19-0           399
97   F-5-10-0           374

Bottom 10 kode INACBG berdasarkan jumlah baris:
       INACBG  Jumlah Baris
462  Q-5-22-0             2
505  U-2-21-0             2
72   E-2-21-0             2
535  V-3-12-0             2
510  U-3-12-0             2
506  U-2-25-0             2
40   C-3-17-0             2
39   C-3-16-0             2
38   C-3-15-0             2
37   C-3-14-0             1


In [9]:
# Jumlah nilai yang tidak null untuk kolom ICD9, ICD10, dan INACBG
icd9_not_null = df_cleaned['ICD9'].notnull().sum()
icd10_not_null = df_cleaned['ICD10'].notnull().sum()
inacbg_not_null = df_cleaned['INACBG'].notnull().sum()

# Tampilkan hasil
print(f"Jumlah nilai yang tidak null pada ICD9: {icd9_not_null}")
print(f"Jumlah nilai yang tidak null pada ICD10: {icd10_not_null}")
print(f"Jumlah nilai yang tidak null pada INACBG: {inacbg_not_null}")

Jumlah nilai yang tidak null pada ICD9: 24462
Jumlah nilai yang tidak null pada ICD10: 22712
Jumlah nilai yang tidak null pada INACBG: 27501


In [10]:
# Jumlah nilai unik untuk kolom ICD9, ICD10, dan INACBG
icd9_unique = df_cleaned['ICD9'].nunique()
icd10_unique = df_cleaned['ICD10'].nunique()
inacbg_unique = df_cleaned['INACBG'].nunique()

# Tampilkan hasil
print(f"Jumlah nilai unik pada ICD9: {icd9_unique}")
print(f"Jumlah nilai unik pada ICD10: {icd10_unique}")
print(f"Jumlah nilai unik pada INACBG: {inacbg_unique}")

Jumlah nilai unik pada ICD9: 902
Jumlah nilai unik pada ICD10: 4410
Jumlah nilai unik pada INACBG: 587


In [11]:
# Menampilkan Top 10 ICD10 dan ICD9 Codes
top_10_icd10 = df_cleaned['ICD10'].value_counts().head(10)
top_10_icd9 = df_cleaned['ICD9'].value_counts().head(10)

# Menampilkan hasil
print("Top 10 ICD10 Codes:")
print(top_10_icd10)

print("\nTop 10 ICD9 Codes:")
print(top_10_icd9)

Top 10 ICD10 Codes:
ICD10
I10      282
E88.0    251
J18.9    215
D63.0    185
D64.9    176
I11.9    147
D63.8    145
C50.9    145
D65      135
E87.1    133
Name: count, dtype: int64

Top 10 ICD9 Codes:
ICD9
8907.0    6589
9059.0    2633
9918.0    1572
8749.0    1315
9921.0     568
8952.0     519
9904.0     394
9357.0     384
8876.0     269
8872.0     251
Name: count, dtype: int64


In [12]:
print(df_cleaned.info())

<class 'pandas.core.frame.DataFrame'>
Index: 27501 entries, 0 to 278771
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   ID           27501 non-null  int64  
 1   Tanggal      27501 non-null  object 
 2   RegID        27501 non-null  object 
 3   SEPID        27501 non-null  object 
 4   INACBG       27501 non-null  object 
 5   ICD10        22712 non-null  object 
 6   ICD9         24462 non-null  float64
 7   INACBG_Desc  27501 non-null  object 
 8   ICD10_Desc   22711 non-null  object 
 9   ICD9_Desc    24462 non-null  object 
dtypes: float64(1), int64(1), object(8)
memory usage: 2.3+ MB
None


In [13]:
# Ubah kolom tertentu ke string
df_cleaned['INACBG'] = df_cleaned['INACBG'].astype('string')
df_cleaned['ICD10'] = df_cleaned['ICD10'].astype('string')
df_cleaned['ICD9'] = df_cleaned['ICD9'].astype('string')

In [14]:
print(df_cleaned.info())

<class 'pandas.core.frame.DataFrame'>
Index: 27501 entries, 0 to 278771
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   ID           27501 non-null  int64 
 1   Tanggal      27501 non-null  object
 2   RegID        27501 non-null  object
 3   SEPID        27501 non-null  object
 4   INACBG       27501 non-null  string
 5   ICD10        22712 non-null  string
 6   ICD9         24462 non-null  string
 7   INACBG_Desc  27501 non-null  object
 8   ICD10_Desc   22711 non-null  object
 9   ICD9_Desc    24462 non-null  object
dtypes: int64(1), object(6), string(3)
memory usage: 2.3+ MB
None


In [15]:
# Membuat salinan dari df_cleaned
df_processed = df_cleaned.copy()

## Train

In [30]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
import xgboost as xgb

def balance_data(df_processed, target_column='INACBG', min_samples=20):
    # Hitung jumlah baris untuk setiap kategori target
    class_counts = df_processed[target_column].value_counts()
    
    # Identifikasi kelas dengan sampel sedikit
    low_sample_classes = class_counts[class_counts < min_samples].index
    
    # Buat list untuk menyimpan data yang akan digabungkan
    balanced_data_list = [df_processed]
    
    # Duplikasi data untuk kelas dengan sampel sedikit
    for low_class in low_sample_classes:
        low_class_data = df_processed[df_processed[target_column] == low_class]
        
    # Duplikasi data untuk kelas dengan sampel sedikit
    for low_class in low_sample_classes:
        low_class_data = df_processed[df_processed[target_column] == low_class]
        
        # Duplikasi data untuk mencapai jumlah minimum (min_samples)
        num_duplicates = min_samples - len(low_class_data)
        duplicated_data = pd.concat([low_class_data] * (num_duplicates // len(low_class_data)) + [low_class_data.head(num_duplicates % len(low_class_data))])
        
        balanced_data_list.append(duplicated_data)
    
    # Gabungkan dataset
    balanced_df = pd.concat(balanced_data_list, ignore_index=True)
    
    return balanced_df
    
def prepare_data(df, target_column='INACBG'):
    """Prepare and encode data, returning train-test split and encoders."""
    # Balance data first
    balanced_df = balance_data(df, target_column)
    
    # Encode categorical features and target
    encoders = {}
    encoded_columns = ['ICD9', 'ICD10']
    
    for col in encoded_columns:
        le = LabelEncoder()
        balanced_df[f'{col}_encoded'] = le.fit_transform(balanced_df[col].astype(str))
        encoders[col] = le
    
    # Encode target column
    le_target = LabelEncoder()
    balanced_df['target_encoded'] = le_target.fit_transform(balanced_df[target_column].astype(str))
    encoders['target'] = le_target
    
    # Select features and target
    X = balanced_df[[f'{col}_encoded' for col in encoded_columns]]
    y = balanced_df['target_encoded']
    
    # Stratified split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )
    
    return X_train, X_test, y_train, y_test, encoders

def calculate_average_report(reports, num_splits):
    """Aggregate classification reports from all folds."""
    avg_report = reports[0]
    for report in reports[1:]:
        for label in report:
            if label != 'accuracy':
                avg_report[label]['precision'] += report[label].get('precision', 0)
                avg_report[label]['recall'] += report[label].get('recall', 0)
                avg_report[label]['f1-score'] += report[label].get('f1-score', 0)

    for label in avg_report:
        if label != 'accuracy':
            avg_report[label]['precision'] /= num_splits
            avg_report[label]['recall'] /= num_splits
            avg_report[label]['f1-score'] /= num_splits

    return avg_report

def train_and_evaluate_xgboost(df, target_column='INACBG', num_splits=5, tune_hyperparameters=False):
    """Train and evaluate XGBoost using Stratified K-Fold."""
    # Data preparation
    X_train, X_test, y_train, y_test, encoders = prepare_data(df, target_column)
    
    # Stratified K-Fold
    strat_kfold = StratifiedKFold(n_splits=num_splits, shuffle=True, random_state=42)
    accuracy_scores = []
    reports = []

    # Debugging: Print unique classes and their counts
    unique_classes = np.unique(y_train)
    print("Unique classes in training data:", unique_classes)
    for cls in unique_classes:
        print(f"Class {cls} count: {np.sum(y_train == cls)}")

    # Loop through each fold
    for fold, (train_idx, val_idx) in enumerate(strat_kfold.split(X_train, y_train)):
        print(f"\nProcessing Fold {fold + 1}/{num_splits}...")
        
        # Split fold data
        X_train_fold, X_val_fold = X_train.iloc[train_idx], X_train.iloc[val_idx]
        y_train_fold, y_val_fold = y_train.iloc[train_idx], y_train.iloc[val_idx]
        
        # Print fold sizes
        print(f"Train fold size: {X_train_fold.shape}, Validation fold size: {X_val_fold.shape}")
        print(f"Train labels size: {y_train_fold.shape}, Validation labels size: {y_val_fold.shape}")
        
        # Debugging: Check class distribution in this fold
        print("Validation fold class distribution:")
        unique_val_classes = np.unique(y_val_fold)
        for cls in unique_val_classes:
            print(f"Class {cls} count: {np.sum(y_val_fold == cls)}")
        
        # Build model
        if tune_hyperparameters:
            model = advanced_hyperparameter_tuning(X_train_fold, y_train_fold, num_classes=len(unique_classes))
        else:
            model = xgb.XGBClassifier(
                objective='multi:softmax',
                num_class=len(unique_classes),
                use_label_encoder=False,
                eval_metric='mlogloss'
            )

        # Before training
        X_train_fold = X_train_fold.values
        y_train_fold = y_train_fold.values
        X_val_fold = X_val_fold.values

        print(f"X_Train Fold: {X_train_fold}")
        print(f"Y_Train Fold: {y_train_fold}")
        print(f"X_Val Fold: {X_val_fold}")
        
        # Train model
        model.fit(X_train_fold, y_train_fold)
        
        # Predict
        y_pred = model.predict(X_val_fold)
        
        # Debugging: Decode and print predictions
        decoded_y_pred = encoders['target'].inverse_transform(y_pred)
        decoded_y_val = encoders['target'].inverse_transform(y_val_fold)
        print("Decoded validation labels:", np.unique(decoded_y_val))
        print("Decoded predicted labels:", np.unique(decoded_y_pred))
        
        # Calculate accuracy
        accuracy = accuracy_score(y_val_fold, y_pred)
        accuracy_scores.append(accuracy)
        
        # Classification report
        report = classification_report(y_val_fold, y_pred, output_dict=True)
        reports.append(report)
    
    # Average accuracy
    avg_accuracy = np.mean(accuracy_scores)
    print(f"\nAverage accuracy: {avg_accuracy:.4f}")
    
    # Calculate and print average classification report
    return avg_accuracy, reports


## Multiply by 20 if counts <20

In [34]:
balanced_df = balance_data(df_processed, target_column='INACBG', min_samples=20)
balanced_df['INACBG'].value_counts()

INACBG
Q-5-44-0     6810
M-3-16-0     1189
H-3-12-0      987
Z-3-27-0      758
Z-3-16-0      657
             ... 
M-1-07-I       20
P-8-12-I       20
Q-5-29-0       20
K-1-10-II      20
U-3-12-0       20
Name: count, Length: 587, dtype: Int64

In [35]:
balanced_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31440 entries, 0 to 31439
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   ID           31440 non-null  int64 
 1   Tanggal      31440 non-null  object
 2   RegID        31440 non-null  object
 3   SEPID        31440 non-null  object
 4   INACBG       31440 non-null  string
 5   ICD10        24363 non-null  string
 6   ICD9         28345 non-null  string
 7   INACBG_Desc  31440 non-null  object
 8   ICD10_Desc   24362 non-null  object
 9   ICD9_Desc    28345 non-null  object
dtypes: int64(1), object(6), string(3)
memory usage: 2.4+ MB


In [32]:
# Assuming 'df' is your DataFrame
result = train_and_evaluate_xgboost(balanced_df)

# If you want to see detailed results
avg_accuracy, reports = result
print(f"Average Accuracy: {avg_accuracy}")

Unique classes in training data: [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107
 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
 234 235 236 237 2

Parameters: { "use_label_encoder" } are not used.



Decoded validation labels: ['A-4-10-II' 'A-4-10-III' 'A-4-11-I' 'A-4-11-III' 'A-4-13-I' 'A-4-13-II'
 'A-4-13-III' 'A-4-14-I' 'A-4-14-II' 'A-4-14-III' 'A-4-15-II' 'A-4-15-III'
 'B-1-10-III' 'B-1-11-I' 'B-1-11-II' 'B-1-12-I' 'B-1-12-II' 'B-1-13-I'
 'B-1-13-III' 'B-1-14-I' 'B-1-14-II' 'B-1-14-III' 'B-3-12-0' 'B-4-11-I'
 'B-4-11-II' 'B-4-11-III' 'B-4-12-II' 'B-4-13-I' 'B-4-13-II' 'B-4-13-III'
 'B-4-14-I' 'B-4-14-II' 'B-4-14-III' 'C-3-10-0' 'C-3-11-0' 'C-3-12-0'
 'C-3-13-0' 'C-3-14-0' 'C-3-15-0' 'C-3-16-0' 'C-3-17-0' 'C-3-18-0'
 'C-3-19-0' 'C-3-21-0' 'C-3-23-0' 'C-4-10-I' 'C-4-10-II' 'C-4-10-III'
 'C-4-11-I' 'C-4-11-II' 'C-4-11-III' 'C-4-12-I' 'C-4-12-II' 'C-4-12-III'
 'C-4-13-I' 'C-4-13-II' 'C-4-13-III' 'C-4-14-I' 'C-4-14-II' 'D-1-11-II'
 'D-1-20-I' 'D-3-10-0' 'D-4-11-I' 'D-4-11-II' 'D-4-13-I' 'D-4-13-II'
 'D-4-13-III' 'D-4-14-I' 'D-4-14-II' 'E-1-01-I' 'E-1-20-I' 'E-1-20-II'
 'E-2-21-0' 'E-3-10-0' 'E-4-10-I' 'E-4-10-II' 'E-4-10-III' 'E-4-11-I'
 'E-4-11-II' 'E-4-11-III' 'E-4-12-I' 'E-4-12-I

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Parameters: { "use_label_encoder" } are not used.



Decoded validation labels: ['A-4-10-II' 'A-4-10-III' 'A-4-11-I' 'A-4-11-III' 'A-4-13-I' 'A-4-13-II'
 'A-4-13-III' 'A-4-14-I' 'A-4-14-II' 'A-4-14-III' 'A-4-15-II' 'A-4-15-III'
 'B-1-10-III' 'B-1-11-I' 'B-1-11-II' 'B-1-12-I' 'B-1-12-II' 'B-1-13-I'
 'B-1-13-III' 'B-1-14-I' 'B-1-14-II' 'B-1-14-III' 'B-3-12-0' 'B-4-11-I'
 'B-4-11-II' 'B-4-11-III' 'B-4-12-II' 'B-4-13-I' 'B-4-13-II' 'B-4-13-III'
 'B-4-14-I' 'B-4-14-II' 'B-4-14-III' 'C-3-10-0' 'C-3-11-0' 'C-3-12-0'
 'C-3-13-0' 'C-3-14-0' 'C-3-15-0' 'C-3-16-0' 'C-3-17-0' 'C-3-18-0'
 'C-3-19-0' 'C-3-21-0' 'C-3-23-0' 'C-4-10-I' 'C-4-10-II' 'C-4-10-III'
 'C-4-11-I' 'C-4-11-II' 'C-4-11-III' 'C-4-12-I' 'C-4-12-II' 'C-4-12-III'
 'C-4-13-I' 'C-4-13-II' 'C-4-13-III' 'C-4-14-I' 'C-4-14-II' 'D-1-11-II'
 'D-1-20-I' 'D-3-10-0' 'D-4-11-I' 'D-4-11-II' 'D-4-13-I' 'D-4-13-II'
 'D-4-13-III' 'D-4-14-I' 'D-4-14-II' 'E-1-01-I' 'E-1-20-I' 'E-1-20-II'
 'E-2-21-0' 'E-3-10-0' 'E-4-10-I' 'E-4-10-II' 'E-4-10-III' 'E-4-11-I'
 'E-4-11-II' 'E-4-11-III' 'E-4-12-I' 'E-4-12-I

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Parameters: { "use_label_encoder" } are not used.



Decoded validation labels: ['A-4-10-II' 'A-4-10-III' 'A-4-11-I' 'A-4-11-III' 'A-4-13-I' 'A-4-13-II'
 'A-4-13-III' 'A-4-14-I' 'A-4-14-II' 'A-4-14-III' 'A-4-15-II' 'A-4-15-III'
 'B-1-10-III' 'B-1-11-I' 'B-1-11-II' 'B-1-12-I' 'B-1-12-II' 'B-1-13-I'
 'B-1-13-III' 'B-1-14-I' 'B-1-14-II' 'B-1-14-III' 'B-3-12-0' 'B-4-11-I'
 'B-4-11-II' 'B-4-11-III' 'B-4-12-II' 'B-4-13-I' 'B-4-13-II' 'B-4-13-III'
 'B-4-14-I' 'B-4-14-II' 'B-4-14-III' 'C-3-10-0' 'C-3-11-0' 'C-3-12-0'
 'C-3-13-0' 'C-3-14-0' 'C-3-15-0' 'C-3-16-0' 'C-3-17-0' 'C-3-18-0'
 'C-3-19-0' 'C-3-21-0' 'C-3-23-0' 'C-4-10-I' 'C-4-10-II' 'C-4-10-III'
 'C-4-11-I' 'C-4-11-II' 'C-4-11-III' 'C-4-12-I' 'C-4-12-II' 'C-4-12-III'
 'C-4-13-I' 'C-4-13-II' 'C-4-13-III' 'C-4-14-I' 'C-4-14-II' 'D-1-11-II'
 'D-1-20-I' 'D-3-10-0' 'D-4-11-I' 'D-4-11-II' 'D-4-13-I' 'D-4-13-II'
 'D-4-13-III' 'D-4-14-I' 'D-4-14-II' 'E-1-01-I' 'E-1-20-I' 'E-1-20-II'
 'E-2-21-0' 'E-3-10-0' 'E-4-10-I' 'E-4-10-II' 'E-4-10-III' 'E-4-11-I'
 'E-4-11-II' 'E-4-11-III' 'E-4-12-I' 'E-4-12-I

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Parameters: { "use_label_encoder" } are not used.



Decoded validation labels: ['A-4-10-II' 'A-4-10-III' 'A-4-11-I' 'A-4-11-III' 'A-4-13-I' 'A-4-13-II'
 'A-4-13-III' 'A-4-14-I' 'A-4-14-II' 'A-4-14-III' 'A-4-15-II' 'A-4-15-III'
 'B-1-10-III' 'B-1-11-I' 'B-1-11-II' 'B-1-12-I' 'B-1-12-II' 'B-1-13-I'
 'B-1-13-III' 'B-1-14-I' 'B-1-14-II' 'B-1-14-III' 'B-3-12-0' 'B-4-11-I'
 'B-4-11-II' 'B-4-11-III' 'B-4-12-II' 'B-4-13-I' 'B-4-13-II' 'B-4-13-III'
 'B-4-14-I' 'B-4-14-II' 'B-4-14-III' 'C-3-10-0' 'C-3-11-0' 'C-3-12-0'
 'C-3-13-0' 'C-3-14-0' 'C-3-15-0' 'C-3-16-0' 'C-3-17-0' 'C-3-18-0'
 'C-3-19-0' 'C-3-21-0' 'C-3-23-0' 'C-4-10-I' 'C-4-10-II' 'C-4-10-III'
 'C-4-11-I' 'C-4-11-II' 'C-4-11-III' 'C-4-12-I' 'C-4-12-II' 'C-4-12-III'
 'C-4-13-I' 'C-4-13-II' 'C-4-13-III' 'C-4-14-I' 'C-4-14-II' 'D-1-11-II'
 'D-1-20-I' 'D-3-10-0' 'D-4-11-I' 'D-4-11-II' 'D-4-13-I' 'D-4-13-II'
 'D-4-13-III' 'D-4-14-I' 'D-4-14-II' 'E-1-01-I' 'E-1-20-I' 'E-1-20-II'
 'E-2-21-0' 'E-3-10-0' 'E-4-10-I' 'E-4-10-II' 'E-4-10-III' 'E-4-11-I'
 'E-4-11-II' 'E-4-11-III' 'E-4-12-I' 'E-4-12-I

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Parameters: { "use_label_encoder" } are not used.



Decoded validation labels: ['A-4-10-II' 'A-4-10-III' 'A-4-11-I' 'A-4-11-III' 'A-4-13-I' 'A-4-13-II'
 'A-4-13-III' 'A-4-14-I' 'A-4-14-II' 'A-4-14-III' 'A-4-15-II' 'A-4-15-III'
 'B-1-10-III' 'B-1-11-I' 'B-1-11-II' 'B-1-12-I' 'B-1-12-II' 'B-1-13-I'
 'B-1-13-III' 'B-1-14-I' 'B-1-14-II' 'B-1-14-III' 'B-3-12-0' 'B-4-11-I'
 'B-4-11-II' 'B-4-11-III' 'B-4-12-II' 'B-4-13-I' 'B-4-13-II' 'B-4-13-III'
 'B-4-14-I' 'B-4-14-II' 'B-4-14-III' 'C-3-10-0' 'C-3-11-0' 'C-3-12-0'
 'C-3-13-0' 'C-3-14-0' 'C-3-15-0' 'C-3-16-0' 'C-3-17-0' 'C-3-18-0'
 'C-3-19-0' 'C-3-21-0' 'C-3-23-0' 'C-4-10-I' 'C-4-10-II' 'C-4-10-III'
 'C-4-11-I' 'C-4-11-II' 'C-4-11-III' 'C-4-12-I' 'C-4-12-II' 'C-4-12-III'
 'C-4-13-I' 'C-4-13-II' 'C-4-13-III' 'C-4-14-I' 'C-4-14-II' 'D-1-11-II'
 'D-1-20-I' 'D-3-10-0' 'D-4-11-I' 'D-4-11-II' 'D-4-13-I' 'D-4-13-II'
 'D-4-13-III' 'D-4-14-I' 'D-4-14-II' 'E-1-01-I' 'E-1-20-I' 'E-1-20-II'
 'E-2-21-0' 'E-3-10-0' 'E-4-10-I' 'E-4-10-II' 'E-4-10-III' 'E-4-11-I'
 'E-4-11-II' 'E-4-11-III' 'E-4-12-I' 'E-4-12-I

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
