# Baseline models training

Notebook contains training and validation of XGBoost models without any previous experiments on data.  We check 4 different prediction cases:
- All cancer types together, multiclass classification (stages: I, II, III, IV)
- All cancer types together, binary classification (stages: I-III vs IV)
- Only the most numerous types of cancer (each one separately), multiclass classification (stages: I, II, III, IV)
- Only the most numerous types of cancer (each one separately), binary classification (stages: I-III vs IV)

Outcomes can be treated as baseline results.

In [21]:
import os
import optuna
import numpy as np
import pandas as pd
import plotly.figure_factory as ff
from functools import partial
from xgboost.sklearn import XGBClassifier
from sklearn.metrics import f1_score, accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import StratifiedKFold

import warnings
warnings.filterwarnings('ignore')

Define random seed to allow reproducibility

In [2]:
random_state = 42
np.random.seed(random_state)

Load training data

In [3]:
data_path = '../data/cleaned'

train = pd.read_csv(os.path.join(data_path, 'train.csv'), sep=';')
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1009 entries, 0 to 1008
Columns: 4398 entries, ID to ENSG00000272168
dtypes: float64(4394), object(4)
memory usage: 33.9+ MB


### All cancer types together, multiclass classification (stages: I, II, III, IV)

Extract cancer types and stages to perform cross validation in a stratified way

Remove unnecessary columns: ID, Group, Sex and Age

In [52]:
X_train = train.copy(deep=True)

stratification_groups = X_train.apply(lambda row: f"{row['Group'].replace(' ', '_')}_{row['Stage']}", axis=1)
X_train = X_train.loc[:, ~X_train.columns.isin(['ID', 'Sex', 'Age', 'Group'])]

Encode labels

In [53]:
labels_encoding = {
    'I': 0,
    'II': 1,
    'III': 2,
    'IV': 3
}
X_train.loc[:, 'Stage'] = X_train['Stage'].map(labels_encoding)

X_train['Stage'].value_counts()

Stage
3    397
0    389
2    122
1    101
Name: count, dtype: int64

Define functions for cross-validation in a stratified way: one for hyperparameters optimization (returns only mean score) and a second for validation of the model with best params (returns predictions)

In [54]:
def stratified_cross_val_score(
        estimator,
        df,
        stratification_groups,
        label_col,
        scoring,
        n_splits=5
):
    scores = np.zeros(n_splits)
    skf = StratifiedKFold(n_splits=n_splits)

    for i, (train_index, test_index) in enumerate(skf.split(df, stratification_groups)):
        y = df.loc[:, label_col]
        X = df.drop(columns=[label_col])

        estimator.fit(X=X.iloc[train_index], y=y.iloc[train_index], verbose=False)

        y_true = y.iloc[test_index]
        y_pred = estimator.predict(X.iloc[test_index])
        scores[i] = scoring(list(y_true), y_pred)

    return scores


def stratified_cross_val_predict(
        estimator,
        df,
        stratification_groups,
        label_col,
        n_splits=5
):
    y_pred = np.zeros(len(df))
    skf = StratifiedKFold(n_splits=n_splits)

    for train_index, test_index in skf.split(df, stratification_groups):
        y = df.loc[:, label_col]
        X = df.drop(columns=[label_col])

        estimator.fit(X=X.iloc[train_index], y=y.iloc[train_index], verbose=False)
        y_pred[test_index] = estimator.predict(X.iloc[test_index])

    return y_pred

Define objective function for hyperparameters optimization using Optuna

In [55]:
def objective(trial: optuna.Trial, X_train, stratification_groups, xgb_config, scoring):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 20, 150),
        'max_depth': trial.suggest_int('max_depth', 2, 8),
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.3),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'gamma': trial.suggest_float('gamma', 0.0, 1.0),
        **xgb_config
    }

    xgb = XGBClassifier(**params)

    scores = stratified_cross_val_score(
        estimator=xgb,
        df=X_train,
        stratification_groups=stratification_groups,
        label_col='Stage',
        scoring=scoring,
        n_splits=4
    )

    return scores.mean()

Since the classes are pretty imbalanced we use the weighted-average F1 metric for optimization

In [56]:
xgb_config = {
    'objective': 'multi:softmax',
    'random_state': random_state,
    'num_class': 4,
}

scoring_func = partial(f1_score, average='weighted')
objective_func = partial(
    objective,
    X_train=X_train,
    stratification_groups=stratification_groups,
    xgb_config=xgb_config,
    scoring=scoring_func
)

study = optuna.create_study(direction='maximize')
study.optimize(objective_func, n_trials=100)

[32m[I 2023-04-11 16:50:03,478][0m A new study created in memory with name: no-name-4c9ce7c7-350f-4d28-8b7b-f2aab9627747[0m
[32m[I 2023-04-11 16:51:27,459][0m Trial 0 finished with value: 0.5483591780676107 and parameters: {'n_estimators': 123, 'max_depth': 8, 'learning_rate': 0.06245555730523676, 'subsample': 0.716466204223274, 'colsample_bytree': 0.6472562756915899, 'gamma': 0.8692166924491291}. Best is trial 0 with value: 0.5483591780676107.[0m
[32m[I 2023-04-11 16:51:43,945][0m Trial 1 finished with value: 0.5593143482728364 and parameters: {'n_estimators': 27, 'max_depth': 4, 'learning_rate': 0.25987235890312715, 'subsample': 0.862399229350953, 'colsample_bytree': 0.6608163451663462, 'gamma': 0.7580556974045628}. Best is trial 1 with value: 0.5593143482728364.[0m
[32m[I 2023-04-11 16:52:29,639][0m Trial 2 finished with value: 0.5492532482133116 and parameters: {'n_estimators': 126, 'max_depth': 3, 'learning_rate': 0.2601377334003289, 'subsample': 0.6061721702084488, 'co

Extract the best of the found hyperparameters and perform cross validation on the whole training set

In [57]:
best_params = {
    **study.best_params,
    **xgb_config
}

best_xgb = XGBClassifier(**best_params)
y_pred = stratified_cross_val_predict(
    estimator=best_xgb,
    df=X_train,
    stratification_groups=stratification_groups,
    label_col='Stage',
    n_splits=5
)
y_true = list(X_train['Stage'].values)

Define function for plotting confusion matrix

In [58]:
def plot_confusion_matrix(cm, labels, title_dict):
    cm = cm[::-1]
    fig = ff.create_annotated_heatmap(z=cm, x=labels, y=labels[::-1], colorscale='ice', reversescale=True)
    fig.update_layout(yaxis_title='True label', xaxis_title='Predicted label',
                      yaxis=dict(tickfont=dict(size=10)), xaxis=dict(tickfont=dict(size=10)),
                      title=title_dict, paper_bgcolor='rgba(0,0,0,0)')
    fig.show()

In [59]:
labels = list(labels_encoding.keys())

report = classification_report(y_true, y_pred.astype(int), target_names=labels)
print(report)

              precision    recall  f1-score   support

           I       0.65      0.78      0.71       389
          II       0.31      0.04      0.07       101
         III       0.25      0.04      0.07       122
          IV       0.61      0.79      0.69       397

    accuracy                           0.62      1009
   macro avg       0.46      0.41      0.39      1009
weighted avg       0.55      0.62      0.56      1009



In [60]:
accuracy = accuracy_score(y_true, y_pred.astype(int))
f1 = f1_score(y_true, y_pred.astype(int), average='weighted')
cm = confusion_matrix(y_true, y_pred.astype(int))

title_dict = {
    'text': f'Accuracy = {accuracy*100:.2f}, Weighted F1 = {f1:.2f}',
    'x': 0.5,
    'y': 0.05
}

plot_confusion_matrix(cm, labels, title_dict)

## All cancer types together, binary classification (stages: I-III vs IV)

Map first three stages to one mutual class "I-III"

In [27]:
X_train = train.copy(deep=True)

mapping = {
    'I': 'I-III',
    'II': 'I-III',
    'III': 'I-III',
    'IV': 'IV'
}
X_train.loc[:, 'Stage'] = X_train['Stage'].map(mapping)

In [28]:
stratification_groups = X_train.apply(lambda row: f"{row['Group'].replace(' ', '_')}_{row['Stage']}", axis=1)
X_train = X_train.loc[:, ~train.columns.isin(['ID', 'Sex', 'Age', 'Group'])]

Encode labels

In [29]:
labels_encoding = {
    'I-III': 0,
    'IV': 1
}
X_train.loc[:, 'Stage'] = X_train['Stage'].map(labels_encoding)

X_train['Stage'].value_counts()

Stage
0    612
1    397
Name: count, dtype: int64

Run hyperparameters optimization

In [30]:
xgb_config = {
    'objective': 'binary:logistic',
    'random_state': random_state
}

objective_func = partial(
    objective,
    X_train=X_train,
    stratification_groups=stratification_groups,
    xgb_config=xgb_config,
    scoring=f1_score
)

study = optuna.create_study(direction='maximize')
study.optimize(objective_func, n_trials=100)

[32m[I 2023-04-11 15:07:47,935][0m A new study created in memory with name: no-name-7564e606-7466-4958-be9a-12c056e47667[0m
[32m[I 2023-04-11 15:07:57,193][0m Trial 0 finished with value: 0.6357480737115248 and parameters: {'n_estimators': 38, 'max_depth': 4, 'learning_rate': 0.17803153795692553, 'subsample': 0.6461181194345617, 'colsample_bytree': 0.9776737012625218, 'gamma': 0.34556273385546654}. Best is trial 0 with value: 0.6357480737115248.[0m
[32m[I 2023-04-11 15:08:47,288][0m Trial 1 finished with value: 0.66690789517865 and parameters: {'n_estimators': 150, 'max_depth': 7, 'learning_rate': 0.01970671441550036, 'subsample': 0.8076617186462105, 'colsample_bytree': 0.9694142770399612, 'gamma': 0.12267699270396781}. Best is trial 1 with value: 0.66690789517865.[0m
[32m[I 2023-04-11 15:08:57,408][0m Trial 2 finished with value: 0.6616067637997919 and parameters: {'n_estimators': 35, 'max_depth': 5, 'learning_rate': 0.11229335049027983, 'subsample': 0.7125073940594522, 'co

Extract the best of the found hyperparameters and perform cross validation on the whole training set

In [31]:
best_params = {
    **study.best_params,
    **xgb_config
}

best_xgb = XGBClassifier(**best_params)
y_pred = stratified_cross_val_predict(
    estimator=best_xgb,
    df=X_train,
    stratification_groups=stratification_groups,
    label_col='Stage',
    n_splits=5
)
y_true = list(X_train['Stage'].values)

In [32]:
labels = list(labels_encoding.keys())

report = classification_report(y_true, y_pred.astype(int), target_names=labels)
print(report)

              precision    recall  f1-score   support

       I-III       0.77      0.84      0.80       612
          IV       0.71      0.62      0.66       397

    accuracy                           0.75      1009
   macro avg       0.74      0.73      0.73      1009
weighted avg       0.75      0.75      0.75      1009



In [33]:
accuracy = accuracy_score(y_true, y_pred.astype(int))
f1 = f1_score(y_true, y_pred.astype(int))
cm = confusion_matrix(y_true, y_pred.astype(int))

title_dict = {
    'text': f'Accuracy = {accuracy*100:.2f}, F1 = {f1:.2f}',
    'x': 0.5,
    'y': 0.05
}

plot_confusion_matrix(cm, labels, title_dict)

## Only the most numerous types of cancer (each one separately), multiclass classification (stages: I, II, III, IV)

In [61]:
labels_encoding = {
    'I': 0,
    'II': 1,
    'III': 2,
    'IV': 3
}

xgb_config = {
    'objective': 'multi:softmax',
    'random_state': random_state,
    'num_class': 4,
}

scoring_func = partial(f1_score, average='weighted')

Get only the most numerous cancer types (with more than 100 cases)

In [62]:
cancer_type_counts = train['Group'].value_counts()
numerous_cancer_types = cancer_type_counts[cancer_type_counts > 100].index.tolist()
numerous_cancer_types

['NSCLC', 'Ovarian cancer', 'Glioma']

For each cancer type a separate model will be created, optimized and tested

### NSCLC

In [63]:
X_train = train.loc[train['Group'] == 'NSCLC']

stratification_groups = X_train.loc[:, 'Stage']
X_train = X_train.loc[:, ~X_train.columns.isin(['ID', 'Sex', 'Age', 'Group'])]

X_train.loc[:, 'Stage'] = X_train['Stage'].map(labels_encoding)
X_train['Stage'].value_counts()

Stage
3    264
2     37
0     30
1     10
Name: count, dtype: int64

In [64]:
objective_func = partial(
    objective,
    X_train=X_train,
    stratification_groups=stratification_groups,
    xgb_config=xgb_config,
    scoring=scoring_func
)

study = optuna.create_study(direction='maximize')
study.optimize(objective_func, n_trials=100)

[32m[I 2023-04-11 18:09:54,243][0m A new study created in memory with name: no-name-4aa05c2d-33f4-48a5-8c7d-4d66df1e6353[0m
[32m[I 2023-04-11 18:10:11,357][0m Trial 0 finished with value: 0.7022934665929529 and parameters: {'n_estimators': 69, 'max_depth': 6, 'learning_rate': 0.19782952909622356, 'subsample': 0.9312130857855134, 'colsample_bytree': 0.7890137455250394, 'gamma': 0.8040736304701734}. Best is trial 0 with value: 0.7022934665929529.[0m
[32m[I 2023-04-11 18:10:37,470][0m Trial 1 finished with value: 0.6992216463610572 and parameters: {'n_estimators': 103, 'max_depth': 7, 'learning_rate': 0.05292700911301404, 'subsample': 0.6347081861755921, 'colsample_bytree': 0.9524198948974147, 'gamma': 0.14277278866286114}. Best is trial 0 with value: 0.7022934665929529.[0m
[32m[I 2023-04-11 18:11:05,018][0m Trial 2 finished with value: 0.7083641224336558 and parameters: {'n_estimators': 147, 'max_depth': 7, 'learning_rate': 0.1717029327114464, 'subsample': 0.7826043723222649, 

In [68]:
best_params = {
    **study.best_params,
    **xgb_config
}

best_xgb = XGBClassifier(**best_params)
y_pred = stratified_cross_val_predict(
    estimator=best_xgb,
    df=X_train,
    stratification_groups=stratification_groups,
    label_col='Stage',
    n_splits=5
)
y_true = list(X_train['Stage'].values)

In [69]:
labels = list(labels_encoding.keys())

report = classification_report(y_true, y_pred.astype(int), target_names=labels)
print(report)

              precision    recall  f1-score   support

           I       0.67      0.07      0.12        30
          II       0.50      0.10      0.17        10
         III       0.14      0.03      0.05        37
          IV       0.80      0.99      0.88       264

    accuracy                           0.78       341
   macro avg       0.53      0.30      0.30       341
weighted avg       0.71      0.78      0.70       341



In [70]:
accuracy = accuracy_score(y_true, y_pred.astype(int))
f1 = f1_score(y_true, y_pred.astype(int), average='weighted')
cm = confusion_matrix(y_true, y_pred.astype(int))

title_dict = {
    'text': f'Accuracy = {accuracy*100:.2f}, Weighted F1 = {f1:.2f}',
    'x': 0.5,
    'y': 0.05
}

plot_confusion_matrix(cm, labels, title_dict)

### Ovarian cancer

In [71]:
X_train = train.loc[train['Group'] == 'Ovarian cancer']

stratification_groups = X_train.loc[:, 'Stage']
X_train = X_train.loc[:, ~X_train.columns.isin(['ID', 'Sex', 'Age', 'Group'])]

X_train.loc[:, 'Stage'] = X_train['Stage'].map(labels_encoding)
X_train['Stage'].value_counts()

Stage
2    34
0    30
3    28
1    14
Name: count, dtype: int64

In [72]:
objective_func = partial(
    objective,
    X_train=X_train,
    stratification_groups=stratification_groups,
    xgb_config=xgb_config,
    scoring=scoring_func
)

study = optuna.create_study(direction='maximize')
study.optimize(objective_func, n_trials=100)

[32m[I 2023-04-11 19:33:13,876][0m A new study created in memory with name: no-name-dd2d871b-b449-4407-8e0b-171c1fa0fabf[0m
[32m[I 2023-04-11 19:33:26,083][0m Trial 0 finished with value: 0.4000656020392862 and parameters: {'n_estimators': 120, 'max_depth': 6, 'learning_rate': 0.12430011199909297, 'subsample': 0.9054546974253824, 'colsample_bytree': 0.7272592615897554, 'gamma': 0.2740596458499803}. Best is trial 0 with value: 0.4000656020392862.[0m
[32m[I 2023-04-11 19:33:37,189][0m Trial 1 finished with value: 0.4321775947936939 and parameters: {'n_estimators': 115, 'max_depth': 5, 'learning_rate': 0.23010474836264355, 'subsample': 0.7178095390972489, 'colsample_bytree': 0.6303174875882364, 'gamma': 0.8510478526675063}. Best is trial 1 with value: 0.4321775947936939.[0m
[32m[I 2023-04-11 19:33:51,636][0m Trial 2 finished with value: 0.441376921061899 and parameters: {'n_estimators': 140, 'max_depth': 2, 'learning_rate': 0.014131636184841905, 'subsample': 0.7637332432400548,

In [73]:
best_params = {
    **study.best_params,
    **xgb_config
}

best_xgb = XGBClassifier(**best_params)
y_pred = stratified_cross_val_predict(
    estimator=best_xgb,
    df=X_train,
    stratification_groups=stratification_groups,
    label_col='Stage',
    n_splits=5
)
y_true = list(X_train['Stage'].values)

In [74]:
labels = list(labels_encoding.keys())

report = classification_report(y_true, y_pred.astype(int), target_names=labels)
print(report)

              precision    recall  f1-score   support

           I       0.38      0.50      0.43        30
          II       0.00      0.00      0.00        14
         III       0.39      0.44      0.42        34
          IV       0.85      0.79      0.81        28

    accuracy                           0.49       106
   macro avg       0.40      0.43      0.42       106
weighted avg       0.46      0.49      0.47       106



In [75]:
accuracy = accuracy_score(y_true, y_pred.astype(int))
f1 = f1_score(y_true, y_pred.astype(int), average='weighted')
cm = confusion_matrix(y_true, y_pred.astype(int))

title_dict = {
    'text': f'Accuracy = {accuracy*100:.2f}, Weighted F1 = {f1:.2f}',
    'x': 0.5,
    'y': 0.05
}

plot_confusion_matrix(cm, labels, title_dict)

 ### Glioma

This cancer type has only samples with I stage

In [78]:
X_train = train.loc[train['Group'] == 'Glioma']

X_train['Stage'].value_counts()

Stage
I    102
Name: count, dtype: int64

## Only the most numerous types of cancer (each one separately), binary classification (stages: I-III vs IV)

In [80]:
mapping = {
    'I': 'I-III',
    'II': 'I-III',
    'III': 'I-III',
    'IV': 'IV'
}

labels_encoding = {
    'I-III': 0,
    'IV': 1
}

xgb_config = {
    'objective': 'binary:logistic',
    'random_state': random_state
}

### NSCLC

In [81]:
X_train = train.loc[train['Group'] == 'NSCLC']

stratification_groups = X_train.loc[:, 'Stage']
X_train = X_train.loc[:, ~X_train.columns.isin(['ID', 'Sex', 'Age', 'Group'])]

X_train.loc[:, 'Stage'] = X_train['Stage'].map(mapping).map(labels_encoding)
X_train['Stage'].value_counts()

Stage
1    264
0     77
Name: count, dtype: int64

In [82]:
objective_func = partial(
    objective,
    X_train=X_train,
    stratification_groups=stratification_groups,
    xgb_config=xgb_config,
    scoring=f1_score
)

study = optuna.create_study(direction='maximize')
study.optimize(objective_func, n_trials=100)

[32m[I 2023-04-11 20:32:45,659][0m A new study created in memory with name: no-name-a90d0d66-87ef-4dc6-aa09-1183e51cf360[0m
[32m[I 2023-04-11 20:32:56,076][0m Trial 0 finished with value: 0.8889972959121896 and parameters: {'n_estimators': 107, 'max_depth': 3, 'learning_rate': 0.19348809671441597, 'subsample': 0.7380715877768046, 'colsample_bytree': 0.8055865562031641, 'gamma': 0.8825594141597728}. Best is trial 0 with value: 0.8889972959121896.[0m
[32m[I 2023-04-11 20:32:59,390][0m Trial 1 finished with value: 0.8972578639391514 and parameters: {'n_estimators': 21, 'max_depth': 5, 'learning_rate': 0.18734485483066007, 'subsample': 0.7727836364134337, 'colsample_bytree': 0.6224646039504536, 'gamma': 0.3854204674318571}. Best is trial 1 with value: 0.8972578639391514.[0m
[32m[I 2023-04-11 20:33:12,979][0m Trial 2 finished with value: 0.8950676167135692 and parameters: {'n_estimators': 141, 'max_depth': 5, 'learning_rate': 0.26655875082825514, 'subsample': 0.8307506718209435, 

In [83]:
best_params = {
    **study.best_params,
    **xgb_config
}

best_xgb = XGBClassifier(**best_params)
y_pred = stratified_cross_val_predict(
    estimator=best_xgb,
    df=X_train,
    stratification_groups=stratification_groups,
    label_col='Stage',
    n_splits=5
)
y_true = list(X_train['Stage'].values)

In [84]:
labels = list(labels_encoding.keys())

report = classification_report(y_true, y_pred.astype(int), target_names=labels)
print(report)

              precision    recall  f1-score   support

       I-III       0.84      0.27      0.41        77
          IV       0.82      0.98      0.90       264

    accuracy                           0.82       341
   macro avg       0.83      0.63      0.65       341
weighted avg       0.83      0.82      0.79       341



In [85]:
accuracy = accuracy_score(y_true, y_pred.astype(int))
f1 = f1_score(y_true, y_pred.astype(int))
cm = confusion_matrix(y_true, y_pred.astype(int))

title_dict = {
    'text': f'Accuracy = {accuracy*100:.2f}, F1 = {f1:.2f}',
    'x': 0.5,
    'y': 0.05
}

plot_confusion_matrix(cm, labels, title_dict)

### Ovarian cancer

In [86]:
X_train = train.loc[train['Group'] == 'Ovarian cancer']

stratification_groups = X_train.loc[:, 'Stage']
X_train = X_train.loc[:, ~X_train.columns.isin(['ID', 'Sex', 'Age', 'Group'])]

X_train.loc[:, 'Stage'] = X_train['Stage'].map(mapping).map(labels_encoding)
X_train['Stage'].value_counts()

Stage
0    78
1    28
Name: count, dtype: int64

In [87]:
objective_func = partial(
    objective,
    X_train=X_train,
    stratification_groups=stratification_groups,
    xgb_config=xgb_config,
    scoring=f1_score
)

study = optuna.create_study(direction='maximize')
study.optimize(objective_func, n_trials=100)

[32m[I 2023-04-11 20:52:09,193][0m A new study created in memory with name: no-name-02631223-fe03-4529-acad-b9a41d1f2ec3[0m
[32m[I 2023-04-11 20:52:16,963][0m Trial 0 finished with value: 0.7205128205128205 and parameters: {'n_estimators': 99, 'max_depth': 5, 'learning_rate': 0.23398440482701321, 'subsample': 0.9165288931886627, 'colsample_bytree': 0.6000065504192383, 'gamma': 0.7189339517765465}. Best is trial 0 with value: 0.7205128205128205.[0m
[32m[I 2023-04-11 20:52:24,444][0m Trial 1 finished with value: 0.664957264957265 and parameters: {'n_estimators': 91, 'max_depth': 4, 'learning_rate': 0.2321216596265116, 'subsample': 0.8757242056497063, 'colsample_bytree': 0.7652855316503839, 'gamma': 0.5623622415278761}. Best is trial 0 with value: 0.7205128205128205.[0m
[32m[I 2023-04-11 20:52:31,712][0m Trial 2 finished with value: 0.7773310023310023 and parameters: {'n_estimators': 91, 'max_depth': 6, 'learning_rate': 0.1677982760305576, 'subsample': 0.9309454154680619, 'cols

In [88]:
best_params = {
    **study.best_params,
    **xgb_config
}

best_xgb = XGBClassifier(**best_params)
y_pred = stratified_cross_val_predict(
    estimator=best_xgb,
    df=X_train,
    stratification_groups=stratification_groups,
    label_col='Stage',
    n_splits=5
)
y_true = list(X_train['Stage'].values)

In [89]:
labels = list(labels_encoding.keys())

report = classification_report(y_true, y_pred.astype(int), target_names=labels)
print(report)

              precision    recall  f1-score   support

       I-III       0.91      0.95      0.93        78
          IV       0.84      0.75      0.79        28

    accuracy                           0.90       106
   macro avg       0.88      0.85      0.86       106
weighted avg       0.89      0.90      0.89       106



In [90]:
accuracy = accuracy_score(y_true, y_pred.astype(int))
f1 = f1_score(y_true, y_pred.astype(int))
cm = confusion_matrix(y_true, y_pred.astype(int))

title_dict = {
    'text': f'Accuracy = {accuracy*100:.2f}, F1 = {f1:.2f}',
    'x': 0.5,
    'y': 0.05
}

plot_confusion_matrix(cm, labels, title_dict)