# Imports

In [1]:
import pandas
import numpy

from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import BernoulliNB, GaussianNB, MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, HistGradientBoostingClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, balanced_accuracy_score, recall_score, precision_score, f1_score, roc_auc_score, average_precision_score, log_loss, brier_score_loss, matthews_corrcoef

# Load Data

In [52]:
path = '/content/drive/MyDrive/Colab Notebooks/QMIND_2023-2024/'
file_name = 'Hospital_Inpatient_Discharges__SPARCS_De-Identified___2019_20240203.csv'
df = pandas.read_csv(path + file_name, nrows=10000)
display(df)

Unnamed: 0,Hospital Service Area,Hospital County,Operating Certificate Number,Permanent Facility Id,Facility Name,Age Group,Zip Code - 3 digits,Gender,Race,Ethnicity,...,APR Severity of Illness Description,APR Risk of Mortality,APR Medical Surgical Description,Payment Typology 1,Payment Typology 2,Payment Typology 3,Birth Weight,Emergency Department Indicator,Total Charges,Total Costs
0,Hudson Valley,Westchester,5902001.0,1045.0,White Plains Hospital Center,30 to 49,106,M,Other Race,Not Span/Hispanic,...,Moderate,Minor,Medical,Private Health Insurance,,,,True,26507.00,4773.11
1,Hudson Valley,Westchester,5902001.0,1045.0,White Plains Hospital Center,70 or Older,105,F,Black/African American,Not Span/Hispanic,...,Moderate,Moderate,Medical,Medicare,,,,True,20693.00,5631.30
2,Hudson Valley,Westchester,5902001.0,1045.0,White Plains Hospital Center,50 to 69,105,F,Other Race,Not Span/Hispanic,...,Extreme,Extreme,Surgical,Medicare,Blue Cross/Blue Shield,,,False,138252.00,29848.69
3,Hudson Valley,Westchester,5902001.0,1045.0,White Plains Hospital Center,50 to 69,106,F,Black/African American,Not Span/Hispanic,...,Moderate,Minor,Medical,Medicare,Medicaid,,,True,11799.00,2595.72
4,Hudson Valley,Westchester,5902001.0,1045.0,White Plains Hospital Center,70 or Older,105,F,Other Race,Not Span/Hispanic,...,Moderate,Moderate,Medical,Medicare,Private Health Insurance,,,True,52711.00,11355.62
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,New York City,Kings,7001002.0,1286.0,Brookdale Hospital Medical Center,0 to 17,112,M,Black/African American,Not Span/Hispanic,...,Moderate,Minor,Medical,Medicaid,Medicaid,,,False,9239.36,5691.19
9996,New York City,Kings,7001002.0,1286.0,Brookdale Hospital Medical Center,0 to 17,112,M,Black/African American,Spanish/Hispanic,...,Moderate,Minor,Medical,Medicaid,,,,False,18037.28,11560.27
9997,New York City,Richmond,7004003.0,1740.0,Staten Island University Hosp-North,30 to 49,103,F,Other Race,Spanish/Hispanic,...,Moderate,Minor,Medical,Medicaid,Medicaid,,,True,7788.46,2085.42
9998,New York City,Richmond,7004003.0,1737.0,Staten Island University Hospital Prince's Bay,50 to 69,103,M,Black/African American,Not Span/Hispanic,...,Moderate,Minor,Medical,Medicaid,,,,False,215554.33,135783.46


# Data Exploration

In [53]:
df.columns

Index(['Hospital Service Area', 'Hospital County',
       'Operating Certificate Number', 'Permanent Facility Id',
       'Facility Name', 'Age Group', 'Zip Code - 3 digits', 'Gender', 'Race',
       'Ethnicity', 'Length of Stay', 'Type of Admission',
       'Patient Disposition', 'Discharge Year', 'CCSR Diagnosis Code',
       'CCSR Diagnosis Description', 'CCSR Procedure Code',
       'CCSR Procedure Description', 'APR DRG Code', 'APR DRG Description',
       'APR MDC Code', 'APR MDC Description', 'APR Severity of Illness Code',
       'APR Severity of Illness Description', 'APR Risk of Mortality',
       'APR Medical Surgical Description', 'Payment Typology 1',
       'Payment Typology 2', 'Payment Typology 3', 'Birth Weight',
       'Emergency Department Indicator', 'Total Charges', 'Total Costs'],
      dtype='object')

In [54]:
columns_to_keep = ['Age Group', 'Gender', 'Race', 'Ethnicity', 'Length of Stay',
                   'Type of Admission', 'APR MDC Description', 'APR Severity of Illness Description',
                   'APR Risk of Mortality', 'APR Medical Surgical Description']
df = df[columns_to_keep]
display(df)

Unnamed: 0,Age Group,Gender,Race,Ethnicity,Length of Stay,Type of Admission,APR MDC Description,APR Severity of Illness Description,APR Risk of Mortality,APR Medical Surgical Description
0,30 to 49,M,Other Race,Not Span/Hispanic,2,Emergency,INFECTIOUS AND PARASITIC DISEASES (SYSTEMIC OR...,Moderate,Minor,Medical
1,70 or Older,F,Black/African American,Not Span/Hispanic,3,Emergency,DISEASES AND DISORDERS OF THE KIDNEY AND URINA...,Moderate,Moderate,Medical
2,50 to 69,F,Other Race,Not Span/Hispanic,7,Elective,DISEASES AND DISORDERS OF THE KIDNEY AND URINA...,Extreme,Extreme,Surgical
3,50 to 69,F,Black/African American,Not Span/Hispanic,1,Emergency,DISEASES AND DISORDERS OF THE RESPIRATORY SYSTEM,Moderate,Minor,Medical
4,70 or Older,F,Other Race,Not Span/Hispanic,3,Emergency,"INJURIES, POISONINGS AND TOXIC EFFECTS OF DRUGS",Moderate,Moderate,Medical
...,...,...,...,...,...,...,...,...,...,...
9995,0 to 17,M,Black/African American,Not Span/Hispanic,5,Emergency,MENTAL DISEASES AND DISORDERS,Moderate,Minor,Medical
9996,0 to 17,M,Black/African American,Spanish/Hispanic,14,Emergency,MENTAL DISEASES AND DISORDERS,Moderate,Minor,Medical
9997,30 to 49,F,Other Race,Spanish/Hispanic,1,Emergency,DISEASES AND DISORDERS OF THE DIGESTIVE SYSTEM,Moderate,Minor,Medical
9998,50 to 69,M,Black/African American,Not Span/Hispanic,68,Elective,MENTAL DISEASES AND DISORDERS,Moderate,Minor,Medical


In [55]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 10 columns):
 #   Column                               Non-Null Count  Dtype 
---  ------                               --------------  ----- 
 0   Age Group                            10000 non-null  object
 1   Gender                               10000 non-null  object
 2   Race                                 10000 non-null  object
 3   Ethnicity                            10000 non-null  object
 4   Length of Stay                       10000 non-null  object
 5   Type of Admission                    10000 non-null  object
 6   APR MDC Description                  10000 non-null  object
 7   APR Severity of Illness Description  9986 non-null   object
 8   APR Risk of Mortality                9986 non-null   object
 9   APR Medical Surgical Description     10000 non-null  object
dtypes: object(10)
memory usage: 781.4+ KB


In [56]:
df.describe(include='all')

Unnamed: 0,Age Group,Gender,Race,Ethnicity,Length of Stay,Type of Admission,APR MDC Description,APR Severity of Illness Description,APR Risk of Mortality,APR Medical Surgical Description
count,10000,10000,10000,10000,10000,10000,10000,9986,9986,10000
unique,5,2,4,4,79,6,26,4,4,3
top,50 to 69,F,White,Not Span/Hispanic,2,Emergency,DISEASES AND DISORDERS OF THE CIRCULATORY SYSTEM,Moderate,Minor,Medical
freq,2952,5487,5449,8170,2308,6794,1278,3737,5338,7302


In [57]:
for column in df.columns:
    print(column)
    print(df[column].value_counts(dropna=False))
    print()

Age Group
50 to 69       2952
70 or Older    2905
30 to 49       2061
0 to 17        1102
18 to 29        980
Name: Age Group, dtype: int64

Gender
F    5487
M    4513
Name: Gender, dtype: int64

Race
White                     5449
Other Race                2672
Black/African American    1795
Multi-racial                84
Name: Race, dtype: int64

Ethnicity
Not Span/Hispanic    8170
Spanish/Hispanic     1347
Unknown               458
Multi-ethnic           25
Name: Ethnicity, dtype: int64

Length of Stay
2      2308
3      1735
1      1667
4      1036
5       664
       ... 
103       1
67        1
66        1
69        1
54        1
Name: Length of Stay, Length: 79, dtype: int64

Type of Admission
Emergency        6794
Elective         2016
Newborn           585
Urgent            578
Trauma             26
Not Available       1
Name: Type of Admission, dtype: int64

APR MDC Description
DISEASES AND DISORDERS OF THE CIRCULATORY SYSTEM                                            1278
PRE

# Data Preparation/Processing

In [58]:
# Drop rows with NA/NULL values
df = df.dropna().reset_index(drop=True)
print(df.shape)

(9986, 10)


In [59]:
# Drop rows with infrequent values
df = df[~df['Type of Admission'].isin(['Not Available', 'Trauma'])]
df = df[df['Race'] != 'Multi-racial']
df = df[df['Ethnicity'] != 'Multi-ethnic']
df = df[~df['APR MDC Description'].isin(['MYELOPROLIFERATIVE DISEASES AND DISORDERS, AND POORLY DIFFERENTIATED NEOPLASM', 'DISEASES AND DISORDERS OF THE FEMALE REPRODUCTIVE SYSTEM', 'DISEASES AND DISORDERS OF THE MALE REPRODUCTIVE SYSTEM', 'HUMAN IMMUNODEFICIENCY VIRUS INFECTIONS', 'MULTIPLE SIGNIFICANT TRAUMA', 'DISEASES AND DISORDERS OF THE EYE', 'PRE MDC', 'BURNS'])]
df = df[df['APR Medical Surgical Description'] != 'Not Applicable']
print(df.shape)

(9561, 10)


In [60]:
pandas.set_option('display.max_rows', 100)
print(df['Length of Stay'].value_counts())
pandas.reset_option('display.max_rows')

2        2225
3        1669
1        1568
4         996
5         635
6         452
7         394
8         265
9         206
10        151
11        112
12        105
14         97
13         91
15         61
17         57
16         48
18         45
20         41
21         39
19         38
22         27
24         20
28         18
25         16
23         14
27         10
30         10
31         10
32          9
35          9
26          9
29          8
33          7
37          6
34          5
44          5
36          5
42          5
38          4
48          4
40          4
39          4
49          4
120 +       4
51          3
57          3
43          3
111         2
45          2
58          2
50          2
46          2
52          2
59          2
47          2
68          2
93          1
60          1
88          1
107         1
55          1
53          1
41          1
92          1
94          1
56          1
64          1
75          1
80          1
85          1
99    

In [61]:
# Combine length of stay into multi-class categories (1 day, 2 days, 3 days, 4 days, 5 days or more)
df['Length of Stay - Multi-class'] = numpy.where(df['Length of Stay'] == '1', 1, df['Length of Stay'])
df['Length of Stay - Multi-class'] = numpy.where(df['Length of Stay - Multi-class'] == '2', 2, df['Length of Stay - Multi-class'])
df['Length of Stay - Multi-class'] = numpy.where(df['Length of Stay - Multi-class'] == '3', 3, df['Length of Stay - Multi-class'])
df['Length of Stay - Multi-class'] = numpy.where(df['Length of Stay - Multi-class'] == '4', 4, df['Length of Stay - Multi-class'])
df['Length of Stay - Multi-class'] = numpy.where(df['Length of Stay - Multi-class'].isin(numpy.setdiff1d(numpy.unique(df['Length of Stay']), ['1', '2', '3', '4'])), 5, df['Length of Stay - Multi-class'])
df['Length of Stay - Multi-class'] = df['Length of Stay - Multi-class'].astype(int)

In [62]:
df['Length of Stay - Multi-class'].value_counts()

5    3103
2    2225
3    1669
1    1568
4     996
Name: Length of Stay - Multi-class, dtype: int64

In [63]:
# Combine length of stay into binary categories (1-3 days, 4 days or more)
df['Length of Stay - Binary'] = numpy.where(df['Length of Stay'].isin(['1', '2', '3']), 0, df['Length of Stay'])
df['Length of Stay - Binary'] = numpy.where(df['Length of Stay - Binary'].isin(numpy.setdiff1d(numpy.unique(df['Length of Stay']), ['1', '2', '3'])), 1, df['Length of Stay - Binary'])
df['Length of Stay - Binary'] = df['Length of Stay - Binary'].astype(int)

In [64]:
df['Length of Stay - Binary'].value_counts()

0    5462
1    4099
Name: Length of Stay - Binary, dtype: int64

In [65]:
# Split into training and testing subsets
X = df.drop(columns=['Length of Stay', 'Length of Stay - Multi-class', 'Length of Stay - Binary'])
y_multiclass = df['Length of Stay - Multi-class']
y_binary = df['Length of Stay - Binary']
X_train, X_test, y_multiclass_train, y_multiclass_test, y_binary_train, y_binary_test = train_test_split(X, y_multiclass, y_binary, test_size=0.2, random_state=0, stratify=y_multiclass)
print('Training:', len(y_multiclass_train))
print('Testing:', len(y_multiclass_test))

Training: 7648
Testing: 1913


In [66]:
# Min-max scale numeric features
# from sklearn.preprocessing import MinMaxScaler
# scaler = MinMaxScaler()
# numeric_features = ['numeric_feature_name_1', 'numeric_feature_name_2']
# X_train[numeric_features] = scaler.fit_transform(X_train[numeric_features])
# X_test[numeric_features] = scaler.transform(X_test[numeric_features])

In [67]:
# One-hot encode categorical features

for column in X_train.columns:
    for value in numpy.unique(X_train[column]):
        X_train[column + ' - ' + value] = numpy.where(X_train[column] == value, 1, 0)

for column in X_test.columns:
    for value in numpy.unique(X_test[column]):
        X_test[column + ' - ' + value] = numpy.where(X_test[column] == value, 1, 0)

# Drop original columns
X_train = X_train.drop(columns=['Age Group', 'Gender', 'Race', 'Ethnicity', 'Type of Admission', 'APR MDC Description', 'APR Severity of Illness Description', 'APR Risk of Mortality', 'APR Medical Surgical Description'])
X_test = X_test.drop(columns=['Age Group', 'Gender', 'Race', 'Ethnicity', 'Type of Admission', 'APR MDC Description', 'APR Severity of Illness Description', 'APR Risk of Mortality', 'APR Medical Surgical Description'])

# Training (Multiclass)

In [68]:
def testing_and_evaluation(model):
    # Testing
    classifications = model.predict(X_test)
    probabilities = model.predict_proba(X_test)

    # Evaluation
    print(confusion_matrix(y_multiclass_test, classifications))
    print()
    print(classification_report(y_multiclass_test, classifications, target_names=['1 day', '2 days', '3 days', '4 days', '5 days or more']))
    print('Accuracy:         ', round(accuracy_score(y_multiclass_test, classifications), 4))
    print('Balanced accuracy:', round(balanced_accuracy_score(y_multiclass_test, classifications), 4))
    print('Recall:           ', round(recall_score(y_multiclass_test, classifications, average='macro'), 4))
    print('Precision:        ', round(precision_score(y_multiclass_test, classifications, average='macro'), 4))
    print('F1:               ', round(f1_score(y_multiclass_test, classifications, average='macro'), 4))
    print('MCC:              ', round(matthews_corrcoef(y_multiclass_test, classifications), 4))
    print('AU_ROC:           ', round(roc_auc_score(y_multiclass_test, probabilities, multi_class='ovr'), 4))
    print('Log loss:         ', round(log_loss(y_multiclass_test, probabilities), 4))

In [69]:
models = {'BernoulliNB': BernoulliNB(), 'GaussianNB': GaussianNB(), 'MultinomialNB': MultinomialNB(),
          'LogisticRegression': LogisticRegression(), 'MLPClassifier': MLPClassifier(), 'SVC': SVC(probability=True),
          'KNeighborsClassifier': KNeighborsClassifier(), 'DecisionTreeClassifier': DecisionTreeClassifier(),
          'RandomForestClassifier': RandomForestClassifier(), 'GradientBoostingClassifier': GradientBoostingClassifier(),
          'HistGradientBoostingClassifier': HistGradientBoostingClassifier()}

for model_name, model in models.items():
    print(model_name)

    # Training
    model = model.fit(X_train, y_multiclass_train)

    # Testing and evaluation
    testing_and_evaluation(model)

    print()

BernoulliNB
[[142  72  14   1  85]
 [122 177  17   1 128]
 [ 66 124  16   5 123]
 [ 37  43   9   0 110]
 [ 77  84  23   0 437]]

                precision    recall  f1-score   support

         1 day       0.32      0.45      0.37       314
        2 days       0.35      0.40      0.37       445
        3 days       0.20      0.05      0.08       334
        4 days       0.00      0.00      0.00       199
5 days or more       0.49      0.70      0.58       621

      accuracy                           0.40      1913
     macro avg       0.27      0.32      0.28      1913
  weighted avg       0.33      0.40      0.35      1913

Accuracy:          0.4036
Balanced accuracy: 0.3203
Recall:            0.3203
Precision:         0.2743
F1:                0.2816
MCC:               0.2058
AU_ROC:            0.6508
Log loss:          1.7974

GaussianNB
[[157  49  11  38  59]
 [148 145  12  58  82]
 [ 91 106  10  42  85]
 [ 46  40   7  32  74]
 [ 85  36  19  96 385]]

                precision  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                precision    recall  f1-score   support

         1 day       0.32      0.45      0.38       314
        2 days       0.36      0.40      0.38       445
        3 days       0.15      0.02      0.04       334
        4 days       0.00      0.00      0.00       199
5 days or more       0.49      0.73      0.58       621

      accuracy                           0.41      1913
     macro avg       0.26      0.32      0.27      1913
  weighted avg       0.32      0.41      0.35      1913

Accuracy:          0.4072
Balanced accuracy: 0.3201
Recall:            0.3201
Precision:         0.2637
F1:                0.2749
MCC:               0.2088
AU_ROC:            0.6565
Log loss:          1.5879

LogisticRegression


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


[[142  72   6   0  94]
 [119 178  12   0 136]
 [ 60 146  19   0 109]
 [ 23  51  10   0 115]
 [ 47  69  15   0 490]]

                precision    recall  f1-score   support

         1 day       0.36      0.45      0.40       314
        2 days       0.34      0.40      0.37       445
        3 days       0.31      0.06      0.10       334
        4 days       0.00      0.00      0.00       199
5 days or more       0.52      0.79      0.63       621

      accuracy                           0.43      1913
     macro avg       0.31      0.34      0.30      1913
  weighted avg       0.36      0.43      0.37      1913

Accuracy:          0.4334
Balanced accuracy: 0.3396
Recall:            0.3396
Precision:         0.3067
F1:                0.2991
MCC:               0.2434
AU_ROC:            0.7029
Log loss:          1.3486

MLPClassifier




[[122  85  25   6  76]
 [ 71 207  51  12 104]
 [ 41 122  64  11  96]
 [ 32  44  32   9  82]
 [ 44  69  52  19 437]]

                precision    recall  f1-score   support

         1 day       0.39      0.39      0.39       314
        2 days       0.39      0.47      0.43       445
        3 days       0.29      0.19      0.23       334
        4 days       0.16      0.05      0.07       199
5 days or more       0.55      0.70      0.62       621

      accuracy                           0.44      1913
     macro avg       0.36      0.36      0.35      1913
  weighted avg       0.40      0.44      0.41      1913

Accuracy:          0.4386
Balanced accuracy: 0.3589
Recall:            0.3589
Precision:         0.3559
F1:                0.3468
MCC:               0.2557
AU_ROC:            0.6867
Log loss:          1.445

SVC
[[158  61   4   0  91]
 [111 195  10   0 129]
 [ 55 133  30   0 116]
 [ 36  41  14   0 108]
 [ 48  56  13   1 503]]

                precision    recall  f1-score  

# Training (Binary)

In [70]:
def testing_and_evaluation(model):
    # Testing
    classifications = model.predict(X_test)
    probabilities = model.predict_proba(X_test)

    # Evaluation
    print(confusion_matrix(y_binary_test, classifications))
    print()
    print(classification_report(y_binary_test, classifications, target_names=['1 or 2 days', '3 days or more']))
    print('Accuracy:         ', round(accuracy_score(y_binary_test, classifications), 4))
    print('Balanced accuracy:', round(balanced_accuracy_score(y_binary_test, classifications), 4))
    print('Recall:           ', round(recall_score(y_binary_test, classifications), 4))
    print('Precision:        ', round(precision_score(y_binary_test, classifications), 4))
    print('F1:               ', round(f1_score(y_binary_test, classifications), 4))
    print('MCC:              ', round(matthews_corrcoef(y_binary_test, classifications), 4))
    print('AU_ROC:           ', round(roc_auc_score(y_binary_test, probabilities[:,1]), 4))
    print('Log loss:         ', round(log_loss(y_binary_test, probabilities[:,1]), 4))

In [71]:
models = {'BernoulliNB': BernoulliNB(), 'GaussianNB': GaussianNB(), 'MultinomialNB': MultinomialNB(),
          'LogisticRegression': LogisticRegression(), 'MLPClassifier': MLPClassifier(), 'SVC': SVC(probability=True),
          'KNeighborsClassifier': KNeighborsClassifier(), 'DecisionTreeClassifier': DecisionTreeClassifier(),
          'RandomForestClassifier': RandomForestClassifier(), 'GradientBoostingClassifier': GradientBoostingClassifier(),
          'HistGradientBoostingClassifier': HistGradientBoostingClassifier()}

for model_name, model in models.items():
    print(model_name)

    # Training
    model = model.fit(X_train, y_binary_train)

    # Testing and evaluation
    testing_and_evaluation(model)

    print()

BernoulliNB
[[775 318]
 [290 530]]

                precision    recall  f1-score   support

   1 or 2 days       0.73      0.71      0.72      1093
3 days or more       0.62      0.65      0.64       820

      accuracy                           0.68      1913
     macro avg       0.68      0.68      0.68      1913
  weighted avg       0.68      0.68      0.68      1913

Accuracy:          0.6822
Balanced accuracy: 0.6777
Recall:            0.6463
Precision:         0.625
F1:                0.6355
MCC:               0.354
AU_ROC:            0.7353
Log loss:          0.9106

GaussianNB
[[724 369]
 [227 593]]

                precision    recall  f1-score   support

   1 or 2 days       0.76      0.66      0.71      1093
3 days or more       0.62      0.72      0.67       820

      accuracy                           0.69      1913
     macro avg       0.69      0.69      0.69      1913
  weighted avg       0.70      0.69      0.69      1913

Accuracy:          0.6884
Balanced accuracy:



[[878 215]
 [356 464]]

                precision    recall  f1-score   support

   1 or 2 days       0.71      0.80      0.75      1093
3 days or more       0.68      0.57      0.62       820

      accuracy                           0.70      1913
     macro avg       0.70      0.68      0.69      1913
  weighted avg       0.70      0.70      0.70      1913

Accuracy:          0.7015
Balanced accuracy: 0.6846
Recall:            0.5659
Precision:         0.6834
F1:                0.6191
MCC:               0.3818
AU_ROC:            0.7627
Log loss:          0.6286

SVC
[[869 224]
 [315 505]]

                precision    recall  f1-score   support

   1 or 2 days       0.73      0.80      0.76      1093
3 days or more       0.69      0.62      0.65       820

      accuracy                           0.72      1913
     macro avg       0.71      0.71      0.71      1913
  weighted avg       0.72      0.72      0.72      1913

Accuracy:          0.7182
Balanced accuracy: 0.7055
Recall:  