In [1]:
import h2o
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split

### Overview
[Reference](https://github.com/NhanDoV/Lectures_notes-teaching-in-VN-/blob/master/h2o_AutoML/classification/clf-tradml-vs-automl.ipynb)

In [2]:
import warnings
warnings.filterwarnings("ignore")
df = sns.load_dataset("titanic")

df.fillna(value={"age": int(df["age"].median())}, inplace=True)
df['survived'] = df['survived'].replace({0: 'dead', 1:'alived'})
df = df.dropna(axis=0, subset='embark_town')
df['alone'] = df['alone'].astype(str)
df['age'] = df['age'].astype(int)

for col in ['fare', 'age', 'parch']:
    print(f"Survived aggregation on class & {col}")
    display(pd.pivot_table(df, index='class', columns='survived', values=col, aggfunc=['mean', 'max']))

Survived aggregation on class & fare


Unnamed: 0_level_0,mean,mean,max,max
survived,alived,dead,alived,dead
class,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
First,95.840984,64.684007,512.3292,263.0
Second,22.0557,19.412328,65.0,73.5
Third,13.694887,13.669364,56.4958,69.55


Survived aggregation on class & age


Unnamed: 0_level_0,mean,mean,max,max
survived,alived,dead,alived,dead
class,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
First,34.373134,40.55,80,71
Second,25.965517,33.134021,62,70
Third,22.731092,26.932796,63,74


Survived aggregation on class & parch


Unnamed: 0_level_0,mean,mean,max,max
survived,alived,dead,alived,dead
class,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
First,0.395522,0.3,2,4
Second,0.643678,0.14433,3,2
Third,0.420168,0.384409,5,6


### Feature engineering
In this session, we will try to verify the affect of bin-group from the numerical variables (`age`, `fare`) to the response `survived`

In [3]:
df['age_group'] = pd.cut(df['age'], bins=5)
df['faregroup'] = pd.cut(df['fare'], bins=5)
for col in ['class', 'sex', 'embark_town', 'alone', 'age_group', 'faregroup']:
    display(pd.pivot_table(df, index='survived', columns=col, values='alive', aggfunc='count'))

class,First,Second,Third
survived,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
alived,134,87,119
dead,80,97,372


sex,female,male
survived,Unnamed: 1_level_1,Unnamed: 2_level_1
alived,231,109
dead,81,468


embark_town,Cherbourg,Queenstown,Southampton
survived,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
alived,93,30,217
dead,75,47,427


alone,False,True
survived,Unnamed: 1_level_1,Unnamed: 2_level_1
alived,179,161
dead,175,374


age_group,"(-0.08, 16.0]","(16.0, 32.0]","(32.0, 48.0]","(48.0, 64.0]","(64.0, 80.0]"
survived,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
alived,55,181,74,29,1
dead,45,344,111,39,10


faregroup,"(-0.512, 102.466]","(102.466, 204.932]","(204.932, 307.398]","(307.398, 409.863]","(409.863, 512.329]"
survived,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
alived,301,25,11,0,3
dead,535,8,6,0,0


There is not any difference between each bin-group to the final-output hence the `bingroup` are not neccessary here (`age_group` and `faregroup`)

In [4]:
y = df[['sex','embark_town','survived']]
X = df.drop(columns=["survived", "alive", "pclass", "age_group", "faregroup", "parch", 
                     "who", "sibsp", "adult_male", "deck", "embarked"])
X_train, X_valid, y_train, y_valid = train_test_split(X.drop(columns=['sex','embark_town']), y, test_size=0.3, 
                                                      stratify=y, 
                                                      random_state=42)
df_train = pd.concat([X_train, y_train], axis=1).reset_index(drop=True)
df_valid = pd.concat([X_valid, y_valid], axis=1).reset_index(drop=True)
df_train.head()

Unnamed: 0,age,fare,class,alone,sex,embark_town,survived
0,20,7.8542,Third,True,male,Southampton,dead
1,28,25.4667,Third,False,female,Southampton,dead
2,21,7.8542,Third,True,male,Southampton,dead
3,28,7.3125,Third,True,male,Southampton,dead
4,36,13.0,Second,True,female,Southampton,alived


Initialize `h2o`

In [5]:
h2o.init()

Checking whether there is an H2O instance running at http://localhost:54321..... not found.
Attempting to start a local H2O server...
  Java Version: openjdk version "11.0.21" 2023-10-17; OpenJDK Runtime Environment (build 11.0.21+9-post-Ubuntu-0ubuntu120.04); OpenJDK 64-Bit Server VM (build 11.0.21+9-post-Ubuntu-0ubuntu120.04, mixed mode, sharing)
  Starting server from /opt/conda/lib/python3.10/site-packages/h2o/backend/bin/h2o.jar
  Ice root: /tmp/tmpex4e4p8c
  JVM stdout: /tmp/tmpex4e4p8c/h2o_unknownUser_started_from_python.out
  JVM stderr: /tmp/tmpex4e4p8c/h2o_unknownUser_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321 ... successful.


0,1
H2O_cluster_uptime:,03 secs
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.3
H2O_cluster_version_age:,2 months and 12 days
H2O_cluster_name:,H2O_from_python_unknownUser_cpdqd7
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,7.500 Gb
H2O_cluster_total_cores:,4
H2O_cluster_allowed_cores:,4


Transform this to h2o-frame

In [6]:
h2o_train_df = h2o.H2OFrame(df_train)
h2o_valid_df = h2o.H2OFrame(df_valid)
h2o_train_df

Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%


age,fare,class,alone,sex,embark_town,survived
20,7.8542,Third,True,male,Southampton,dead
28,25.4667,Third,False,female,Southampton,dead
21,7.8542,Third,True,male,Southampton,dead
28,7.3125,Third,True,male,Southampton,dead
36,13.0,Second,True,female,Southampton,alived
31,26.25,Second,False,male,Southampton,dead
49,56.9292,First,False,male,Cherbourg,alived
23,7.2292,Third,True,male,Cherbourg,dead
25,30.0,Second,False,female,Southampton,alived
7,29.125,Third,False,male,Queenstown,dead


As a results from [Section 2.2.4 (clf-tradml-vs-automl.ipynb)](https://github.com/NhanDoV/Lectures_notes-teaching-in-VN-/blob/master/h2o_AutoML/classification/clf-tradml-vs-automl.ipynb), now we will focus on the GBM model

In [7]:
from h2o.estimators.gbm import H2OGradientBoostingEstimator

my_model = H2OGradientBoostingEstimator(ntrees=100, stopping_metric='auc',
                                        keep_cross_validation_predictions = True,
                                        stopping_rounds = 3,
                                        model_id = "first_model", seed = 1234,
                                        stopping_tolerance = 0.0005)
my_model.train(x = ["class","alone","age","fare","sex","embark_town"], y = "survived", 
               training_frame = h2o_train_df, 
               validation_frame = h2o_valid_df)

gbm Model Build progress: |██████████████████████████████████████████████████████| (done) 100%


Unnamed: 0,number_of_trees,number_of_internal_trees,model_size_in_bytes,min_depth,max_depth,mean_depth,min_leaves,max_leaves,mean_leaves
,6.0,6.0,1704.0,5.0,5.0,5.0,16.0,19.0,18.0

Unnamed: 0,alived,dead,Error,Rate
alived,176.0,62.0,0.2605,(62.0/238.0)
dead,38.0,346.0,0.099,(38.0/384.0)
Total,214.0,408.0,0.1608,(100.0/622.0)

metric,threshold,value,idx
max f1,0.5962821,0.8737374,28.0
max f2,0.4377871,0.9346022,44.0
max f0point5,0.6414353,0.8708791,22.0
max accuracy,0.6052235,0.840836,27.0
max precision,0.7593211,1.0,0.0
max recall,0.3538742,1.0,47.0
max specificity,0.7593211,1.0,0.0
max absolute_mcc,0.6052235,0.6618563,27.0
max min_per_class_accuracy,0.6414353,0.8235294,22.0
max mean_per_class_accuracy,0.6052235,0.8295584,27.0

group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain,kolmogorov_smirnov
1,0.0176849,0.7593211,1.6197917,1.6197917,1.0,0.7593211,1.0,0.7593211,0.0286458,0.0286458,61.9791667,61.9791667,0.0286458
2,0.0498392,0.75848,1.6197917,1.6197917,1.0,0.75848,1.0,0.7587785,0.0520833,0.0807292,61.9791667,61.9791667,0.0807292
3,0.0675241,0.7572008,1.6197917,1.6197917,1.0,0.7572008,1.0,0.7583653,0.0286458,0.109375,61.9791667,61.9791667,0.109375
4,0.2106109,0.7485399,1.5287921,1.5579676,0.9438202,0.7491341,0.9618321,0.7520937,0.21875,0.328125,52.8792135,55.7967557,0.3071166
5,0.4517685,0.7380668,1.4038194,1.4756821,0.8666667,0.7382839,0.911032,0.7447219,0.3385417,0.6666667,40.3819444,47.5682088,0.5616246
6,0.5064309,0.703117,1.2386642,1.4500992,0.7647059,0.7246177,0.8952381,0.742552,0.0677083,0.734375,23.8664216,45.0099206,0.5957195
7,0.6141479,0.6056603,1.2087998,1.4077771,0.7462687,0.6552063,0.8691099,0.7272322,0.1302083,0.8645833,20.8799751,40.7777051,0.6544993
8,0.6993569,0.5324663,0.7334906,1.3256226,0.4528302,0.583749,0.8183908,0.7097503,0.0625,0.9270833,-26.6509434,32.5622605,0.5951506
9,0.8247588,0.4377871,0.5606971,1.2093181,0.3461538,0.4800586,0.7465887,0.6748264,0.0703125,0.9973958,-43.9302885,20.9318145,0.4511773
10,0.9003215,0.339642,0.0344637,1.1107143,0.0212766,0.3589755,0.6857143,0.6483175,0.0026042,1.0,-96.5536348,11.0714286,0.2605042

Unnamed: 0,alived,dead,Error,Rate
alived,62.0,40.0,0.3922,(40.0/102.0)
dead,8.0,157.0,0.0485,(8.0/165.0)
Total,70.0,197.0,0.1798,(48.0/267.0)

metric,threshold,value,idx
max f1,0.4985526,0.8674033,29.0
max f2,0.481864,0.916955,32.0
max f0point5,0.7299864,0.8658009,7.0
max accuracy,0.4985526,0.8202247,29.0
max precision,0.7299864,0.9090909,7.0
max recall,0.328924,1.0,39.0
max specificity,0.7593211,0.9803922,0.0
max absolute_mcc,0.4985526,0.6179453,29.0
max min_per_class_accuracy,0.681724,0.7878788,14.0
max mean_per_class_accuracy,0.6832448,0.8124777,13.0

group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain,kolmogorov_smirnov
1,0.0262172,0.7593211,1.1558442,1.1558442,0.7142857,0.7593211,0.7142857,0.7593211,0.030303,0.030303,15.5844156,15.5844156,0.0106952
2,0.0599251,0.75848,1.4383838,1.3147727,0.8888889,0.75848,0.8125,0.758848,0.0484848,0.0787879,43.8383838,31.4772727,0.0493761
3,0.1123596,0.7529467,1.2714286,1.2945455,0.7857143,0.7547699,0.8,0.7569449,0.0666667,0.1454545,27.1428571,29.4545455,0.086631
4,0.2247191,0.7485399,1.4563636,1.3754545,0.9,0.7485399,0.85,0.7527424,0.1636364,0.3090909,45.6363636,37.5454545,0.2208556
5,0.4906367,0.7380668,1.5498079,1.4699514,0.9577465,0.7384124,0.9083969,0.7449757,0.4121212,0.7212121,54.9807939,46.9951423,0.6035651
6,0.505618,0.7259158,0.8090909,1.4503704,0.5,0.7269335,0.8962963,0.7444412,0.0121212,0.7333333,-19.0909091,45.037037,0.5960784
7,0.6217228,0.6056603,0.9395894,1.3549836,0.5806452,0.654814,0.8373494,0.7277036,0.1090909,0.8424242,-6.0410557,35.4983571,0.5777184
8,0.7041199,0.5354621,0.8826446,1.2997099,0.5454545,0.5772707,0.8031915,0.7100997,0.0727273,0.9151515,-11.7355372,29.9709865,0.5524064
9,0.7977528,0.4359577,0.5178182,1.2079385,0.32,0.4887835,0.7464789,0.6841236,0.0484848,0.9636364,-48.2181818,20.793854,0.4342246
10,1.0,0.328924,0.179798,1.0,0.1111111,0.3437013,0.6179775,0.6152742,0.0363636,1.0,-82.020202,0.0,0.0

Unnamed: 0,timestamp,duration,number_of_trees,training_rmse,training_logloss,training_auc,training_pr_auc,training_lift,training_classification_error,validation_rmse,validation_logloss,validation_auc,validation_pr_auc,validation_lift,validation_classification_error
,2024-03-04 05:09:29,0.118 sec,0.0,0.4860307,0.6653402,0.5,0.6173633,1.0,0.3826367,0.4858824,0.6650463,0.5,0.6179775,1.0,0.3820225
,2024-03-04 05:09:29,0.579 sec,1.0,0.4626731,0.6188916,0.8787476,0.8864817,1.5040923,0.1672026,0.463574,0.6206331,0.859893,0.8609257,1.2136364,0.1910112
,2024-03-04 05:09:29,0.693 sec,2.0,0.443059,0.5818319,0.8843279,0.9051476,1.6197917,0.1672026,0.4453847,0.5861418,0.8527035,0.8498517,1.3147727,0.1910112
,2024-03-04 05:09:29,0.791 sec,3.0,0.4266041,0.551629,0.8878841,0.9100405,1.6197917,0.1672026,0.4309694,0.5594554,0.8482769,0.8504499,1.4383838,0.1910112
,2024-03-04 05:09:29,0.876 sec,4.0,0.4131466,0.5272436,0.8925727,0.9166584,1.6197917,0.1672026,0.4185094,0.5366015,0.8478907,0.8486635,1.4383838,0.1797753
,2024-03-04 05:09:30,0.965 sec,5.0,0.4018519,0.5067443,0.8960303,0.9204838,1.6197917,0.1736334,0.4079617,0.5171055,0.8525253,0.8531941,1.4383838,0.1797753
,2024-03-04 05:09:30,1.022 sec,6.0,0.3918757,0.4884905,0.8997834,0.9248738,1.6197917,0.1607717,0.3994825,0.5012978,0.8516637,0.8432878,1.1558442,0.1797753

variable,relative_importance,scaled_importance,percentage
sex,164.6757507,1.0,0.6056568
class,55.4726906,0.3368601,0.2040216
age,23.607954,0.1433602,0.0868271
fare,20.3601379,0.1236377,0.074882
embark_town,6.6806231,0.0405683,0.0245705
alone,1.0989764,0.0066736,0.0040419


In [8]:
my_model.model_id

'first_model'

### 2. What happend if we didnt use train_test_split from `sklearn`?

In [9]:
# split into train and validation sets
train, valid = h2o.H2OFrame(df.drop(columns=["alive", "pclass", "age_group", "faregroup", 
                                             "parch", "who", "sibsp", "adult_male", 
                                             "deck", "embarked"])).split_frame(ratios = [.7], 
                                                                               seed = 1234)
train

Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%


survived,sex,age,fare,class,embark_town,alone
alived,female,38,71.2833,First,Cherbourg,False
alived,female,26,7.925,Third,Southampton,True
dead,male,35,8.05,Third,Southampton,True
dead,male,28,8.4583,Third,Queenstown,True
dead,male,54,51.8625,First,Southampton,True
dead,male,2,21.075,Third,Southampton,False
alived,female,27,11.1333,Third,Southampton,False
alived,female,4,16.7,Third,Southampton,False
alived,female,58,26.55,First,Southampton,True
dead,male,20,8.05,Third,Southampton,True


In [10]:
my_model = H2OGradientBoostingEstimator(ntrees=100, stopping_metric='auc',
                                        keep_cross_validation_predictions = True,
                                        stopping_rounds = 3,
                                        model_id = "first_model", seed = 1234,
                                        stopping_tolerance = 0.0005)
my_model.train(x = ["class","alone","age","fare","sex","embark_town"], y = "survived", 
               training_frame = train, 
               validation_frame = valid)

gbm Model Build progress: |██████████████████████████████████████████████████████| (done) 100%


Unnamed: 0,number_of_trees,number_of_internal_trees,model_size_in_bytes,min_depth,max_depth,mean_depth,min_leaves,max_leaves,mean_leaves
,15.0,15.0,4382.0,5.0,5.0,5.0,17.0,22.0,18.6

Unnamed: 0,alived,dead,Error,Rate
alived,175.0,72.0,0.2915,(72.0/247.0)
dead,17.0,370.0,0.0439,(17.0/387.0)
Total,192.0,442.0,0.1404,(89.0/634.0)

metric,threshold,value,idx
max f1,0.4685871,0.8926417,132.0
max f2,0.2979146,0.9304603,159.0
max f0point5,0.6246537,0.8933263,96.0
max accuracy,0.6246537,0.8643533,96.0
max precision,0.8883713,1.0,0.0
max recall,0.2214928,1.0,176.0
max specificity,0.8883713,1.0,0.0
max absolute_mcc,0.6246537,0.7167103,96.0
max min_per_class_accuracy,0.6731696,0.8502024,88.0
max mean_per_class_accuracy,0.6246537,0.8603291,96.0

group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain,kolmogorov_smirnov
1,0.011041,0.8677438,1.6382429,1.6382429,1.0,0.8779861,1.0,0.8779861,0.0180879,0.0180879,63.8242894,63.8242894,0.0180879
2,0.0283912,0.8555027,1.6382429,1.6382429,1.0,0.8578724,1.0,0.8656944,0.0284238,0.0465116,63.8242894,63.8242894,0.0465116
3,0.0299685,0.8536908,1.6382429,1.6382429,1.0,0.8550286,1.0,0.865133,0.002584,0.0490956,63.8242894,63.8242894,0.0490956
4,0.0410095,0.8478981,1.6382429,1.6382429,1.0,0.8523212,1.0,0.8616837,0.0180879,0.0671835,63.8242894,63.8242894,0.0671835
5,0.0536278,0.8424619,1.6382429,1.6382429,1.0,0.8446374,1.0,0.8576728,0.0206718,0.0878553,63.8242894,63.8242894,0.0878553
6,0.1088328,0.8362759,1.591436,1.6145002,0.9714286,0.8384814,0.9855072,0.847938,0.0878553,0.1757106,59.1435954,61.4500243,0.171662
7,0.2003155,0.8308558,1.5535062,1.5866447,0.9482759,0.8309641,0.9685039,0.8401861,0.1421189,0.3178295,55.3506193,58.6644693,0.3016351
8,0.2003155,0.8302014,0.0,1.5866447,0.0,0.0,0.9685039,0.8401861,0.0,0.3178295,-100.0,58.6644693,0.3016351
9,0.3217666,0.8270264,1.44676,1.5338451,0.8831169,0.8279458,0.9362745,0.835566,0.1757106,0.4935401,44.6759958,53.3845063,0.4409085
10,0.4116719,0.8145329,1.4945374,1.5252606,0.9122807,0.8183657,0.9310345,0.8318096,0.1343669,0.627907,49.4537377,52.5260626,0.5550325

Unnamed: 0,alived,dead,Error,Rate
alived,58.0,35.0,0.3763,(35.0/93.0)
dead,8.0,154.0,0.0494,(8.0/162.0)
Total,66.0,189.0,0.1686,(43.0/255.0)

metric,threshold,value,idx
max f1,0.4079468,0.8774929,69.0
max f2,0.2570878,0.9349593,85.0
max f0point5,0.6965612,0.8787466,39.0
max accuracy,0.4632575,0.8313725,66.0
max precision,0.8555027,1.0,0.0
max recall,0.1717245,1.0,101.0
max specificity,0.8555027,1.0,0.0
max absolute_mcc,0.6504232,0.6334803,48.0
max min_per_class_accuracy,0.6757887,0.8064516,43.0
max mean_per_class_accuracy,0.6965612,0.8228793,39.0

group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain,kolmogorov_smirnov
1,0.0117647,0.8546581,1.5740741,1.5740741,1.0,0.8555027,1.0,0.8555027,0.0185185,0.0185185,57.4074074,57.4074074,0.0185185
2,0.0235294,0.8520561,1.0493827,1.3117284,0.6666667,0.8532494,0.8333333,0.854376,0.0123457,0.0308642,4.9382716,31.1728395,0.0201115
3,0.0392157,0.8475783,1.5740741,1.4166667,1.0,0.8484793,0.9,0.8520174,0.0246914,0.0555556,57.4074074,41.6666667,0.0448029
4,0.0431373,0.8448949,1.5740741,1.4309764,1.0,0.8460474,0.9090909,0.8514746,0.0061728,0.0617284,57.4074074,43.0976431,0.0509757
5,0.0509804,0.8371806,1.5740741,1.4529915,1.0,0.8388444,0.9230769,0.8495315,0.0123457,0.0740741,57.4074074,45.2991453,0.0633214
6,0.172549,0.8308558,1.4217443,1.4309764,0.9032258,0.832282,0.9090909,0.8373785,0.1728395,0.2469136,42.1744325,43.0976431,0.2039028
7,0.2078431,0.8287813,1.5740741,1.455276,1.0,0.8287813,0.9245283,0.8359186,0.0555556,0.3024691,57.4074074,45.5276031,0.2594584
8,0.3137255,0.8270265,1.515775,1.4756944,0.962963,0.827068,0.9375,0.8329315,0.1604938,0.462963,51.5775034,47.5694444,0.4091995
9,0.4196078,0.814533,1.457476,1.4710973,0.9259259,0.8180597,0.9345794,0.8291788,0.154321,0.617284,45.7475995,47.1097265,0.5420151
10,0.5019608,0.7345244,1.2742504,1.4388021,0.8095238,0.7868364,0.9140625,0.822232,0.1049383,0.7222222,27.4250441,43.8802083,0.6039427

Unnamed: 0,timestamp,duration,number_of_trees,training_rmse,training_logloss,training_auc,training_pr_auc,training_lift,training_classification_error,validation_rmse,validation_logloss,validation_auc,validation_pr_auc,validation_lift,validation_classification_error
,2024-03-04 05:09:30,0.005 sec,0.0,0.4876573,0.6685643,0.5,0.6104101,1.0,0.3895899,0.4819904,0.6573905,0.5,0.6352941,1.0,0.3647059
,2024-03-04 05:09:31,0.069 sec,1.0,0.4647996,0.62315,0.8770884,0.8935769,1.5962367,0.1735016,0.4611465,0.616341,0.8622727,0.8809333,1.4166667,0.1921569
,2024-03-04 05:09:31,0.112 sec,2.0,0.4449598,0.5856263,0.8855151,0.9047166,1.6382429,0.1656151,0.4439221,0.5838048,0.8645958,0.8905519,1.5740741,0.1803922
,2024-03-04 05:09:31,0.154 sec,3.0,0.4288814,0.5560348,0.8856511,0.9047348,1.6382429,0.1656151,0.4293886,0.5569714,0.8653591,0.8907784,1.5740741,0.1803922
,2024-03-04 05:09:31,0.200 sec,4.0,0.4153796,0.5314913,0.8843643,0.9040196,1.6382429,0.1656151,0.417138,0.5345524,0.866222,0.8913738,1.5740741,0.1803922
,2024-03-04 05:09:31,0.243 sec,5.0,0.4032292,0.5095691,0.8943445,0.911302,1.6382429,0.1656151,0.4055482,0.5135366,0.8715651,0.8985601,1.5740741,0.1764706
,2024-03-04 05:09:31,0.280 sec,6.0,0.3920525,0.489523,0.8978805,0.9151126,1.6382429,0.1451104,0.3977462,0.4990855,0.8707686,0.8928193,1.5740741,0.1764706
,2024-03-04 05:09:31,0.316 sec,7.0,0.3835335,0.4739088,0.8964211,0.9122706,1.6382429,0.1419558,0.3886586,0.4823267,0.8826497,0.9066495,1.5740741,0.1764706
,2024-03-04 05:09:31,0.351 sec,8.0,0.3753999,0.4590906,0.8978334,0.9140405,1.6382429,0.1388013,0.3842176,0.4732962,0.8757135,0.8959403,1.5740741,0.1764706
,2024-03-04 05:09:31,0.385 sec,9.0,0.368014,0.4450796,0.9058783,0.9250784,1.6382429,0.1388013,0.3778325,0.4610205,0.8767755,0.8990635,1.5740741,0.1764706

variable,relative_importance,scaled_importance,percentage
sex,228.382431,1.0,0.5599137
fare,71.5016937,0.3130788,0.1752971
class,62.4219093,0.2733219,0.1530367
age,39.2425575,0.1718283,0.096209
embark_town,6.2637715,0.0274267,0.0153566
alone,0.0762307,0.0003338,0.0001869
