In [1]:
!pip install --use-deprecated=legacy-resolver pycaret[full] --quiet
import numpy as np
import pandas as pd

[0m

In [2]:
!pip install -U jinja2 --quiet

[0m

In [3]:
import pandas as pd
from pprint import pprint

In [2]:
import xarray as xr
import json
from typing import Union
from pathlib import Path
import glob



def to_path(p: Union[str, Path]) -> Path:
    return p if isinstance(p, Path) else Path(p)

def find_records(path: str):
    search_path: str = f"{path}/**/signals/"
    all_paths = list(map(lambda x: str(to_path(x).parent), glob.glob(search_path, recursive=True)))
    return all_paths

class RecordReader():
    def __init__(self, path: Union[str, Path]):
        self.path = to_path(path)

    def load_signal(self, sig_name):
        return xr.open_zarr(self.path / "signals" / sig_name / "dataset")

    def load_signal_meta(self, sig_name):
        with open(self.path / "signals" / sig_name / "meta.json", "r") as meta:
            return json.load(meta)
    
    def load_metadata(self):
        with open(self.path / "meta.json", "r") as meta:
            return json.load(meta)

    def load_crf_metadata(self):
        with open(self.path / "crf.json", "r") as meta:
            return json.load(meta)

In [3]:
records = find_records(("./"))
records

['Dataset1/MMC-HFrEF/KINO/Sub-EY/MMC-HFrEF_EY_kinoV1_20190107-153210_gwd5ar2K2tvvIo',
 'Dataset1/MMC-HFrEF/KINO/Sub-MT/MMC-HFrEF_MT_kinoV1_20191210-161556_gwX8ymbEkuvvIo',
 'Dataset1/MMC-HFrEF/KINO/Sub-WZ/MMC-HFrEF_WZ_kinoV1_20190930-145749_1WdAWm9mNfOGux',
 'Dataset1/MMC-HFrEF/KINO/Sub-YI/MMC-HFrEF_YI_kinoV1_20190909-150010_gwkwwq1lvIvvIo',
 'Dataset1/MMC-HFrEF/KINO/Sub-MV/MMC-HFrEF_MV_kinoV1_20190103-124526_gwdW4mA7nivvIo',
 'Dataset1/MMC-HFrEF/KINO/Sub-RK/MMC-HFrEF_RK_kinoV1_20190506-142648_gwo4YOgXnIvvIo',
 'Dataset1/MMC-HFrEF/KINO/Sub-YT/MMC-HFrEF_YT_kinoV1_20190603-145424_gwokar1wzTvvIo',
 'Dataset1/MMC-HFrEF/KINO/Sub-UF/MMC-HFrEF_UF_kinoV1_20190624-145537_gwR5aOzoqIvvIo',
 'Dataset1/MMC-HFrEF/KINO/Sub-PU/MMC-HFrEF_PU_kinoV1_20190211-154927_gwd4wQLYnsvvIo',
 'Dataset1/MMC-HFrEF/KINO/Sub-QR/MMC-HFrEF_QR_kinoV1_20190812-145831_gwRaDplewcvvIo',
 'Dataset1/MMC-HFrEF/KINO/Sub-WJ/MMC-HFrEF_WJ_kinoV1_20190520-145741_gwoa9Rdn8ivvIo',
 'Dataset1/MMC-HFrEF/KINO/Sub-WF/MMC-HFrEF_WF_kinoV1_2

In [6]:
len(records)

891

In [7]:
data = {}
for r in records:
    reader = RecordReader(r)
    metadata = reader.load_metadata()
    scg_metadata = reader.load_signal_meta('scg-k')
    rsp_metadata = reader.load_signal_meta('rsp')
    crf_data = reader.load_crf_metadata()
    
    value = {
            'age': metadata['subject']['age']['value'],
            'sex' : metadata['subject']['sex'],
            'weight': metadata['subject']['weight']['value'],
            'height' : metadata['subject']['height']['value'],
            'subject_id' : crf_data['subject_id'],
            'study_id' : crf_data['study_id'],
            'hf_type' : crf_data['hf_type'],
            'sample_rate_scgk' : scg_metadata['sample_rate'],
            'nrg_lin_scgk' : reader.load_signal("scg-k").nrg.sel(motion="lin").to_pandas(),
            'nrg_rot_scgk' : reader.load_signal("scg-k").nrg.sel(motion="rot").to_pandas(),
            'pwr_lin_scgk': reader.load_signal("scg-k").pwr.sel(motion="lin").to_pandas(),
            'pwr_rot_scgk': reader.load_signal("scg-k").pwr.sel(motion="rot").to_pandas(),
            'sample_rate_rsp' : rsp_metadata['sample_rate'],
            'rsp': reader.load_signal("rsp").signal.to_pandas()
            }
    data[metadata['id']] = value

In [8]:
df = pd.DataFrame.from_dict(data, orient='index')

In [9]:
df.size

12474

In [10]:
import numpy as np
from scipy.stats import skew, kurtosis
import pywt

def calculate_features(ts):
    mean = np.mean(ts)
    std = np.std(ts)
    median = np.median(ts)
    minimum = np.min(ts)
    maximum = np.max(ts)
    skewness = skew(ts)
    kurt = kurtosis(ts)
    rms = np.sqrt(np.mean(np.square(ts)))
    zero_crossings = np.sum(np.diff(np.sign(ts)) != 0)
    
    # Wavelet transformation
    wavelet = 'db4'
    coeffs = pywt.wavedec(ts, wavelet, level=4)
    
    # Calculate wavelet features
    wavelet_mean = np.mean(np.concatenate(coeffs))
    wavelet_std = np.std(np.concatenate(coeffs))
    wavelet_energy = np.sum(np.square(np.concatenate(coeffs)))
    
    return [mean, std, median, minimum, maximum, skewness, kurt, rms, zero_crossings, wavelet_mean, wavelet_std, wavelet_energy]

# Compute time series features for each subject
for subject_id, value in data.items():
    for feature in ['nrg_lin_scgk', 'nrg_rot_scgk', 'pwr_lin_scgk', 'pwr_rot_scgk', 'rsp']:
        ts = value[feature].values
        features = calculate_features(ts)
        
        # Store the computed features
        value[f"{feature}_mean"] = features[0]
        value[f"{feature}_std"] = features[1]
        value[f"{feature}_median"] = features[2]
        value[f"{feature}_min"] = features[3]
        value[f"{feature}_max"] = features[4]
        value[f"{feature}_skew"] = features[5]
        value[f"{feature}_kurt"] = features[6]
        value[f"{feature}_rms"] = features[7]
        value[f"{feature}_zero_crossings"] = features[8]
        value[f"{feature}_wavelet_mean"] = features[9]
        value[f"{feature}_wavelet_std"] = features[10]
        value[f"{feature}_wavelet_energy"] = features[11]

# Convert the dictionary to a DataFrame
df = pd.DataFrame.from_dict(data, orient='index')

In [11]:
df.dtypes

age                   float64
sex                    object
weight                float64
height                float64
subject_id             object
                       ...   
rsp_rms               float64
rsp_zero_crossings      int64
rsp_wavelet_mean      float64
rsp_wavelet_std       float64
rsp_wavelet_energy    float64
Length: 74, dtype: object

In [12]:
# Checking target
df.hf_type.value_counts()

NoHF       477
UNKNOWN    282
HFrEF       85
HFpEF       27
HFmEF       20
Name: hf_type, dtype: int64

In [13]:
# Dropping the hf_type = UNKNOWN
df = df[df["hf_type"]!="UNKNOWN"]

In [14]:
# Check counts again
df.hf_type.value_counts()

NoHF     477
HFrEF     85
HFpEF     27
HFmEF     20
Name: hf_type, dtype: int64

In [15]:
from sklearn.preprocessing import LabelEncoder
# Encode the hf_type column as integer labels
encoder = LabelEncoder()
df['hf_type'] = encoder.fit_transform(df['hf_type'])

In [16]:
# Creating a function to code HFpEF and HFmEF into one category, NoHF second category, and HFrEF third
def convert(df):
    if df["hf_type"]==3:
        return 0
    elif df["hf_type"]==2:
        return 2
    else:
        return 1

In [17]:
df["hf_type"] = df.apply(lambda df: convert(df), axis=1)

In [18]:
from scipy.signal import periodogram
from scipy.stats import entropy

def spectral_entropy(pxx):
    psd_norm = pxx / np.sum(pxx)
    return entropy(psd_norm)

# Initialize a list to store the feature data
feature_data = []

# Iterate through the data dictionary
for key, value in data.items():
    # Calculate the periodogram for each time series
    freq_nrg_lin, pxx_nrg_lin = periodogram(value['nrg_lin_scgk'])
    freq_nrg_rot, pxx_nrg_rot = periodogram(value['nrg_rot_scgk'])
    freq_pwr_lin, pxx_pwr_lin = periodogram(value['pwr_lin_scgk'])
    freq_pwr_rot, pxx_pwr_rot = periodogram(value['pwr_rot_scgk'])
    freq_rsp, pxx_rsp = periodogram(value['rsp'])

    # Calculate the frequency-domain features for each time series
    features = {
        'record_id': key,
        'nrg_lin_mean_freq': np.mean(freq_nrg_lin),
        'nrg_lin_median_freq': np.median(freq_nrg_lin),
        'nrg_lin_peak_freq': freq_nrg_lin[np.argmax(pxx_nrg_lin)],
        'nrg_lin_spectral_entropy': spectral_entropy(pxx_nrg_lin),
        'nrg_rot_mean_freq': np.mean(freq_nrg_rot),
        'nrg_rot_median_freq': np.median(freq_nrg_rot),
        'nrg_rot_peak_freq': freq_nrg_rot[np.argmax(pxx_nrg_rot)],
        'nrg_rot_spectral_entropy': spectral_entropy(pxx_nrg_rot),
        'pwr_lin_mean_freq': np.mean(freq_pwr_lin),
        'pwr_lin_median_freq': np.median(freq_pwr_lin),
        'pwr_lin_peak_freq': freq_pwr_lin[np.argmax(pxx_pwr_lin)],
        'pwr_lin_spectral_entropy': spectral_entropy(pxx_pwr_lin),
        'pwr_rot_mean_freq': np.mean(freq_pwr_rot),
        'pwr_rot_median_freq': np.median(freq_pwr_rot),
        'pwr_rot_peak_freq': freq_pwr_rot[np.argmax(pxx_pwr_rot)],
        'pwr_rot_spectral_entropy': spectral_entropy(pxx_pwr_rot),
        'rsp_mean_freq': np.mean(freq_rsp),
        'rsp_median_freq': np.median(freq_rsp),
        'rsp_peak_freq': freq_rsp[np.argmax(pxx_rsp)],
        'rsp_spectral_entropy': spectral_entropy(pxx_rsp)
    }
    
    # Add the features to the feature_data list
    feature_data.append(features)

# Convert the feature_data list into a DataFrame
features_df = pd.DataFrame(feature_data)

In [19]:
features_df.head()

Unnamed: 0,record_id,nrg_lin_mean_freq,nrg_lin_median_freq,nrg_lin_peak_freq,nrg_lin_spectral_entropy,nrg_rot_mean_freq,nrg_rot_median_freq,nrg_rot_peak_freq,nrg_rot_spectral_entropy,pwr_lin_mean_freq,...,pwr_lin_peak_freq,pwr_lin_spectral_entropy,pwr_rot_mean_freq,pwr_rot_median_freq,pwr_rot_peak_freq,pwr_rot_spectral_entropy,rsp_mean_freq,rsp_median_freq,rsp_peak_freq,rsp_spectral_entropy
0,MMC-HFrEF_EY_kinoV1_20190107-153210_gwd5ar2K2t...,0.249979,0.249979,0.015665,6.260469,0.249979,0.249979,0.007666,6.634803,0.249979,...,0.068078,6.445916,0.249979,0.249979,0.08316,7.068857,0.249979,0.249979,0.002,2.482062
1,MMC-HFrEF_MT_kinoV1_20191210-161556_gwX8ymbEku...,0.249979,0.249979,0.00425,6.283518,0.249979,0.249979,8.3e-05,6.887951,0.249979,...,0.093409,7.049933,0.249979,0.249979,0.249979,7.651671,0.249979,0.249979,0.00075,2.47031
2,MMC-HFrEF_WZ_kinoV1_20190930-145749_1WdAWm9mNf...,0.249979,0.249979,0.000333,6.286389,0.249979,0.249979,0.000333,6.532926,0.249979,...,0.073161,6.59802,0.249979,0.249979,0.100492,7.028616,0.249979,0.249979,0.00075,2.971475
3,MMC-HFrEF_YI_kinoV1_20190909-150010_gwkwwq1lvI...,0.249979,0.249979,0.00525,5.876797,0.249979,0.249979,0.00525,5.585985,0.249979,...,0.057662,6.085939,0.249979,0.249979,0.249979,7.209044,0.249979,0.249979,0.001833,1.623179
4,MMC-HFrEF_MV_kinoV1_20190103-124526_gwdW4mA7ni...,0.249979,0.249979,0.005,5.689868,0.249979,0.249979,0.005,6.377936,0.249979,...,0.054245,6.333234,0.249979,0.249979,0.100825,7.166193,0.249979,0.249979,0.001417,2.004515


In [20]:
# Set the index of features_df to be the record_id
features_df.set_index('record_id', inplace=True)

In [21]:

# Join the main DataFrame with the features_df
combined_df = pd.merge(df, features_df, left_index=True, right_index=True)

In [22]:
combined_df.head()

Unnamed: 0,age,sex,weight,height,subject_id,study_id,hf_type,sample_rate_scgk,nrg_lin_scgk,nrg_rot_scgk,...,pwr_lin_peak_freq,pwr_lin_spectral_entropy,pwr_rot_mean_freq,pwr_rot_median_freq,pwr_rot_peak_freq,pwr_rot_spectral_entropy,rsp_mean_freq,rsp_median_freq,rsp_peak_freq,rsp_spectral_entropy
MMC-HFrEF_EY_kinoV1_20190107-153210_gwd5ar2K2tvvIo,70.0,Male,106.0,1.87,EY,MMC-HFrEF,2,200,time 0 8.143398e-41 5 2.974699e-...,time 0 3.429861e-19 5 2.323705e-...,...,0.068078,6.445916,0.249979,0.249979,0.08316,7.068857,0.249979,0.249979,0.002,2.482062
MMC-HFrEF_MT_kinoV1_20191210-161556_gwX8ymbEkuvvIo,70.0,Male,77.0,1.76,MT,MMC-HFrEF,2,200,time 0 4.827842e-41 5 3.143857e-...,time 0 3.722609e-19 5 1.496852e-...,...,0.093409,7.049933,0.249979,0.249979,0.249979,7.651671,0.249979,0.249979,0.00075,2.47031
MMC-HFrEF_WZ_kinoV1_20190930-145749_1WdAWm9mNfOGux,72.0,Female,60.0,1.66,WZ,MMC-HFrEF,2,200,time 0 1.008940e-39 5 4.981065e-...,time 0 1.613923e-19 5 7.237731e-...,...,0.073161,6.59802,0.249979,0.249979,0.100492,7.028616,0.249979,0.249979,0.00075,2.971475
MMC-HFrEF_YI_kinoV1_20190909-150010_gwkwwq1lvIvvIo,67.0,Male,78.0,1.75,YI,MMC-HFrEF,2,200,time 0 1.621356e-41 5 3.310214e-...,time 0 1.505611e-19 5 6.988511e-...,...,0.057662,6.085939,0.249979,0.249979,0.249979,7.209044,0.249979,0.249979,0.001833,1.623179
MMC-HFrEF_MV_kinoV1_20190103-124526_gwdW4mA7nivvIo,68.0,Male,106.0,1.84,MV,MMC-HFrEF,2,200,time 0 4.274874e-42 5 3.747583e-...,time 0 2.533075e-19 5 1.138278e-...,...,0.054245,6.333234,0.249979,0.249979,0.100825,7.166193,0.249979,0.249979,0.001417,2.004515


In [23]:
combined_df.columns

Index(['age', 'sex', 'weight', 'height', 'subject_id', 'study_id', 'hf_type',
       'sample_rate_scgk', 'nrg_lin_scgk', 'nrg_rot_scgk', 'pwr_lin_scgk',
       'pwr_rot_scgk', 'sample_rate_rsp', 'rsp', 'nrg_lin_scgk_mean',
       'nrg_lin_scgk_std', 'nrg_lin_scgk_median', 'nrg_lin_scgk_min',
       'nrg_lin_scgk_max', 'nrg_lin_scgk_skew', 'nrg_lin_scgk_kurt',
       'nrg_lin_scgk_rms', 'nrg_lin_scgk_zero_crossings',
       'nrg_lin_scgk_wavelet_mean', 'nrg_lin_scgk_wavelet_std',
       'nrg_lin_scgk_wavelet_energy', 'nrg_rot_scgk_mean', 'nrg_rot_scgk_std',
       'nrg_rot_scgk_median', 'nrg_rot_scgk_min', 'nrg_rot_scgk_max',
       'nrg_rot_scgk_skew', 'nrg_rot_scgk_kurt', 'nrg_rot_scgk_rms',
       'nrg_rot_scgk_zero_crossings', 'nrg_rot_scgk_wavelet_mean',
       'nrg_rot_scgk_wavelet_std', 'nrg_rot_scgk_wavelet_energy',
       'pwr_lin_scgk_mean', 'pwr_lin_scgk_std', 'pwr_lin_scgk_median',
       'pwr_lin_scgk_min', 'pwr_lin_scgk_max', 'pwr_lin_scgk_skew',
       'pwr_lin_scgk_kurt'

In [24]:
print(combined_df.dtypes.head(50))

age                            float64
sex                             object
weight                         float64
height                         float64
subject_id                      object
study_id                        object
hf_type                          int64
sample_rate_scgk                 int64
nrg_lin_scgk                    object
nrg_rot_scgk                    object
pwr_lin_scgk                    object
pwr_rot_scgk                    object
sample_rate_rsp                  int64
rsp                             object
nrg_lin_scgk_mean              float64
nrg_lin_scgk_std               float64
nrg_lin_scgk_median            float64
nrg_lin_scgk_min               float64
nrg_lin_scgk_max               float64
nrg_lin_scgk_skew              float64
nrg_lin_scgk_kurt              float64
nrg_lin_scgk_rms               float64
nrg_lin_scgk_zero_crossings      int64
nrg_lin_scgk_wavelet_mean      float64
nrg_lin_scgk_wavelet_std       float64
nrg_lin_scgk_wavelet_ener

In [25]:
combined_df.isna().sum()

age                         0
sex                         0
weight                      0
height                      0
subject_id                  0
                           ..
pwr_rot_spectral_entropy    0
rsp_mean_freq               0
rsp_median_freq             0
rsp_peak_freq               0
rsp_spectral_entropy        0
Length: 94, dtype: int64

<h1>Exploration with PyCaret</h1>

In [26]:
#Take very small sample size from the df to conserve memory
sample = combined_df.sample(n=500)

In [27]:
sample.head()

Unnamed: 0,age,sex,weight,height,subject_id,study_id,hf_type,sample_rate_scgk,nrg_lin_scgk,nrg_rot_scgk,...,pwr_lin_peak_freq,pwr_lin_spectral_entropy,pwr_rot_mean_freq,pwr_rot_median_freq,pwr_rot_peak_freq,pwr_rot_spectral_entropy,rsp_mean_freq,rsp_median_freq,rsp_peak_freq,rsp_spectral_entropy
K-ICB_K_BDC9_20190404-121138_BKeqleWVoCVETk_6s,24.0,Male,76.4,1.76,K,K-ICB,0,200,time 0 2.478831e-39 5 2.036584e-...,time 0 8.648976e-19 5 2.094088e-...,...,0.049767,6.746926,0.25,0.25,0.138713,7.121075,0.25,0.25,0.000834,1.235167
SLEEP-SIMUL_WM_Centrale_20181109-154812_xKpBMkwmGhlws9,30.0,Male,74.0,1.86,WM,SLEEP-SIMUL,0,200,time 0 9.652153e-41 5 3.038379e-...,time 0 3.891642e-18 5 1.463988e-...,...,0.055579,6.892983,0.249979,0.249979,0.129406,7.223772,0.249979,0.249979,0.000583,3.025851
KINO-HFrEF-Brug_PS_2_20201030-154608_d1o8doNnKU0nf5,42.0,Male,100.0,1.73,PS,KINO-HFrEF-Brug,0,200,time 0 2.524292e-41 5 4.986101e-...,time 0 1.153393e-19 5 3.963267e-...,...,0.130489,7.479317,0.249979,0.249979,0.173652,7.217252,0.249979,0.249979,0.000833,2.231802
Heartemis-ERASME_SZO_2_20210701-131931_MAqpLR24YfEESb,23.0,Female,60.0,1.63,SZO,Heartemis-ERASME,0,250,time 0 1.895214e-37 4 5.522241e-...,time 0 4.783146e-17 4 3.455225e-...,...,6.7e-05,7.099755,0.249983,0.249983,6.7e-05,7.709559,0.249983,0.249983,0.001534,2.241385
K-ICB_H_HDT56_20190606-121516_gwoKv8WrwHA8fy_6s,48.0,Female,64.9,1.62,H,K-ICB,0,200,time 0 1.077873e-40 5 7.518803e-...,time 0 2.073912e-18 5 1.889777e-...,...,0.063104,6.692921,0.25,0.25,0.070273,6.988844,0.25,0.25,0.000834,1.116215


In [28]:
#Change dataset values to Category
sample = sample.astype({'hf_type': 'category'})

In [29]:
print(sample['hf_type'].unique())


[0, 1, 2]
Categories (3, int64): [0, 1, 2]


In [30]:
sample.head()

Unnamed: 0,age,sex,weight,height,subject_id,study_id,hf_type,sample_rate_scgk,nrg_lin_scgk,nrg_rot_scgk,...,pwr_lin_peak_freq,pwr_lin_spectral_entropy,pwr_rot_mean_freq,pwr_rot_median_freq,pwr_rot_peak_freq,pwr_rot_spectral_entropy,rsp_mean_freq,rsp_median_freq,rsp_peak_freq,rsp_spectral_entropy
K-ICB_K_BDC9_20190404-121138_BKeqleWVoCVETk_6s,24.0,Male,76.4,1.76,K,K-ICB,0,200,time 0 2.478831e-39 5 2.036584e-...,time 0 8.648976e-19 5 2.094088e-...,...,0.049767,6.746926,0.25,0.25,0.138713,7.121075,0.25,0.25,0.000834,1.235167
SLEEP-SIMUL_WM_Centrale_20181109-154812_xKpBMkwmGhlws9,30.0,Male,74.0,1.86,WM,SLEEP-SIMUL,0,200,time 0 9.652153e-41 5 3.038379e-...,time 0 3.891642e-18 5 1.463988e-...,...,0.055579,6.892983,0.249979,0.249979,0.129406,7.223772,0.249979,0.249979,0.000583,3.025851
KINO-HFrEF-Brug_PS_2_20201030-154608_d1o8doNnKU0nf5,42.0,Male,100.0,1.73,PS,KINO-HFrEF-Brug,0,200,time 0 2.524292e-41 5 4.986101e-...,time 0 1.153393e-19 5 3.963267e-...,...,0.130489,7.479317,0.249979,0.249979,0.173652,7.217252,0.249979,0.249979,0.000833,2.231802
Heartemis-ERASME_SZO_2_20210701-131931_MAqpLR24YfEESb,23.0,Female,60.0,1.63,SZO,Heartemis-ERASME,0,250,time 0 1.895214e-37 4 5.522241e-...,time 0 4.783146e-17 4 3.455225e-...,...,6.7e-05,7.099755,0.249983,0.249983,6.7e-05,7.709559,0.249983,0.249983,0.001534,2.241385
K-ICB_H_HDT56_20190606-121516_gwoKv8WrwHA8fy_6s,48.0,Female,64.9,1.62,H,K-ICB,0,200,time 0 1.077873e-40 5 7.518803e-...,time 0 2.073912e-18 5 1.889777e-...,...,0.063104,6.692921,0.25,0.25,0.070273,6.988844,0.25,0.25,0.000834,1.116215


In [31]:
import pandas as pd
from pycaret.classification import *

#Convert non-numeric columns to strings
for col in sample.select_dtypes(exclude='number').columns:
    sample[col] = sample[col].astype(str)

#Drop rows with missing target variable
sample.dropna(subset=['hf_type'], inplace=True)

# Check size of dataframe before passing to setup
if len(sample) > 0:
    # Adjust train and test sizes based on size of dataframe
    train_size = min(0.8, len(sample)/10)
    test_size = 1 - train_size
    
    #Pass the categorical column to PyCaret's setup() function
    s = setup(data=sample, target='hf_type', session_id=123, train_size=train_size)
else:
    print("Error: dataframe has no rows.")













Unnamed: 0,Description,Value
0,Session id,123
1,Target,hf_type
2,Target type,Multiclass
3,Target mapping,"0: 0, 1: 1, 2: 2"
4,Original data shape,"(500, 94)"
5,Transformed data shape,"(500, 102)"
6,Transformed train set shape,"(400, 102)"
7,Transformed test set shape,"(100, 102)"
8,Ordinal features,1
9,Numeric features,85


In [32]:
#Compare all models
best_model = compare_models()


Processing:   0%|          | 0/69 [00:00<?, ?it/s]

In [None]:
#Create Ensemble Model
ensemble_model = ensemble_model(best_model)

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9,0.9232,0.9,0.9183,0.9046,0.7333,0.7413
1,0.925,0.9804,0.925,0.9375,0.9161,0.7959,0.8015
2,0.875,0.9492,0.875,0.8739,0.8555,0.5902,0.6245
3,0.95,0.9549,0.95,0.953,0.9458,0.8529,0.8636
4,0.825,0.9506,0.825,0.8368,0.7795,0.3651,0.4394
5,0.85,0.9511,0.85,0.8525,0.8497,0.598,0.599
6,0.85,0.9656,0.85,0.7838,0.8154,0.5294,0.5485
7,0.875,0.9806,0.875,0.8795,0.8716,0.6324,0.6402
8,0.9,0.9755,0.9,0.9087,0.891,0.7158,0.721
9,0.925,0.9864,0.925,0.9133,0.9177,0.788,0.7904


In [34]:
#Evaluate Model
#Click on tabs to view metrics
eval = evaluate_model(ensemble_model)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…

In [35]:
#Predict on Model
predict = predict_model(ensemble_model)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,str,0.82,0.9256,0,0,0,0.54,0.5407


In [36]:
#Finalize Model
final = finalize_model(ensemble_model)

In [37]:
#Save Model
saved_model = save_model(final, 'final_model')

Transformation Pipeline and Model Successfully Saved


In [None]:
#Allows for the dashboarding of the metrics
#numpy may need to be downgraded to run
#uncomment the line below to run

# dashboard(final)
