In [1]:
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import numpy as np
from skimpy import skim

import torch.nn as nn



2023-04-27 09:03:42.936197: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-27 09:03:45.361599: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-04-27 09:03:45.361643: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-04-27 09:03:50.895554: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-

In [2]:
#Check torch version
import torch
torch.__version__

'1.12.1+cu102'

In [3]:
import xarray as xr
import json
from typing import Union
from pathlib import Path
import glob



def to_path(p: Union[str, Path]) -> Path:
    return p if isinstance(p, Path) else Path(p)

def find_records(path: str):
    search_path: str = f"{path}/**/signals/"
    all_paths = list(map(lambda x: str(to_path(x).parent), glob.glob(search_path, recursive=True)))
    return all_paths

class RecordReader():
    def __init__(self, path: Union[str, Path]):
        self.path = to_path(path)

    def load_signal(self, sig_name):
        return xr.open_zarr(self.path / "signals" / sig_name / "dataset")

    def load_signal_meta(self, sig_name):
        with open(self.path / "signals" / sig_name / "meta.json", "r") as meta:
            return json.load(meta)
    
    def load_metadata(self):
        with open(self.path / "meta.json", "r") as meta:
            return json.load(meta)

    def load_crf_metadata(self):
        with open(self.path / "crf.json", "r") as meta:
            return json.load(meta)

In [4]:
records = find_records(("./"))
print(records, flush=True)

['Dataset1/MMC-HFrEF/KINO/Sub-EY/MMC-HFrEF_EY_kinoV1_20190107-153210_gwd5ar2K2tvvIo', 'Dataset1/MMC-HFrEF/KINO/Sub-MT/MMC-HFrEF_MT_kinoV1_20191210-161556_gwX8ymbEkuvvIo', 'Dataset1/MMC-HFrEF/KINO/Sub-WZ/MMC-HFrEF_WZ_kinoV1_20190930-145749_1WdAWm9mNfOGux', 'Dataset1/MMC-HFrEF/KINO/Sub-YI/MMC-HFrEF_YI_kinoV1_20190909-150010_gwkwwq1lvIvvIo', 'Dataset1/MMC-HFrEF/KINO/Sub-MV/MMC-HFrEF_MV_kinoV1_20190103-124526_gwdW4mA7nivvIo', 'Dataset1/MMC-HFrEF/KINO/Sub-RK/MMC-HFrEF_RK_kinoV1_20190506-142648_gwo4YOgXnIvvIo', 'Dataset1/MMC-HFrEF/KINO/Sub-YT/MMC-HFrEF_YT_kinoV1_20190603-145424_gwokar1wzTvvIo', 'Dataset1/MMC-HFrEF/KINO/Sub-UF/MMC-HFrEF_UF_kinoV1_20190624-145537_gwR5aOzoqIvvIo', 'Dataset1/MMC-HFrEF/KINO/Sub-PU/MMC-HFrEF_PU_kinoV1_20190211-154927_gwd4wQLYnsvvIo', 'Dataset1/MMC-HFrEF/KINO/Sub-QR/MMC-HFrEF_QR_kinoV1_20190812-145831_gwRaDplewcvvIo', 'Dataset1/MMC-HFrEF/KINO/Sub-WJ/MMC-HFrEF_WJ_kinoV1_20190520-145741_gwoa9Rdn8ivvIo', 'Dataset1/MMC-HFrEF/KINO/Sub-WF/MMC-HFrEF_WF_kinoV1_20190311-152

In [5]:
data = {}
for r in records:
    reader = RecordReader(r)
    metadata = reader.load_metadata()
    scg_metadata = reader.load_signal_meta('scg-k')
    rsp_metadata = reader.load_signal_meta('rsp')
    crf_data = reader.load_crf_metadata()
    
    value = {
            'age': metadata['subject']['age']['value'],
            'sex' : metadata['subject']['sex'],
            'weight': metadata['subject']['weight']['value'],
            'height' : metadata['subject']['height']['value'],
            'subject_id' : crf_data['subject_id'],
            'study_id' : crf_data['study_id'],
            'hf_type' : crf_data['hf_type'],
            'sample_rate_scgk' : scg_metadata['sample_rate'],
            'nrg_lin_scgk' : reader.load_signal("scg-k").nrg.sel(motion="lin").to_pandas(),
            'nrg_rot_scgk' : reader.load_signal("scg-k").nrg.sel(motion="rot").to_pandas(),
            'pwr_lin_scgk': reader.load_signal("scg-k").pwr.sel(motion="lin").to_pandas(),
            'pwr_rot_scgk': reader.load_signal("scg-k").pwr.sel(motion="rot").to_pandas(),
            'sample_rate_rsp' : rsp_metadata['sample_rate'],
            'rsp': reader.load_signal("rsp").signal.to_pandas()
            }
    data[metadata['id']] = value

In [6]:
df = pd.DataFrame.from_dict(data, orient='index')

In [7]:
import numpy as np
from scipy.stats import skew, kurtosis
import pywt

def calculate_features(ts):
    mean = np.mean(ts)
    std = np.std(ts)
    median = np.median(ts)
    minimum = np.min(ts)
    maximum = np.max(ts)
    skewness = skew(ts)
    kurt = kurtosis(ts)
    rms = np.sqrt(np.mean(np.square(ts)))
    zero_crossings = np.sum(np.diff(np.sign(ts)) != 0)
    
    # Wavelet transformation
    wavelet = 'db4'
    coeffs = pywt.wavedec(ts, wavelet, level=4)
    
    # Calculate wavelet features
    wavelet_mean = np.mean(np.concatenate(coeffs))
    wavelet_std = np.std(np.concatenate(coeffs))
    wavelet_energy = np.sum(np.square(np.concatenate(coeffs)))
    
    return [mean, std, median, minimum, maximum, skewness, kurt, rms, zero_crossings, wavelet_mean, wavelet_std, wavelet_energy]

# Compute time series features for each subject
for subject_id, value in data.items():
    for feature in ['nrg_lin_scgk', 'nrg_rot_scgk', 'pwr_lin_scgk', 'pwr_rot_scgk', 'rsp']:
        ts = value[feature].values
        features = calculate_features(ts)
        
        # Store the computed features
        value[f"{feature}_mean"] = features[0]
        value[f"{feature}_std"] = features[1]
        value[f"{feature}_median"] = features[2]
        value[f"{feature}_min"] = features[3]
        value[f"{feature}_max"] = features[4]
        value[f"{feature}_skew"] = features[5]
        value[f"{feature}_kurt"] = features[6]
        value[f"{feature}_rms"] = features[7]
        value[f"{feature}_zero_crossings"] = features[8]
        value[f"{feature}_wavelet_mean"] = features[9]
        value[f"{feature}_wavelet_std"] = features[10]
        value[f"{feature}_wavelet_energy"] = features[11]

# Convert the dictionary to a DataFrame
df = pd.DataFrame.from_dict(data, orient='index')

In [8]:
df.dtypes

age                   float64
sex                    object
weight                float64
height                float64
subject_id             object
                       ...   
rsp_rms               float64
rsp_zero_crossings      int64
rsp_wavelet_mean      float64
rsp_wavelet_std       float64
rsp_wavelet_energy    float64
Length: 74, dtype: object

In [9]:
# Checking target
df.hf_type.value_counts()

NoHF       477
UNKNOWN    282
HFrEF       85
HFpEF       27
HFmEF       20
Name: hf_type, dtype: int64

In [10]:
# Dropping the hf_type = UNKNOWN
df = df[df["hf_type"]!="UNKNOWN"]

In [11]:
# Check counts again
df.hf_type.value_counts()

NoHF     477
HFrEF     85
HFpEF     27
HFmEF     20
Name: hf_type, dtype: int64

In [12]:
from sklearn.preprocessing import LabelEncoder
# Encode the hf_type column as integer labels
encoder = LabelEncoder()
df['hf_type'] = encoder.fit_transform(df['hf_type'])

In [13]:
# Creating a function to code HFpEF and HFmEF into one category, NoHF second category, and HFrEF third
def convert(df):
    if df["hf_type"]==3:
        return 0
    elif df["hf_type"]==2:
        return 2
    else:
        return 1

In [14]:
df["hf_type"] = df.apply(lambda df: convert(df), axis=1)

In [15]:
from scipy.signal import periodogram
from scipy.stats import entropy

def spectral_entropy(pxx):
    psd_norm = pxx / np.sum(pxx)
    return entropy(psd_norm)

# Initialize a list to store the feature data
feature_data = []

# Iterate through the data dictionary
for key, value in data.items():
    # Calculate the periodogram for each time series
    freq_nrg_lin, pxx_nrg_lin = periodogram(value['nrg_lin_scgk'])
    freq_nrg_rot, pxx_nrg_rot = periodogram(value['nrg_rot_scgk'])
    freq_pwr_lin, pxx_pwr_lin = periodogram(value['pwr_lin_scgk'])
    freq_pwr_rot, pxx_pwr_rot = periodogram(value['pwr_rot_scgk'])
    freq_rsp, pxx_rsp = periodogram(value['rsp'])

    # Calculate the frequency-domain features for each time series
    features = {
        'record_id': key,
        'nrg_lin_mean_freq': np.mean(freq_nrg_lin),
        'nrg_lin_median_freq': np.median(freq_nrg_lin),
        'nrg_lin_peak_freq': freq_nrg_lin[np.argmax(pxx_nrg_lin)],
        'nrg_lin_spectral_entropy': spectral_entropy(pxx_nrg_lin),
        'nrg_rot_mean_freq': np.mean(freq_nrg_rot),
        'nrg_rot_median_freq': np.median(freq_nrg_rot),
        'nrg_rot_peak_freq': freq_nrg_rot[np.argmax(pxx_nrg_rot)],
        'nrg_rot_spectral_entropy': spectral_entropy(pxx_nrg_rot),
        'pwr_lin_mean_freq': np.mean(freq_pwr_lin),
        'pwr_lin_median_freq': np.median(freq_pwr_lin),
        'pwr_lin_peak_freq': freq_pwr_lin[np.argmax(pxx_pwr_lin)],
        'pwr_lin_spectral_entropy': spectral_entropy(pxx_pwr_lin),
        'pwr_rot_mean_freq': np.mean(freq_pwr_rot),
        'pwr_rot_median_freq': np.median(freq_pwr_rot),
        'pwr_rot_peak_freq': freq_pwr_rot[np.argmax(pxx_pwr_rot)],
        'pwr_rot_spectral_entropy': spectral_entropy(pxx_pwr_rot),
        'rsp_mean_freq': np.mean(freq_rsp),
        'rsp_median_freq': np.median(freq_rsp),
        'rsp_peak_freq': freq_rsp[np.argmax(pxx_rsp)],
        'rsp_spectral_entropy': spectral_entropy(pxx_rsp)
    }
    
    # Add the features to the feature_data list
    feature_data.append(features)

# Convert the feature_data list into a DataFrame
features_df = pd.DataFrame(feature_data)

In [16]:
features_df.head()

Unnamed: 0,record_id,nrg_lin_mean_freq,nrg_lin_median_freq,nrg_lin_peak_freq,nrg_lin_spectral_entropy,nrg_rot_mean_freq,nrg_rot_median_freq,nrg_rot_peak_freq,nrg_rot_spectral_entropy,pwr_lin_mean_freq,...,pwr_lin_peak_freq,pwr_lin_spectral_entropy,pwr_rot_mean_freq,pwr_rot_median_freq,pwr_rot_peak_freq,pwr_rot_spectral_entropy,rsp_mean_freq,rsp_median_freq,rsp_peak_freq,rsp_spectral_entropy
0,MMC-HFrEF_EY_kinoV1_20190107-153210_gwd5ar2K2t...,0.249979,0.249979,0.015665,6.260469,0.249979,0.249979,0.007666,6.634803,0.249979,...,0.068078,6.445916,0.249979,0.249979,0.08316,7.068857,0.249979,0.249979,0.002,2.482062
1,MMC-HFrEF_MT_kinoV1_20191210-161556_gwX8ymbEku...,0.249979,0.249979,0.00425,6.283518,0.249979,0.249979,8.3e-05,6.887951,0.249979,...,0.093409,7.049933,0.249979,0.249979,0.249979,7.651671,0.249979,0.249979,0.00075,2.47031
2,MMC-HFrEF_WZ_kinoV1_20190930-145749_1WdAWm9mNf...,0.249979,0.249979,0.000333,6.286389,0.249979,0.249979,0.000333,6.532926,0.249979,...,0.073161,6.59802,0.249979,0.249979,0.100492,7.028616,0.249979,0.249979,0.00075,2.971475
3,MMC-HFrEF_YI_kinoV1_20190909-150010_gwkwwq1lvI...,0.249979,0.249979,0.00525,5.876797,0.249979,0.249979,0.00525,5.585985,0.249979,...,0.057662,6.085939,0.249979,0.249979,0.249979,7.209044,0.249979,0.249979,0.001833,1.623179
4,MMC-HFrEF_MV_kinoV1_20190103-124526_gwdW4mA7ni...,0.249979,0.249979,0.005,5.689868,0.249979,0.249979,0.005,6.377936,0.249979,...,0.054245,6.333234,0.249979,0.249979,0.100825,7.166193,0.249979,0.249979,0.001417,2.004515


In [17]:
# Set the index of features_df to be the record_id
features_df.set_index('record_id', inplace=True)

In [18]:
# Join the main DataFrame with the features_df
combined_df = pd.merge(df, features_df, left_index=True, right_index=True)

In [19]:
combined_df.head()

Unnamed: 0,age,sex,weight,height,subject_id,study_id,hf_type,sample_rate_scgk,nrg_lin_scgk,nrg_rot_scgk,...,pwr_lin_peak_freq,pwr_lin_spectral_entropy,pwr_rot_mean_freq,pwr_rot_median_freq,pwr_rot_peak_freq,pwr_rot_spectral_entropy,rsp_mean_freq,rsp_median_freq,rsp_peak_freq,rsp_spectral_entropy
MMC-HFrEF_EY_kinoV1_20190107-153210_gwd5ar2K2tvvIo,70.0,Male,106.0,1.87,EY,MMC-HFrEF,2,200,time 0 8.143398e-41 5 2.974699e-...,time 0 3.429861e-19 5 2.323705e-...,...,0.068078,6.445916,0.249979,0.249979,0.08316,7.068857,0.249979,0.249979,0.002,2.482062
MMC-HFrEF_MT_kinoV1_20191210-161556_gwX8ymbEkuvvIo,70.0,Male,77.0,1.76,MT,MMC-HFrEF,2,200,time 0 4.827842e-41 5 3.143857e-...,time 0 3.722609e-19 5 1.496852e-...,...,0.093409,7.049933,0.249979,0.249979,0.249979,7.651671,0.249979,0.249979,0.00075,2.47031
MMC-HFrEF_WZ_kinoV1_20190930-145749_1WdAWm9mNfOGux,72.0,Female,60.0,1.66,WZ,MMC-HFrEF,2,200,time 0 1.008940e-39 5 4.981065e-...,time 0 1.613923e-19 5 7.237731e-...,...,0.073161,6.59802,0.249979,0.249979,0.100492,7.028616,0.249979,0.249979,0.00075,2.971475
MMC-HFrEF_YI_kinoV1_20190909-150010_gwkwwq1lvIvvIo,67.0,Male,78.0,1.75,YI,MMC-HFrEF,2,200,time 0 1.621356e-41 5 3.310214e-...,time 0 1.505611e-19 5 6.988511e-...,...,0.057662,6.085939,0.249979,0.249979,0.249979,7.209044,0.249979,0.249979,0.001833,1.623179
MMC-HFrEF_MV_kinoV1_20190103-124526_gwdW4mA7nivvIo,68.0,Male,106.0,1.84,MV,MMC-HFrEF,2,200,time 0 4.274874e-42 5 3.747583e-...,time 0 2.533075e-19 5 1.138278e-...,...,0.054245,6.333234,0.249979,0.249979,0.100825,7.166193,0.249979,0.249979,0.001417,2.004515


In [20]:
combined_df.columns

Index(['age', 'sex', 'weight', 'height', 'subject_id', 'study_id', 'hf_type',
       'sample_rate_scgk', 'nrg_lin_scgk', 'nrg_rot_scgk', 'pwr_lin_scgk',
       'pwr_rot_scgk', 'sample_rate_rsp', 'rsp', 'nrg_lin_scgk_mean',
       'nrg_lin_scgk_std', 'nrg_lin_scgk_median', 'nrg_lin_scgk_min',
       'nrg_lin_scgk_max', 'nrg_lin_scgk_skew', 'nrg_lin_scgk_kurt',
       'nrg_lin_scgk_rms', 'nrg_lin_scgk_zero_crossings',
       'nrg_lin_scgk_wavelet_mean', 'nrg_lin_scgk_wavelet_std',
       'nrg_lin_scgk_wavelet_energy', 'nrg_rot_scgk_mean', 'nrg_rot_scgk_std',
       'nrg_rot_scgk_median', 'nrg_rot_scgk_min', 'nrg_rot_scgk_max',
       'nrg_rot_scgk_skew', 'nrg_rot_scgk_kurt', 'nrg_rot_scgk_rms',
       'nrg_rot_scgk_zero_crossings', 'nrg_rot_scgk_wavelet_mean',
       'nrg_rot_scgk_wavelet_std', 'nrg_rot_scgk_wavelet_energy',
       'pwr_lin_scgk_mean', 'pwr_lin_scgk_std', 'pwr_lin_scgk_median',
       'pwr_lin_scgk_min', 'pwr_lin_scgk_max', 'pwr_lin_scgk_skew',
       'pwr_lin_scgk_kurt'

In [21]:
print(combined_df.dtypes.head(50))

age                            float64
sex                             object
weight                         float64
height                         float64
subject_id                      object
study_id                        object
hf_type                          int64
sample_rate_scgk                 int64
nrg_lin_scgk                    object
nrg_rot_scgk                    object
pwr_lin_scgk                    object
pwr_rot_scgk                    object
sample_rate_rsp                  int64
rsp                             object
nrg_lin_scgk_mean              float64
nrg_lin_scgk_std               float64
nrg_lin_scgk_median            float64
nrg_lin_scgk_min               float64
nrg_lin_scgk_max               float64
nrg_lin_scgk_skew              float64
nrg_lin_scgk_kurt              float64
nrg_lin_scgk_rms               float64
nrg_lin_scgk_zero_crossings      int64
nrg_lin_scgk_wavelet_mean      float64
nrg_lin_scgk_wavelet_std       float64
nrg_lin_scgk_wavelet_ener

In [22]:
combined_df.isna().sum()

age                         0
sex                         0
weight                      0
height                      0
subject_id                  0
                           ..
pwr_rot_spectral_entropy    0
rsp_mean_freq               0
rsp_median_freq             0
rsp_peak_freq               0
rsp_spectral_entropy        0
Length: 94, dtype: int64

In [23]:
combined_df.shape

(609, 94)

In [24]:
#prepare data for modeling
combined_df.drop(columns=['nrg_lin_scgk', 'nrg_rot_scgk', 'pwr_lin_scgk', 'pwr_rot_scgk', 'rsp'], inplace=True)
skim(combined_df)

In [25]:
#separate each signal
nrg_signal = combined_df[['nrg_lin_mean_freq', 'nrg_lin_median_freq', 'nrg_lin_peak_freq', 'nrg_lin_spectral_entropy', 'nrg_rot_mean_freq', 'nrg_rot_median_freq', 'nrg_rot_peak_freq', 'nrg_rot_spectral_entropy']]
rot_signal = combined_df[['pwr_lin_mean_freq', 'pwr_lin_median_freq', 'pwr_lin_peak_freq', 'pwr_lin_spectral_entropy', 'pwr_rot_mean_freq', 'pwr_rot_median_freq', 'pwr_rot_peak_freq', 'pwr_rot_spectral_entropy']]
pwr_signal = combined_df[['rsp_mean_freq', 'rsp_median_freq', 'rsp_peak_freq', 'rsp_spectral_entropy']]

In [26]:
print(nrg_signal.shape)
print(rot_signal.shape)
print(pwr_signal.shape)

(609, 8)
(609, 8)
(609, 4)


In [27]:
nrg_signal.columns

Index(['nrg_lin_mean_freq', 'nrg_lin_median_freq', 'nrg_lin_peak_freq',
       'nrg_lin_spectral_entropy', 'nrg_rot_mean_freq', 'nrg_rot_median_freq',
       'nrg_rot_peak_freq', 'nrg_rot_spectral_entropy'],
      dtype='object')

<h1>Pytorch Neural Network with Skortch Integration w/PyCaret Tuning</h1>

In [28]:
#PyTorch Neural Network

import torch.nn as nn
class Net(nn.Module):
    def __init__(self, num_inputs=12, num_units_d1=200, num_units_d2=100):
        super(Net, self).__init__()
        self.dense0 = nn.Linear(num_inputs, num_units_d1)
        self.nonlin = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.dense1 = nn.Linear(num_units_d1, num_units_d2)
        self.output = nn.Linear(num_units_d2, 2)
        self.softmax = nn.Softmax(dim=-1)
    def forward(self, X, **kwargs):
        X = self.nonlin(self.dense0(X))
        X = self.dropout(X)
        X = self.nonlin(self.dense1(X))
        X = self.softmax(self.output(X))
        return X

In [29]:
#Skortch Neural Network
from skorch import NeuralNetClassifier
net = NeuralNetClassifier(
    module=Net,
    max_epochs=30,
    lr=0.1,
    batch_size=32,
    train_split=None
)

In [30]:
#TabNet Classifier
!pip install pytorch-tabnet --quiet

import torch
import numpy as np
from skorch import NeuralNetClassifier
from skorch.callbacks import EarlyStopping
from pytorch_tabnet.tab_model import TabNetClassifier
from torch.optim.lr_scheduler import ReduceLROnPlateau


# Define the TabNet model
tabnet = TabNetClassifier(
    n_d=8, n_a=8, n_steps=3, gamma=1.3, lambda_sparse=0.001,
    optimizer_fn=torch.optim.Adam,
    optimizer_params=dict(lr=2e-2, weight_decay=1e-5),
    mask_type='entmax',
    scheduler_params=dict(mode='min', patience=5, min_lr=1e-5, factor=0.5),
    scheduler_fn=torch.optim.lr_scheduler.ReduceLROnPlateau,
)


# Define the Skorch neural network classifier
neural = NeuralNetClassifier(
    tabnet,
    max_epochs=100,
    batch_size=256,
    optimizer=torch.optim.Adam,
    optimizer__lr=1e-2,
    optimizer__weight_decay=1e-5,
    criterion=torch.nn.CrossEntropyLoss,
    callbacks=[EarlyStopping(patience=10)],
    train_split=None,
    callbacks__scheduler=ReduceLROnPlateau,
    callbacks__scheduler__mode='min',
    callbacks__scheduler__patience=5,
    callbacks__scheduler__factor=0.5,
)





[0m



In [31]:
#1D CNN
import numpy as np
from sklearn.datasets import make_classification
from skorch import NeuralNetClassifier
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

# Define the 1D CNN model
class CNN1D(nn.Module):
    def __init__(self, input_size, num_classes):
        super(CNN1D, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=input_size, out_channels=32, kernel_size=3)
        self.pool1 = nn.MaxPool1d(kernel_size=2)
        self.conv2 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3)
        self.pool2 = nn.MaxPool1d(kernel_size=2)
        self.conv3 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3)
        self.pool3 = nn.MaxPool1d(kernel_size=2)
        self.fc1 = nn.Linear(in_features=128 * 12, out_features=512)
        self.fc2 = nn.Linear(in_features=512, out_features=num_classes)
        self.dropout = nn.Dropout(p=0.5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        x = F.relu(self.conv3(x))
        x = self.pool3(x)
        x = x.view(-1, 128 * 12)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# Set the hyperparameters
input_size = 10 # Number of input features
num_classes = 2 # Number of output classes
learning_rate = 0.001
batch_size = 32
num_epochs = 100

# Define the PyTorch neural network classifier
One_CNN = NeuralNetClassifier(
    CNN1D(input_size, num_classes),
    criterion=nn.CrossEntropyLoss,
    optimizer=optim.Adam,
    lr=learning_rate,
    batch_size=batch_size,
    max_epochs=num_epochs,
    device='cuda' if torch.cuda.is_available() else 'cpu',
    callbacks=[
        ('early_stop', EarlyStopping(monitor='valid_loss', patience=5))
    ]
)




In [32]:
import numpy as np
from sklearn.datasets import make_classification
from skorch import NeuralNetClassifier
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

# Define the 2D CNN model
class CNN2D(nn.Module):
    def __init__(self, input_shape, num_classes):
        super(CNN2D, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=input_shape[0], out_channels=32, kernel_size=3)
        self.pool1 = nn.MaxPool2d(kernel_size=2)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
        self.pool2 = nn.MaxPool2d(kernel_size=2)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3)
        self.pool3 = nn.MaxPool2d(kernel_size=2)
        self.fc1 = nn.Linear(in_features=128 * 12 * 12, out_features=512)
        self.fc2 = nn.Linear(in_features=512, out_features=num_classes)
        self.dropout = nn.Dropout(p=0.5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        x = F.relu(self.conv3(x))
        x = self.pool3(x)
        x = x.view(-1, 128 * 12 * 12)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# Set the hyperparameters
input_shape = (3, 32, 32) # Input shape of each image
num_classes = 10 # Number of output classes
learning_rate = 0.001
batch_size = 32
num_epochs = 100

# Define the PyTorch neural network classifier
Two_CNN = NeuralNetClassifier(
    CNN2D(input_shape, num_classes),
    criterion=nn.CrossEntropyLoss,
    optimizer=optim.Adam,
    lr=learning_rate,
    batch_size=batch_size,
    max_epochs=num_epochs,
    device='cuda' if torch.cuda.is_available() else 'cpu',
    callbacks=[
        ('early_stop', EarlyStopping(monitor='valid_loss', patience=5))
    ]
)



<h3>DataFrameTransformer</h3>




In [33]:
#Import SKlearn Pipeline
from sklearn.pipeline import Pipeline

In [34]:
#Dataframe Transformer

from skorch.helper import DataFrameTransformer
nn_pipe = Pipeline(
    [
        ("transform", DataFrameTransformer()),
        ("net", net),
        ("tabnet", neural),
        ("One_CNN", One_CNN),
        ("Two_CNN", Two_CNN)
    ]
)

In [35]:
print(nn_pipe)

Pipeline(steps=[('transform', DataFrameTransformer()),
                ('net',
                 <class 'skorch.classifier.NeuralNetClassifier'>[uninitialized](
  module=<class '__main__.Net'>,
)),
                ('tabnet',
                 <class 'skorch.classifier.NeuralNetClassifier'>[uninitialized](
  module=TabNetClassifier(n_d=8, n_a=8, n_steps=3, gamma=1.3, cat_idxs=[], cat_dims=[], cat_emb_dim=1, n_independent=2, n_shared=2, eps...
    (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
    (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (fc1): Linear(in_features=18432, out_features=512, bias=True)
    (fc2): Linear(in_features=512, out_features=10, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
  ),
))])


In [36]:
#Take a smaller sample to save memory
sample = combined_df.sample(n=50)

In [37]:
from pycaret.classification import *
target = "hf_type"
clf1 = setup(data = sample, 
            target = target,
            train_size = 0.8,
            fold = 5,
            session_id = 123,
            log_experiment = True, 
            experiment_name = 'electrical_grid_1', 
            )

Unnamed: 0,Description,Value
0,Session id,123
1,Target,hf_type
2,Target type,Multiclass
3,Original data shape,"(50, 89)"
4,Transformed data shape,"(50, 96)"
5,Transformed train set shape,"(40, 96)"
6,Transformed test set shape,"(10, 96)"
7,Ordinal features,1
8,Numeric features,85
9,Categorical features,3


COMET INFO: No Comet API Key was found, creating an OfflineExperiment. Set up your API Key to get the full Comet experience https://www.comet.com/docs/python-sdk/advanced/#python-configuration
COMET INFO: Using '/home/brandon/Omdena/Omdena HeartKinetics/HeartKinetics - Project/.cometml-runs' path as offline directory. Pass 'offline_directory' parameter into constructor or set the 'COMET_OFFLINE_DIRECTORY' environment variable to manually choose where to store offline experiment archives.


In [51]:
best = compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)


COMET INFO: No Comet API Key was found, creating an OfflineExperiment. Set up your API Key to get the full Comet experience https://www.comet.com/docs/python-sdk/advanced/#python-configuration
COMET INFO: Using '/home/brandon/Omdena/Omdena HeartKinetics/HeartKinetics - Project/.cometml-runs' path as offline directory. Pass 'offline_directory' parameter into constructor or set the 'COMET_OFFLINE_DIRECTORY' environment variable to manually choose where to store offline experiment archives.
COMET INFO: ---------------------------------------------------------------------------------------
COMET INFO: Comet.ml OfflineExperiment Summary
COMET INFO: ---------------------------------------------------------------------------------------
COMET INFO:   Data:
COMET INFO:     display_summary_level : 1
COMET INFO:     url                   : [OfflineExperiment will get URL after upload]
COMET INFO:   Metrics:
COMET INFO:     AUC      : 0.5
COMET INFO:     Accuracy : 0.725
COMET INFO:     F1       

In [None]:
models = [best, nn_pipe]

In [None]:
#Compare Best NN Model
best_nn = compare_models(include=[nn_pipe, best], sort="F1")

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)


COMET INFO: No Comet API Key was found, creating an OfflineExperiment. Set up your API Key to get the full Comet experience https://www.comet.com/docs/python-sdk/advanced/#python-configuration
COMET INFO: Using '/home/brandon/Omdena/Omdena HeartKinetics/HeartKinetics - Project/.cometml-runs' path as offline directory. Pass 'offline_directory' parameter into constructor or set the 'COMET_OFFLINE_DIRECTORY' environment variable to manually choose where to store offline experiment archives.
COMET INFO: ---------------------------------------------------------------------------------------
COMET INFO: Comet.ml OfflineExperiment Summary
COMET INFO: ---------------------------------------------------------------------------------------
COMET INFO:   Data:
COMET INFO:     display_summary_level : 1
COMET INFO:     url                   : [OfflineExperiment will get URL after upload]
COMET INFO:   Metrics:
COMET INFO:     AUC      : 0.5
COMET INFO:     Accuracy : 0.725
COMET INFO:     F1       

In [None]:
!pip install -U skorch


[0m

In [None]:
#!pip install pytorch_tabular --quiet
#
#from skorch.classifier import NeuralNetClassifier
#from sklearn.pipeline import Pipeline
#from skorch.callbacks import EarlyStopping
#from pytorch_tabular import TabularModel
#from pytorch_tabular.models import TabNetModelConfig
#from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig
#from skorch import NeuralNetClassifier
#
#
#
## load data
#data = sample
#
## define pipeline
#numeric_features = data.select_dtypes(include=['float64', 'int64']).columns.tolist()
#
#preprocessor = Pipeline(steps=[('transform', DataFrameTransformer(float_dtype='float32',
#                                                                  int_dtype='int32',
#                                                                  treat_int_as_categorical=False))])
#
#net = NeuralNetClassifier(module=nn_pipe,
#                          module__input_units=len(numeric_features),
#                          module__output_units=2,
#                          max_epochs=10,
#                          lr=0.1,
#                          optimizer=torch.optim.Adam,
#                          criterion=torch.nn.CrossEntropyLoss,
#                          device='cuda' if torch.cuda.is_available() else 'cpu')
#
#pipeline = Pipeline(steps=[('preprocessor', preprocessor),
#                           ('net', net)])
#
## create model using create_model()
#model = create_model(pipeline, verbose=False)
#
## evaluate model
#evaluate_model(model)
#
#
#

In [None]:
sample.columns

Index(['age', 'sex', 'weight', 'height', 'subject_id', 'study_id', 'hf_type',
       'sample_rate_scgk', 'sample_rate_rsp', 'nrg_lin_scgk_mean',
       'nrg_lin_scgk_std', 'nrg_lin_scgk_median', 'nrg_lin_scgk_min',
       'nrg_lin_scgk_max', 'nrg_lin_scgk_skew', 'nrg_lin_scgk_kurt',
       'nrg_lin_scgk_rms', 'nrg_lin_scgk_zero_crossings',
       'nrg_lin_scgk_wavelet_mean', 'nrg_lin_scgk_wavelet_std',
       'nrg_lin_scgk_wavelet_energy', 'nrg_rot_scgk_mean', 'nrg_rot_scgk_std',
       'nrg_rot_scgk_median', 'nrg_rot_scgk_min', 'nrg_rot_scgk_max',
       'nrg_rot_scgk_skew', 'nrg_rot_scgk_kurt', 'nrg_rot_scgk_rms',
       'nrg_rot_scgk_zero_crossings', 'nrg_rot_scgk_wavelet_mean',
       'nrg_rot_scgk_wavelet_std', 'nrg_rot_scgk_wavelet_energy',
       'pwr_lin_scgk_mean', 'pwr_lin_scgk_std', 'pwr_lin_scgk_median',
       'pwr_lin_scgk_min', 'pwr_lin_scgk_max', 'pwr_lin_scgk_skew',
       'pwr_lin_scgk_kurt', 'pwr_lin_scgk_rms', 'pwr_lin_scgk_zero_crossings',
       'pwr_lin_scgk_wave

In [None]:
sample.head()

Unnamed: 0,age,sex,weight,height,subject_id,study_id,hf_type,sample_rate_scgk,sample_rate_rsp,nrg_lin_scgk_mean,...,pwr_lin_peak_freq,pwr_lin_spectral_entropy,pwr_rot_mean_freq,pwr_rot_median_freq,pwr_rot_peak_freq,pwr_rot_spectral_entropy,rsp_mean_freq,rsp_median_freq,rsp_peak_freq,rsp_spectral_entropy
HAP-Test_MC_0_20220214-123731_nWY689kvoUKSD,69.0,Female,68.0,1.6,MC,HAP-Test,0,250,250,2.914829e-07,...,9.8e-05,6.723479,0.25,0.25,9.8e-05,7.227367,0.25,0.25,0.000978,1.206507
K-ICB_U_BDC10_20190910-092528_aBOB0aOW8tERH2_6s,29.0,Male,71.4,1.74,U,K-ICB,0,200,200,8.597045e-08,...,0.135128,6.807209,0.25,0.25,0.135128,6.852696,0.25,0.25,0.001501,2.238054
HAP-Test_TQ_1_20220214-090448_nWY6qKMdlhKSD,45.0,Male,95.0,1.75,TQ,HAP-Test,0,250,250,5.278546e-07,...,0.000196,6.739383,0.25,0.25,0.000196,7.145468,0.25,0.25,0.000392,2.844196
HAP-Test_MD_0_20220209-095223_nWYPWgGnpiKSD,70.0,Female,70.0,1.62,MD,HAP-Test,0,250,250,2.938018e-07,...,9.8e-05,6.717198,0.25,0.25,9.8e-05,7.142014,0.25,0.25,0.000979,2.397772
KINO-HFrEF-Brug_YE_kinoV1_20181214-140454_LXG25VlgGUWrCP,65.0,Male,74.0,1.73,YE,KINO-HFrEF-Brug,0,200,200,3.619068e-08,...,0.04508,6.474318,0.249979,0.249979,0.249979,7.269163,0.249979,0.249979,0.00175,2.525728


In [None]:
best = compare_models(n_select=3)
best_model_nn = compare_models(include=[nn_pipe, best], sort="F1")

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)


COMET INFO: No Comet API Key was found, creating an OfflineExperiment. Set up your API Key to get the full Comet experience https://www.comet.com/docs/python-sdk/advanced/#python-configuration
COMET INFO: No Comet API Key was found, creating an OfflineExperiment. Set up your API Key to get the full Comet experience https://www.comet.com/docs/python-sdk/advanced/#python-configuration
COMET INFO: Using '/home/brandon/Omdena/Omdena HeartKinetics/HeartKinetics - Project/.cometml-runs' path as offline directory. Pass 'offline_directory' parameter into constructor or set the 'COMET_OFFLINE_DIRECTORY' environment variable to manually choose where to store offline experiment archives.
COMET INFO: ---------------------------------------------------------------------------------------
COMET INFO: Comet.ml OfflineExperiment Summary
COMET INFO: ---------------------------------------------------------------------------------------
COMET INFO:   Data:
COMET INFO:     display_summary_level : 1
COMET

ValueError: Estimator [LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
               importance_type='split', learning_rate=0.1, max_depth=-1,
               min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
               n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,
               random_state=123, reg_alpha=0.0, reg_lambda=0.0, silent='warn',
               subsample=1.0, subsample_for_bin=200000, subsample_freq=0), DummyClassifier(constant=None, random_state=123, strategy='prior'), KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=-1, n_neighbors=5, p=2,
                     weights='uniform')] does not have the required fit() method.

In [None]:
#test models

if best_model_nn:
    prediction = predict_model(best_model_nn[0], data=sample)
else:
    print("No models found that include both nn_pipe and best.")



NameError: name 'best_model_nn' is not defined

In [None]:
#Plot AUC
plot_model(best_model_nn, plot = 'auc')

NameError: name 'best_model_nn' is not defined

In [None]:
#Plot Confusion Matrix
plot_model(best_model_nn, plot = 'confusion_matrix')

In [None]:
#Plot Feature Importance
plot_model(best_model_nn, plot = 'feature')

In [None]:
#Plot Precision Recall Curve
plot_model(best_model_nn, plot = 'pr')

In [None]:
#Plot Validation Curve
plot_model(best_model_nn, plot = 'vc')

In [None]:
#Plot Classification Report
plot_model(best_model_nn, plot = 'class_report')

In [None]:
#Plot Dimensions
plot_model(best_model_nn, plot = 'dimension')

In [None]:
!mlflow ui