In [62]:
# import essential libraries
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import train_test_split, cross_validate, GridSearchCV
import warnings
warnings.filterwarnings('ignore')

In [63]:

# read in csv 
df = pd.read_csv('training_11_features.csv')
df = df.dropna()
# df = df[df['ritmi'] != 0]
df = df.reset_index(drop=True)
print(df.shape)
df.head()

(1803, 11)


Unnamed: 0,ritmi,age,sex,height,weight,heart_axis,validated_by,second_opinion,validated_by_human,pacemaker,strat_fold
0,2,29.0,1,164.0,56.0,0,0.0,0,1,0,1
1,0,59.0,0,156.0,75.0,0,0.0,0,1,0,9
2,2,84.0,1,152.0,51.0,0,0.0,0,1,0,7
3,0,79.0,0,172.0,66.0,0,0.0,0,1,0,5
4,1,67.0,0,178.0,73.0,4,0.0,0,1,0,5


In [64]:
# convert all columns' types to int64
df['age'] = df['age'].astype('int64')
df['height'] = df['height'].astype('int64')
df['weight'] = df['weight'].astype('int64')
df['validated_by'] = df['validated_by'].astype('int64')

# get info for columns
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1803 entries, 0 to 1802
Data columns (total 11 columns):
 #   Column              Non-Null Count  Dtype
---  ------              --------------  -----
 0   ritmi               1803 non-null   int64
 1   age                 1803 non-null   int64
 2   sex                 1803 non-null   int64
 3   height              1803 non-null   int64
 4   weight              1803 non-null   int64
 5   heart_axis          1803 non-null   int64
 6   validated_by        1803 non-null   int64
 7   second_opinion      1803 non-null   int64
 8   validated_by_human  1803 non-null   int64
 9   pacemaker           1803 non-null   int64
 10  strat_fold          1803 non-null   int64
dtypes: int64(11)
memory usage: 155.1 KB


In [65]:
# train-test split
X = df.drop(columns='ritmi')
y = df['ritmi']
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size = 0.25, random_state = 246)

In [66]:
#Random Forest

In [67]:
# Plug in appropriate max_depth and random_state parameters
rf = RandomForestClassifier()
rf_param_grid = {'n_estimators': [600], 'criterion': ['entropy'], 'max_depth': [60]} #0.4615443314230772

rf_cv= GridSearchCV(rf,rf_param_grid,cv=7,n_jobs=-1)

rf_cv.fit(X_train,y_train)

print("Best Score:" + str(rf_cv.best_score_))
print("Best Parameters: " + str(rf_cv.best_params_))

Best Score:0.46596640899829833
Best Parameters: {'criterion': 'entropy', 'max_depth': 60, 'n_estimators': 600}


In [68]:
y_pred = rf_cv.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.47      0.47      0.47       162
           1       0.46      0.43      0.44       117
           2       0.46      0.48      0.47       172

    accuracy                           0.46       451
   macro avg       0.46      0.46      0.46       451
weighted avg       0.46      0.46      0.46       451



In [69]:
# K Neighbors Classifier

In [70]:
clfl2 = KNeighborsClassifier()
parameters = {'n_neighbors': [150], 'weights': ['distance'], 'metric': ['euclidean']} #0.4889326226595599

fitmodel = GridSearchCV(clfl2, param_grid=parameters, cv=5, refit=True, scoring="accuracy", n_jobs=-1, verbose=3)
fitmodel.fit(X_train, y_train)
print(fitmodel.best_estimator_, fitmodel.best_params_, fitmodel.best_score_)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
KNeighborsClassifier(metric='euclidean', n_neighbors=150, weights='distance') {'metric': 'euclidean', 'n_neighbors': 150, 'weights': 'distance'} 0.4881918819188192


In [71]:
y_pred = fitmodel.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.47      0.40      0.43       162
           1       0.49      0.47      0.48       117
           2       0.43      0.50      0.46       172

    accuracy                           0.46       451
   macro avg       0.46      0.46      0.46       451
weighted avg       0.46      0.46      0.46       451



In [92]:
#Convo1D
!pip install  --default-timeout=100 tensorflow



In [148]:
from keras.layers import Conv1D, MaxPooling1D, Dense, Dropout, Input, Flatten, SeparableConv1D
from tensorflow.keras.layers import BatchNormalization
from keras.models import Model
from sklearn.metrics import accuracy_score
from keras.optimizers import Adam
import math
from keras.callbacks import LearningRateScheduler, ModelCheckpoint

In [149]:
def build_model():
    input_img = Input(shape=(feature, depth), name='ImageInput')
    x = Conv1D(32, 3, activation='relu', padding='same', name='Conv1_1')(input_img)
    x = Conv1D(32, 3, activation='relu', padding='same', name='Conv1_2')(x)
    x = MaxPooling1D(2, name='pool1')(x)
    
    x = SeparableConv1D(32, 3, activation='relu', padding='same', name='Conv2_1')(x)
    x = SeparableConv1D(32, 3, activation='relu', padding='same', name='Conv2_2')(x)
    x = MaxPooling1D(2, name='pool2')(x)
    
    x = SeparableConv1D(64, 3, activation='relu', padding='same', name='Conv3_1')(x)
    x = BatchNormalization(name='bn1')(x)
    x = SeparableConv1D(64, 3, activation='relu', padding='same', name='Conv3_2')(x)
    x = BatchNormalization(name='bn2')(x)
    
    x = SeparableConv1D(64, 3, activation='relu', padding='same', name='Conv3_3')(x)
    x = MaxPooling1D(2, name='pool3')(x)
    
    x = Flatten(name='flatten')(x)
    x = Dense(128, activation='relu', name='fc1')(x)
    x = Dropout(0.6, name='dropout1')(x)
    x = Dense(128, activation='relu', name='fc2')(x)
    x = Dropout(0.5, name='dropout2')(x)
    x = Dense(5, activation='softmax', name='fc3')(x)
    
    model = Model(inputs=input_img, outputs=x)
    return model


In [150]:
print("X_train", X_train.shape)
print("y_train", y_train.shape)
print("X_test", X_test.shape)
print("y_test", y_test.shape)

X_train (1352, 10)
y_train (1352,)
X_test (451, 10)
y_test (451,)


In [151]:
n_obs ,feature = X_train.shape
depth = 1

In [152]:
model =  build_model()
model.summary()

Model: "model_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 ImageInput (InputLayer)     [(None, 10, 1)]           0         
                                                                 
 Conv1_1 (Conv1D)            (None, 10, 32)            128       
                                                                 
 Conv1_2 (Conv1D)            (None, 10, 32)            3104      
                                                                 
 pool1 (MaxPooling1D)        (None, 5, 32)             0         
                                                                 
 Conv2_1 (SeparableConv1D)   (None, 5, 32)             1152      
                                                                 
 Conv2_2 (SeparableConv1D)   (None, 5, 32)             1152      
                                                                 
 pool2 (MaxPooling1D)        (None, 2, 32)             0   

In [153]:
adam = Adam(lr = 0.001, beta_1 = 0.9, beta_2 = 0.999)
model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])

In [154]:
def exp_decay(epoch):
    initial_lrate = 0.001
    k = 0.75
    t = n_obs//(10000 * batch_size)  # every epoch we do n_obs/batch_size iteration
    lrate = initial_lrate * math.exp(-k*t)
    return lrate

lrate = LearningRateScheduler(exp_decay)

In [155]:
batch_size = 500
history = model.fit(X_train, y_train, 
                    epochs=75, 
                    batch_size=batch_size, 
                    verbose=2, 
                    validation_data=(X_test, y_test), 
                    callbacks=[lrate])

Epoch 1/75


ValueError: in user code:

    File "/home/tabeedhassan/.local/lib/python3.8/site-packages/keras/engine/training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "/home/tabeedhassan/.local/lib/python3.8/site-packages/keras/engine/training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/tabeedhassan/.local/lib/python3.8/site-packages/keras/engine/training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "/home/tabeedhassan/.local/lib/python3.8/site-packages/keras/engine/training.py", line 994, in train_step
        loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "/home/tabeedhassan/.local/lib/python3.8/site-packages/keras/engine/training.py", line 1052, in compute_loss
        return self.compiled_loss(
    File "/home/tabeedhassan/.local/lib/python3.8/site-packages/keras/engine/compile_utils.py", line 265, in __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "/home/tabeedhassan/.local/lib/python3.8/site-packages/keras/losses.py", line 152, in __call__
        losses = call_fn(y_true, y_pred)
    File "/home/tabeedhassan/.local/lib/python3.8/site-packages/keras/losses.py", line 272, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "/home/tabeedhassan/.local/lib/python3.8/site-packages/keras/losses.py", line 1990, in categorical_crossentropy
        return backend.categorical_crossentropy(
    File "/home/tabeedhassan/.local/lib/python3.8/site-packages/keras/backend.py", line 5529, in categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)

    ValueError: Shapes (None, 1) and (None, 5) are incompatible
