## Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold
from sklearn.utils import class_weight
from sklearn.metrics import classification_report

## Load Data

In [2]:
def create_pd(train_path,test_path):
    train=pd.read_csv(train_path)
    test=pd.read_csv(test_path)
    train.columns=[x for x in range(206)]
    test.columns=[x for x in range(206)]
    return pd.concat([train,test], axis=0, join='inner').sort_index()

In [3]:
mit_train_path="final_train_data.csv"
mit_test_path="final_test_data.csv"

In [4]:
mit= create_pd(mit_train_path,mit_test_path)
mit.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,196,197,198,199,200,201,202,203,204,205
0,0.99123,0.943533,0.764677,0.618571,0.379632,0.190822,0.040237,0.025995,0.031709,0.065524,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
0,1.0,0.811455,0.139592,0.081024,0.223213,0.168008,0.155161,0.131744,0.126489,0.115919,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,0.954146,0.910473,0.89311,0.76286,0.519833,0.348941,0.288343,0.234051,0.194448,0.155896,...,0.33727,0.340058,0.340473,0.340887,0.357433,0.346974,0.363124,0.36474,0.371849,2
1,0.971482,0.928969,0.572933,0.178457,0.122962,0.13236,0.094392,0.089575,0.030481,0.040499,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
2,1.0,0.959149,0.701378,0.231778,0.0,0.080698,0.128376,0.187448,0.280826,0.328261,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2


## create K-fold column

In [5]:
## For improving model prediction
## Split the dataset into the number of k folds. Start off with using your k-1 fold as the test dataset
## and the remaining folds as the training dataset. Train the model on the training dataset and validate
## it on the test dataset
def create_k_folds_column(df):
    df.loc[:,'kfold']=-1
    df=df.sample(frac=1).reset_index(drop=Tr
    y=df.loc[:,205].values
    kf=StratifiedKFold(n_splits=5)
    for fold,(target,index) in enumerate(kf.split(X=df,y=y)):
        df.loc[index,'kfold']=fold
    return df
mit= create_k_folds_column(mit)
    

In [6]:
mit.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,197,198,199,200,201,202,203,204,205,kfold
0,0.950297,0.832587,0.228129,0.0,0.173115,0.228129,0.218281,0.290551,0.285844,0.257275,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0
1,0.977252,0.926091,0.850117,0.776015,0.557504,0.268522,0.024103,0.0,0.003468,0.036005,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
2,1.0,0.930098,0.67929,0.397877,0.286402,0.229338,0.204175,0.165586,0.161234,0.161234,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
3,1.0,0.869081,0.459845,0.113726,0.122114,0.09254,0.062348,0.040389,0.027051,0.027051,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
4,0.97311,0.769387,0.0,0.106622,0.258245,0.275278,0.328471,0.312058,0.328471,0.344699,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0


In [7]:
mit.loc[:,205].astype('int').value_counts()

0    64327
3    17912
2    14199
1     3562
Name: 205, dtype: int64

## Defining Model

In [8]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Activation, Flatten, Convolution1D, Dropout,MaxPooling1D,GlobalAveragePooling1D
from tensorflow.keras import Model, layers,Sequential,regularizers
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping,ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import LearningRateScheduler

In [9]:
## CNN
## 1D CNN can perform activity recognition task from accelerometer data and classifiation
def make_model(X_train):
    model= Sequential()
    model.add(Convolution1D(32,5,activation='relu',input_shape=(205,1)))
    model.add(Convolution1D(64,5,activation='relu'))         
    model.add(MaxPooling1D(3))
    model.add(Convolution1D(128, 3, activation='relu'))
    model.add(Convolution1D(256, 3, activation='relu'))
    model.add(GlobalAveragePooling1D())
    model.add(Dropout(0.3))
    model.add(Flatten())
    model.add(Dense(1024,activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(256,activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(32,activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(4,activation='softmax'))
    model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy',tf.keras.metrics.AUC(name='auc')])
    return model



In [10]:
def training_data(train,valid):
    X_train=np.asarray(train.iloc[:,:205].values)
    y_train=train.iloc[:,205].values
    X_valid=np.asarray(valid.iloc[:,:205].values)
    y_valid=valid.iloc[:,205].values
    X_train=tf.expand_dims(X_train, axis=2)
    X_valid=tf.expand_dims(X_valid, axis=2)
    y_train=to_categorical(y_train)
    y_valid=to_categorical(y_valid)
    return X_train,y_train,X_valid,y_valid

**Fitting the model**

In [11]:
## As epochs increases accuracy increases
Epochs=1
Batch_size=64
my_callbacks = [EarlyStopping(patience=3,monitor='val_loss', mode='min',restore_best_weights=True),
               ReduceLROnPlateau(monitor='val_loss', factor=0.1,patience=2, min_lr=0.00001, mode='auto')]
dict_acc={}
dict_acc2={}

In [12]:
def run_train(fold):
    train=mit[mit["kfold"]!=fold].reset_index(drop=True)
    valid=mit[mit["kfold"]==fold].reset_index(drop=True)
    X_train,y_train,X_valid,y_valid=training_data(train,valid)
    model=make_model(X_train)
    history = model.fit(X_train,y_train,validation_split=0.1,batch_size=Batch_size,epochs=Epochs,callbacks=my_callbacks)
    model.save(f'model{fold}.h5')
    results = model.evaluate(X_valid, y_valid)
    print("Test Accuracy: {:.2f}%".format(results[1] * 100))
    print("     Test AUC: {:.4f}".format(results[2]))
    dict_acc[f"{i}"]= "Test Accuracy: {:.2f}%".format(results[1] * 100) 
    
    

## Epochs Running

In [13]:
## as fold range increases the accuracy increses 
for i in range(1):
    print(f"{i}-fold trained",sep="/n")
    run_train(i)
    print("_______________________________",sep='/n')
    print("_______________________________",sep='/n')

0-fold trained
Test Accuracy: 92.32%
     Test AUC: 0.9857
_______________________________
_______________________________


In [14]:
print(dict_acc)

{'0': 'Test Accuracy: 92.32%'}


In [15]:
# model trained with K-fold 0 give us best accuracy
mit_model=tf.keras.models.load_model('model0.h5')

In [16]:
mit_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 201, 32)           192       
                                                                 
 conv1d_1 (Conv1D)           (None, 197, 64)           10304     
                                                                 
 max_pooling1d (MaxPooling1D  (None, 65, 64)           0         
 )                                                               
                                                                 
 conv1d_2 (Conv1D)           (None, 63, 128)           24704     
                                                                 
 conv1d_3 (Conv1D)           (None, 61, 256)           98560     
                                                                 
 global_average_pooling1d (G  (None, 256)              0         
 lobalAveragePooling1D)                                 

## prediction

In [17]:

df11=pd.read_csv("train.csv")
# test split data
df11.heartbeat_signals = (df11.heartbeat_signals).str.split(',')
# output to nested list
df11.heartbeat_signals.tolist()
df3=df11.heartbeat_signals.tolist()
data1=pd.DataFrame(df3)
labels=df11["label"]
final_train_data=(pd.concat([data1,labels],axis = 1))
final_train_data=final_train_data.drop("label",axis=1)
final_train_data=final_train_data.head(20000)


In [18]:
# converting object datatype to float
def coerce_to_float(val):
    try:
       return float(val)
    except ValueError:
       return val

final_train_data=final_train_data.applymap(lambda x: coerce_to_float(x))

In [19]:
final_train_data=final_train_data.values.reshape((final_train_data.shape[0],final_train_data.shape[1], 1))

In [20]:
## this is train data
train_pred=mit_model.predict(final_train_data)
# label result
train_pred



array([[8.8047248e-01, 1.0186024e-02, 5.9715334e-02, 4.9626172e-02],
       [9.5900935e-01, 2.8519407e-02, 1.0304433e-02, 2.1667378e-03],
       [7.0132214e-01, 4.6170715e-02, 1.9761342e-01, 5.4893736e-02],
       ...,
       [1.0463420e-02, 5.0883752e-04, 7.0565206e-01, 2.8337577e-01],
       [9.8777699e-01, 1.2130957e-02, 8.9225949e-05, 2.8027196e-06],
       [9.3150276e-01, 3.0423803e-02, 2.8989187e-02, 9.0842573e-03]],
      dtype=float32)

In [21]:
df2=pd.read_csv("test.csv")
# test split data
df2.heartbeat_signals = (df2.heartbeat_signals).str.split(',')
# output to nested list
df2.heartbeat_signals.tolist()
df3=df2.heartbeat_signals.tolist()
data1=pd.DataFrame(df3)
labels=df2["label"]
final_test_data=(pd.concat([data1,labels],axis = 1))
final_test_data=final_test_data.drop("label",axis=1)

In [22]:
# converting object datatype to float
def coerce_to_float(val):
    try:
       return float(val)
    except ValueError:
       return val

final_test_data=final_test_data.applymap(lambda x: coerce_to_float(x))

In [23]:
final_test_data=final_test_data.values.reshape((final_test_data.shape[0],final_test_data.shape[1], 1))

In [24]:
test_pred=mit_model.predict(final_test_data)



In [25]:
# results
test_pred

array([[9.6487468e-01, 3.4420632e-02, 6.6738256e-04, 3.7298305e-05],
       [2.8410248e-02, 5.7000900e-03, 9.4905704e-01, 1.6832611e-02],
       [8.7926543e-01, 2.8252151e-02, 6.7040108e-02, 2.5442388e-02],
       ...,
       [1.1546554e-05, 5.1972097e-11, 1.4595146e-04, 9.9984252e-01],
       [2.8341966e-02, 4.3376442e-03, 9.4681984e-01, 2.0500537e-02],
       [9.3120146e-01, 6.7796096e-02, 9.6501346e-04, 3.7458460e-05]],
      dtype=float32)

In [26]:
## Mean square error
import sklearn
from sklearn.metrics import mean_absolute_error
sklearn.metrics.mean_absolute_error(test_pred,train_pred, sample_weight=None, multioutput='uniform_average')#raw_values

0.25763243

## showing the predicted labels

In [27]:
Xnew=final_test_data[0:3]

In [28]:
ynew = mit_model.predict(Xnew)
# show the inputs and predicted outputs
for i in range(len(Xnew)):
 print("X=%s, Predicted=%s" % (Xnew[i], ynew[i]))

X=[[1.        ]
 [0.81145466]
 [0.13959212]
 [0.0810245 ]
 [0.22321331]
 [0.1680078 ]
 [0.15516092]
 [0.13174426]
 [0.12648853]
 [0.11591929]
 [0.11856887]
 [0.11193578]
 [0.10527205]
 [0.11060549]
 [0.10527205]
 [0.11856887]
 [0.11060549]
 [0.12121359]
 [0.12121359]
 [0.13698091]
 [0.13959212]
 [0.15773947]
 [0.16031341]
 [0.16288277]
 [0.18327456]
 [0.19838145]
 [0.22074927]
 [0.24520231]
 [0.27875487]
 [0.29289905]
 [0.3184753 ]
 [0.32995263]
 [0.35263699]
 [0.3537619 ]
 [0.35488594]
 [0.37053111]
 [0.35937333]
 [0.35488594]
 [0.32537264]
 [0.30458086]
 [0.26207613]
 [0.24520231]
 [0.21580853]
 [0.20088396]
 [0.19712857]
 [0.19336338]
 [0.1958746 ]
 [0.18327456]
 [0.18832779]
 [0.18832779]
 [0.19084778]
 [0.18832779]
 [0.20088396]
 [0.20088396]
 [0.21085081]
 [0.20836554]
 [0.20587599]
 [0.21085081]
 [0.20338213]
 [0.20836554]
 [0.18832779]
 [0.19336338]
 [0.19084778]
 [0.18327456]
 [0.17820357]
 [0.18074129]
 [0.17566138]
 [0.17056351]
 [0.16544756]
 [0.16031341]
 [0.16544756]
 [0.

In [29]:
##"""Training labels
#Xnew1=final_train_data[0:3]
#ynew = mit_model.predict(Xnew1)
# show the inputs and predicted outputs
#for i in range(len(Xnew1)):
 print("X=%s, Predicted=%s" % (Xnew1[i], ynew[i]))"""

SyntaxError: invalid syntax (Temp/ipykernel_2252/4100324783.py, line 1)