# Deep Learning Term Project for ECG (DIL700)

## Imports

In [3]:
import numpy as np
import pandas as pd
import seaborn as sns
%matplotlib widget
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from keras.utils.np_utils import to_categorical
from sklearn.utils import class_weight
import warnings
import os
from sklearn.decomposition import KernelPCA
from sklearn.manifold import TSNE
from general_functions import *


from sklearn.preprocessing import OneHotEncoder
warnings.filterwarnings('ignore')

## Loading Dataset

Collect all the data from the csv files

In [4]:
train_df=combine_csv_files_to_df('./Dataset_mitbih/Training/')
test_df=combine_csv_files_to_df('./Dataset_mitbih/Testing/')

In [5]:
train_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,178,179,180,181,182,183,184,185,186,187
0,0.977941,0.926471,0.681373,0.245098,0.154412,0.191176,0.151961,0.085784,0.058824,0.049020,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.960114,0.863248,0.461538,0.196581,0.094017,0.125356,0.099715,0.088319,0.074074,0.082621,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.000000,0.659459,0.186486,0.070270,0.070270,0.059459,0.056757,0.043243,0.054054,0.045946,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.925414,0.665746,0.541436,0.276243,0.196133,0.077348,0.071823,0.060773,0.066298,0.058011,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.967136,1.000000,0.830986,0.586854,0.356808,0.248826,0.145540,0.089202,0.117371,0.150235,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87549,0.807018,0.494737,0.536842,0.529825,0.491228,0.484211,0.456140,0.396491,0.284211,0.136842,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
87550,0.718333,0.605000,0.486667,0.361667,0.231667,0.120000,0.051667,0.001667,0.000000,0.013333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
87551,0.906122,0.624490,0.595918,0.575510,0.530612,0.481633,0.444898,0.387755,0.322449,0.191837,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
87552,0.858228,0.645570,0.845570,0.248101,0.167089,0.131646,0.121519,0.121519,0.118987,0.103797,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0


In [6]:
test_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,178,179,180,181,182,183,184,185,186,187
0,1.000000,0.758264,0.111570,0.000000,0.080579,0.078512,0.066116,0.049587,0.047521,0.035124,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.908425,0.783883,0.531136,0.362637,0.366300,0.344322,0.333333,0.307692,0.296703,0.300366,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.730088,0.212389,0.000000,0.119469,0.101770,0.101770,0.110619,0.123894,0.115044,0.132743,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.000000,0.910417,0.681250,0.472917,0.229167,0.068750,0.000000,0.004167,0.014583,0.054167,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.570470,0.399329,0.238255,0.147651,0.000000,0.003356,0.040268,0.080537,0.070470,0.090604,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21887,0.928736,0.871264,0.804598,0.742529,0.650575,0.535632,0.394253,0.250575,0.140230,0.102299,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
21888,0.802691,0.692078,0.587444,0.446936,0.318386,0.189836,0.118087,0.077728,0.112108,0.152466,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
21889,1.000000,0.967359,0.620178,0.347181,0.139466,0.089021,0.103858,0.100890,0.106825,0.100890,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
21890,0.984127,0.567460,0.607143,0.583333,0.607143,0.575397,0.575397,0.488095,0.392857,0.238095,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0


In [7]:
print(train_df[187].unique())

[0. 1. 2. 3. 4.]


In [8]:
labels = ['Normal', 'Artial Premature', 'Premature ventricular contraction', 'Fusion of ventricular and normal', 'Paced']

In [9]:
print('Number of samples in data:')
print(labels[0],':', len(train_df[train_df[187]==0]))
print(labels[1],':', len(train_df[train_df[187]==1]))
print(labels[2],':', len(train_df[train_df[187]==2]))
print(labels[3],':', len(train_df[train_df[187]==3]))
print(labels[4],':', len(train_df[train_df[187]==4]))

Number of samples in data:
Normal : 72471
Artial Premature : 2223
Premature ventricular contraction : 5788
Fusion of ventricular and normal : 641
Paced : 6431


In [10]:
print('Number of samples in data:')
print(labels[0],':', len(test_df[test_df[187]==0]))
print(labels[1],':', len(test_df[test_df[187]==1]))
print(labels[2],':', len(test_df[test_df[187]==2]))
print(labels[3],':', len(test_df[test_df[187]==3]))
print(labels[4],':', len(test_df[test_df[187]==4]))

Number of samples in data:
Normal : 18118
Artial Premature : 556
Premature ventricular contraction : 1448
Fusion of ventricular and normal : 162
Paced : 1608


## Create feautures and labels for dataset

Create different dataframes for training/testing data and labels.
Also OneHotEncoding for the labels and renaming the classes

In [12]:
train_X = train_df.iloc[:, :-1]
train_y = train_df.iloc[:, -1]
train_y_nn = pd.get_dummies(train_y, columns=[1])
train_y_nn = train_y_nn.rename(columns={0: "N", 1: "S", 2: "V", 3: "F", 4: "Q"})

test_X = test_df.iloc[:, :-1]
test_y = test_df.iloc[:, -1]
test_y_nn = pd.get_dummies(test_y, columns=[1])
test_y_nn = test_y_nn.rename(columns={0: "N", 1: "S", 2: "V", 3: "F", 4: "Q"})

In [13]:
train_X

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,177,178,179,180,181,182,183,184,185,186
0,0.977941,0.926471,0.681373,0.245098,0.154412,0.191176,0.151961,0.085784,0.058824,0.049020,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.960114,0.863248,0.461538,0.196581,0.094017,0.125356,0.099715,0.088319,0.074074,0.082621,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.000000,0.659459,0.186486,0.070270,0.070270,0.059459,0.056757,0.043243,0.054054,0.045946,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.925414,0.665746,0.541436,0.276243,0.196133,0.077348,0.071823,0.060773,0.066298,0.058011,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.967136,1.000000,0.830986,0.586854,0.356808,0.248826,0.145540,0.089202,0.117371,0.150235,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87549,0.807018,0.494737,0.536842,0.529825,0.491228,0.484211,0.456140,0.396491,0.284211,0.136842,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
87550,0.718333,0.605000,0.486667,0.361667,0.231667,0.120000,0.051667,0.001667,0.000000,0.013333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
87551,0.906122,0.624490,0.595918,0.575510,0.530612,0.481633,0.444898,0.387755,0.322449,0.191837,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
87552,0.858228,0.645570,0.845570,0.248101,0.167089,0.131646,0.121519,0.121519,0.118987,0.103797,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
train_y

0        0.0
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
87549    4.0
87550    4.0
87551    4.0
87552    4.0
87553    4.0
Name: 187, Length: 87554, dtype: float64

In [18]:
train_y_nn

Unnamed: 0,N,S,V,F,Q
0,1,0,0,0,0
1,1,0,0,0,0
2,1,0,0,0,0
3,1,0,0,0,0
4,1,0,0,0,0
...,...,...,...,...,...
87549,0,0,0,0,1
87550,0,0,0,0,1
87551,0,0,0,0,1
87552,0,0,0,0,1


In [15]:
test_X

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,177,178,179,180,181,182,183,184,185,186
0,1.000000,0.758264,0.111570,0.000000,0.080579,0.078512,0.066116,0.049587,0.047521,0.035124,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.908425,0.783883,0.531136,0.362637,0.366300,0.344322,0.333333,0.307692,0.296703,0.300366,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.730088,0.212389,0.000000,0.119469,0.101770,0.101770,0.110619,0.123894,0.115044,0.132743,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.000000,0.910417,0.681250,0.472917,0.229167,0.068750,0.000000,0.004167,0.014583,0.054167,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.570470,0.399329,0.238255,0.147651,0.000000,0.003356,0.040268,0.080537,0.070470,0.090604,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21887,0.928736,0.871264,0.804598,0.742529,0.650575,0.535632,0.394253,0.250575,0.140230,0.102299,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
21888,0.802691,0.692078,0.587444,0.446936,0.318386,0.189836,0.118087,0.077728,0.112108,0.152466,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
21889,1.000000,0.967359,0.620178,0.347181,0.139466,0.089021,0.103858,0.100890,0.106825,0.100890,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
21890,0.984127,0.567460,0.607143,0.583333,0.607143,0.575397,0.575397,0.488095,0.392857,0.238095,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
test_y

0        0.0
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
21887    4.0
21888    4.0
21889    4.0
21890    4.0
21891    4.0
Name: 187, Length: 21892, dtype: float64

In [17]:
test_y_nn

Unnamed: 0,N,S,V,F,Q
0,1,0,0,0,0
1,1,0,0,0,0
2,1,0,0,0,0
3,1,0,0,0,0
4,1,0,0,0,0
...,...,...,...,...,...
21887,0,0,0,0,1
21888,0,0,0,0,1
21889,0,0,0,0,1
21890,0,0,0,0,1


## Visualization

In [None]:
ecg_example = train_df.iloc[0,:187]
plt.plot(ecg_example)

In [None]:
train_df[187]=train_df[187].astype(int)
equilibre=train_df[187].value_counts(sort=False)
print(equilibre)

In [None]:
plt.figure(figsize=(20,10))
my_circle=plt.Circle( (0,0), 0.7, color='white')
plt.pie(equilibre, labels= labels
        , colors=['red','green','blue','skyblue','orange'],autopct='%1.1f%%')
p=plt.gcf()
p.gca().add_artist(my_circle)
plt.show()

In [None]:
def plot_ecg_examples(df, label, n_examples=5):    
    filtered_df = df[df[187] == label]

    # choose 5 random instances from the filtered DataFrame
    random_instances = filtered_df.sample(n=n_examples, random_state=42)

    return random_instances


In [None]:
samples = list()
samples.append(plot_ecg_examples(train_df, 0, 5))
samples.append(plot_ecg_examples(train_df, 1, 5))
samples.append(plot_ecg_examples(train_df, 2, 5))
samples.append(plot_ecg_examples(train_df, 3, 5))
samples.append(plot_ecg_examples(train_df, 4, 5))


fig, axs = plt.subplots(3, 2, figsize=(20, 7))
titles = ['Normal', 'Atrial premature', 'Premature ventricular contraction', 'Fusion of ventricular and normal', 'Paced']
for i in range(5):
    ax = axs.flat[i]
    ax.plot(samples[i].values[:,:-2].transpose())
    ax.set_title(titles[i])

fig.tight_layout()
plt.show()

# Heatmaps classes

In [None]:
def plot_hist(class_number,size,min_,bins):
    img=train_df.loc[train_df[187]==class_number].values
    img=img[:,min_:size]
    img_flatten=img.flatten()

    final1=np.arange(min_,size)
    for i in range (img.shape[0]-1):
        tempo1=np.arange(min_,size)
        final1=np.concatenate((final1, tempo1), axis=None)
    print(len(final1))
    print(len(img_flatten))
    plt.hist2d(final1,img_flatten, bins=(bins,bins),cmap=plt.cm.jet)
    plt.show()

## Heatmap of class 0 (Normal)

In [None]:
fig = plt.figure()
hist_class0 = plot_hist(0,70,5,65)

In [None]:
fig, axs = plt.subplots(1, 1, figsize=(20, 7))
class0 = plot_ecg_examples(train_df, 0, n_examples=5)
for i in range(5):
    #ax = axs.flat[i]
    axs.plot(class0.values[:,:-2].transpose()[5:70])
    #ax.set_title(titles[i])
fig.tight_layout()
plt.show()

## Heatmap of class 1 (Atrial premature)

In [None]:
fig = plt.figure()
hist_class1 = plot_hist(1,50,5,45)

In [None]:
fig, axs = plt.subplots(1, 1, figsize=(20, 7))
class1 = plot_ecg_examples(train_df, 1, n_examples=5)
for i in range(5):
    #ax = axs.flat[i]
    axs.plot(class1.values[:,:-2].transpose()[5:70])
    #ax.set_title(titles[i])
fig.tight_layout()
plt.show()

## Heatmap of class 2 (Premature ventricular contraction)

In [None]:
fig = plt.figure()
hist_class2 = plot_hist(2,50,5,45)

In [None]:
fig, axs = plt.subplots(1, 1, figsize=(20, 7))
class2 = plot_ecg_examples(train_df, 2, n_examples=5)
for i in range(5):
    #ax = axs.flat[i]
    axs.plot(class2.values[:,:-2].transpose()[5:70])
    #ax.set_title(titles[i])
fig.tight_layout()
plt.show()

## Heatmap of class 3 (Fusion of ventricular and normal)

In [None]:
fig = plt.figure()
hist_class3 = plot_hist(3,60,5,55)

In [None]:
fig, axs = plt.subplots(1, 1, figsize=(20, 7))
class3 = plot_ecg_examples(train_df, 3, n_examples=5)
for i in range(5):
    #ax = axs.flat[i]
    axs.plot(class3.values[:,:-2].transpose()[5:70])
    #ax.set_title(titles[i])
fig.tight_layout()
plt.show()

## Heatmap of class 4 (Paced)

In [None]:
fig = plt.figure()
hist_class4 = plot_hist(4,70,5,65)

In [None]:
fig, axs = plt.subplots(1, 1, figsize=(20, 7))
class4 = plot_ecg_examples(train_df, 4, n_examples=5)
for i in range(5):
    #ax = axs.flat[i]
    axs.plot(class4.values[:,:-2].transpose()[5:70])
    #ax.set_title(titles[i])
fig.tight_layout()
plt.show()

## PCA

In [None]:
from sklearn.decomposition import KernelPCA

In [None]:
def get_random_data_from_class(df, class_number, n_examples=5):
    filtered_df = df[df[187] == class_number]
    random_instances = filtered_df.sample(n=n_examples, random_state=42)
    return random_instances

In [None]:
def pairplot_pca(np_result_pca, input_data):
    # np_result_pca is the result of the PCA transformation
    # input_data is the original data
    df_pca = pd.DataFrame(np_result_pca)
    df_labels = pd.DataFrame(input_data[187])
    df_labels = df_labels.set_index(df_pca.index)
    # add the target variable to the dataframe

    result = pd.concat([df_pca, df_labels], axis=1)

    # create a pairplot with the T-SNE components and the target variable
    sns.pairplot(data=result, hue=result.columns[-1], palette='Set1')

In [None]:
def plot_3D_pca(np_result_pca, input_data, n_classes):
    # np_result_pca is the result of the PCA transformation
    # input_data is the original data
    # Create a 3D scatter plot of the first 3 principal components
    y = input_data[187]
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    colors = ['r', 'g', 'b', 'c', 'm']

    for i in range(n_classes):
        idx = np.where(y == i)
        #print(idx)
        ax.scatter(np_result_pca[idx,0], np_result_pca[idx,1], np_result_pca[idx,2], c=colors[i], label=f"Class {i}")
    #x.scatter(pca[:, 0], pca[:, 1], pca[:, 2], c='b', marker='o') #, marker='o'

    ax.set_xlabel('PCA Component 1')
    ax.set_ylabel('PCA Component 2')
    ax.set_zlabel('PCA Component 3')
    ax.legend()
    plt.show()


In [None]:
pca_data_0 = get_random_data_from_class(train_df, 0, n_examples=100)
pca_data_1 = get_random_data_from_class(train_df, 1, n_examples=100)
pca_data_2 = get_random_data_from_class(train_df, 2, n_examples=100)
pca_data_3 = get_random_data_from_class(train_df, 3, n_examples=100)
pca_data_4 = get_random_data_from_class(train_df, 4, n_examples=100)


# put the different dataframes into one dataframe
pca_data = pd.concat([pca_data_0, pca_data_1, pca_data_2, pca_data_3, pca_data_4])

## LinearPCA

In [None]:
# Find the first 3 principal components for both the datasets
linear_pca = KernelPCA(kernel="linear", n_components=5, random_state=0)

np_linear_pca = linear_pca.fit_transform(pca_data[pca_data.columns[:-1]])

In [None]:
plot_3D_pca(np_linear_pca, pca_data, 5)

In [None]:
pairplot_pca(np_linear_pca, pca_data)

## Sigmoid PCA

In [None]:
# Find the first 3 principal components for both the datasets
sigmoid_pca = KernelPCA(kernel="sigmoid", n_components=3, random_state=0)

np_sigmoid_pca = sigmoid_pca.fit_transform(pca_data[pca_data.columns[:-1]])

In [None]:
plot_3D_pca(np_sigmoid_pca, pca_data, 5)

In [None]:
pairplot_pca(np_sigmoid_pca, pca_data)

## RBF PCA

In [None]:
# Find the first 3 principal components for both the datasets
rbf_pca = KernelPCA(kernel="rbf", n_components=3, random_state=0)

np_rbf_pca = rbf_pca.fit_transform(pca_data[pca_data.columns[:-1]])

In [None]:
plot_3D_pca(np_sigmoid_pca, pca_data, 5)

In [None]:
pairplot_pca(np_sigmoid_pca, pca_data)

## Poly PCA

In [None]:
# Find the first 3 principal components for both the datasets
poly_pca = KernelPCA(kernel="poly", n_components=3, random_state=0, degree=3)

np_poly_pca = poly_pca.fit_transform(pca_data[pca_data.columns[:-1]])

## TSNE

Testing part for a TSNE with 3 components and 100 instances from each class

In [None]:
from sklearn.manifold import TSNE
tsne = TSNE(n_components=4, verbose=1, random_state=123, method='exact')

tsne_data_0 = get_random_data_from_class(train_df, 0, n_examples=100)
tsne_data_1 = get_random_data_from_class(train_df, 1, n_examples=100)
tsne_data_2 = get_random_data_from_class(train_df, 2, n_examples=100)
tsne_data_3 = get_random_data_from_class(train_df, 3, n_examples=100)
tsne_data_4 = get_random_data_from_class(train_df, 4, n_examples=100)
# put the different dataframes into one dataframe
tsne_data = pd.concat([tsne_data_0, tsne_data_1, tsne_data_2, tsne_data_3, tsne_data_4], axis=0)
np_tsne = tsne.fit_transform(tsne_data.values[:,:-1])


In [None]:
plot_3D_pca(np_tsne, tsne_data, 5)

In [None]:
pairplot_pca(np_tsne, pca_data)

## Training some ML Models

### SVM

In [None]:
from sklearn.svm import SVC
# from sklearn.model_selection import train_test_split


# Train SVM classifier
svm = SVC(kernel='poly', C=1)
svm.fit(train_X, train_df[187])

# Evaluate SVM classifier
accuracy = svm.score(test_X, test_df[187])
print(f'SVM classifier accuracy: {accuracy:.2f}')

In [None]:
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support

# Predict test set labels
y_pred_svm = svm.predict(test_X)

# Calculate confusion matrix
cm = confusion_matrix(test_df[187], y_pred_svm)

# Plot confusion matrix
plt.figure(figsize=(5,5))
sns.heatmap(cm, annot=True, fmt='g', cmap='Blues', square=True)
plt.xlabel('Predicted label')
plt.ylabel('True label')
plt.show()

In [None]:
print(classification_report(test_df[187], y_pred_svm, target_names=labels))

## Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

# Train random forest classifier
rfc = RandomForestClassifier(n_estimators=100, max_depth=None, random_state=42)
rfc.fit(train_X, train_df[187])

# Evaluate random forest classifier
accuracy = rfc.score(test_X, test_df[187])
print(f'Random forest classifier accuracy: {accuracy:.2f}')

In [None]:
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support

# Predict test set labels
y_pred_rfc = rfc.predict(test_X)

# Calculate confusion matrix
cm = confusion_matrix(test_df[187], y_pred_rfc)

# Plot confusion matrix
plt.figure(figsize=(5,5))
sns.heatmap(cm, annot=True, fmt='g', cmap='Blues', square=True)
plt.xlabel('Predicted label')
plt.ylabel('True label')
plt.show()

In [None]:
print(classification_report(test_df[187], y_pred_rfc, target_names=labels))

## CNN

In [None]:
from keras.layers import Dense, Convolution1D, MaxPool1D, Flatten, Dropout
from keras.layers import Input
from keras.models import Model
from keras.layers import BatchNormalization
import keras
from keras.callbacks import EarlyStopping, ModelCheckpoint

im_shape=(train_X.shape[1],1)
inputs_cnn=Input(shape=(im_shape), name='inputs_cnn')
conv1_1=Convolution1D(64, (6), activation='relu', input_shape=im_shape)(inputs_cnn)
conv1_1=BatchNormalization()(conv1_1)
pool1=MaxPool1D(pool_size=(3), strides=(2), padding="same")(conv1_1)
conv2_1=Convolution1D(64, (3), activation='relu', input_shape=im_shape)(pool1)
conv2_1=BatchNormalization()(conv2_1)
pool2=MaxPool1D(pool_size=(2), strides=(2), padding="same")(conv2_1)
conv3_1=Convolution1D(64, (3), activation='relu', input_shape=im_shape)(pool2)
conv3_1=BatchNormalization()(conv3_1)
pool3=MaxPool1D(pool_size=(2), strides=(2), padding="same")(conv3_1)
flatten=Flatten()(pool3)
dense_end1 = Dense(64, activation='relu')(flatten)
dense_end2 = Dense(32, activation='relu')(dense_end1)
main_output = Dense(5, activation='softmax', name='main_output')(dense_end2)


model = Model(inputs= inputs_cnn, outputs=main_output)
model.compile(optimizer='adam', loss='categorical_crossentropy',metrics = ['accuracy'])


callbacks = [EarlyStopping(monitor='val_loss', patience=8),
            ModelCheckpoint(filepath='best_model.h5', monitor='val_loss', save_best_only=True)]

history=model.fit(train_X, train_y,epochs=40,callbacks=callbacks, batch_size=32,validation_data=(test_X,test_y))
model.load_weights('best_model.h5')



In [None]:
scores = model.evaluate(test_X, test_y, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))


In [None]:
# Predict test set labels
y_pred = model.predict(test_X)
y_pred_cnn = np.argmax(y_pred, axis=1)

# Calculate confusion matrix
cm = confusion_matrix(test_df[187], y_pred_cnn)

# Plot confusion matrix
plt.figure(figsize=(5,5))
sns.heatmap(cm, annot=True, fmt='g', cmap='Blues', square=True)
plt.xlabel('Predicted label')
plt.ylabel('True label')
plt.show()

In [None]:
print(classification_report(test_df[187], y_pred_cnn, target_names=labels))

## Bagging/Bootstrapping (sampling with replacement)

In [None]:
from sklearn.utils import resample
df_1=train_df[train_df[187]==1]
df_2=train_df[train_df[187]==2]
df_3=train_df[train_df[187]==3]
df_4=train_df[train_df[187]==4]
df_0=(train_df[train_df[187]==0]).sample(n=2000,random_state=42)

df_1_upsample=resample(df_1,replace=True,n_samples=2000,random_state=123)
df_2_upsample=resample(df_2,replace=True,n_samples=2000,random_state=124)
df_3_upsample=resample(df_3,replace=True,n_samples=2000,random_state=125)
df_4_upsample=resample(df_4,replace=True,n_samples=2000,random_state=126)

train_df=pd.concat([df_0,df_1_upsample,df_2_upsample,df_3_upsample,df_4_upsample])