In [None]:
import pandas as pd
from io import StringIO
import sklearn
import re
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import xml.etree.ElementTree as ET
import os
import ecg_plot
import matplotlib.pyplot as plt
import time

In [None]:
def get_waves_from_file(filepath):
    xpath = '//RestingECGMeasurements/MedianSamples/WaveformData' #- contains the waveform data of the ecg strip for the given lead
    try:
        df = pd.read_xml(filepath, xpath=xpath)
    except ValueError as value_error:
        print('file_name', filepath, 'error', value_error)
        return []
    except Exception as e:
        print('file_name', filepath, 'error', e)
        return []
    waves = get_waveform(df)
    normalized_waves = normalize_waveform(waves)
    return normalized_waves

def get_waveform(df, column='WaveformData'):
    waves = df[column]
    waves_processed = []
    for wave in waves:
        wave = re.sub(r"\s+", "", wave)
        res = [int(num) for num in wave.split(',')]
        waves_processed.append(list(res))
    return waves_processed

def normalize_waveform(waves):
    scaler = MinMaxScaler((-1, 1))
    return [scaler.fit_transform(np.array(wave).reshape(-1, 1)) for wave in waves]

def get_raw_diagnosis(filepath):
    tree = ET.parse(filepath)
    root = tree.getroot()
    diagnosis = ''
    for inter in root.findall('Interpretation'):
        obj = inter.find('Diagnosis')
        if obj != None:
            for diag in obj:
                diagnosis += diag.text + '#'
    return diagnosis[:-1]
    
def get_dataset():
    skipped_files = 0
    normal_label = 'Normal ECG'
    dir = '/groups/umcg-endocrinology/tmp02/projects/ukb-55495/data/metaData/v5/xml_T3/'
    X, data_labels = [], []
    for file in sorted(os.listdir(dir)):
        file_name = os.path.join(dir, file)

        # get waves (X)
        normalized_12_waves = np.array(get_waves_from_file(file_name)).ravel()
        if len(normalized_12_waves) == 0:
            skipped_files += 1
            continue
        else:
            X.append(normalized_12_waves)

        # get labels (y)
        diagnosis = get_raw_diagnosis(file_name)
        reversed_diagnosis = diagnosis[::-1]
        position = reversed_diagnosis.find('#')
        new_diagnosis = reversed_diagnosis[position+1:][::-1]
        if normal_label in new_diagnosis:
            data_labels.append(0)
        else:
            data_labels.append(1)
    print("skipped", skipped_files, "files")
    return X, data_labels

In [None]:
import random

def calculate_class_ratio(labels):
    normals, abnormals = 0, 0
    for label in labels:
        if label == 0:
            normals += 1
        else:
            abnormals += 1

    ratio = (abnormals // normals)
    print("normals:", normals, "abnormals:", abnormals)
    print("normals_to_abnormals_ration:", ratio)
    return ratio

def solve_imbalance_problem(X, labels):

    X = [np.append(X[i], labels[i]) for i in range(len(X))]
        
    ratio = calculate_class_ratio(labels)

    x_new = []
    for x in X:
        if x[-1] == 0:
            for i in range(ratio):
                x_new.append(x)

    for xx in x_new:
        X.append(xx)
    random.shuffle(X)

    updated_labels = []

    # separate waves and labels again
    for i in range(len(X)):
        updated_labels.append(X[i][-1])
        X[i] = X[i][:-1]
    
    calculate_class_ratio(updated_labels)

    return np.array(X), np.array(updated_labels)

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
import tensorflow as tf
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

def reverse_one_hot(predictions):
    reversed_x = []
    for x in predictions:
        reversed_x.append(np.argmax(np.array(x)))
    return reversed_x

image_shape = (24, 300)
# image_shape = (12, 600)
# image_shape = (1, 600*12) # does not work

def learn_the_model(training_set_X, training_set_y, testing_set_X, testing_set_y, image_shape = (24, 300), num_classes = 2):

    test_labels = tf.keras.utils.to_categorical(testing_set_y, num_classes)
    train_labels = tf.keras.utils.to_categorical(training_set_y, num_classes)
    
    train_images = training_set_X.reshape(training_set_X.shape[0], image_shape[0], image_shape[1], 1)
    test_images = testing_set_X.reshape(testing_set_X.shape[0], image_shape[0], image_shape[1], 1)
    
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(image_shape[0], image_shape[1], 1)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(loss=tf.keras.losses.categorical_crossentropy, optimizer=tf.keras.optimizers.Adadelta(), metrics=['accuracy', 'mae', 'mse'])
    
    train_data_size = train_images.shape[0]
    test_data_size = test_images.shape[0]
    
    print("model will be trained with {} and be tested with {} sample".format(train_data_size, test_data_size))
    print("Fitting model to the training data...")
    model.fit(train_images, train_labels, batch_size=150, epochs=20, verbose=1, validation_data=None)
    
    predictions_test = model.predict(test_images, batch_size=150, verbose=1)
    predictions_train = model.predict(train_images, batch_size=150, verbose=1)
    print(model.evaluate(test_images, test_labels, batch_size=150, verbose=1))
    return predictions_test, predictions_train

In [None]:
# precision_recall_fscore_support(testing_set_y, reverse_one_hot(predictions_replicate))
import seaborn as sns

def depict_confusion_matrix(cf_matrix, title):
    group_names = ['True Neg','False Pos','False Neg','True Pos']
    group_counts = ['{0:0.0f}'.format(value) for value in
                    cf_matrix.flatten()]
    group_percentages = ['{0:.2%}'.format(value) for value in
                         cf_matrix.flatten()/np.sum(cf_matrix)]
    labels = [f'{v1}\n{v2}\n{v3}' for v1, v2, v3 in
              zip(group_names,group_counts,group_percentages)]
    labels = np.asarray(labels).reshape(2,2)
    sns.heatmap(cf_matrix, annot=labels, fmt='', cmap='Blues')
    plt.title(title)
    plt.show()

In [None]:
X, y = get_dataset() # 0 - normal ECG, 1 - abnormal ECG
X, y = np.array(X), np.array(y)   # 500 - number of samples per second
print(X.shape, y.shape)          # 2439 patients, 7200 = 12 leads * 600 measurements in time (600 - number of samples of one lead)

In [None]:
training_proportion = int(0.75 * len(X))

training_set_X = X[:training_proportion]
testing_set_X = X[training_proportion:]
training_set_y = y[:training_proportion]
testing_set_y = y[training_proportion:]

In [None]:
from collections import Counter
from sklearn.metrics import precision_recall_fscore_support

# oversampling - replicating minority class intances
X_train_replicate, y_train_replicate = solve_imbalance_problem(training_set_X, training_set_y)

print('Original dataset shape:', Counter(training_set_y))
print('Resampled dataset shape:', Counter(y_train_replicate))

predictions_replicate_test, predictions_replicate_train = learn_the_model(X_train_replicate, y_train_replicate, testing_set_X, testing_set_y)
print("Evaluation accuracy score (test) = ", accuracy_score(testing_set_y, reverse_one_hot(predictions_replicate_test)))
print("Evaluation accuracy score (train) = ", accuracy_score(y_train_replicate, reverse_one_hot(predictions_replicate_train)))

# print(precision_recall_fscore_support(testing_set_y, reverse_one_hot(predictions_replicate)))

cf_matrix_test = confusion_matrix(testing_set_y, reverse_one_hot(predictions_replicate_test))
cf_matrix_train = confusion_matrix(y_train_replicate, reverse_one_hot(predictions_replicate_train))

depict_confusion_matrix(cf_matrix_test, 'Testing set')
depict_confusion_matrix(cf_matrix_train, 'Training set')

# 0.43278688524590164
# Confusion matrix =  [[119 (TP)  18 (FP)]
#  [328 (TN) 145 (FN)]]

In [None]:
from imblearn.over_sampling import SMOTE
from collections import Counter
import warnings
warnings.filterwarnings(action='ignore', category=FutureWarning)

# oversampling technique - creates synthetic copies
# SMOTE works by selecting examples that are close in the feature space, drawing a line between the examples 
# in the feature space and drawing a new sample at a point along that line.
sm = SMOTE(random_state=40, n_jobs=8, sampling_strategy='not majority')
X_train_resampled, y_train_resampled = sm.fit_resample(training_set_X, training_set_y)

print('Original dataset shape:', Counter(training_set_y))
print('Resampled dataset shape:', Counter(y_train_resampled))

In [None]:
predictions_SMOTE_test, predictions_SMOTE_train = learn_the_model(X_train_resampled, y_train_resampled, testing_set_X, testing_set_y)

print("Evaluation accuracy score (test) = ", accuracy_score(testing_set_y, reverse_one_hot(predictions_SMOTE_test)))
print("Evaluation accuracy score (train) = ", accuracy_score(y_train_resampled, reverse_one_hot(predictions_SMOTE_train)))

cf_matrix_test = confusion_matrix(testing_set_y, reverse_one_hot(predictions_SMOTE_test))
depict_confusion_matrix(cf_matrix_test, 'Testing set')

cf_matrix_train = confusion_matrix(y_train_resampled, reverse_one_hot(predictions_SMOTE_train))
depict_confusion_matrix(cf_matrix_train, 'Training set')

# 0.49

In [None]:
from imblearn.over_sampling import ADASYN
from collections import Counter

# oversampling technique - adaptive syntethic sampling
# generate more synthetic examples in regions of the feature space where the density of minority examples is low, 
# and fewer or none where the density is high.
sm = ADASYN(random_state=40, n_jobs=8, sampling_strategy='not majority')
X_train_resampled_ADASYN, y_train_resampled_ADASYN = sm.fit_resample(training_set_X, training_set_y)

print('Original dataset shape:', Counter(training_set_y))
print('Resampled dataset shape:', Counter(y_train_resampled_ADASYN))

In [None]:
# predictions_ADASYN_test, predictions_ADASYN_train = learn_the_model(X_train_resampled_ADASYN, y_train_resampled_ADASYN, testing_set_X, testing_set_y)
print("Evaluation accuracy score (test) = ", accuracy_score(testing_set_y, reverse_one_hot(predictions_ADASYN_test)))
print("Evaluation accuracy score (train) = ", accuracy_score(y_train_resampled_ADASYN, reverse_one_hot(predictions_ADASYN_train)))

cf_matrix = confusion_matrix(testing_set_y, reverse_one_hot(predictions_ADASYN_test))
depict_confusion_matrix(cf_matrix, 'Testing set')

cf_matrix = confusion_matrix(y_train_resampled_ADASYN, reverse_one_hot(predictions_ADASYN_train))
depict_confusion_matrix(cf_matrix, 'Training set')

# Evaluation accuracy score =  0.578688524590164

In [None]:
from sklearn.cluster import MiniBatchKMeans
from imblearn.under_sampling import ClusterCentroids

# undersampling technique
cc = ClusterCentroids(
    estimator=MiniBatchKMeans(n_init=1, random_state=0), random_state=42, sampling_strategy='not minority'
)
X_train_resampled_CC, y_train_resampled_CC = cc.fit_resample(training_set_X, training_set_y)
print('Original dataset shape:', Counter(training_set_y))
print('Resampled dataset shape:', Counter(y_train_resampled_CC))
# Evaluation accuracy score =  0.3885245901639344
# Confusion matrix =  [[112  25]
#  [348 125]]

In [None]:
predictions_CC = learn_the_model(X_train_resampled_CC, y_train_resampled_CC, testing_set_X, testing_set_y)
print("Evaluation accuracy score = ", accuracy_score(testing_set_y, reverse_one_hot(predictions_CC)))

cf_matrix = confusion_matrix(testing_set_y, reverse_one_hot(predictions_CC))
depict_confusion_matrix(cf_matrix)

In [None]:
from scipy import signal 
import matplotlib as mpl 
import matplotlib.pyplot as plt 
%matplotlib inline 

# Plot spectrogram 
fig, ax = plt.subplots()  
f, t, Sxx = signal.spectrogram(training_set_X[0], fs=500)
print(Sxx.shape, training_set_X[0].shape)
pc = ax.pcolormesh(t, f, Sxx, norm=mpl.colors.LogNorm(vmin=Sxx.min(), vmax=Sxx.max()), cmap='inferno') 
ax.set_ylabel('Frequency') 
ax.set_xlabel('Time')  
fig.colorbar(pc) 

In [None]:
def create_spectograms(data, sampling_frequency):
    specs = [signal.spectrogram(wave, fs=sampling_frequency) for wave in data]
    return np.array([spec[2] for spec in specs])
    

In [None]:
# let's first turn 1d waves into 2d spectograms and after that oversample if possible

X_train_specs = create_spectograms(training_set_X, 5)
X_test_specs = create_spectograms(testing_set_X, 5)

sm = ADASYN(random_state=40, n_jobs=8, sampling_strategy='not majority')
print(X_train_specs.shape)
X_train_specs = np.array(X_train_specs).reshape(1829, 129*32)
print(X_train_specs.shape)

X_train_resampled_ADASYN, y_train_resampled_ADASYN = sm.fit_resample(X_train_specs, training_set_y)

print(X_train_resampled_ADASYN.shape)
X_train_resampled_ADASYN = np.array(X_train_resampled_ADASYN).reshape(X_train_resampled_ADASYN.shape[0], 129, 32)
print(X_train_resampled_ADASYN.shape)

print('Original dataset shape:', Counter(training_set_y))
print('Resampled dataset shape:', Counter(y_train_resampled_ADASYN))

predictions_specs_test, predictions_specs_train = learn_the_model(X_train_resampled_ADASYN, y_train_resampled_ADASYN, X_test_specs, testing_set_y, image_shape=(129, 32))

print("Evaluation accuracy score (test) = ", accuracy_score(testing_set_y, reverse_one_hot(predictions_specs_test)))
print("Evaluation accuracy score (train)= ", accuracy_score(y_train_resampled_ADASYN, reverse_one_hot(predictions_specs_train)))


cf_matrix = confusion_matrix(testing_set_y, reverse_one_hot(predictions_specs_test))
depict_confusion_matrix(cf_matrix, 'Testing set')

cf_matrix = confusion_matrix(y_train_resampled_ADASYN, reverse_one_hot(predictions_specs_train))
depict_confusion_matrix(cf_matrix, 'Training set')



In [None]:
import csv
import numpy as np

# takes 2 minutes to read all three datasets
def read_data_from_file(filename):
    with open(filename, newline='') as csvfile:
        spamreader = csv.reader(csvfile, delimiter=',', quoting = csv.QUOTE_NONNUMERIC)
        data = [row for row in spamreader]
    return np.array(data)

waves = read_data_from_file('waves_full.csv')
print(waves.shape, waves[0])

labels2classes = read_data_from_file('labels_full_2_classes.csv')[0]
print(labels2classes.shape)

labels3classes = read_data_from_file('labels_full_3_classes.csv')[0]
print(labels2classes.shape)

In [None]:
from collections import Counter

# create frequency map
def get_raw_diagnosis(filepath):
    tree = ET.parse(filepath)
    root = tree.getroot()
    diagnosis = ''
    for inter in root.findall('Interpretation'):
        obj = inter.find('Diagnosis')
        if obj != None:
            for diag in obj:
                diagnosis += diag.text + '#'
    return diagnosis[:-1]

def get_diagnosis_by_label():
    dirs = ['/groups/umcg-endocrinology/tmp02/projects/ukb-55495/data/metaData/v5/xml_T3/', "/groups/umcg-endocrinology/tmp02/projects/ukb-55495/data/metaData/v5/xml_T2/"]
    short_label = []
    for dir in dirs:
        for file in os.listdir(dir):
            file_name = os.path.join(dir, file)
            try:
                d = get_raw_diagnosis(file_name)
            except Exception as e:
                print('error', e, 'in', file_name)
                continue
            reversed_d = d[::-1]
            position = reversed_d.find('#')
            new_d = reversed_d[position+1:][::-1]
            short_label.append(new_d)
    return set(short_label), len(short_label), short_label
    
diags_normal, len_origin, origin = get_diagnosis_by_label()
with open('diagnosis_frequencies.csv', 'w') as f:
        mywriter = csv.writer(f, delimiter=',')
        mywriter.writerow(['Diagnosis', 'Frequency'])
        for pair in Counter(origin).most_common(200000):
            mywriter.writerow([pair[0], pair[1]])

In [None]:
from sklearn.cluster import KMeans

inertias = []
for i in range(1,11):
    kmeans = KMeans(n_clusters=i)
    kmeans.fit(waves)
    inertias.append(kmeans.inertia_)inertias

plt.plot(range(1,11), inertias, marker='o')
plt.title('Elbow method')
plt.xlabel('Number of clusters')
plt.ylabel('Inertia')
plt.show()

In [None]:
from sklearn.model_selection import train_test_split
from collections import Counter

# calculate_class_ratio(labels2classes)
X_train, X_test, y_train, y_test = train_test_split(waves, labels2classes, train_size=0.75, stratify=labels2classes)

print('Original dataset shape (full):', Counter(labels2classes))
print('Resampled dataset shape (full):', Counter(y_train))
print('Resampled dataset shape (full):', Counter(y_test))

In [None]:
# calculate_class_ratio(labels3classes)
from sklearn.model_selection import train_test_split
from collections import Counter

X_train3, X_test3, y_train3, y_test3 = train_test_split(waves, labels3classes, train_size=0.75, stratify=labels3classes)

print('Original dataset shape (full):', Counter(labels3classes))
print('Resampled dataset shape (full):', Counter(y_train3))
print('Resampled dataset shape (full):', Counter(y_test3))

In [None]:
from sklearn.cluster import MiniBatchKMeans
from imblearn.under_sampling import ClusterCentroids
from collections import Counter

# undersampling technique - 2 classes

cc = ClusterCentroids(
    estimator=MiniBatchKMeans(n_init=3, random_state=0), random_state=42, sampling_strategy='not minority'
)
st = time.time()
X_resampled_train, y_resampled_train = cc.fit_resample(X_train, y_train)
elapsed_time = time.time() - st
print('Undersampling time (full):', elapsed_time/60, 'minutes')

print('Original dataset shape (full):', Counter(y_train))
print('Resampled dataset shape (full):', Counter(y_resampled_train))

In [None]:
from sklearn.cluster import MiniBatchKMeans
from imblearn.under_sampling import ClusterCentroids
from collections import Counter

# undersampling technique - 3 classes

cc = ClusterCentroids(
    estimator=MiniBatchKMeans(n_init=3, random_state=0), random_state=42, sampling_strategy='not minority'
)
st = time.time()
X_resampled_train3, y_resampled_train3 = cc.fit_resample(X_train3, y_train3)
elapsed_time = time.time() - st
print('Undersampling time (full):', elapsed_time/60, 'minutes')

print('Original dataset shape (full):', Counter(y_train3))
print('Resampled dataset shape (full):', Counter(y_resampled_train3))

In [None]:
from sklearn.utils import shuffle

def learn_the_model_experiment(training_set_X, training_set_y, testing_set_X, testing_set_y, image_shape = (24, 300), num_classes = 2):
    batch_size = 100
    test_labels = tf.keras.utils.to_categorical(testing_set_y, num_classes)
    train_labels = tf.keras.utils.to_categorical(training_set_y, num_classes)
    
    train_images = training_set_X.reshape(training_set_X.shape[0], image_shape[0], image_shape[1], 1)
    test_images = testing_set_X.reshape(testing_set_X.shape[0], image_shape[0], image_shape[1], 1)
    
    model = Sequential()
    model.add(Conv2D(16, (3, 3), activation='relu', input_shape=(image_shape[0], image_shape[1], 1)))
    model.add(Conv2D(32, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(loss=tf.keras.losses.categorical_crossentropy, optimizer=tf.keras.optimizers.Adadelta(), metrics=['accuracy', 'mae', 'mse'])
    
    train_data_size = train_images.shape[0]
    test_data_size = test_images.shape[0]
    
    print("model will be trained with {} and be tested with {} sample".format(train_data_size, test_data_size))
    print("Fitting model to the training data...")
    model.fit(train_images, train_labels, batch_size=batch_size, epochs=150, verbose=1, validation_data=None)
    
    predictions_test = model.predict(test_images, batch_size=batch_size, verbose=1)
    predictions_train = model.predict(train_images, batch_size=batch_size, verbose=1)
    print(model.evaluate(test_images, test_labels, batch_size=batch_size, verbose=1))
    return predictions_test, predictions_train

def learn_and_test(X_resampled_train3, y_resampled_train3, X_test3, y_test3, num_classes=2):
    X_resampled_train3, y_resampled_train3 = shuffle(X_resampled_train3, y_resampled_train3)

    st = time.time()
    predictions_full_CC_test3, predictions_full_CC_train3 = learn_the_model_experiment(X_resampled_train3, y_resampled_train3, X_test3, y_test3, num_classes=num_classes)

    elapsed_time = time.time() - st
    print('Training model time (full):', elapsed_time/60, 'minutes')

    print("Evaluation accuracy score (full, test) = ", accuracy_score(y_test3, reverse_one_hot(predictions_full_CC_test3)))
    print("Evaluation accuracy score (full, train) = ", accuracy_score(y_resampled_train3, reverse_one_hot(predictions_full_CC_train3)))
    
    cf_matrix3 = confusion_matrix(y_test3, reverse_one_hot(predictions_full_CC_test3))
    if num_classes == 2:
        depict_confusion_matrix(cf_matrix3, 'Testing set (full)')
    else:
        print(cf_matrix3)
    
    cf_matrix4 = confusion_matrix(y_resampled_train3, reverse_one_hot(predictions_full_CC_train3))
    if num_classes == 2:
        depict_confusion_matrix(cf_matrix4, 'Training set (full)')
    else:
        print(cf_matrix4)
    return predictions_full_CC_test3, predictions_full_CC_train3

In [None]:
from sklearn.cluster import MiniBatchKMeans
from imblearn.under_sampling import ClusterCentroids
from collections import Counter

pred_test, pred_train = learn_and_test(X_resampled_train, y_resampled_train, X_test, y_test, num_classes=2)

In [None]:
cf_matrix3 = confusion_matrix(y_test, reverse_one_hot(pred_test))
depict_confusion_matrix(cf_matrix3, 'Testing set (full)')

cf_matrix4 = confusion_matrix(y_resampled_train, reverse_one_hot(pred_train))
depict_confusion_matrix(cf_matrix4, 'Training set (full)')

In [None]:
from sklearn.cluster import MiniBatchKMeans
from imblearn.under_sampling import ClusterCentroids
from collections import Counter
from sklearn.utils import shuffle

pred_test3, pred_train3 = learn_and_test(X_resampled_train3, y_resampled_train3, X_test, y_test, num_classes=3)


In [None]:
print('Original dataset shape (full):', Counter(y_resampled_train3))


In [None]:
x = np.arange(0, len(y_resampled_train3[:3000])) 
# setting the corresponding y - coordinates 
y = y_resampled_train3[:100]
  
# plotting the points 
plt.plot(y) 
  
# function to show the plot 
plt.show() 

In [None]:
print(x[:10], y[:10])

In [None]:
from imblearn.over_sampling import ADASYN
from collections import Counter
import time

# oversampling technique - adaptive syntethic sampling
# generate more synthetic examples in regions of the feature space where the density of minority examples is low, 
# and fewer or none where the density is high.
st = time.time()
sm = ADASYN(random_state=40, n_jobs=8, sampling_strategy='not majority')
X_train_resampled_ADASYN, y_train_resampled_ADASYN = sm.fit_resample(X_train, y_train)

elapsed_time = time.time() - st
print('ADASYN time (full):', elapsed_time/60, 'minutes')

print('Original dataset shape:', Counter(y_train))
print('Resampled dataset shape:', Counter(y_train_resampled_ADASYN))

In [None]:
from imblearn.over_sampling import ADASYN
from collections import Counter

st = time.time()
sm = ADASYN(random_state=40, n_jobs=8, sampling_strategy='minority')
X_train_resampled_ADASYN3, y_train_resampled_ADASYN3 = sm.fit_resample(X_train3, y_train3)
X_train_resampled_ADASYN3, y_train_resampled_ADASYN3 = sm.fit_resample(X_train_resampled_ADASYN3, y_train_resampled_ADASYN3)

elapsed_time = time.time() - st
print('ADASYN time (full):', elapsed_time/60, 'minutes')

print('Original dataset shape:', Counter(y_train3))
print('Resampled dataset shape:', Counter(y_train_resampled_ADASYN3))

In [None]:
pred_test_ad2, pred_train_ad2 = learn_and_test(X_train_resampled_ADASYN, y_train_resampled_ADASYN, X_test, y_test, num_classes=2)

In [None]:
pred_test_ad3, pred_train_ad3 = learn_and_test(X_train_resampled_ADASYN3, y_train_resampled_ADASYN3, X_test3, y_test3, num_classes=3)

In [None]:
mat = [[ 990, 838, 1048],
 [1511, 4255, 2369],
 [0,0, 0]]
mat2 = [[1178, 1100, 1134],
 [ 414, 2306,  692],
 [ 586,  938, 1888]]

mat3 = [[2456, 1190, 1806],
 [ 727, 2906,  789],
 [ 279,  240,  618]]
mat4 = [[ 7321,  3651,  5382],
 [ 3061, 10102,  3764],
 [ 3972,  3434, 10208]]

In [None]:
# plt.figure(figsize=(5,4))
sns.heatmap(mat4, annot=True, fmt='g')
plt.title('Confusion Matrix (Training data)')
plt.ylabel('Actal Values')
plt.xlabel('Predicted Values')
plt.show()