# Transfer Learning and Clustering

This is the code implementation for the project "Detecting Cardiac Arrhythmia Using Poincaré plot of Heart Rate Variability (HRV) and Machine Learning".

In [1]:

# =================================================================
# Required packages
# =================================================================

import pandas as pd
import numpy as np
import os
import pickle

# for loading/processing the images  
from keras.preprocessing.image import load_img 
from keras.applications.vgg16 import preprocess_input 

# models 
from keras.applications.vgg16 import VGG16 
from keras.models import Model

# clustering and dimension reduction
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA


In [2]:

subpath_list=['rr_plots', 'hmrr_plots']

X_both=np.empty([179,0])

In [None]:

for subpath in subpath_list:
    
    path = r"C:\Users\%s"%subpath
    os.chdir(path)
    
    signals = []

    with os.scandir(path) as files:
        for file in files:
            if file.name.endswith('.jpeg'):
                signals.append(file.name)
      
            
    model = VGG16()
    model = Model(inputs = model.inputs, outputs = model.layers[-2].output)

    def extract_features(file, model):
        img = load_img(file, target_size=(224,224))
        img = np.array(img) 
        reshaped_img = img.reshape(1,224,224,3) 
        imgx = preprocess_input(reshaped_img)
        features = model.predict(imgx, use_multiprocessing=True)
        return features


    data = {}
    p = r"C:\Users\%s\all_output.pkl"%subpath

    for signal in signals:
        try:
            feat = extract_features(signal,model)
            data[signal] = feat
        except:
            with open(p,'wb') as file:
                pickle.dump(data,file)
          
 
    filenames = np.array(list(data.keys()))

    feat = np.array(list(data.values()))
    print(feat.shape)

    feat = feat.reshape(-1,feat.shape[2])
    print(feat.shape)

    pca = PCA(n_components=40)
    pca.fit(feat)
    vari=pca.explained_variance_ratio_
    
    X = pca.transform(feat)
    X_both=np.concatenate((X_both, X), axis=1)


In [None]:

kmeans = KMeans(n_clusters=2)
kmeans.fit(X_both)

clustering_labels=kmeans.labels_
print(clustering_labels)

# holds the cluster id and the images { id: [images] }
groups = {}
for file, cluster in zip(filenames,kmeans.labels_):
    if cluster not in groups.keys():
        groups[cluster] = []
        groups[cluster].append(file)
    else:
        groups[cluster].append(file)
        
# view the patients' names in each cluster
group0 = groups[0]
group1 = groups[1]


In [None]:

print(group0)


In [None]:

print(group1)
