# Task 4: Transfer Learning using ResNet

In the following we implement a transfer learning approach for our ResNet model. We first trained the model on data from the [MIT-BIH Arrythmia Database](https://physionet.org/content/mitdb/1.0.0/) and then retrained some layers on [PTB Diagnostic ECG Database](https://physionet.org/physiobank/database/ptbdb/). 

We investiage three different transfer learning approaches. For each one of them the hyperparameters are tuned with a grid search. Then the final model is trained with all available training data and the classes for the test set are predicted. 

Further information can be found in the corresponding section of the report.

In [None]:
import numpy as np
import pandas as pd
import json
from keras.callbacks import ModelCheckpoint, EarlyStopping, LearningRateScheduler
from keras import losses, activations, models
from tensorflow.keras import optimizers
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import f1_score, accuracy_score, roc_auc_score, average_precision_score
from TransferResNet_Models import get_ResNet_frozenBase
from tensorflow import keras
from keras.layers import Dense, Input, Dropout, Conv1D, ReLU, add, MaxPool1D, Flatten, BatchNormalization
from keras import Model

import sys
sys.path.append("../")
from models import *

In [None]:
# read data
df_1 = pd.read_csv("../input/ptbdb_normal.csv", header=None)
df_2 = pd.read_csv("../input/ptbdb_abnormal.csv", header=None)
df = pd.concat([df_1, df_2])

df_train, df_test = train_test_split(df, test_size=0.2, random_state=1337, stratify=df[187])

Y = np.array(df_train[187].values).astype(np.int8)
X = np.array(df_train[list(range(187))].values)[..., np.newaxis]

Y_test = np.array(df_test[187].values).astype(np.int8)
X_test = np.array(df_test[list(range(187))].values)[..., np.newaxis]

## Approach 1: Frozen Base Model

The residual blocks are kept non-trainable and kept with the weights trained on the MIT-BIH dataset. Only the fully connected layer will be trained on the PTBDB data.

### Hyperparameter tuning

In [None]:
grid_search_results = {}

opts = ["Adam","rmsprop"]
learning_rates = [("const", 0.0001), ("const", 0.0005), ("const", 0.001)]

for opt in opts: 
    for mode,factor in learning_rates:
        if mode == "const":
            lr = factor
        elif mode=="exponential": 
            lr = optimizers.schedules.ExponentialDecay(initial_learning_rate=0.005,decay_steps=1000,decay_rate=factor)
        else:
            lr = optimizers.schedules.PolynomialDecay(initial_learning_rate=0.01,decay_steps=1000,power=factor)
        if opt== "Adam":
            optimizer = optimizers.Adam(learning_rate = lr)
        elif opt == "SGD":
            optimizer = optimizers.SGD(learning_rate = lr)
        elif opt == "rmsprop":
            optimizer = optimizers.RMSprop(learning_rate = lr)

        grid_search_results[f"{opt}_{mode}_{factor}"] = []

        for fold, (train, val) in enumerate(KFold(n_splits=5, shuffle=True,random_state = 123).split(X,Y)):
            print(f"FOLD: {fold} OPT {opt} MODDE {mode} FACTOR {factor}")
            
            resnet_model = ResNetStandard(5,0.1,optimizer=optimizers.RMSprop(0.001)).model_builder()
            resnet_model.load_weights("Results/ResNet_MITBIH.h5")

            # freeze layers
            for i, layer in enumerate(resnet_model.layers):
                layer.trainable = False
                if(i >= len(resnet_model.layers) - 5):
                    break

            # changing the new final fully connected layer
            output = Dense(1, activation=activations.sigmoid)(resnet_model.layers[len(resnet_model.layers)-2].output)
            transfered_model = Model(inputs=resnet_model.input, outputs=output)
            transfered_model.compile(loss=losses.binary_crossentropy, optimizer=optimizer, metrics=['acc'])

            # train model
            file_path_frozen_base = f"Results/Transfer_ResNetFrozenBase_Hyperparam.h5"
            checkpoint_frozen_base = ModelCheckpoint(file_path_frozen_base, monitor='val_acc', verbose=1, save_best_only=True, mode='max')    
            early_frozen_base = EarlyStopping(monitor='val_acc', patience=7)
            callbacks_list_frozen_base = [checkpoint_frozen_base, early_frozen_base] 

            transfered_model.fit(X[train], Y[train], epochs=100, batch_size=128, verbose=1, callbacks=callbacks_list_frozen_base, validation_data = (X[val],Y[val]))

            grid_search_results[f"{opt}_{mode}_{factor}"].append(transfered_model.evaluate(X[val],Y[val]))

            with open("Results/FrozenBase_Hyperparam.json", "w") as outfile:
                json.dump(grid_search_results, outfile)

In [None]:
f = open('Results/FrozenBase_Hyperparam.json')
grid_search_results = json.load(f)
means = []
for k in grid_search_results.keys():
    mean_acc = sum(elt for elt in grid_search_results[k])/len(grid_search_results[k])
    means.append((k, mean_loss))

means.sort(reverse=True)
means[0]

Based on the grid search the optimal parameters for the model are: 

optimizer: **Adam**

learning rate: **constant of 0.001**

These parameters obtained performances of (average loss,  average accuracy): 

**(0.07962191924452781, 0.9737127780914306)**

### Train Final Model

In [None]:
resnet_model = ResNetStandard(5,0.1,optimizer=optimizers.RMSprop(0.001)).model_builder()
resnet_model.load_weights("Results/ResNet_MITBIH.h5")

# freeze layers
for i, layer in enumerate(resnet_model.layers):
    layer.trainable = False
    if(i >= len(resnet_model.layers) - 5):
        break

# changing the new final fully connected layer
output = Dense(1, activation=activations.sigmoid)(resnet_model.layers[len(resnet_model.layers)-2].output)
transfered_model = Model(inputs=resnet_model.input, outputs=output)
transfered_model.compile(loss=losses.binary_crossentropy, optimizer=optimizers.Adam(0.001), metrics=['acc'])

# train model
file_path_frozen_base = f"Results/Transfer_ResNetFrozenBase.h5"
checkpoint_frozen_base = ModelCheckpoint(file_path_frozen_base, monitor='val_acc', verbose=1, save_best_only=True, mode='max')    
early_frozen_base = EarlyStopping(monitor='val_acc', patience=10)
callbacks_list_frozen_base = [checkpoint_frozen_base, early_frozen_base] 

transfered_model.fit(X, Y, epochs=200, batch_size=128, verbose=1, callbacks=callbacks_list_frozen_base, validation_split=0.1)

In [None]:
transfered_model.load_weights("Results/Transfer_ResNetFrozenBase.h5")
pred_test = transfered_model.predict(X_test)
pred_test = (pred_test>0.5).astype(np.int8)

f1 = f1_score(Y_test, pred_test)

print("Test f1 score : %s "% f1)

acc = accuracy_score(Y_test, pred_test)

print("Test accuracy : %s "% acc)

auroc = roc_auc_score(Y_test, pred_test)

print("Test AUROC : %s "% auroc)

auprc = average_precision_score(Y_test, pred_test)

print("Test AUPRC : %s "% auprc)

**Performances:**

Test f1 score : 0.9742979533555449 

Test accuracy : 0.9628993473033322 

Test AUROC : 0.9541607322004235 

Test AUPRC : 0.9681505678692455

## Approach 2: Retrain whole model

The last layer of the model is changed so it matches the PTBDB data. Then all weights are retrained with PTBDB data.

### Hyperparameter tuning

In [None]:
grid_search_results = {}

opts = ["Adam","rmsprop"]
learning_rates = [("const", 0.0001), ("const", 0.0005), ("const", 0.001)]

for opt in opts: 
    for mode,factor in learning_rates:
        if mode == "const":
            lr = factor
        elif mode=="exponential": 
            lr = optimizers.schedules.ExponentialDecay(initial_learning_rate=0.005,decay_steps=1000,decay_rate=factor)
        else:
            lr = optimizers.schedules.PolynomialDecay(initial_learning_rate=0.01,decay_steps=1000,power=factor)
        if opt== "Adam":
            optimizer = optimizers.Adam(learning_rate = lr)
        elif opt == "SGD":
            optimizer = optimizers.SGD(learning_rate = lr)
        elif opt == "rmsprop":
            optimizer = optimizers.RMSprop(learning_rate = lr)

        grid_search_results[f"{opt}_{mode}_{factor}"] = []

        for fold, (train, val) in enumerate(KFold(n_splits=5, shuffle=True,random_state = 123).split(X,Y)):
            print(f"FOLD: {fold} OPT {opt} MODDE {mode} FACTOR {factor}")
            
            resnet_model = ResNetStandard(5,0.1,optimizer=optimizers.RMSprop(0.001)).model_builder()
            resnet_model.load_weights("Results/ResNet_MITBIH.h5")

            # changing the new final fully connected layer
            output = Dense(1, activation=activations.sigmoid)(resnet_model.layers[len(resnet_model.layers)-2].output)
            transfered_model = Model(inputs=resnet_model.input, outputs=output)
            transfered_model.compile(loss=losses.binary_crossentropy, optimizer=optimizer, metrics=['acc'])

            # train model
            file_path_fullretrain = f"Results/Transfer_ResNetFullRetrain_Hyperparam.h5"
            checkpoint_fullretrain = ModelCheckpoint(file_path_fullretrain, monitor='val_acc', verbose=1, save_best_only=True, mode='max')    
            early_fullretrain = EarlyStopping(monitor='val_acc', patience=7)
            callbacks_list_fullretrain = [checkpoint_fullretrain, early_fullretrain] 

            transfered_model.fit(X[train], Y[train], epochs=1, batch_size=128, verbose=1, callbacks=callbacks_list_fullretrain, validation_data = (X[val],Y[val]))

            grid_search_results[f"{opt}_{mode}_{factor}"].append(transfered_model.evaluate(X[val],Y[val]))

            with open("Results/FullRetrain_Hyperparam.json", "w") as outfile:
                json.dump(grid_search_results, outfile)

In [None]:
f = open('Results/FullRetrain_Hyperparam.json')
grid_search_results = json.load(f)
means = []
for k in grid_search_results.keys():
    mean_loss = sum(elt[0] for elt in grid_search_results[k])/len(grid_search_results[k])
    mean_acc = sum(elt[1] for elt in grid_search_results[k])/len(grid_search_results[k])
    means.append((k, mean_loss, mean_acc))

means.sort(reverse=True)
means

Based on the grid search the optimal parameters for the model are: 

optimizer: **Adam**

learning rate: **constant of 0.001**

These parameters obtained performances of (average loss,  average accuracy): 

**(0.06563280522823334, 0.9768921613693238)**

### Train Final Model

In [None]:
resnet_model = ResNetStandard(5,0.1,optimizer=optimizers.RMSprop(0.001)).model_builder()
resnet_model.load_weights("Results/ResNet_MITBIH.h5")

# changing the new final fully connected layer
output = Dense(1, activation=activations.sigmoid)(resnet_model.layers[len(resnet_model.layers)-2].output)
transfered_model = Model(inputs=resnet_model.input, outputs=output)
transfered_model.compile(loss=losses.binary_crossentropy, optimizer=optimizers.Adam(0.001), metrics=['acc'])

# train model
file_path_frozen_base = f"Results/Transfer_ResNet_FullRetrain2.h5"
checkpoint_frozen_base = ModelCheckpoint(file_path_frozen_base, monitor='val_acc', verbose=1, save_best_only=True, mode='max')    
early_frozen_base = EarlyStopping(monitor='val_acc', patience=10)
callbacks_list_frozen_base = [checkpoint_frozen_base, early_frozen_base] 

transfered_model.fit(X, Y, epochs=200, batch_size=128, verbose=1, callbacks=callbacks_list_frozen_base, validation_split=0.1)

In [None]:
transfered_model.load_weights("Results/Transfer_ResNet_FullRetrain.h5")
pred_test = transfered_model.predict(X_test)
pred_test = (pred_test>0.5).astype(np.int8)

f1 = f1_score(Y_test, pred_test)

print("Test f1 score : %s "% f1)

acc = accuracy_score(Y_test, pred_test)

print("Test accuracy : %s "% acc)

auroc = roc_auc_score(Y_test, pred_test)

print("Test AUROC : %s "% auroc)

auprc = average_precision_score(Y_test, pred_test)

print("Test AUPRC : %s "% auprc)

**Performances**:

Test f1 score : 0.9825389334591789

Test accuracy : 0.9745791824115424

Test AUROC : 0.9618680896056379 

Test AUPRC : 0.9723153858829998 

## Approach 3: first frozen base model, then re-training whole model.

The last layer of the model is changed so it matches the PTBDB data. Then the fully connected layer is retrained with the PTBDB data. After convergence, all weights are retrained.

In [None]:
frozen_model = get_ResNet_frozenBase(5, 0.1)
frozen_model.load_weights("Results/ResNet_MITBIH_PTBDB_params.h5")
number_layers = len(frozen_model.layers)

# changing the new final fully connected layer
output = Dense(1, activation=activations.sigmoid)(frozen_model.layers[number_layers-2].output)
new_model = Model(inputs=frozen_model.input, outputs=output)
new_model.compile(loss=losses.binary_crossentropy, optimizer=optimizers.RMSprop(0.001), metrics=['acc'])
new_model.summary()

# train model
file_path_frozen_base = f"Results/Transfer_ResNet_FrozenBaseRetrainWhole.h5"
checkpoint_frozen_base = ModelCheckpoint(file_path_frozen_base, monitor='val_acc', verbose=1, save_best_only=True, mode='max')    
early_frozen_base = EarlyStopping(monitor='val_acc', patience=7)
callbacks_list_frozen_base = [checkpoint_frozen_base, early_frozen_base] 

new_model.fit(X, Y, epochs=200, batch_size=128, verbose=2, callbacks=callbacks_list_frozen_base, validation_split=0.1)

for layer in new_model.layers:
    layer.trainable = True

new_model.compile(loss=losses.binary_crossentropy, optimizer=optimizers.RMSprop(0.0005), metrics=['acc'])

new_model.fit(X, Y, epochs=200, batch_size=128, verbose=2, callbacks=callbacks_list_frozen_base, validation_split=0.1)

### Hyperparameter tuning

In [None]:
grid_search_results = {}

#opts = ["Adam","rmsprop"]
opts = ["rmsprop"]
learning_rates = [("const", 0.0001), ("const", 0.0005), ("const", 0.001)]

for opt in opts: 
    for mode,factor in learning_rates:
        for mode2,factor2 in learning_rates:
            if mode == "const":
                lr = factor
            elif mode=="exponential": 
                lr = optimizers.schedules.ExponentialDecay(initial_learning_rate=0.005,decay_steps=1000,decay_rate=factor)
            else:
                lr = optimizers.schedules.PolynomialDecay(initial_learning_rate=0.01,decay_steps=1000,power=factor)
            if opt== "Adam":
                optimizer = optimizers.Adam(learning_rate = lr)
                optimizer2 = optimizers.Adam(learning_rate = factor2)
            elif opt == "SGD":
                optimizer = optimizers.SGD(learning_rate = lr)
            elif opt == "rmsprop":
                optimizer = optimizers.RMSprop(learning_rate = lr)
                optimizer2 = optimizers.RMSprop(learning_rate = factor2)

            grid_search_results[f"{opt}_{mode}_{factor}_{mode2}_{factor2}"] = []

            for fold, (train, val) in enumerate(KFold(n_splits=5, shuffle=True,random_state = 123).split(X,Y)):
                print(f"FOLD: {fold} OPT {opt} MODDE {mode} FACTOR {factor}")
                
                
                resnet_model = ResNetStandard(5,0.1,optimizer=optimizers.RMSprop(0.001)).model_builder()
                resnet_model.load_weights("Results/ResNet_MITBIH.h5")
                
                # freeze layers
                for i, layer in enumerate(resnet_model.layers):
                    layer.trainable = False
                    if(i >= len(resnet_model.layers) - 5):
                        break

                # changing the new final fully connected layer
                output = Dense(1, activation=activations.sigmoid)(resnet_model.layers[len(resnet_model.layers)-2].output)
                transfered_model = Model(inputs=resnet_model.input, outputs=output)
                transfered_model.compile(loss=losses.binary_crossentropy, optimizer=optimizer, metrics=['acc'])

                # train model
                file_path_frozen_retrain = f"Results/Transfer_FrozenBaseRetrainWhole_Hyperparam.h5"
                checkpoint_frozen_retrain = ModelCheckpoint(file_path_frozen_retrain, monitor='val_acc', verbose=1, save_best_only=True, mode='max')    
                early_frozen_retrain = EarlyStopping(monitor='val_acc', patience=7)
                callbacks_list_frozen_retrain= [checkpoint_frozen_retrain, early_frozen_retrain] 
                
                transfered_model.fit(X, Y, epochs=200, batch_size=128, verbose=1, callbacks=callbacks_list_frozen_retrain, validation_data = (X[val],Y[val]))

                
                for layer in transfered_model.layers:
                    layer.trainable = True
                
                transfered_model.compile(loss=losses.binary_crossentropy, optimizer=optimizer2, metrics=['acc'])
                transfered_model.fit(X, Y, epochs=200, batch_size=128, verbose=1, callbacks=callbacks_list_frozen_retrain, validation_data = (X[val],Y[val]))

                grid_search_results[f"{opt}_{mode}_{factor}_{mode2}_{factor2}"].append(transfered_model.evaluate(X[val],Y[val], verbose = 0))

                with open("Results/FrozenBaseRetrainWhole_Hyperparam_RMSPROP.json", "w") as outfile:
                    json.dump(grid_search_results, outfile)

In [None]:
f = open('Results/FrozenBaseRetrainWhole_Hyperparam.json')
grid_search_results = json.load(f)
means = []
for k in grid_search_results.keys():
    mean_loss = sum(elt[0] for elt in grid_search_results[k])/len(grid_search_results[k])
    mean_acc = sum(elt[1] for elt in grid_search_results[k])/len(grid_search_results[k])
    means.append((k, mean_loss, mean_acc))

means.sort(reverse=True)
means

Based on the grid search the optimal parameters for the model are: 

optimizer1: **Adam**       learning rate1: **constant of 0.0005**

optimizer2: **Adam**       learning rate1: **constant of 0.0005**

These parameters obtained performances of (average loss,  average accuracy): 

**(0.00659539841581136, 0.9987114071846008)**

('rmsprop_const_0.001_const_0.0005',
  0.006532190646976232,
  0.9986255288124084),

## Train Final Model

In [None]:
resnet_model = ResNetStandard(5,0.1,optimizer=optimizers.RMSprop(0.001)).model_builder()
resnet_model.load_weights("Results/ResNet_MITBIH.h5")

# freeze layers
for i, layer in enumerate(resnet_model.layers):
    layer.trainable = False
    if(i >= len(resnet_model.layers) - 5):
        break

# changing the new final fully connected layer
output = Dense(1, activation=activations.sigmoid)(resnet_model.layers[len(resnet_model.layers)-2].output)
transfered_model = Model(inputs=resnet_model.input, outputs=output)
transfered_model.compile(loss=losses.binary_crossentropy, optimizer=optimizers.Adam(0.0005), metrics=['acc'])

# train model
file_path_frozen_retrain = f"Results/Transfer_ResNet_FrozenBaseRetrainWhole.h5"
checkpoint_frozen_retrain = ModelCheckpoint(file_path_frozen_retrain, monitor='val_acc', verbose=1, save_best_only=True, mode='max')    
early_frozen_retrain = EarlyStopping(monitor='val_acc', patience=10)
callbacks_list_frozen_retrain= [checkpoint_frozen_retrain, early_frozen_retrain] 

transfered_model.fit(X, Y, epochs=200, batch_size=128, verbose=1, callbacks=callbacks_list_frozen_retrain, validation_data = (X[val],Y[val]))


for layer in transfered_model.layers:
    layer.trainable = True

transfered_model.compile(loss=losses.binary_crossentropy, optimizer=optimizers.Adam(0.0005), metrics=['acc'])
transfered_model.fit(X, Y, epochs=200, batch_size=128, verbose=1, callbacks=callbacks_list_frozen_retrain, validation_split=0.1)

In [None]:
transfered_model.load_weights("Results/Transfer_ResNet_FrozenBaseRetrainWhole.h5")
pred_test = transfered_model.predict(X_test)
pred_test = (pred_test>0.5).astype(np.int8)

f1 = f1_score(Y_test, pred_test)

print("Test f1 score : %s "% f1)

acc = accuracy_score(Y_test, pred_test)

print("Test accuracy : %s "% acc)

auroc = roc_auc_score(Y_test, pred_test)

print("Test AUROC : %s "% auroc)

auprc = average_precision_score(Y_test, pred_test)

print("Test AUPRC : %s "% auprc)

**Performances:**

Test f1 score : 0.9961977186311787 

Test accuracy : 0.9945036070079011 

Test AUROC : 0.9923923181054244 

Test AUPRC : 0.9944719387044904 