In [None]:
#imports
import os

from google.colab import drive
import numpy as np
import matplotlib as plt
from matplotlib import pyplot
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from keras.models import load_model
from sklearn.utils import shuffle

In [None]:
#loading data from drive
drive.mount('/content/drive')
os.chdir('/content/drive/My Drive/ICASSP-2021/')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Toggles that may need to be changed every round denoted by ###

In [None]:
#directory that holds folders with each test set
#each test set folder will contain numpy arrays with prepared images for each patient
os.chdir('/content/drive/My Drive/ICASSP-2021/extra-test-arrays/')

#test set 1: Change source and name here
source = 'covid/'      ### subfolder with test set images
name = 'covid4.csv'      ### name of predictions file to save

#test set 2: change name and source
source2 = 'normal/'   ### subfolder with test set images
name = 'normal4.csv'    ### name of predictions file to save

In [None]:

quarter = 4     ### the quarter of the test data to get predictions and new data from
this_q = str(quarter)
previous_q = str(quarter - 1)
old_data_dest = '/content/drive/My Drive/ICASSP-2021/extra-online-data/'      ### location of data used to train most recent model
new_data_dest = '/content/drive/My Drive/ICASSP-2021/extra-online-data/'      ### location for updated data to be saved
preds_dest = '/content/drive/My Drive/ICASSP-2021/extra-results/'      ### location for predictions files to be saved

In [None]:

def extract_avgs(folder, model, quarter): 
    os.chdir(folder)
    start = round(len(os.listdir()) * 0.25 * (quarter - 1))
    end = round(len(os.listdir()) * 0.25 * quarter)

    avgs = []
    names = []
    pos_list = []
    neg_list = []
    lens = []
    for i in range(len(os.listdir())):
        if (i < start):
            continue       
        if (i >= end):
            continue    
        next_numpy = np.load(os.listdir()[i])
        lens.append(len(next_numpy))
        names.append(os.listdir()[i])
        processed = pre_p(next_numpy)
        probs = model.predict(processed)
        next_pos = []
        for i in range(len(probs)):
            if (probs[i][1] > 0.9):
                next_pos.append(i)
        pos_list.append(next_pos)     
        next_neg = []
        for i in range(len(probs)):
            if (probs[i][0] > 0.9):
                next_neg.append(i)
        neg_list.append(next_neg)               
        # average the guesses
        average = np.mean(probs, axis=0)
        avgs.append(average)

    os.chdir('..')
    return avgs, names, pos_list, neg_list, lens

def pre_p(np_array):
    max_value = (np.max(np_array))   
    H=224
    W=224
    train_val_sizing = tf.keras.layers.experimental.preprocessing.Resizing(
        H, W, interpolation="bilinear"
    ) 
    #np_array = train_val_sizing(np_array)
    np_array = np_array / np.max(np_array)
    np_array = np.concatenate((np_array, np_array, np_array), axis = -1)
    return np_array


In [None]:
def get_healthy_preds(X_val, pos_list, lens, h_recalls):
    preds = []
    multiplier = h_recalls[1] / h_recalls[0]
    for i in range(len(X_val)):
        if ((multiplier * X_val[i][0]) > (5 * X_val[i][1])):
            preds.append(0)
        else:
            preds.append(1)      
    return preds        

In [None]:
def get_covCap_preds(X_val, cap_list, cov_list, pos_list, c_recalls):
    preds = []
    multiplier = c_recalls[1] / c_recalls[0]
    for i in range(len(X_val)):
        if ((multiplier * X_val[i][0]) > (1 * X_val[i][1])):
            preds.append(0)
        else:
            preds.append(1)    
    for i in range(len(cov_list)):
        count1 = 0
        count0 = 0
        for j in range(len(cov_list[i])):
            if (cov_list[i][j] in pos_list[i]):
                count0 += 1
        for k in range(len(cap_list[i])):        
            if (cap_list[i][k] in pos_list[i]):
                count1 += 1    

        print('0: ' + str(count0))
        print('1: ' + str(count1))
        print()   
    return preds        

In [None]:
def get_final_preds(healthy_preds, covCap_preds):
    total = len(healthy_preds)
    final_preds  = []
    for i in range(total):
        if (healthy_preds[i] == 0):
            final_preds.append("Normal")
        else:
            if (covCap_preds[i] == 0):
                final_preds.append("COVID-19")
            else:
                final_preds.append("CAP")      
    return final_preds              

In [None]:
def check_and_combine(final_preds, names1, names2):
    for i in range(len(names1)):
        if (names1[i] != names2[i]):
            print("error: names out of order")
            break
    d = {'Patient': names1, 'Class': final_preds}
    df = pd.DataFrame(data=d)
    return df

In [None]:
def save_to_csv(df, dest_folder, name):
    df.to_csv(dest_folder + name, index=False)
    return

In [None]:
def get_results(name, dest_folder, source_folder, healthy_model, covCap_model, quarter, h_recalls, c_recalls):  
    test_avgs, test_names, pos_list, neg_list, lens = extract_avgs(source_folder, healthy_model, quarter)
    test_healthy_preds = get_healthy_preds(test_avgs, pos_list, lens, h_recalls)

    test_avgs_p2, test_names2, cap_list, cov_list, lens = extract_avgs(source_folder, covCap_model, quarter)
    test_covCap_preds = get_covCap_preds(test_avgs_p2, cap_list, cov_list, pos_list, c_recalls)

    test_final_preds = get_final_preds(test_healthy_preds, test_covCap_preds)
    test_results = check_and_combine(test_final_preds, test_names, test_names2)

    save_to_csv(test_results, dest_folder, name)
    return test_final_preds, pos_list, neg_list, cap_list, cov_list

In [None]:
def save_new_data(old_data, new_data1, new_data2, name, dest_folder): 
    data1 = np.concatenate((old_data, new_data1), axis=0)
    data = np.concatenate((data1, new_data2), axis=0)
    np.save(dest_folder + name, data)
    return

def saving_pre_p(np_array):
    #max_value = (np.max(np_array))   
    H=224
    W=224
    train_val_sizing = tf.keras.layers.experimental.preprocessing.Resizing(
        H, W, interpolation="bilinear"
    ) 
    np_array = train_val_sizing(np_array)
    #np_array = np_array / np.max(np_array)
    #np_array = np.concatenate((np_array, np_array, np_array), axis = -1)
    return np_array    

def extract_data(folder, quarter, pos_list, neg_list, results, cap_list, cov_list): 
    os.chdir(folder)
    start = round(len(os.listdir()) * 0.25 * (quarter - 1))
    end = round(len(os.listdir()) * 0.25 * quarter)
    round_total = end - start
    train_total = int(round_total * 0.7)
    val_total = round_total - train_total

    train_lens = []
    val_lens = []
    total = 0
    for i in range(len(os.listdir())):
        if (i < start):
            continue       
        if (i >= end):
            continue 
        print(os.listdir()[i])   
        print(results[total]) 
        next_numpy = np.load(os.listdir()[i])
        print("first len: " + str(len(next_numpy)))
        if (results[total] == "Normal"):
            selected_imgs = []
            for i in range(len(neg_list[total])):
                selected_imgs.append(next_numpy[(neg_list[total][i])])
            next_numpy = np.array(selected_imgs)
            print("after len: " + str(len(next_numpy)))
        if (results[total] == "COVID-19"):
            selected_imgs = []
            for i in range(len(cov_list[total])):
                if (cov_list[total][i] in pos_list[total]):
                    selected_imgs.append(next_numpy[(cov_list[total][i])])
            next_numpy = np.array(selected_imgs)
            print("after len: " + str(len(next_numpy)))
        if (results[total] == "CAP"):
            selected_imgs = []
            for i in range(len(cap_list[total])):
                if (cap_list[total][i] in pos_list[total]):
                    selected_imgs.append(next_numpy[(cap_list[total][i])])
            next_numpy = np.array(selected_imgs)
            print("after len: " + str(len(next_numpy)))  
        if (len(next_numpy) == 0):
            total += 1
            continue          
        #next_numpy = saving_pre_p(next_numpy)
        if (total < train_total):
            train_lens.append(len(next_numpy))
            try:
                train_data = np.concatenate((train_data, next_numpy), axis=0)
            except UnboundLocalError:
                train_data = next_numpy
        else:
            val_lens.append(len(next_numpy))
            try:
                val_data = np.concatenate((val_data, next_numpy), axis=0)
            except UnboundLocalError:
                val_data = next_numpy     
        total += 1

    os.chdir('..')
    return train_data, val_data, train_lens, val_lens

def get_new_labels(results, train_lengths, val_lengths):
    total_len = (len(train_lengths) + len(val_lengths))
    new_train_labels = []
    new_val_labels = []
    for i in range(total_len):
        if (i < (len(train_lengths))):
            for j in range(train_lengths[i]):
                if (results[i] == "Normal"):
                    new_train_labels.append(0)
                if (results[i] == "COVID-19"):
                    new_train_labels.append(1)       
                if (results[i] == "CAP"):
                    new_train_labels.append(2)       

        else:
            for j in range(val_lengths[i - len(train_lengths)]):
                if (results[i] == "Normal"):
                    new_val_labels.append(0)
                if (results[i] == "COVID-19"):
                    new_val_labels.append(1)       
                if (results[i] == "CAP"):
                    new_val_labels.append(2)      

    new_train_labels = np.array(new_train_labels)        
    new_val_labels = np.array(new_val_labels)                     
    return new_train_labels, new_val_labels              

In [None]:
covCap_model = load_model('/content/drive/My Drive/ICASSP-2021/extra-online-data/cov-cap-extra' + previous_q + '.hdf5')    

In [None]:
healthy_model = load_model('/content/drive/My Drive/ICASSP-2021/extra-online-data/healthy-sick-extra' + previous_q + '.hdf5')    

In [None]:
old_train_data = np.load(old_data_dest + 'extra_train' + previous_q + '.npy') 
old_val_data = np.load(old_data_dest + 'extra_val' + previous_q + '.npy') 
old_train_labels = np.load(old_data_dest + 'extra_train_labels' + previous_q + '.npy') 
old_val_labels = np.load(old_data_dest + 'extra_val_labels' + previous_q + '.npy') 

In [None]:
h_recalls = np.load('/content/drive/My Drive/ICASSP-2021/online-code/recalls/extra-online-data/h_recalls' + previous_q + '.npy')  
c_recalls = np.load('/content/drive/My Drive/ICASSP-2021/online-code/recalls/extra-online-data/c_recalls' + previous_q + '.npy')  

In [None]:
results, pos_list, neg_list, cap_list, cov_list = get_results(name, preds_dest, source, healthy_model, covCap_model, quarter, h_recalls, c_recalls)

results2, pos_list2, neg_list2, cap_list2, cov_list2 = get_results(name, preds_dest, source2, healthy_model, covCap_model, quarter, h_recalls, c_recalls)

0: 28
1: 0

0: 19
1: 1

0: 6
1: 1

0: 6
1: 0

0: 32
1: 0

0: 4
1: 23

0: 9
1: 0

0: 9
1: 1

0: 11
1: 0

0: 14
1: 0

0: 38
1: 0

0: 40
1: 0

0: 2
1: 0

0: 19
1: 0

0: 15
1: 0

0: 7
1: 0

0: 2
1: 11

0: 36
1: 0

0: 29
1: 0

0: 7
1: 0

0: 10
1: 0

0: 15
1: 0

0: 4
1: 0

0: 4
1: 0

0: 5
1: 0

0: 7
1: 0

0: 0
1: 26

0: 6
1: 0

0: 6
1: 0

0: 5
1: 0

0: 5
1: 0

0: 4
1: 0

0: 11
1: 0

0: 8
1: 0

0: 10
1: 0

0: 2
1: 0

0: 3
1: 0

0: 6
1: 0

0: 4
1: 2

0: 5
1: 0

0: 4
1: 0

0: 8
1: 1

0: 2
1: 0

0: 7
1: 0

0: 12
1: 0

0: 4
1: 0

0: 13
1: 0

0: 0
1: 0

0: 0
1: 0

0: 5
1: 1

0: 5
1: 0



In [None]:
#os.chdir('..')

In [None]:
train_data, val_data, train_lens, val_lens = extract_data(source, quarter, pos_list, neg_list, results, cap_list, cov_list)
train_data2, val_data2, train_lens2, val_lens2 = extract_data(source2, quarter, pos_list2, neg_list2, results2, cap_list2, cov_list2)

patient76.npy
COVID-19
first len: 45
after len: 28
patient41.npy
COVID-19
first len: 46
after len: 19
patient10.npy
COVID-19
first len: 31
after len: 6
patient28.npy
CAP
first len: 43
after len: 0
patient82.npy
COVID-19
first len: 33
after len: 32
patient24.npy
CAP
first len: 40
after len: 23
patient100.npy
COVID-19
first len: 45
after len: 9
patient1.npy
COVID-19
first len: 34
after len: 9
patient61.npy
COVID-19
first len: 25
after len: 11
patient59.npy
COVID-19
first len: 43
after len: 14
patient85.npy
COVID-19
first len: 38
after len: 38
patient7.npy
COVID-19
first len: 48
after len: 40
patient5.npy
COVID-19
first len: 44
after len: 2
patient74.npy
COVID-19
first len: 46
after len: 19
patient122.npy
COVID-19
first len: 37
after len: 15
patient75.npy
CAP
first len: 48
after len: 0
patient15.npy
CAP
first len: 29
after len: 11
patient117.npy
COVID-19
first len: 38
after len: 36
patient97.npy
COVID-19
first len: 41
after len: 29
patient119.npy
COVID-19
first len: 33
after len: 7
patien

In [None]:
new_train_labels, new_val_labels = get_new_labels(results, train_lens, val_lens)
new_train_labels2, new_val_labels2 = get_new_labels(results2, train_lens2, val_lens2)

In [None]:
#(old_data, new_data1, new_data2, name, dest_folder)
save_new_data(old_train_data, train_data, train_data2, 'extra_train' + this_q + '.npy', new_data_dest) 
save_new_data(old_val_data, val_data, val_data2, 'extra_val' + this_q + '.npy', new_data_dest) 
save_new_data(old_train_labels, new_train_labels, new_train_labels2, 'extra_train_labels' + this_q + '.npy', new_data_dest) 
save_new_data(old_val_labels, new_val_labels, new_val_labels2, 'extra_val_labels' + this_q + '.npy', new_data_dest) 

In [None]:
print(len(old_train_data))
print(len(train_data))
print(len(val_data))
print(len(val_lens))
print(len(train_lens))

9274
265
116
8
14


In [None]:
len(results)

24