In [10]:
import matplotlib.pyplot as plt
import os
import numpy as np
from ipywidgets import interact, fixed
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

if os.path.basename(os.getcwd()) != "RADIANT":
    print("Changing directory to RADIANT")
    os.chdir("..")
print(os.getcwd())

/home/dylan/RADIANT


In [11]:
direc = "MachineLearning/results/"

# direc = "older/EPOCH_125/CLIPPING/FIRST/" #? 30 epoch is good at .00025l 8lr
# direc = "older/EPOCH_125/CLIPPING/FIRST.LOFAR.NVSS/" #? 30 epoch is good at .00025l 8lr
# direc = "older/EPOCH_125/CLIPPING/LOFAR/" #? Would argue the same as above ^^^ (The heigher Test of 89 is overfitting)
# direc = "older/EPOCH_125/CLIPPING/NVSS/" #? 30 epoch is good at .00025l 8lr

# direc = "older/EPOCH_125/NO_CLIPPING/FIRST/" #? 50 epoch is good at .00025l 8lr
# direc = "older/EPOCH_125/NO_CLIPPING/FIRST.LOFAR.NVSS/" #? Eish... too much overfitting everywhere according to the ratios
# direc = "older/EPOCH_125/NO_CLIPPING/LOFAR/" #? 44 epoch is good at .00025l 8lr
# direc = "older/EPOCH_125/NO_CLIPPING/NVSS/" #? 33-ish epoch is good at .00025l 8lr or 6lr

In [12]:
experiment_options = sorted(os.listdir(direc))

loss_file = [x for x in os.listdir(direc + experiment_options[0]) if x.startswith("loss_fold")][0]
max_epochs = len(np.load(direc + experiment_options[0] + "/" + loss_file, allow_pickle=True).item()['accuracy'])

def show_aclos_image(Experiment, Zoom, do_cut, Epoch_Cut = 30):
    if Experiment.startswith("-"):
        return
    directory = direc + Experiment
    losses = []
    for filename in sorted(os.listdir(directory)):

        if filename.startswith("loss_fold"):
            loss = np.load(directory + "/" +filename, allow_pickle=True).item()
            losses.append(loss)

    blue = '#1f77b4'
    orange = '#ff7f0e'
    green = '#2ca02c'
    purple = '#9467bd'

    EPOCHS = len(loss['accuracy'])

    train_acc = np.array([loss['accuracy'] for loss in losses])
    train_acc_mean = np.mean(train_acc, axis=0)
    train_acc_std = np.std(train_acc, axis=0)
    val_acc = np.array([loss['val_accuracy'] for loss in losses])
    val_acc_mean = np.mean(val_acc, axis=0)
    val_acc_std = np.std(val_acc, axis=0)

    fig, ax = plt.subplots(1, 3, figsize=(20, 5))

    ax[0].plot(range(1, EPOCHS+1), train_acc_mean, label='Training Accuracy', color=blue)
    ax[0].plot(range(1, EPOCHS+1), val_acc_mean, label='Validation Accuracy', color=orange)
    for loss in losses:
        ax[0].plot(range(1, EPOCHS+1), loss['accuracy'], alpha=0.1, color=blue)
        ax[0].plot(range(1, EPOCHS+1), loss['val_accuracy'], alpha=0.1, color=orange)
    ax[0].fill_between(range(1, EPOCHS+1), train_acc_mean - train_acc_std, train_acc_mean + train_acc_std, alpha=0.2,)
    ax[0].fill_between(range(1, EPOCHS+1), val_acc_mean - val_acc_std, val_acc_mean + val_acc_std, alpha=0.2, color=orange)
    ax[0].set_xlabel('Epochs')
    ax[0].set_ylabel('Accuracy')
    ax[0].set_title('Training and Validation Accuracy')
    ax[0].legend()

    train_loss = np.array([loss['loss'] for loss in losses])
    train_loss_mean = np.mean(train_loss, axis=0)
    train_loss_std = np.std(train_loss, axis=0)
    val_loss = np.array([loss['val_loss'] for loss in losses])
    val_loss_mean = np.mean(val_loss, axis=0)
    val_loss_std = np.std(val_loss, axis=0)

    ax[1].plot(range(1, EPOCHS+1), train_loss_mean, label='Training Loss', color=blue)
    ax[1].plot(range(1, EPOCHS+1), val_loss_mean, label='Validation Loss', color=orange)
    for loss in losses:
        ax[1].plot(range(1, EPOCHS+1), loss['loss'], alpha=0.1, color=blue)
        ax[1].plot(range(1, EPOCHS+1), loss['val_loss'], alpha=0.1, color=orange)
    ax[1].fill_between(range(1, EPOCHS+1), train_loss_mean - train_loss_std, train_loss_mean + train_loss_std, alpha=0.2,)
    ax[1].fill_between(range(1, EPOCHS+1), val_loss_mean - val_loss_std, val_loss_mean + val_loss_std, alpha=0.2, color=orange)
    ax[1].set_xlabel('Epochs')
    ax[1].set_ylabel('Loss')
    ax[1].set_title("Training Validation Loss")
    ax[1].legend()

    ratios_loss = []
    ratios_acc = []
    for l in losses:
        ratios_loss.append([l['loss'][i] / l['val_loss'][i] for i in range(EPOCHS)])
        ratios_acc.append([l['accuracy'][i] / l['val_accuracy'][i] for i in range(EPOCHS)])
    ratios_loss = np.array(ratios_loss)
    ratios_acc = np.array(ratios_acc)

    ratios_loss_mean = np.mean(ratios_loss, axis=0)
    ratios_acc_mean = np.mean(ratios_acc, axis=0)
    ratios_loss_std = np.std(ratios_loss, axis=0)
    ratios_acc_std = np.std(ratios_acc, axis=0)

    ax[2].plot(range(1, EPOCHS+1), ratios_loss_mean, label='Training/Validation Loss Ratio', color=green)
    ax[2].plot(range(1, EPOCHS+1), ratios_acc_mean, label='Training/Validation Accuracy Ratio', color=purple)
    for ratio_ac, ratio_lo in zip(ratios_acc,ratios_loss):
        ax[2].plot(range(1, EPOCHS+1), ratio_ac, alpha=0.1, color=purple)
        ax[2].plot(range(1, EPOCHS+1), ratio_lo, alpha=0.1, color=green)
    ax[2].fill_between(range(1, EPOCHS+1), ratios_loss_mean - ratios_loss_std, ratios_loss_mean + ratios_loss_std, alpha=0.2,color=green)
    ax[2].fill_between(range(1, EPOCHS+1), ratios_acc_mean - ratios_acc_std, ratios_acc_mean + ratios_acc_std, alpha=0.2, color=purple)
    ax[2].set_xlabel('Epochs')
    ax[2].set_ylabel('Ratio')
    ax[2].set_title('Training/Validation Ratios')
    ax[2].legend()
    ax[2].axhline(y=1, color='black', linestyle='--')

    if Zoom:
        ax[0].set_ylim(.8,.91) 
        ax[1].set_ylim(0,10)
        ax[2].set_ylim(0,1.0)
    else:
        ax[0].set_ylim(0,1)
        ax[2].set_ylim(0,2.5)

    if do_cut:

        y_value = val_acc_mean[Epoch_Cut-1]
        ax[0].axvline(x=Epoch_Cut, color='magenta', linestyle='--')
        ax[0].axhline(y=y_value, color='magenta', linestyle='--')

        ax[1].axvline(x=Epoch_Cut, color='magenta', linestyle='--')

        y_value = ratios_loss_mean[Epoch_Cut-1]
        ax[2].axvline(x=Epoch_Cut, color='magenta', linestyle='--')
        ax[2].axhline(y=y_value, color='magenta', linestyle='--')


    fig.suptitle(directory)
    plt.show()

interact(show_aclos_image, Experiment=experiment_options, do_cut=False, Epoch_Cut=(1,max_epochs) ,Zoom=False)

interactive(children=(Dropdown(description='Experiment', options=('FIRST.LOFAR.NVSS_same_3000a_30e_0.000251l_0…

<function __main__.show_aclos_image(Experiment, Zoom, do_cut, Epoch_Cut=30)>

In [13]:
scc = SparseCategoricalCrossentropy()

def show_scatter_image(dataset, show_matricies, get_data):

    losses = []
    accuracies = []
    coords = []

    for folder in sorted(os.listdir(direc)):

        details = folder.split("_")
        lr = float(details[-2][:-1]) 
        reg = float(details[-1][:-1])
        lookup = np.load(direc + folder + "/lookup.npy", allow_pickle=True).item()

        coords.append((lr, reg))

        loss = []
        accuracy = []
        cms = []

        for filename in sorted(os.listdir(direc + folder)):
            if filename.startswith("y_pred_"+dataset):
                y_pred_soft = np.load(direc + folder + "/" + filename, allow_pickle=True)
                
                if dataset == "test":
                    y_true_hard = np.load(direc + folder + "/y_test.npy", allow_pickle=True)
                else:
                    y_true_hard = np.load(direc + folder + "/y_" + dataset + "_fold_" + filename.split("_")[3], allow_pickle=True)

                labels = np.unique(y_true_hard)
                
                y_pred_hard = np.array([list(lookup.keys())[list(lookup.values()).index(np.argmax(x))] for x in y_pred_soft])
                y_true_hard_encoded = np.array([lookup[x] for x in y_true_hard])
                
                loss.append(scc(y_true_hard_encoded, y_pred_soft))

                accuracy.append(accuracy_score(y_true_hard, y_pred_hard))
                cms.append(confusion_matrix(y_true_hard, y_pred_hard, normalize='true', labels=labels))


        losses.append(np.mean(loss))
        accuracies.append(np.mean(accuracy))
        cm = np.mean(cms, axis=0)

        if show_matricies == "Confusion Matrix":
            plt.imshow(cm, cmap='viridis')
            for i in range(cm.shape[0]):
                for j in range(cm.shape[1]):
                    if cm[i, j] > 0.5:
                        colour = 'black'
                    else:
                        colour = 'white'
                    plt.text(j, i, f"{round(cm[i, j],2)}", ha='center', va='center', color=colour)
            plt.title(f"{folder} : {round(accuracies[-1],2)}")
            plt.xlabel("Predicted")
            plt.ylabel("True")
            plt.xticks(range(len(labels)), labels)
            plt.yticks(range(len(labels)), labels)
            plt.colorbar()
            plt.show()

    if not coords:
        print("No data found")
        return
    
    if show_matricies == "Grid":
        fig, ax = plt.subplots(1,2, figsize=(20,6))
        im = ax[0].scatter(*zip(*coords), c=accuracies, cmap='viridis', s=170)
        fig.colorbar(im, ax=ax[0])
        ax[0].set_xlabel("Learning Rate")
        ax[0].set_ylabel("Regularization")
        ax[0].set_title(f"{details[0]} Accuracy")
        ax[0].ticklabel_format(style='scientific', axis='x', scilimits=(0,0))


        im = ax[1].scatter(*zip(*coords), c=losses, cmap='viridis', s=170)
        fig.colorbar(im, ax=ax[1])
        ax[1].set_xlabel("Learning Rate")
        ax[1].set_ylabel("Regularization")
        ax[1].set_title(f"{details[0]} Loss")
        ax[1].ticklabel_format(style='scientific', axis='x', scilimits=(0,0))

        for i, (ac, lo) in enumerate(zip(accuracies, losses)):
            lo = str(round(lo,2))
            ax[0].annotate(f"{int(round(ac,2)*100)}", (coords[i][0], coords[i][1]), ha='center', va='center')
            ax[1].annotate(f"{lo}", (coords[i][0], coords[i][1]), ha='center', va='center')

    if show_matricies == "Contour":
        coords = np.array(coords)
        learning_rate = coords[:, 0]
        regularization = coords[:, 1]
        from scipy.interpolate import griddata
        min_lr, max_lr = min(learning_rate), max(learning_rate)
        min_reg, max_reg = min(regularization), max(regularization)
        number_of_points = 500j
        grid_x, grid_y = np.mgrid[min_lr:max_lr:number_of_points, min_reg:max_reg:number_of_points]
        
        grid_z_ac = griddata(coords, accuracies, (grid_x, grid_y), method='cubic')
        grid_z_lo = griddata(coords, losses, (grid_x, grid_y), method='cubic')

        fig, ax = plt.subplots(1,2, figsize=(20,6))
        contour = ax[0].contourf(grid_x, grid_y, grid_z_ac, levels=50, cmap='viridis')
        fig.colorbar(contour, ax=ax[0])
        ax[0].scatter(learning_rate, regularization, c=accuracies, edgecolors='w', linewidths=1)
        ax[0].set_title('LOFAR Accuracy Contour Map')
        ax[0].set_xlabel('Learning Rate')
        ax[0].set_ylabel('Regularization')

        contour = ax[1].contourf(grid_x, grid_y, grid_z_lo, levels=50, cmap='viridis')
        fig.colorbar(contour, ax=ax[1])
        ax[1].scatter(learning_rate, regularization, c=losses, edgecolors='w', linewidths=1)
        ax[1].set_title('LOFAR Loss Contour Map')
        ax[1].set_xlabel('Learning Rate')
        ax[1].set_ylabel('Regularization')
        plt.show()

    if get_data:
        return accuracies, losses, coords
    
interact(show_scatter_image, dataset=["test","val","train"], show_matricies=["Grid", "Contour","Confusion Matrix"], get_data=fixed(False))

interactive(children=(Dropdown(description='dataset', options=('test', 'val', 'train'), value='test'), Dropdow…

<function __main__.show_scatter_image(dataset, show_matricies, get_data)>

In [14]:
def show_ratios(numerator, denominator, invert_ac=False, invert_lo=False):
    num_ac, num_lo, num_coords = show_scatter_image(numerator, get_data=True, show_matricies=None)
    den_ac, den_lo, den_coords = show_scatter_image(denominator, get_data=True, show_matricies=None)

    if num_coords != den_coords:
        raise ValueError("Coords need to match. Something went wrong.")

    numerator_ac = numerator
    numerator_lo = numerator
    denominator_ac = denominator
    denominator_lo = denominator

    if invert_ac:
        den_ac, num_ac = num_ac, den_ac
        numerator_ac, denominator_ac = denominator_ac, numerator_ac
    if invert_lo:
        den_lo, num_lo = num_lo, den_lo
        numerator_lo, denominator_lo = denominator_lo, numerator_lo

    ratios_ac = [num_ac[i] / den_ac[i] for i in range(len(num_ac))]
    ratios_lo = [num_lo[i] / den_lo[i] for i in range(len(num_lo))]

    coords = [(ac, lo) for ac, lo in zip(ratios_ac, ratios_lo)]
    distances = [np.sqrt((1-ac)**2 + (1-lo)**2) for ac, lo in coords]

    plt.scatter(*zip(*coords), s=170, marker='.')
    plt.scatter(1, 1, color='red', s=170, marker='.')
    plt.vlines(1, colors='#AAA5', linestyles='--', ymin=min(ratios_lo), ymax=max(ratios_lo))
    plt.hlines(1, colors='#AAA5', linestyles='--', xmin=min(ratios_ac), xmax=max(ratios_ac))
    plt.xlabel(f"Accuracy ({numerator_ac}/{denominator_ac})")
    plt.ylabel(f"Loss ({numerator_lo}/{denominator_lo})")
    survey = os.listdir(direc)[0].split("_")[0]
    plt.title(f"{survey} Ratios")
    plt.show()

    args = np.argsort(distances)
    for number, i in enumerate(args):
        print(f"{number+1:2d} | Distance: {distances[i]:.2f}  A_Ratio: {ratios_ac[i]:.2f}  L_Ratio: {ratios_lo[i]:.2f}  ( {num_coords[i][0]:.2e} lr, {num_coords[i][1]:.2e} reg )  Accuracy: {num_ac[i]:.2f}/{den_ac[i]:.2f}    ")


interact(show_ratios, numerator=["train","val","test"], denominator=["val","train","test"])

interactive(children=(Dropdown(description='numerator', options=('train', 'val', 'test'), value='train'), Drop…

<function __main__.show_ratios(numerator, denominator, invert_ac=False, invert_lo=False)>