In [138]:
import tensorflow
import keras
from keras.models import model_from_json
import matplotlib.pyplot as plt
import pandas as pd
import os
import idlsave
import numpy as np
from tqdm import tqdm_notebook as tqdm
import re
from sklearn.metrics import roc_curve, auc
from scipy import interp
from itertools import cycle
import glob

In [2]:
model = model_from_json(open('model.json', 'rb').read())

In [116]:
root_path = '/Users/localhost/Desktop/Projects/Working/CFA/visioneering-deeplearning/experiments/planet_experiments/local_cache/original_k2_data/'
data_path = root_path+'curves/'
weights_path = 'weights/'
label_path = root_path+'labels/joined_labels.csv'
validation_campaigns = root_path+'Validation_Campaigns.csv'
example_weights = 'weights/lstm_reattempt_weights.05-0.77.hdf5'
roc_curves = 'out/'

validation_campaigns = pd.read_csv(validation_campaigns)
validation_campaigns = pd.DataFrame({0: ['c0']})
labels = pd.read_csv(label_path)
del labels['junk']
id_from_filepath = re.compile('(?<=ep)[0-9]*(?=search)')

In [117]:
def plot_roc(tpr,fpr,roc_auc,label, saving_path):
    if not os.path.exists(os.path.join(saving_path,'roc_curves')):
        os.makedirs(os.path.join(saving_path,'roc_curves'))
    plt.figure()    
    plt.plot(fpr, tpr, color='darkorange',
         lw=2, label='R OC curve (area = %0.2f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic example')
    plt.legend(loc="lower right")
    plt.savefig(saving_path + 'roc_curves/' + label + '.png')

In [125]:
def get_n_samples(n):
    size = 1360
    errors = 0
    for campaign in validation_campaigns[0]:
        files = [data_path+campaign+'/'+x for x in np.array(os.listdir(data_path+campaign))]
        for fs in np.array_split(files, int(len(files)/n)):
            X = []
            y = []
            for f in fs:
                try:
                    x = idlsave.read(f, verbose=False).k.f[0][:1360]
                    assert len(x) == 1360
                    p_id = id_from_filepath.findall(f)[0]
                    label = labels[labels['planet_id'] == int(p_id)]['label'].tolist()[0].strip()
                    y.append([label == 'C', label != 'C'])
                    X.append(x.reshape((-1,1)))
                except Exception as e:
                    errors += 1
            yield np.array(X), np.array(y)
    print 'READ', len(X), 'Light curves'
    print 'And the shape is', X.shape

In [132]:
def get_roc(predicted, actual):
    # Compute ROC curve and ROC area for each class
    n_classes = 2
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(actual[:, i], predicted[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(actual.ravel(), predicted.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
    return tpr, fpr, roc_auc

In [134]:
def plot_lc(tpr, fpr, roc_auc, title):
    # Compute macro-average ROC curve and ROC area
    lw = 2
    # First aggregate all false positive rates
    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))

    # Then interpolate all ROC curves at this points
    mean_tpr = np.zeros_like(all_fpr)
    for i in range(n_classes):
        mean_tpr += interp(all_fpr, fpr[i], tpr[i])

    # Finally average it and compute AUC
    mean_tpr /= n_classes

    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

    # Plot all ROC curves
    fig = plt.figure()
    plt.plot(fpr["micro"], tpr["micro"],
             label='micro-average ROC curve (area = {0:0.2f})'
                   ''.format(roc_auc["micro"]),
             color='deeppink', linestyle=':', linewidth=4)

    plt.plot(fpr["macro"], tpr["macro"],
             label='macro-average ROC curve (area = {0:0.2f})'
                   ''.format(roc_auc["macro"]),
             color='navy', linestyle=':', linewidth=4)

    colors = cycle(['aqua', 'darkorange', 'cornflowerblue'])
    for i, color in zip(range(n_classes), colors):
        plt.plot(fpr[i], tpr[i], color=color, lw=2,
                 label='ROC curve of class {0} (area = {1:0.2f})'
                 ''.format(i, roc_auc[i]))

    plt.plot([0, 1], [0, 1], 'k--', lw=2)
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(title)
    plt.legend(loc="lower right")
    fig.savefig(roc_curves+title+'.png')

In [None]:
for f in tqdm(os.listdir(weights_path)):
    model.load_weights(weights_path+f)
    predicted = []
    actual = []
    batch_size = 1000
    # Compute predicted in batches
    for X, y in tqdm(get_n_samples(batch_size), total=int(len(glob.glob(data_path+'*/*'))/batch_size)):
        predictions = model.predict(X)
        predicted.extend(predictions)
        actual.extend(y)
    tpr, fpr, roc_auc = get_roc(np.array(predicted), np.array(actual))
    plot_lc(tpr, fpr, roc_auc, f)

A Jupyter Widget

A Jupyter Widget