In [None]:
import tensorflow as tf
import numpy as np

import sklearn.metrics as metrics

import matplotlib.pyplot as plt

import os

import data as dat
from model import Model

import colours as c

from io import BytesIO
import base64

dir = os.getcwd()
dir_main = os.path.dirname(os.path.abspath(dir))

In [None]:
scorer = dat.Scorer()
m = Model(scorer.smiles_vocabulary, dropout=False)

In [None]:
def replaceTemplate (template, replace, figure):
    bio = BytesIO()

    figure.savefig(bio, format="png")

    image = base64.encodebytes(bio.getvalue()).decode()
    return template.replace(replace, "<img class=\"plot\" src=\"data:image/png;base64,{}\"/> ".format(image))

template = open(dir_main + "/report/template.html").read()

model_name = "Solanaceae-N"

In [None]:
with tf.Session() as session:
    
    saver = tf.train.Saver()
    saver.restore(session, dir_main + "/trained/" + model_name)
    
    iteration = session.run(m.global_step)
    
    print("Model restored.")
    
    # Score the data iteratively
    smiles, labels, weights, smiles_raw = scorer.getEvaluations()
    
    total = [0] * scorer.classes_amount
    
    # Classifications
    false_negative = [0] * scorer.classes_amount
    false_positive = [0] * scorer.classes_amount
    
    true_negative = [0] * scorer.classes_amount
    true_positive = [0] * scorer.classes_amount
    
    outputs = list()
    weight_list = list()
    
    for smile, label, weight, smile_raw in zip(smiles, labels, weights, smiles_raw):
        
        output = session.run([m.output], feed_dict={m.batch_size: 1, m.input: np.expand_dims(smile, 0)})
        
        outputs.append(output[0])
        output = list(np.round(output)[0,0])
        
        weight_list.append(weights[0])

        
        for i in range(len(label)):
            
            if label[i] != 0.5:
                
                total[i] += 1
            
                if label[i] == output[i]:

                    if output[i] == 1:
                        true_positive[i] += 1

                    elif output[i] == 0:
                        true_negative[i] += 1

                elif label[i] != output[i]:

                    if output[i] == 1:
                        false_positive[i] += 1

                    elif output[i] == 0:
                        false_negative[i] += 1
                
        print("Total: {}".format(total))
    
    true_positive, true_negative, false_positive, false_negative, total = np.array(true_positive), np.array(true_negative), np.array(false_positive), np.array(false_negative), np.array(total)
    #[88.17733990147784, 97.77777777777777, 98.62306368330465, 92.61363636363636, 89.53488372093024, 96.661101836394, 94.70198675496688, 82.67148014440433, 93.88083735909822, 96.55172413793103, 88.95027624309392, 93.4959349593496]

In [None]:
# Generate graphs
figure = plt.figure(figsize=(18, 9))
    
tp_per = true_positive/total * 100
tn_per = true_negative/total * 100

fp_per = false_positive/total * 100
fn_per = false_negative/total * 100

# Compoutes the FP/TP rates at standard cutoff
fp_rate = false_positive/(false_positive + true_negative)
tp_rate = true_positive/(true_positive + false_negative)
    
one = plt.barh(scorer.classes, tn_per + tp_per, color="red")
two = plt.barh(scorer.classes, tp_per, color="blue")
plt.legend([one, two], ["True negative", "True positive"], loc=4)

for i, v in enumerate(tn_per + tp_per):
    plt.text(v, i - .1, str(round(v*100)/100), color="red", fontweight="bold")
    
for i, v in enumerate(tp_per):
    plt.text(v, i - .1, str(round(v*100)/100), color="blue", fontweight="bold")

plt.xlabel("Percentage [numbers rounded] / %")
plt.title("Correct predictions", fontweight="bold")

template = replaceTemplate(template, "{correct_graph}", figure)
    
figure = plt.figure(figsize=(18, 9))
one = plt.barh(scorer.classes, fn_per + fp_per, color="green")
two = plt.barh(scorer.classes, fp_per, color="yellow")

for i, v in enumerate(fn_per + fp_per):
    plt.text(v + .1, i - .1, str(round(v*100)/100), color="green", fontweight="bold")
    
for i, v in enumerate(fp_per):
    plt.text(v + .1, i - .1, str(round(v*100)/100), color="yellow", fontweight="bold")

plt.legend([one, two], ["False negative", "False positive"], loc=4)
plt.xlabel("Percentage [numbers rounded] / %")
plt.title("Incorrect predictions", fontweight="bold")

template = replaceTemplate(template, "{wrong_graph}", figure)

In [None]:
swap_labels = np.swapaxes(labels, 0, 1)
swap_outs = np.swapaxes(np.squeeze(outputs), 0, 1)

# Weight scores are already computed up to this point, there is no need for further integration
swap_weights = np.swapaxes(np.squeeze(weight_list), 0, 1)

def createRoc (index, figure):
    fpr, tpr, thresholds = metrics.roc_curve(y_true=np.floor(swap_labels[index]), y_score=swap_outs[index], pos_label=1)
    #fpr, tpr, thresholds = metrics.roc_curve(y_true=np.floor(swap_labels[index]), y_score=swap_outs[index], pos_label=1)
    auc_score = metrics.auc(fpr, tpr)
    
    figure.add_subplot(6, 2, (index + 1))

    one = plt.plot(fpr, tpr, color="blue")
    two = plt.plot([0, 1], [0, 1], 'k--', lw=2)

    plt.ylabel("True positive rate")
    plt.xlabel("False positive rate")
    plt.ylim(0, 1.03)
    plt.xlim(0, 1)

    plt.legend(one, ["Area under curve = {}".format(auc_score)], loc=4)

    plt.title("ROC curve for {}".format(scorer.classes[index]), fontweight="bold")
    
    return auc_score
    
auc_scores = list()

figure = plt.figure(figsize=(20, 20*3))

for i in range(scorer.classes_amount):
    auc_scores.append(createRoc(i, figure))
    
plt.show()
template = replaceTemplate(template, "{roc_metric}", figure)
    

In [None]:
template = template.replace("{model_name}", model_name)

from time import gmtime, strftime
template = template.replace("{time}", strftime("%Y-%m-%d %H:%M:%S", gmtime()) + " UTC")
template = template.replace("{iterations_trained}", str(iteration))

template = template.replace("{compounds_tested}", str(scorer.smiles_length))

# fill values
"""
      <tr>
        <th>AhR</th>
        <th>{ahr_accounted}</th>
        <th>&nbsp</th>
        <th>{ahr_auc}</th>
        <th>&nbsp</th>
        <th>{ahr_accuracy}</th>
        <th>{ahr_precision}</th>
        <th>{ahr_fdr}</th>
        <th>{ahr_for}</th>
      </tr>
"""

trex = str()

def sf4 (x):
    return round(x * 1000)/1000

for i in range(len(scorer.classes)):
        trex += "<tr><th>{}</th><th>{}</th><th>{}</th><th>&nbsp</th><th>{}\
        </th><th>&nbsp</th><th>{}</th><th>{}</th>\
        <th>{}</th><th>{}</th></tr>".format(scorer.classes[i],
                                            total[i],
                                            false_negative[i] + true_positive[i],
                                            sf4(auc_scores[i]), 
                                            sf4((true_positive[i] + true_negative[i])/total[i]), 
                                            sf4(true_positive[i]/(true_positive[i] + false_negative[i])), 
                                            sf4(false_positive[i]/(false_positive[i] + true_positive[i])),
                                            sf4(false_negative[i]/(false_negative[i] + true_negative[i])))
        
template = template.replace("{table1_values}", trex)
       
open(dir_main + "/report/report.html", "w+").write(template)

In [None]:
print(true_positive + false_negative + true_negative + false_positive)
print(total)

In [None]:
smiles, labels, weights, smiles_raw = scorer.getEvaluations()

accounts = [0] * scorer.classes_amount

for label in labels:
    
    for i in range(scorer.classes_amount):
        
        if label[i] == 1:
            accounts[i] += 1
            
print(accounts)
print(total)

print(false_negative + true_positive)

print(scorer.classes)