In [1]:
# Helpers
import numpy as np

# DB 
import psycopg2
from django.conf import settings

# Evaluation
from sklearn.metrics import roc_curve, auc, f1_score

# Plot
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
connection = psycopg2.connect(user = settings.DATABASES["default"]["USER"],
                                  password = settings.DATABASES["default"]["PASSWORD"],
                                  host = settings.DATABASES["default"]["HOST"],
                                  port = settings.DATABASES["default"]["PORT"],
                                  database = settings.DATABASES["default"]["NAME"])
connection.autocommit=True
cursor = connection.cursor()

In [3]:
%%time
query = """select experiment_id, sp, sp_test, sp_hat, sk_hat, q_matrix_hat, attempts_train
from EDM2020_2020_02_19 where method='nmf' and attempts_train = 1"""
cursor.execute(query)
row = cursor.fetchone()

CPU times: user 2.48 s, sys: 1.49 s, total: 3.97 s
Wall time: 3min 16s


In [4]:
%%time
update_queries = []
total = 0
while row:
    exp_id, sp, sp_test, sp_hat, sk_hat, q_matrix_hat, att = row
    sp = np.asarray(sp)
    sp_test = np.asarray(sp_test)
    sp_hat = np.asarray(sp_hat)
    sk_hat = np.asarray(sk_hat)
    q_matrix_hat = np.asarray(q_matrix_hat)
    
    # Get train predicted values
    # 0 is when there aren't any attempts
    y = sp[np.where(sp > 0)]
    y_pred = sp_hat[np.where(sp > 0)]
    rmse = np.sqrt(np.mean(np.power(y - y_pred, 2)))
    
    update = {
        "train_rmse": rmse
    }
    
    # If just first attempt, we can binarize predictions
    if att == 1:
        # Calculate AUC and threshold to binarize prediction
        y[np.where(y == 0.1)] = 0
        fpr, tpr, thresholds = roc_curve(y, y_pred, pos_label=1)
        auc_score = auc(fpr, tpr)
        J_stats = tpr - fpr
        J_opt_thresholds = thresholds[np.argmax(J_stats)]
        y_pred_binary = np.where(np.asarray(y_pred) > J_opt_thresholds, 1, 0)
    
         # Calculate accuracy and F1
        acc = np.logical_not(np.logical_xor(y, y_pred_binary)).sum()/len(y)
        f1 = f1_score(y, y_pred_binary)
        
        update.update({
            "train_auc": auc_score,
            "train_acc": acc,
            "auc_threshold": J_opt_thresholds,
            "train_1":  np.where(y == 1)[0].shape[0],
            "train_0":  np.where(y == 0)[0].shape[0],
            "train_pred_1": np.where(y_pred_binary == 1)[0].shape[0],
            "train_pred_0": np.where(y_pred_binary == 0)[0].shape[0],
            "train_f1": f1
        })

        
    
    # Write PSQL query
    update_query = "UPDATE EDM2020_2020_02_19 SET "
    update_list = []
    query_values = []
    for key, value in update.items():
        update_list.append(key + "= %s")
        query_values.append(value)
    
    update_query += ", ".join(update_list) + "where experiment_id = %s"
    query_values.append(exp_id)
    query_values = tuple(query_values)
    query = cursor.mogrify(update_query, query_values)
    update_queries.append(query)
    total += 1
    try:
        row = cursor.fetchone()
    except psycopg2.ProgrammingError:
        row = False

  'precision', 'predicted', average, warn_for)


CPU times: user 41.7 s, sys: 0 ns, total: 41.7 s
Wall time: 41.7 s


In [None]:
for query in update_queries:
    cursor.execute(query)