In [30]:
# Helpers
import numpy as np

# DB 
import psycopg2
from django.conf import settings

# Evaluation
from sklearn.metrics import roc_curve, auc, f1_score
from scipy import special

# Plot
import matplotlib.pyplot as plt
import seaborn as sns

In [31]:
connection = psycopg2.connect(user = settings.DATABASES["default"]["USER"],
                                  password = settings.DATABASES["default"]["PASSWORD"],
                                  host = settings.DATABASES["default"]["HOST"],
                                  port = settings.DATABASES["default"]["PORT"],
                                  database = settings.DATABASES["default"]["NAME"])
connection.autocommit=True
cursor = connection.cursor()

In [32]:
%%time
query = """select experiment_id, sp_test, sp_hat, x, x_test, sk_hat, q_matrix_hat, mu, auc_threshold
from EDM2020_2020_02_19 where method='fdtf'"""
cursor.execute(query)
row = cursor.fetchone()

CPU times: user 20.3 s, sys: 21.8 s, total: 42 s
Wall time: 16min 14s


In [33]:
%%time
update_queries = []
total = 0
error_count = 0
while row:
    exp_id, sp_test, sp_hat, X, X_test, sk_hat, q_matrix_hat, mu, auc_threshold = row
    sp_test = np.asarray(sp_test)
    sp_hat = np.asarray(sp_hat)
    sk_hat = np.asarray(sk_hat)
    X_test = np.asarray(X_test)
    sp_hat_test = np.zeros((sp_hat.shape[0], sp_hat.shape[1], sp_test.shape[2]))
    attempts = min(sp_test.shape[2], sp_hat.shape[2])
    sp_hat_test[:, :, :attempts] = sp_hat[:, :, :attempts]
    sk_hat_test = np.zeros((sk_hat.shape[0], sk_hat.shape[1], sp_test.shape[2]))
    sk_hat_test[:, :, :attempts] = sk_hat[:, :, :attempts]
    q_matrix_hat = np.asarray(q_matrix_hat)
    
    # Calculate SK and SP for the next test attempts
    for attempt in range(1, attempts):
        students = np.where(X_test[:,:, attempt] == 1)
        for student in students:
            sk_hat_test[student, :, attempt] = (2*sk_hat_test[student, :, attempt-1]) + \
                                            2*(1-sk_hat_test[student, :, attempt-1])/(1+np.exp(
                                                -mu*np.dot(X_test[student, :, attempt], q_matrix_hat.T))) - 1
            sp_hat_test[student, :, attempt] = np.dot(sk_hat_test[student, :, attempt], q_matrix_hat)
    
    # Get test predicted values
    y = sp_test[np.where(X_test == 1)]
    y_pred = sp_hat_test[np.where(X_test == 1)]
    
    # Binarize prediction
    y_pred_binary = np.where(np.asarray(y_pred) > auc_threshold, 1, 0)
    
    # Calculate accuracy, RMSE, NLL
    acc = np.logical_not(np.logical_xor(y, y_pred_binary)).sum()/len(y)
    rmse = np.sqrt(np.power((y-y_pred), 2)).mean()
    y_1 = y[np.where(y == 1)]
    y_pred_1 = y_pred[np.where(y == 1)]
    y_0 = y[np.where(y == 0)]
    y_pred_0 = y_pred[np.where(y == 0)]
    rmse_1 = np.sqrt(np.mean(np.power(y_1 - y_pred_1, 2))) 
    rmse_0 = np.sqrt(np.mean(np.power(y_0 - y_pred_0, 2))) 
    f1 = f1_score(y, y_pred_binary)
    nll = -special.xlogy(y, y_pred) - special.xlogy(1-y, 1-y_pred)

    y_pred_all = np.zeros((y.shape[0], 2))
    y_pred_all[:,0] = 1-y_pred
    y_pred_all[:,1] = y_pred
    nll_special = (-np.ma.log(y_pred_all[range(y.shape[0]),y.astype(int)])).mean()

    # Substitute inf by max NLL
    idx = np.where(np.isinf(nll))
    nll[idx] = 0
    max_nll = np.max(nll)
    nll[idx] = max_nll
    nll = np.mean(nll)
    
    update = {
        "test_acc": acc,
        "test_rmse": rmse,
        "test_1":  np.where(y == 1)[0].shape[0],
        "test_0":  np.where(y == 0)[0].shape[0],
        "test_pred_1": np.where(y_pred_binary == 1)[0].shape[0],
        "test_pred_0": np.where(y_pred_binary == 0)[0].shape[0],
        "test_f1": f1,
        "test_nll": nll,
        "test_nll_special": nll_special,
        "test_nll_masked": idx[0].shape[0],
        "test_rmse_1": rmse_1,
        "test_rmse_0": rmse_0
    }
    
    # Write PSQL query
    update_query = "UPDATE EDM2020_2020_02_19 SET "
    update_list = []
    query_values = []
    for key, value in update.items():
        update_list.append(key + "= %s")
        query_values.append(value)
    
    update_query += ", ".join(update_list) + "where experiment_id = %s"
    query_values.append(exp_id)
    query_values = tuple(query_values)
    query = cursor.mogrify(update_query, query_values)
    update_queries.append(query)
    total += 1
    try:
        row = cursor.fetchone()
    except psycopg2.ProgrammingError:
        row = False

CPU times: user 5min 30s, sys: 22.7 s, total: 5min 53s
Wall time: 5min 53s


In [36]:
%%time
for query in update_queries:
    cursor.execute(query)

CPU times: user 15.6 ms, sys: 46.9 ms, total: 62.5 ms
Wall time: 1min 58s


In [35]:
total

763