In [5]:
# Helpers
import pickle
import numpy as np
import time
import pandas as pd
import matplotlib.pyplot as plt

# DB 
import psycopg2
from django.conf import settings

# Learning
from spkit import pfa

# Evaluation
from sklearn.metrics import roc_curve, auc, f1_score, confusion_matrix

In [6]:
folder = "data/edm2020"
N_RUNS = 10

### Organizing data

In [108]:
def add_to_list(data, attempts=None):
    cleaned_data = []
    for item in data:
        s_idx, q_idx, a_idx, outcome = item
        if attempts and a_idx >= attempts:
            continue
        cleaned_data.append([outcome, s_idx, q_idx])
    return cleaned_data
    
def transform_data(data):
    student_performance = add_to_list(data['train_set'],50)
    student_performance_test = add_to_list(data['test_set'])
    return student_performance, student_performance_test

In [175]:
def run(*args):
    dataset, student_performance, student_performance_test, q_matrix, students = args
    
    # Run BKT
    start = time.time()
    model = bkt.BKT()
    model.fit(student_performance, q_matrix)
    end = time.time()
    
    data_arr = np.asarray(student_performance)
    data_arr_test = np.asarray(student_performance_test)
    rmse = 0
    n = 0
    score_df = []
    learning_state = {}
    y = []
    y_pred = []
    y_test = []
    y_pred_test = []
    
    # Predict for each student
    for student in range(len(students)):
        # Train
        student_idx = np.where(data_arr[:,1] == student)
        outcome = data_arr[student_idx, 0].astype('int64')
        y += outcome.tolist()[0]
        question_id = data_arr[student_idx, 2].astype('int64')
        data_student = np.vstack((outcome, question_id)).T.astype('int64').tolist()
        predicted = model.predict_proba(data_student, q_matrix)
        y_pred += predicted[:,1].tolist()
        s_ll, s_aic, s_bic, s_rmse, s_acc = model.score()
        try:
            fpr, tpr, _ = roc_curve(outcome.flatten(), predicted[:,1], pos_label=1)
            auc_score = auc(fpr, tpr)
        except ValueError:
            print("auc error found")
            auc_score = 0
        score_df.append({"student": student,
                         "AIC": s_aic,
                         "BIC": s_bic,
                         "RMSE": s_rmse,
                         "Acc": s_acc,
                         "n questions": model.n_questions,
                         "LL": s_ll,
                         "correct_outcome": np.where(outcome == 1)[1].shape[0],
                         "incorrect_outcome": np.where(outcome == 0)[1].shape[0],
                         "AUC": auc_score})
        learning_state[student] = model.get_learning_state()
        
        # Test
        student_idx = np.where(data_arr_test[:,1] == student)
        if student_idx[0].size > 0:
            outcome_test = data_arr_test[student_idx, 0].astype('int64')
            y_test += outcome_test.tolist()[0]
            question_id = data_arr_test[student_idx, 2].astype('int64')
            data_student_test = np.vstack((outcome_test, question_id)).T.astype('int64').tolist()
            predicted_test = model.predict_proba(data_student_test, q_matrix)
            y_pred_test += predicted_test[:,1].tolist()
            s_ll, s_aic, s_bic, s_rmse, s_acc = model.score()
#         try:
#             fpr, tpr, _ = roc_curve(outcome.flatten(), predicted[:,1], pos_label=1)
#             auc_score = auc(fpr, tpr)
#         except ValueError:
#             print("auc error found")
#             auc_score = 0
#         score_df.append({"student": student,
#                          "AIC": s_aic,
#                          "BIC": s_bic,
#                          "RMSE": s_rmse,
#                          "Acc": s_acc,
#                          "n questions": model.n_questions,
#                          "LL": s_ll,
#                          "correct_outcome": np.where(outcome == 1)[1].shape[0],
#                          "incorrect_outcome": np.where(outcome == 0)[1].shape[0],
#                          "AUC": auc_score})
#         learning_state[student] = model.get_learning_state()
        
    # Calculate train metrics
    score = pd.DataFrame.from_dict(score_df)
    rmse_train = np.sqrt((score['RMSE']**2*score['n questions']).sum()/score['n questions'].sum())
    y = np.asarray(y)
    y_pred = np.asarray(y_pred)
    train_rmse = np.sqrt(np.mean(np.power(y - y_pred, 2)))
    print("Train")
    print(len(y))
    print(rmse_train)
    acc_train = (score['Acc']*score['n questions']).sum()/score['n questions'].sum()
    print(acc_train)
    y_test = np.asarray(y_test)
    y_pred_test = np.asarray(y_pred_test)
    test_rmse = np.sqrt(np.mean(np.power(y_test - y_pred_test, 2)))
    print("Test")
    print(len(y_test))
    print(test_rmse)
    
    n_questions = score['n questions'].sum()
    p = y_test.sum()/n_questions
    estimated_outcome = np.random.binomial(1, p, n_questions)
    random_rmse = np.sqrt(np.mean(np.power(y - estimated_outcome, 2)))
    acc = (estimated_outcome == y).sum()/score['n questions'].sum()
    print("random")
    print(p)
    print(random_rmse)
    
#     
    
#     try:
#         fpr, tpr, thresholds = roc_curve(y, y_pred, pos_label=1)
#         auc_score = auc(fpr, tpr)
#     except ValueError:
#         auc_score = 0
#         tpr = fpr = [0]
#         acc = 0
#         f1 = 0
#         y_pred_binary = [0]
#         J_opt_thresholds = 0
#     else:
#         J_stats = tpr - fpr
#         J_opt_thresholds = thresholds[np.argmax(J_stats)]
#         print('Youden\'s optimum threshold: %.2f' % J_opt_thresholds)
#         y_pred_binary = np.where(y_pred > J_opt_thresholds, 1, 0)
#         acc = (y_pred_binary == y).sum()/score['n questions'].sum()
#         f1 = f1_score(y, y_pred_binary)
#         plt.figure()
#         lw = 2
#         plt.plot(fpr, tpr, color='darkorange',
#                  lw=lw, label='ROC curve (area = %0.2f)' % auc_score)
#         plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
#         plt.xlim([0.0, 1.0])
#         plt.ylim([0.0, 1.05])
#         plt.xlabel('False Positive Rate')
#         plt.ylabel('True Positive Rate')
#         plt.title('Receiver operating characteristic example')
#         plt.legend(loc="lower right")
#         plt.show()
#         print(print(confusion_matrix(y,y_pred_binary)))

    row = {
        "dataset": "run_%d" % dataset,
        "q_matrix": q_matrix,
        "sp": student_performance,
        "sp_test": student_performance_test,
        "sp_hat": y_pred,
        "sk_hat": learning_state,
        "q_matrix_hat": q_matrix,
        "concepts": 14,
        "method": "bkt",
        "seconds": end-start,
#         "train_auc": auc_score,
#         "train_acc": acc,
        "train_rmse": rmse_train,
        "test_rmse": test_rmse,
        "random_rmse": random_rmse
#         "auc_threshold": J_opt_thresholds,
#         "train_1":  np.where(y == 1)[0].shape[0],
#         "train_0":  np.where(y == 0)[0].shape[0],
#         "train_pred_1": np.where(y_pred_binary == 1)[0].shape[0],
#         "train_pred_0": np.where(y_pred_binary == 0)[0].shape[0],
#         "train_f1": f1,
#         "train_nll": nll,
#         "train_nll_special": nll_special,
#         "train_nll_masked": idx[0].shape[0],
#         "train_rmse_1": rmse_1,
#         "train_rmse_0": rmse_0
    }
    return row

    # Write PSQL query
#     insert_query_base = "INSERT INTO EDM2020_2020_02_19 "
#     column_value = []
#     insert_format = []
#     query_values = []
#     for col in row.keys():
#         if isinstance(row[col], np.ndarray):
#             query_values.append(row[col].tolist())
#         else:
#             query_values.append(row[col])
#         column_value.append(col)
#         insert_format.append("%s")

#     insert_query = insert_query_base + "(" + ", ".join(column_value) + ") VALUES "
#     insert_query += "(" + ", ".join(insert_format) + ")"
#     query_values = tuple(query_values)
#     query = cursor.mogrify(insert_query, query_values)
#     cursor.execute(query)

In [176]:
args = []
for restart in range(1):
    for dataset in range(N_RUNS):
        with open("%s/2020_06_08_run_%d.pkl" % (folder, dataset), "rb") as pklfile:
            data = pickle.load(pklfile)
            train_set, test_set = transform_data(data)
            args.append([dataset, 
                        train_set, 
                        test_set,
                        data['q_matrix'].T,
                        data['users_idx']])

In [177]:
next_id = 0

In [178]:
import warnings
warnings.filterwarnings('ignore')

In [179]:
%%time
connection = psycopg2.connect(user = settings.DATABASES["default"]["USER"],
                                  password = settings.DATABASES["default"]["PASSWORD"],
                                  host = settings.DATABASES["default"]["HOST"],
                                  port = settings.DATABASES["default"]["PORT"],
                                  database = settings.DATABASES["default"]["NAME"])
connection.autocommit=True
cursor = connection.cursor()

results = []

for item in args[next_id:]:
    row = run(*item)
    results.append(row)

Train
2642
0.40369637855246876
0.7872823618470856
Test
453
0.37878862090512083
random
0.030658591975775928
0.48364625682528056
Train
2654
0.40538097316002514
0.7889977392614921
Test
570
0.4088961311205535
random
0.04709871891484552
0.4956479777622766
Train
2650
0.40258543292094107
0.7913207547169812
Test
476
0.4103602716420763
random
0.03924528301886793
0.48330622348608065
Train
2604
0.4015988301906474
0.7956989247311828
Test
447
0.4247766801917546
random
0.04185867895545315
0.486768080673455
Train
2611
0.40672202857201
0.7847567981616239
Test
473
0.3585085764749785
random
0.028724626579854463
0.4841414595876007
Train
2725
0.4034812120164335
0.7897247706422018
Test
348
0.4100015709954932
random
0.026422018348623854
0.47967879160878696
Train
2640
0.40719653569913955
0.7837121212121212
Test
686
0.32485332081592777
random
0.032196969696969696
0.48966593436007705
Train
2619
0.4079520607035465
0.7842688048873616
Test
422
0.3796197184299526
random
0.030164184803360063
0.4865506930811655
Trai

In [182]:
train_rmse = [item['train_rmse'] for item in results]
test_rmse = [item['test_rmse'] for item in results]
random_rmse = [item['random_rmse'] for item in results]

In [167]:
print("PFA")
print("Avg train: %.4f (%.4f)" % (np.mean(train_rmse), np.std(train_rmse)))
print("Avg test: %.4f (%.4f)" % (np.mean(test_rmse), np.std(test_rmse)))

PFA
Avg train: 0.3817 (0.0025)
Avg test: 0.3732 (0.0277)


In [183]:
print("BKT")
print("Avg train: %.4f (%.4f)" % (np.mean(train_rmse), np.std(train_rmse)))
print("Avg test: %.4f (%.4f)" % (np.mean(test_rmse), np.std(test_rmse)))
print("Avg random (test): %.4f (%.4f)" % (np.mean(random_rmse), np.std(random_rmse)))

BKT
Avg train: 0.4041 (0.0027)
Avg test: 0.3816 (0.0299)
Avg random: 0.4865 (0.0050)
