In [1]:
import numpy as np
import pandas as pd
from typing import List
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split

#base_path = "C:/Users/99818854/Projetos/GitRep/adaptive_learning"
base_path = "/media/bruno/Arquivos/Desenvolvimento/NextQuestion"

In [2]:
base = pd.read_csv(f"{base_path}/data/mastery.csv")
submit = pd.read_csv(f"{base_path}/data/Submit.csv", sep=";")

In [3]:
mastery_params = [[4.44], [0.33], [0.86], [0.86], [0.46], [0.39]]
mastery = np.dot(base.values[:, 3:-1], mastery_params)
X, y = np.concatenate((base.values[:, 2:3], mastery), axis=1), base.values[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
mean_difficulty = round(np.nanmean(X_train[:, 0]))
X_train[np.isnan(X_train)] = mean_difficulty
X_test[np.isnan(X_test)] = mean_difficulty

In [9]:
mean_difficulty

2

In [4]:
total_models = 15
models: List[SGDClassifier] = []

for i in range(0, total_models):
    X_sample, y_sample = [], []
    while len(X_sample) != len(X_train):
        j = np.random.randint(0, len(X_train))
        X_sample.append(X_train[j])
        y_sample.append(y_train[j])

    model = SGDClassifier(loss="log_loss", learning_rate="adaptive", eta0=0.001)
    model.fit(X_sample, y_sample)
    models.append(model)

In [5]:
def predict(models: List[SGDClassifier], test):
    votes = []
    for model in models:
        y_pred_test = model.predict(test)
        votes.append(y_pred_test)

    result = []
    for i in range(0, len(test)):
        total = []
        for j in range(0, len(votes)):
            total.append(votes[j][i])
        predict = np.mean(total)
        result.append(1 if predict > 0.5 else 0)

    tn, fp, fn, tp = confusion_matrix(y_test, result).ravel()
    return result, tn, fp, fn, tp

In [8]:
votes = []
for model in models:
    y_pred_test = model.predict(X_test)
    votes.append(y_pred_test)

result = []
for i in range(0, len(X_test)):
    total = []
    for j in range(0, len(votes)):
        total.append(votes[j][i])
    predict = np.mean(total)
    result.append(1 if predict > 0.5 else 0)

tn, fp, fn, tp = confusion_matrix(y_test, result).ravel()

In [10]:
accuracy = (tp + tn) / (tn + fp + fn + tp)
precision = tp / (tp + fp)
recall = tp / (tp + fn)
f1 = 2 * (precision * recall) / (precision + recall)

In [7]:
y_pred, accuracy, precision, recall, f1 = predict(models, X_test)

In [8]:
accuracy, precision, recall, f1

(74232, 132846, 55800, 337122)