In [9]:
import numpy as np
import preprocessing as pp

from graph import *
from bayesian_decision_evaluation import *
# import importlib
from models.gmm_clf import GMM
from models.logistic_regression_classifier import LogisticRegression, LogisticRegressionWeighted, QuadraticExpansion
from models.svm_classifier import SVMClassifier
from models.svm_kernel_classifier import SVMClassifierPolyKernel, SVMClassifierRBFKernel
from models.multivariate_gaussian_classifier import MultivariateGaussianClassifier, NaiveBayesClassifier, TiedCovarianceClassifier

In [10]:
X, y = utils.load_data()
X_eval, y_eval = utils.load_validation_data()
classes = {
    'Fake': 'blue',
    'Real': 'orange'
}

X_train, y_train, X_test, y_test = utils.split_data(X, y)

In [11]:
def compute_statistics(llr, y_true, prior, unique_labels=None):
    cost_matrix, prior_class_prob, threshold = binary_cost_matrix(prior)
    
    min_DCF, best_threshold = compute_minDCF(llr, y_true, prior, unique_labels)
    y_pred = (llr > threshold) * 1
    y_pred_best = (llr > best_threshold) * 1
    cm = confusion_matrix(y_true, y_pred, unique_labels)
    cm_best = confusion_matrix(y_true, y_pred_best, unique_labels)
    acc = accuracy(cm_best)
    DCF, _, _ = compute_DCF(cm, cost_matrix, prior_class_prob)
    DCF_norm, _, _ = compute_DCF_normalized(cm, cost_matrix, prior_class_prob)
    
    print(f"MinDCF: {min_DCF:.4f}, DCF: {DCF:.4f}, Normalized DCF: {DCF_norm:.4f}, Accuracy: {acc*100:.2f}%\n")
    
    return min_DCF, DCF, DCF_norm

In [12]:
test_only = True

#### Comparison with Calibration

#### Best model for GMM: GMM - diagonal 8 components

In [13]:
covariance_type = 'diagonal'
n_best_components = 8
prior = 0.1
psiEig = 0.01

model = GMM(n_components=n_best_components, covariance_type=covariance_type, psiEig=psiEig)
folder = f"gmm_{covariance_type}_{n_best_components}_components"
model.fit(X_train, y_train, n_features=2, folder=folder, test_only=test_only)
gmm_score = model.score_binary(X_test)

GMM 0 loaded successfully.
GMM 1 loaded successfully.


#### Best model for SVM: SVM - RBF with gamma = 0.1, epsilon = 1.0, C = 100.0

In [14]:
eps = 1.0
gamma = 1e-1
C = 100.0
prior = 0.1

model = SVMClassifierRBFKernel(C=C, gamma=gamma, eps=eps)
folder = f"svm_rbf_raw_data_gamma_{gamma:.1e}_eps_{eps}_C_{C:.1e}".replace('.', '_').replace('e-0', 'e-')
model.fit(X_train, y_train, folder, test_only)
svm_score = model.score(X_test)

Model loaded successfully.


#### Best model for Logistic Regression: Quadratic Expansion with lambda = 0.032, pi = 0.1

In [15]:
l = 0.032
prior = 0.1
n_T = np.sum(y_train == 1)
n_F = np.sum(y_train == 0)
pEmp = n_T / (n_T + n_F)

model = QuadraticExpansion(lambda_=l)
X_train_quad = model.expand(X_train)
X_test_quad = model.expand(X_test)
folder = f"lr_quad_raw_data_pi_{prior:.1e}_lambda_{l:.1e}".replace('.', '_').replace('e-0', 'e-')
model.fit(X_train_quad, y_train, folder=folder, test_only=test_only)
lr_score = model.score(X_test_quad) - np.log(pEmp / (1 - pEmp))

Weights loaded successfully.


#### K-Fold Cross Validation