In [3]:
from loading_data import *

#### 1) Explain which classification problem you have chosen to solve. Is it a multi-class or binary classification problem?

The classification problem that we are solving is to predict chd (coronary heart disease) based on the attributes. It is a binary classification problem. The direct interpretation is to know if someone has had a chd based on data. We can extend this to predicting if someone will likely have a chd based on the collected data on this individual. (this is given that the person keeps that same habits) -> maybe this is not applicable because there is an age attribute.
Another use of this classification can be for an insurance company to determine if a client likely has had a chd to then be able to adjust the cost for the insurance.

#### 2. We will compare logistic regression, method 2 and a baseline. For logistic regression, we will once more use λ as a complexity-controlling parameter, and for method 2 a relevant complexity controlling parameter and range of values. We recommend this choice is made based on a trial run, which you do not need to report. Describe which parameter you have chosen and the possible values of the parameters you will examine. The baseline will be a model which compute the largest class on the training data, and predict everything in the test-data as belonging to that class (corresponding to the optimal prediction by a logistic regression model with a bias term and no features).

In [4]:
#First we want to normalize and transform our data.

from scipy.stats import zscore

normalised_X = np.copy(X)
#transform
normalised_X[:,6] = np.log(1 + X[:,6]) #add 1 because some alcohol values are 0
#normalise
normalised_X = zscore(normalised_X, axis = 0, ddof = 1)

attributeNames_norm = np.copy(attributeNames)
attributeNames_norm[6] = 'log-alc'
attributeNames_norm = ['normalized ' + attribute for attribute in attributeNames_norm]

#Or without the last binary data

Y = np.copy(normalised_X[:,:-1])
N_y, M_y = Y.shape

attributeNames_y = np.copy(attributeNames_norm[:-1])

In [5]:
import numpy as np
from sklearn import model_selection
from sklearn.dummy import DummyClassifier
import torch
from sklearn.linear_model import LogisticRegression
from dtuimldmtools import train_neural_net

In [None]:
#We need to consider the possible lambdas and the number of hidden units that we want to consider in the inner loop. 
#For the baseline model there is no controlling parameter

#First try
lambda_interval = np.logspace(-4, 4, 100)
n_hidden_units = [1,5,20,50]
# print(n_hidden_units)

K_out = 2
K_in = 2
CV_out = model_selection.KFold(K_out,shuffle=True)
CV_in = model_selection.KFold(K_in,shuffle=True)

# For statistical evaluation : store outer fold predictions for the three models
yhat = []
y_true = []

# For debugging : store inner fold predictions (for each outer fold)
Logistic_full = {}
ANN_full = {}
#where the last coordinate gives us if we are looking at the estimate or the validation values (0 for estimation and 1 for validation)

#The Error for the best model of each type of model in each outer loop
# Train_error = np.zeros((K_out,3))
Test_error= np.zeros((K_out,3))
best_lambda_index = np.int32(np.zeros(K_out))
best_h_index = np.int32(np.zeros(K_out))

for k, (train_index,test_index) in enumerate(CV_out.split(normalised_X,y)):
    print(f"#================OUTER LOOP {k+1}================#")
    #to store the new predictions of the selected model at each outer fold (to then be concatenated in yhat)
    dy = []

    #the training tests for each fold of the outer loop
    X_train = normalised_X[train_index, :]
    y_train = y[train_index]
    X_test = normalised_X[test_index, :]
    y_test = y[test_index]

    #Baseline model
    baseline = DummyClassifier(strategy='most_frequent')
    baseline.fit(X_train,y_train)
    Test_error[k,2] = 1-baseline.score(X_test,y_test)
    dy.append(baseline.predict(X_test))

    #INITIALIZE ERROR HANDLING
    #Error for each model in each of the loops, overwritten at each outer loop 
    Logistic_Inner_test_error= np.zeros((K_in, len(lambda_interval)))
    ANN_Inner_test_error= np.zeros((K_in, len(n_hidden_units)))

    #average error of each model on each outter fold
    Logistic_Model_out_test_error = np.zeros((K_out, len(lambda_interval)))
    ANN_Model_out_test_error = np.zeros((K_out, len(n_hidden_units)))

    #we also need to store the sizes of the folds
    inner_fold_validate_sizes = np.zeros(K_in)

    #Inner Loop
    for i, (Inner_train_index, Inner_test_index) in enumerate(CV_in.split(X_train,y_train)):
        print(f"#================INNER LOOP {i+1}================#")
        #initialize the training and validation sets
        X_subtrain = X_train[Inner_train_index]
        y_subtrain = y[Inner_train_index]
        X_validate = X_train[Inner_test_index]
        y_validate = y_train[Inner_test_index]

        #store the size of the validation set
        inner_fold_validate_sizes[i] = X_validate.shape[0]

        #=========#
        #Logistic regression
        #=========#

        #Logistic Regression Model Loop (hyper_parameter tuning)
        for s,lamb in enumerate(lambda_interval):
            print(f"#================Logistic Model {s+1}================#")
            
            mdl = LogisticRegression(penalty="l2", C=1 / lamb)
            mdl.fit(X_subtrain, y_subtrain)

            # y_subtrain_est = mdl.predict(X_train).T
            y_validate_est = mdl.predict(X_validate).T
            Logistic_Inner_test_error[i, s] = np.sum(y_validate_est != y_validate) / len(y_validate)
            Logistic_full[(i,s,k)] = { 'predictions' : y_validate_est, 'ground_truth' : y_validate}

        #==========#
        #ANN
        #==========#

        #We convert the training and test sets to torch tensors

        X_subtrain = torch.tensor(X_subtrain, dtype=torch.float32)
        y_subtrain = torch.tensor(y_subtrain, dtype=torch.float32).reshape(-1, 1)
        X_validate = torch.tensor(X_validate, dtype=torch.float32)
        y_validate = torch.tensor(y_validate, dtype=torch.float32).reshape(-1, 1)

        #ANN cross validation loop
        for j, n in enumerate(n_hidden_units):
            print(f"#================ANN Model {j+1}================#")
            # The lambda-syntax defines an anonymous function, which is used here to
            # make it easy to make new networks within each cross validation fold
            model = lambda: torch.nn.Sequential(
                torch.nn.Linear(M, n),  # M features to H hiden units
                # 1st transfer function, either Tanh or ReLU:
                torch.nn.Tanh(),  
                # torch.nn.ReLU(),
                torch.nn.Linear(n, 1),  # H hidden units to 1 output neuron
                torch.nn.Sigmoid(),  # final tranfer function
            )
            #Loss function (Binary Cross Entropy)
            loss_fn = torch.nn.BCELoss()
            # Train for a maximum of 10000 steps, or until convergence
            max_iter = 10000
        
            net, final_loss, learning_curve = train_neural_net(
            model, loss_fn, X=X_subtrain, y=y_subtrain, n_replicates=1, max_iter=max_iter
            )
            y_validate_est = net(X_validate).detach().numpy()
            y_validate_pred = (y_validate_est > 0.5).astype(int)
            y_validate_np = y_validate.numpy()
            ANN_Inner_test_error[i, j] = np.sum(y_validate_pred != y_validate) / len(y_validate)
            ANN_full[(i,j,k)] = { 'predictions' : y_validate_pred.squeeze(), 'ground_truth' : y_validate_np.squeeze()}

    #Average Model Error calculation for Regression   
    Logistic_Model_out_test_error = np.sum(inner_fold_validate_sizes[:,None]*Logistic_Inner_test_error, axis = 0)/X_train.shape[0]
    best_lambda_index[k] = int(np.argmin(Logistic_Model_out_test_error))
    #Average Model Error calculation for ANN   
    ANN_Model_out_test_error = np.sum(inner_fold_validate_sizes[:,None]*ANN_Inner_test_error, axis = 0)/X_train.shape[0]
    best_h_index[k] = int(np.argmin(ANN_Model_out_test_error))

    #Retrain the best model on the full X_train for regression
    mdl = LogisticRegression(penalty="l2", C = 1/lambda_interval[best_lambda_index[k]])
    mdl.fit(X_train,y_train)
    y_test_est = mdl.predict(X_test).T
    Test_error[k,0] = np.sum(y_test_est != y_test) / len(y_test)
    
    #to store the predictions
    dy.append(y_test_est.T)

    #Retrain the best ANN model
    X_train = torch.tensor(X_train, dtype=torch.float32)
    y_train = torch.tensor(y_train, dtype=torch.float32).reshape(-1, 1)
    X_test = torch.tensor(X_test, dtype=torch.float32)
    y_test = torch.tensor(y_test, dtype=torch.float32).reshape(-1, 1)
    model = lambda: torch.nn.Sequential(
        torch.nn.Linear(M, n_hidden_units[best_h_index[k]]),  # M features to H hiden units
        # 1st transfer function, either Tanh or ReLU:
        # torch.nn.Tanh(),  
        torch.nn.ReLU(),
        torch.nn.Linear(n_hidden_units[best_h_index[k]], 1),  # H hidden units to 1 output neuron
        torch.nn.Sigmoid(),  # final tranfer function
    )
    #Loss function (Binary Cross Entropy)
    loss_fn = torch.nn.BCELoss()
    # Train for a maximum of 10000 steps, or until convergence
    max_iter = 10000
        
    net, final_loss, learning_curve = train_neural_net(
        model, loss_fn, X=X_train, y=y_train, n_replicates=1, max_iter=max_iter
    )
    y_test_est = net(X_test).detach().numpy()
    y_test_pred = (y_test_est > 0.5).astype(int)
    y_test = y_test.numpy()
    Test_error[k, 1] = np.sum(y_test_pred != y_test) / len(y_test)


    #to store the predictions
    dy.append(y_test_pred.squeeze())
    dy = np.stack(dy, axis=1)
    yhat.append(dy)
    y_true.append(y_test)

yhat = np.array(np.concatenate(yhat))
y_true = np.array(np.concatenate(y_true)).squeeze()


	Replicate: 1/1
		Iter	Loss			Rel. loss
		1000	0.49987686	7.6908345e-06
		2000	0.4980697	1.914737e-06
		3000	0.49475512	3.812826e-05
		4000	0.47146285	2.7939122e-05
		5000	0.46225712	1.39900785e-05
		6000	0.45715094	9.126717e-06
		7000	0.4535544	7.030747e-06
		8000	0.45071065	5.6865424e-06
		9000	0.44841737	4.519336e-06
		10000	0.4460734	1.6167844e-05
		Final loss:
		10000	0.4460734	1.6167844e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss
		1000	0.41049638	0.0003961702
		2000	0.25564137	0.0005461057
		3000	0.14488088	0.00052005326
		4000	0.09223522	0.00038669645
		5000	0.06576531	0.00029978988
		6000	0.050314035	0.00023213797
		7000	0.041159917	0.0001731113
		8000	0.035414156	0.00013031605
		9000	0.03162611	9.657979e-05
		10000	0.029098634	7.2647694e-05
		Final loss:
		10000	0.029098634	7.2647694e-05

	Replicate: 1/1
		Iter	Loss			Rel. loss
		1000	0.12707236	0.002007762
		2000	0.026037136	0.0012853148
		3000	0.00871212	0.00094111316
		4000	0.0037545755	0.00076433964
		5000	0.0018294472	

In [104]:
print(len(yhat))

462


In [105]:
y_true = y_true.squeeze()

In [108]:
print(Test_error)
print(lambda_interval[best_lambda_index])

[[0.26839827 0.35497835 0.36363636]
 [0.23809524 0.23809524 0.32900433]]
[4.86260158 2.7825594 ]


In [106]:
from dtuimldmtools import mcnemar
#we suppose that we have the y_hat and y_true for all models.

# Compute the Jeffreys interval
alpha = 0.05
[thetahatA, CIA, p] = mcnemar(y_true, yhat[:, 0], yhat[:, 1], alpha=alpha)
[thetahatB, CIB, p] = mcnemar(y_true, yhat[:, 1], yhat[:, 2], alpha=alpha)
[thetahatC, CIB, p] = mcnemar(y_true, yhat[:, 0], yhat[:, 2], alpha=alpha)

Result of McNemars test using alpha= 0.05
Comparison matrix n
[[258.  44.]
 [ 87.  73.]]
Approximate 1-alpha confidence interval of theta: [thetaL,thetaU] =  (-0.14071856902166135, -0.04521730074858388)
p-value for two-sided test A and B have same accuracy (exact binomial test): p= 0.00021577652478126048
Result of McNemars test using alpha= 0.05
Comparison matrix n
[[295.  50.]
 [ 30.  87.]]
Approximate 1-alpha confidence interval of theta: [thetaL,thetaU] =  (0.005564554029718671, 0.08095471984448777)
p-value for two-sided test A and B have same accuracy (exact binomial test): p= 0.03299261842647619
Result of McNemars test using alpha= 0.05
Comparison matrix n
[[233.  69.]
 [ 92.  68.]]
Approximate 1-alpha confidence interval of theta: [thetaL,thetaU] =  (-0.10328397073459339, 0.0038582847152375255)
p-value for two-sided test A and B have same accuracy (exact binomial test): p= 0.08262582271649302


### Question 5

to yield a control parameter for the logistic regression model we could take the parameter with the lowest error or we could also take the average of all the coefficients yielded by all the models to yield a model (but not directly a controlling parameter).

We need to test the error handling structures.

In [11]:
print(ANN_full)

{(0, 0, 0): {'predictions': array([1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,
       1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
       0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0,
       1, 0, 0, 0, 0, 0]), 'ground_truth': array([1., 0., 0., 0., 1., 1., 0., 0., 1., 0., 0., 0., 1., 1., 0., 0., 0.,
       1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 1., 1., 0.,
       0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
       0., 0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0.,
       0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 0., 0., 1., 1., 1., 0.,
       0., 0., 1., 1., 1., 0., 0., 1., 1., 1., 0., 1., 1., 0., 1., 0., 0.,
       1., 0., 1., 1., 0., 1., 1., 1., 1., 1., 1., 0., 0., 1.],
      dtype=float32)}, (0, 1, 0): {'predictions': 

In [12]:
print(Logistic_full)

{(0, 0, 0): {'predictions': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0], dtype=int32), 'ground_truth': array([1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1,
       1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 0, 0, 1], dtype=int32)}, (0, 1, 0): {'predictions': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

Here we try to find the best lambda interval possible

In [None]:
print(ANN_Model_out_test_error)
print(Logistic_Model_out_test_error)

[0.00865801 0.00865801 0.00865801]
[0.35064935 0.35064935 0.35064935 0.35064935 0.35064935 0.35064935
 0.35064935 0.35064935 0.35064935 0.35064935 0.35064935 0.35064935
 0.35064935 0.35064935 0.35064935 0.35064935 0.35064935 0.35064935
 0.35064935 0.35064935 0.35064935 0.35064935 0.35064935 0.35064935
 0.35064935 0.35064935 0.35064935 0.35064935 0.35064935 0.35064935
 0.35064935 0.35064935 0.35064935 0.35064935 0.34632035 0.34632035
 0.34632035 0.34632035 0.34632035 0.34632035 0.34632035 0.34632035
 0.34632035 0.34632035 0.34632035 0.34632035 0.34632035 0.34632035
 0.34632035 0.34632035 0.34632035 0.35064935 0.35064935 0.35064935
 0.35064935 0.35064935 0.35064935 0.35064935 0.35064935 0.35064935
 0.35064935 0.35064935 0.35497835 0.35497835 0.35497835 0.35497835
 0.35497835 0.35497835 0.35497835 0.35497835 0.35497835 0.35497835
 0.35497835 0.35497835 0.35497835 0.35497835 0.35497835 0.35497835
 0.35497835 0.35497835 0.35497835 0.35497835 0.35497835 0.35497835
 0.35497835 0.35497835 0.35

More imoportantly here we see that for all the values of lambda and of h, there is the same test error on the last outer fold.

we copy the code to try to find first the optimal lambda interval and then the h interval

In [88]:
#We need to consider the possible lambdas and the number of hidden units that we want to consider in the inner loop. 
#For the baseline model there is no controlling parameter

#First try
lambda_interval = np.logspace(-5, 2, 100)
# lambda_interval = np.linspace(0.0001, 80, 200)
n_hidden_units = np.arange(1,4)
# print(n_hidden_units)

K_out = 5
K_in = 10
CV_out = model_selection.KFold(K_out,shuffle=True)
CV_in = model_selection.KFold(K_in,shuffle=True)

# For statistical evaluation : store outer fold predictions for the three models
yhat = []
y_true = []

# For debugging : store inner fold predictions (for each outer fold)
Logistic_full = {}
ANN_full = {}
#where the last coordinate gives us if we are looking at the estimate or the validation values (0 for estimation and 1 for validation)

#The Error for the best model of each type of model in each outer loop
# Train_error = np.zeros((K_out,3))
Test_error= np.zeros((K_out,3))
best_lambda_index = np.int32(np.zeros(K_out))
best_h_index = np.int32(np.zeros(K_out))

for k, (train_index,test_index) in enumerate(CV_out.split(normalised_X,y)):
    print(f"#================OUTER LOOP {k+1}================#")
    #to store the new predictions of the selected model at each outer fold (to then be concatenated in yhat)
    dy = []

    #the training tests for each fold of the outer loop
    X_train = normalised_X[train_index, :]
    y_train = y[train_index]
    X_test = normalised_X[test_index, :]
    y_test = y[test_index]

    #Baseline model
    baseline = DummyClassifier(strategy='most_frequent')
    baseline.fit(X_train,y_train)
    Test_error[k,2] = 1-baseline.score(X_test,y_test)
    dy.append(baseline.predict(X_test))

    #INITIALIZE ERROR HANDLING
    #Error for each model in each of the loops, overwritten at each outer loop 
    Logistic_Inner_test_error= np.zeros((K_in, len(lambda_interval)))
    ANN_Inner_test_error= np.zeros((K_in, len(n_hidden_units)))

    #average error of each model on each outter fold
    Logistic_Model_out_test_error = np.zeros((K_out, len(lambda_interval)))
    ANN_Model_out_test_error = np.zeros((K_out, len(n_hidden_units)))

    #we also need to store the sizes of the folds
    inner_fold_validate_sizes = np.zeros(K_in)

    #Inner Loop
    for i, (Inner_train_index, Inner_test_index) in enumerate(CV_in.split(X_train,y_train)):
        print(f"#================INNER LOOP {i+1}================#")
        #initialize the training and validation sets
        X_subtrain = X_train[Inner_train_index]
        y_subtrain = y[Inner_train_index]
        X_validate = X_train[Inner_test_index]
        y_validate = y_train[Inner_test_index]

        #store the size of the validation set
        inner_fold_validate_sizes[i] = X_validate.shape[0]

        #=========#
        #Logistic regression
        #=========#

        #Logistic Regression Model Loop (hyper_parameter tuning)
        for s,lamb in enumerate(lambda_interval):
            print(f"#================Logistic Model {s+1}================#")
            
            mdl = LogisticRegression(penalty="l2", C=1 / lamb)
            mdl.fit(X_subtrain, y_subtrain)

            # y_subtrain_est = mdl.predict(X_train).T
            y_validate_est = mdl.predict(X_validate).T
            Logistic_Inner_test_error[i, s] = np.sum(y_validate_est != y_validate) / len(y_validate)
            Logistic_full[(i,s,k)] = { 'predictions' : y_validate_est, 'ground_truth' : y_validate}

    #Average Model Error calculation for Regression   
    Logistic_Model_out_test_error = np.sum(inner_fold_validate_sizes[:,None]*Logistic_Inner_test_error, axis = 0)/X_train.shape[0]
    best_lambda_index[k] = int(np.argmin(Logistic_Model_out_test_error))

    #Retrain the best model on the full X_train for regression
    mdl = LogisticRegression(penalty="l2", C = 1/lambda_interval[best_lambda_index[k]])
    mdl.fit(X_train,y_train)
    y_test_est = mdl.predict(X_test).T
    Test_error[k,0] = np.sum(y_test_est != y_test) / len(y_test)
    
    #to store the predictions
    dy.append(y_test_est)
    dy = np.stack(dy, axis=1)
    yhat.append(dy)
    y_true.append(y_test)

yhat = np.concatenate(yhat)
y_true = np.concatenate(y_true)



Then again we right the error for the last logisitic regression model.

In [51]:
print(Logistic_Model_out_test_error)

[0.36486486 0.36486486 0.36486486 0.36486486 0.36486486 0.36486486
 0.36486486 0.36486486 0.36486486 0.36486486 0.36486486 0.36486486
 0.36486486 0.36486486 0.36486486 0.36486486 0.36486486 0.36486486
 0.36486486 0.36486486 0.36486486 0.36486486 0.36486486 0.36486486
 0.36486486 0.36486486 0.36486486 0.36486486 0.36486486 0.36486486
 0.36486486 0.36486486 0.36486486 0.36486486 0.36486486 0.36486486
 0.36486486 0.36486486 0.36486486 0.36486486 0.36486486 0.36486486
 0.36486486 0.36486486 0.36486486 0.36486486 0.36756757 0.36756757
 0.36756757 0.36756757 0.36756757 0.36756757 0.36756757 0.36756757
 0.36756757 0.36756757 0.36756757 0.36756757 0.36756757 0.36486486
 0.36486486 0.36486486 0.36486486 0.36216216 0.36216216 0.36486486
 0.36486486 0.36486486 0.36486486 0.36486486 0.36486486 0.36486486
 0.36486486 0.36486486 0.36216216 0.35945946 0.36216216 0.35945946
 0.35945946 0.35945946 0.36486486 0.36756757 0.36486486 0.36216216
 0.35945946 0.36216216 0.36216216 0.35675676 0.35675676 0.3648

In [103]:
#And the final test error with the chosen index and model
print(f'optimal control parameter index : {best_lambda_index}')
print(f'optimal control parameter : {lambda_interval[best_lambda_index]}')
print(Test_error)

optimal control parameter index : [58 55]
optimal control parameter : [4.86260158 2.7825594 ]
[[0.26839827 0.35497835 0.36363636]
 [0.23809524 0.23809524 0.32900433]]


In [53]:
print(lambda_interval)

[1.00000000e-05 1.17681195e-05 1.38488637e-05 1.62975083e-05
 1.91791026e-05 2.25701972e-05 2.65608778e-05 3.12571585e-05
 3.67837977e-05 4.32876128e-05 5.09413801e-05 5.99484250e-05
 7.05480231e-05 8.30217568e-05 9.77009957e-05 1.14975700e-04
 1.35304777e-04 1.59228279e-04 1.87381742e-04 2.20513074e-04
 2.59502421e-04 3.05385551e-04 3.59381366e-04 4.22924287e-04
 4.97702356e-04 5.85702082e-04 6.89261210e-04 8.11130831e-04
 9.54548457e-04 1.12332403e-03 1.32194115e-03 1.55567614e-03
 1.83073828e-03 2.15443469e-03 2.53536449e-03 2.98364724e-03
 3.51119173e-03 4.13201240e-03 4.86260158e-03 5.72236766e-03
 6.73415066e-03 7.92482898e-03 9.32603347e-03 1.09749877e-02
 1.29154967e-02 1.51991108e-02 1.78864953e-02 2.10490414e-02
 2.47707636e-02 2.91505306e-02 3.43046929e-02 4.03701726e-02
 4.75081016e-02 5.59081018e-02 6.57933225e-02 7.74263683e-02
 9.11162756e-02 1.07226722e-01 1.26185688e-01 1.48496826e-01
 1.74752840e-01 2.05651231e-01 2.42012826e-01 2.84803587e-01
 3.35160265e-01 3.944206

The complexity of our data set means that it is not surprising that the control parameters are high.

## Test the statistical inference

In [89]:
from dtuimldmtools import mcnemar

In [93]:
yhat= np.array(yhat)
print(len(yhat))
print(len(y_true))

462
462


In [94]:
yhat

array([[0, 0],
       [0, 1],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 1],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 1],
       [0, 0],
       [0, 1],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 1],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 1],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 1],
       [0, 0],
       [0, 1],
       [0, 1],
       [0, 0],
       [0, 1],
       [0, 1],
       [0, 1],
       [0, 0],
       [0,

first we also need to store the predictions of the baseline modes

In [92]:
#we suppose that we have the y_hat and y_true for all models.

# Compute the Jeffreys interval
alpha = 0.05
[thetahatA, CIA, p] = mcnemar(y_true, yhat[:, 0], yhat[:, 1], alpha=alpha)
# [thetahatB, CIB, p] = mcnemar(y_true, yhat[:, 1], yhat[:, 2], alpha=alpha)
# [thetahatA, CIB, p] = mcnemar(y_true, yhat[:, 0], yhat[:, 2], alpha=alpha)

Result of McNemars test using alpha= 0.05
Comparison matrix n
[[255.  47.]
 [ 80.  80.]]
Approximate 1-alpha confidence interval of theta: [thetaL,thetaU] =  (-0.11865527502893591, -0.024043273798398657)
p-value for two-sided test A and B have same accuracy (exact binomial test): p= 0.004330818115086333
