In [1]:
import numpy as np
import pandas as pd

import fragility_index as fi

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.pipeline import make_pipeline
from sklearn.svm import LinearSVC


In [2]:
result = pd.DataFrame(columns = ["Data_name","Model_name","Probability","Mean","Std","VaR%95","CVaR%95","VaR%99","CVaR%99"])


data_name_list = ['BreastCancerCoimbra','LiverDisorders', 'LiverPatient']
# data_name_list = ['BreastCancerCoimbra']

        
for data_name in data_name_list: 
    print(data_name)
    x,y = fi.Data_load(data_name)  
    
    for _seed in range(20):
        print(_seed)
        np.random.seed(_seed) 
        X_sample, X_test, y_sample, y_test = train_test_split(x, y, test_size=0.2)
        # np.savez(f'./Split_BCC/split_bcc_seed_{_seed}', x_train=X_sample, x_test=X_test, y_train=y_sample, y_test=y_test)
        
        scaler = StandardScaler()
        X_sample = scaler.fit_transform(X_sample)
        X_test = scaler.transform(X_test)
        
        data_p = X_sample[y_sample==1,:]
        data_n = X_sample[y_sample==-1,:]
        data_test_p = X_test[y_test==1,:]
        data_test_n = X_test[y_test==-1,:]
        
        N = X_sample.shape[1]   # number of attribute
        
        M_p = data_p.shape[0]
        M_n = data_n.shape[0]
        S = M_p * M_n        
        
        M_test_p = data_test_p.shape[0]
        M_test_n = data_test_n.shape[0]
        S_test = M_test_p*M_test_n
        
        data_sample = np.zeros((S, 2*N))
        for i in range(M_p):
            for j in range(M_n):
                data_sample[i * M_n + j,:N] = data_p[i,:]
                data_sample[i * M_n + j,N:] = data_n[j,:]
                       
        data_test = np.zeros((S_test, 2*N))
        for i in range(M_test_p):
            for j in range(M_test_n):
                data_test[i * M_test_n + j,:N] = data_test_p[i,:]
                data_test[i * M_test_n + j,N:] = data_test_n[j,:]
        
        lb_p = data_p.min(axis=0)
        lb_n = data_n.min(axis=0)
        ub_p = data_p.max(axis=0)
        ub_n = data_n.max(axis=0)

        w_FI = fi.FI_minimization(N,S,data_sample, lb_p,lb_n,ub_p,ub_n,LogToConsole=False)
        w_bAUC = fi.bAUC(N,S,data_sample,LogToConsole=False) 
        # np.savez(f'./Split_BCC/w_bbc_seed_{_seed}', w_FI=w_FI, w_bAUC=w_bAUC)
        # clf_lr = LogisticRegression(random_state=0)
        # clf_lr.fit(X_sample, y_sample)
        # clf_lda = LinearDiscriminantAnalysis()
        # clf_lda.fit(X_sample, y_sample)
        # clf_lsvm = make_pipeline(StandardScaler(), LinearSVC(random_state=0, tol=1e-5))
        # clf_lsvm.fit(X_sample, y_sample)

        result_FI = fi.performance(data_name,"FI",w_FI,N,S_test,data_test) 
        result_bAUC = fi.performance(data_name,"bAUC",w_bAUC,N,S_test,data_test)
        # result_lr = fi.performance_of_error(data_name, 'LR', fi.calculate_error(clf_lr, X_test, y_test)) 
        # result_lda = fi.performance_of_error(data_name, 'LDA', fi.calculate_error(clf_lda, X_test, y_test))
        # result_lsvm = fi.performance_of_error(data_name, 'LSVM', fi.calculate_error(clf_lsvm, X_test, y_test))     
                
        result = pd.concat([result, result_FI, result_bAUC]) 
        # result = pd.concat([result_lr])
        

result.groupby(['Data_name', 'Model_name']).mean().reset_index()

BreastCancerCoimbra
0
presolving:
   (2.6s) symmetry computation started: requiring (bin +, int +, cont +), (fixed: bin -, int -, cont -)
   (2.9s) no symmetry present (symcode time: 0.10)
presolving (1 rounds: 1 fast, 1 medium, 1 exhaustive):
 0 deleted vars, 0 deleted constraints, 0 added constraints, 27 tightened bounds, 0 added holes, 0 changed sides, 0 changed coefficients
 0 implications, 0 cliques
presolved problem has 152662 variables (0 bin, 0 int, 0 impl, 152662 cont) and 115015 constraints
  77368 constraints of type <linear>
  37647 constraints of type <nonlinear>
Presolving Time: 2.45

 time | node  | left  |LP iter|LP it/n|mem/heur|mdpt |vars |cons |rows |cuts |sepa|confs|strbr|  dualbound   | primalbound  |  gap   | compl. 
* 210s|     1 |     0 |    17 |     - |    LP  |   0 | 227k| 115k| 190k|   0 |  0 |   0 |   0 | 0.000000e+00 | 0.000000e+00 |   0.00%| unknown
  210s|     1 |     0 |    17 |     - |  1283M |   0 | 227k| 115k| 190k|   0 |  0 |   0 |   0 | 0.000000e+00

AttributeError: 'pyscipopt.scip.Variable' object has no attribute 'getVal'

In [3]:
result

Unnamed: 0,Data_name,Model_name,Probability,Mean,Std,VaR%95,CVaR%95,VaR%99,CVaR%99


In [4]:
result_BCC_LD_ILDP = result.groupby(['Data_name', 'Model_name']).mean().reset_index()
result_BCC_LD_ILDP

Unnamed: 0,Data_name,Model_name,Probability,Mean,Std,VaR%95,CVaR%95,VaR%99,CVaR%99
