In [3]:
import csv
import pandas as pd
import numpy as np
from tpot import TPOTClassifier

from sklearn.metrics import accuracy_score
from sklearn.metrics import average_precision_score
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import hamming_loss
from sklearn.metrics import recall_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score

import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)
import joblib

file_feature = "./csv/endometrium.csv"
file_train = "./csv/train.csv"
file_validate = "./csv/validation.csv"
file_test = "./csv/test.csv"

f = open(file_feature)
csv_f = csv.reader(f)
features = next(csv_f)
dataset = pd.read_csv(file_feature, names=features, usecols=range(1,6098), dtype=np.float64, skiprows=1, low_memory=False)
f = open(file_train)
csv_f = csv.reader(f)
features = next(csv_f)
dataset_train = pd.read_csv(file_train, names=features, usecols=range(1,1), dtype=np.float64, skiprows=1, low_memory=False)

with open('./csv/train.csv','r') as csvfile:
    reader = csv.reader(csvfile)
    train_list = [row[1] for row in reader]
with open('./csv/validation.csv','r') as csvfile:
    reader = csv.DictReader(csvfile)
    validation_list = [row['patient'] for row in reader]
with open('./csv/test.csv','r') as csvfile:
    reader = csv.DictReader(csvfile)
    test_list = [row['patient'] for row in reader]

dataset['outcome'] = pd.to_numeric(dataset['outcome'],errors='coerce')
array_OG = dataset.values
print(array_OG.shape)
train_list = train_list[1:]
validation_list = validation_list[0:]
test_list = test_list[0:]
#print(test_list)
#print(train_list)
#print(validation_list)

def cat_str(num_list):
    n_list = []
    for i in num_list:
        temp = i[12:]
        n_list.append(temp)
    n_list = [int(x) for x in n_list]
    return n_list

train_list = cat_str(train_list)
validation_list = cat_str(validation_list)
test_list = cat_str(test_list)

#print(train_list)
#print(validation_list)
#print(test_list)
#print(len(test_list))

train_feature = []
validate_feature = []
test_feature = []
count = 1
for i in range(len(array_OG)):
    num = i + 1
    if num in train_list:
        train_feature.append(array_OG[i])
    elif num in validation_list:
        validate_feature.append(array_OG[i])
    elif num in test_list:
        #print(count)
        count = count + 1
        test_feature.append(array_OG[i])
        #print(num)
        #print(array_OG[i,6096])
        
train_feature = np.array(train_feature)
validate_feature = np.array(validate_feature)
test_feature = np.array(test_feature)

train_feature = pd.DataFrame(train_feature)
train_feature.dropna(axis=1, thresh=2, inplace=True)
train_feature.dropna(how='all',thresh = 20,inplace=True)
train_feature = np.array(train_feature)
wh_inf = np.isinf(train_feature)
train_feature[wh_inf]=0
wh_nan = np.isnan(train_feature)
train_feature[wh_nan]=0

validate_feature = pd.DataFrame(validate_feature)
validate_feature.dropna(axis=1, thresh=2, inplace=True)
#validate_feature.dropna(how='all',thresh = 20,inplace=True)
validate_feature = np.array(validate_feature)
wh_inf = np.isinf(validate_feature)
validate_feature[wh_inf]=0
wh_nan = np.isnan(validate_feature)
validate_feature[wh_nan]=0

test_feature = pd.DataFrame(test_feature)
test_feature.dropna(axis=1, thresh=2, inplace=True)
#test_feature.dropna(how='all',thresh = 20,inplace=True)
test_feature = np.array(test_feature)
wh_inf = np.isinf(test_feature)
test_feature[wh_inf]=0
wh_nan = np.isnan(test_feature)
test_feature[wh_nan]=0

#only use image features
X_train = train_feature[:,:6093]
Y_train = train_feature[:,6093]
Y_train = Y_train.astype('int32')

X_validate = validate_feature[:,:6093]
Y_validate = validate_feature[:,6093]
Y_validate = Y_validate.astype('int32')

X_test = test_feature[:,:6093]
Y_test = test_feature[:,6093]
Y_test = Y_test.astype('int32')
seed = 7

np.random.seed(seed)
np.random.shuffle(X_train) 
np.random.seed(seed)
np.random.shuffle(Y_train)

print(Y_test)

(849, 6097)
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 1 1 1]


In [2]:
#直接运行Tpot
pipeline_optimizer = TPOTClassifier(generations=20, population_size=10, config_dict = 'TPOT light',cv=5, verbosity=2, scoring='roc_auc')
pipeline_optimizer.fit(X_train,Y_train)
joblib.dump(pipeline_optimizer.fitted_pipeline_,'./pkl/tpot_running_11.pkl')
pipeline_optimizer.export('./py/tpot_running_11.py')

Y_pred_vali = pipeline_optimizer.predict(X_validate)
print("Accuracy: " + repr(accuracy_score(Y_validate, Y_pred_vali)))
print("Average Precision Score: " + repr(average_precision_score(Y_validate, Y_pred_vali)))
print("Kappa: " + repr(cohen_kappa_score(Y_validate, Y_pred_vali)))
print("Hamming Loss: " + repr(hamming_loss(Y_validate, Y_pred_vali)))

print("AUC: " + repr(roc_auc_score(Y_validate, Y_pred_vali)))
print("Sensitivity: " + repr(recall_score(Y_validate, Y_pred_vali)))
tn, fp, fn, tp = confusion_matrix(Y_validate, Y_pred_vali).ravel()
print("Specificity: " + repr(tn / (tn + fp)))

Y_pred = pipeline_optimizer.predict(X_test)
Y_prob = pipeline_optimizer.predict_proba(X_test)
print("Accuracy: " + repr(accuracy_score(Y_test, Y_pred)))
print("Average Precision Score: " + repr(average_precision_score(Y_test, Y_pred)))
print("Kappa: " + repr(cohen_kappa_score(Y_test, Y_pred)))
print("Hamming Loss: " + repr(hamming_loss(Y_test, Y_pred)))
y_prob = np.empty((len(Y_prob),1))
for i in range(len(Y_prob)):
    #print(i)
    y_prob[i] = Y_prob[i][1]
    #print(y_prob[i][0])
print("AUC: " + repr(roc_auc_score(Y_test, y_prob)))
print("AUC: " + repr(roc_auc_score(Y_test, Y_pred)))
print("Sensitivity: " + repr(recall_score(Y_test, Y_pred)))
tn, fp, fn, tp = confusion_matrix(Y_test, Y_pred).ravel()
print("Specificity: " + repr(tn / (tn + fp)))

HBox(children=(IntProgress(value=0, description='Optimization Progress', max=210, style=ProgressStyle(descript…

Generation 1 - Current best internal CV score: 0.8339737636405852
Generation 2 - Current best internal CV score: 0.8339737636405852
Generation 3 - Current best internal CV score: 0.8339737636405852
Generation 4 - Current best internal CV score: 0.8362630601346644
Generation 5 - Current best internal CV score: 0.8378306632613576
Generation 6 - Current best internal CV score: 0.8378306632613576
Generation 7 - Current best internal CV score: 0.8378306632613576
Generation 8 - Current best internal CV score: 0.8378306632613576
Generation 9 - Current best internal CV score: 0.8391865954647473
Generation 10 - Current best internal CV score: 0.8393994272889096
Generation 11 - Current best internal CV score: 0.8393994272889096
Generation 12 - Current best internal CV score: 0.8393994272889096
Generation 13 - Current best internal CV score: 0.8393994272889096
Generation 14 - Current best internal CV score: 0.8393994272889096
Generation 15 - Current best internal CV score: 0.8393994272889096
Gene



NameError: name 'y_prob' is not defined

In [3]:
print("AUC: " + repr(roc_auc_score(Y_validate, Y_pred_vali)))
print("Sensitivity: " + repr(recall_score(Y_validate, Y_pred_vali)))
tn, fp, fn, tp = confusion_matrix(Y_validate, Y_pred_vali).ravel()
print("Specificity: " + repr(tn / (tn + fp)))

Y_pred = pipeline_optimizer.predict(X_test)
Y_prob = pipeline_optimizer.predict_proba(X_test)
print("Accuracy: " + repr(accuracy_score(Y_test, Y_pred)))
print("Average Precision Score: " + repr(average_precision_score(Y_test, Y_pred)))
print("Kappa: " + repr(cohen_kappa_score(Y_test, Y_pred)))
print("Hamming Loss: " + repr(hamming_loss(Y_test, Y_pred)))
y_prob = np.empty((len(Y_prob),1))
for i in range(len(Y_prob)):
    #print(i)
    y_prob[i] = Y_prob[i][1]
    #print(y_prob[i][0])
print("AUC: " + repr(roc_auc_score(Y_test, y_prob)))
print("AUC: " + repr(roc_auc_score(Y_test, Y_pred)))
print("Sensitivity: " + repr(recall_score(Y_test, Y_pred)))
tn, fp, fn, tp = confusion_matrix(Y_test, Y_pred).ravel()
print("Specificity: " + repr(tn / (tn + fp)))

AUC: 0.7470760233918128
Sensitivity: 0.6052631578947368
Specificity: 0.8888888888888888
Accuracy: 0.7349397590361446
Average Precision Score: 0.6387303600366377
Kappa: 0.46388725778038753
Hamming Loss: 0.26506024096385544
AUC: 0.802046783625731
AUC: 0.7309941520467835
Sensitivity: 0.6842105263157895
Specificity: 0.7777777777777778




In [4]:
#直接运行Tpot
pipeline_optimizer = TPOTClassifier(generations=20, population_size=10, config_dict = 'TPOT light',cv=5, verbosity=2, scoring='roc_auc')
pipeline_optimizer.fit(X_train,Y_train)
joblib.dump(pipeline_optimizer.fitted_pipeline_,'./pkl/tpot_running_10.pkl')
pipeline_optimizer.export('./py/tpot_running_10.py')

Y_pred_vali = pipeline_optimizer.predict(X_validate)
print("Accuracy: " + repr(accuracy_score(Y_validate, Y_pred_vali)))
print("Average Precision Score: " + repr(average_precision_score(Y_validate, Y_pred_vali)))
print("Kappa: " + repr(cohen_kappa_score(Y_validate, Y_pred_vali)))
print("Hamming Loss: " + repr(hamming_loss(Y_validate, Y_pred_vali)))
print("AUC: " + repr(roc_auc_score(Y_validate, Y_pred_vali)))
print("Sensitivity: " + repr(recall_score(Y_validate, Y_pred_vali)))
tn, fp, fn, tp = confusion_matrix(Y_validate, Y_pred_vali).ravel()
print("Specificity: " + repr(tn / (tn + fp)))

Y_pred = pipeline_optimizer.predict(X_test)
Y_prob = pipeline_optimizer.predict_proba(X_test)
print("Accuracy: " + repr(accuracy_score(Y_test, Y_pred)))
print("Average Precision Score: " + repr(average_precision_score(Y_test, Y_pred)))
print("Kappa: " + repr(cohen_kappa_score(Y_test, Y_pred)))
print("Hamming Loss: " + repr(hamming_loss(Y_test, Y_pred)))
y_prob = np.empty((len(Y_prob),1))
for i in range(len(Y_prob)):
    #print(i)
    y_prob[i] = Y_prob[i][1]
    #print(y_prob[i][0])
print("AUC: " + repr(roc_auc_score(Y_test, y_prob)))
print("AUC: " + repr(roc_auc_score(Y_test, Y_pred)))
print("Sensitivity: " + repr(recall_score(Y_test, Y_pred)))
tn, fp, fn, tp = confusion_matrix(Y_test, Y_pred).ravel()
print("Specificity: " + repr(tn / (tn + fp)))

HBox(children=(IntProgress(value=0, description='Optimization Progress', max=210, style=ProgressStyle(descript…

Generation 1 - Current best internal CV score: 0.836349547248665
Generation 2 - Current best internal CV score: 0.836349547248665
Generation 3 - Current best internal CV score: 0.836349547248665
Generation 4 - Current best internal CV score: 0.836349547248665
Generation 5 - Current best internal CV score: 0.8373781054097981
Generation 6 - Current best internal CV score: 0.8373781054097981
Generation 7 - Current best internal CV score: 0.8373781054097981
Generation 8 - Current best internal CV score: 0.8373781054097981
Generation 9 - Current best internal CV score: 0.8373781054097981
Generation 10 - Current best internal CV score: 0.8373781054097981
Generation 11 - Current best internal CV score: 0.8383481025720403
Generation 12 - Current best internal CV score: 0.8383481025720403
Generation 13 - Current best internal CV score: 0.8383481025720403
Generation 14 - Current best internal CV score: 0.8383481025720403
Generation 15 - Current best internal CV score: 0.8383481025720403
Generati



In [10]:
#load tpot run
model1 = joblib.load('./pkl/tpot_running_2.pkl')
model2 = joblib.load('./pkl/tpot_running_3.pkl')
model3 = joblib.load('./pkl/tpot_running_4.pkl')
model4 = joblib.load('./pkl/tpot_running_5.pkl')
model5 = joblib.load('./pkl/tpot_running_6.pkl')
model6 = joblib.load('./pkl/tpot_running_7.pkl')
model7 = joblib.load('./pkl/tpot_running_8.pkl')
model8 = joblib.load('./pkl/tpot_running_9.pkl')
model9 = joblib.load('./pkl/tpot_running_10.pkl')
model10 = joblib.load('./pkl/tpot_running_11.pkl')


#validation predict 
Y_pred_vali = model1.predict(X_validate)
print("Accuracy: " + repr(accuracy_score(Y_validate, Y_pred_vali)))
print("Average Precision Score: " + repr(average_precision_score(Y_validate, Y_pred_vali)))
print("Kappa: " + repr(cohen_kappa_score(Y_validate, Y_pred_vali)))
print("Hamming Loss: " + repr(hamming_loss(Y_validate, Y_pred_vali)))
print("AUC: " + repr(roc_auc_score(Y_validate, Y_pred_vali)))
print("Sensitivity: " + repr(recall_score(Y_validate, Y_pred_vali)))
tn, fp, fn, tp = confusion_matrix(Y_validate, Y_pred_vali).ravel()
print("Specificity: " + repr(tn / (tn + fp)))

print("###################")
Y_pred_vali = model2.predict(X_validate)
print("Accuracy: " + repr(accuracy_score(Y_validate, Y_pred_vali)))
print("Average Precision Score: " + repr(average_precision_score(Y_validate, Y_pred_vali)))
print("Kappa: " + repr(cohen_kappa_score(Y_validate, Y_pred_vali)))
print("Hamming Loss: " + repr(hamming_loss(Y_validate, Y_pred_vali)))
print("AUC: " + repr(roc_auc_score(Y_validate, Y_pred_vali)))
print("Sensitivity: " + repr(recall_score(Y_validate, Y_pred_vali)))
tn, fp, fn, tp = confusion_matrix(Y_validate, Y_pred_vali).ravel()
print("Specificity: " + repr(tn / (tn + fp)))

print("###################")
Y_pred_vali = model3.predict(X_validate)
print("Accuracy: " + repr(accuracy_score(Y_validate, Y_pred_vali)))
print("Average Precision Score: " + repr(average_precision_score(Y_validate, Y_pred_vali)))
print("Kappa: " + repr(cohen_kappa_score(Y_validate, Y_pred_vali)))
print("Hamming Loss: " + repr(hamming_loss(Y_validate, Y_pred_vali)))
print("AUC: " + repr(roc_auc_score(Y_validate, Y_pred_vali)))
print("Sensitivity: " + repr(recall_score(Y_validate, Y_pred_vali)))
tn, fp, fn, tp = confusion_matrix(Y_validate, Y_pred_vali).ravel()
print("Specificity: " + repr(tn / (tn + fp)))

print("###################")
Y_pred_vali = model4.predict(X_validate)
print("Accuracy: " + repr(accuracy_score(Y_validate, Y_pred_vali)))
print("Average Precision Score: " + repr(average_precision_score(Y_validate, Y_pred_vali)))
print("Kappa: " + repr(cohen_kappa_score(Y_validate, Y_pred_vali)))
print("Hamming Loss: " + repr(hamming_loss(Y_validate, Y_pred_vali)))
print("AUC: " + repr(roc_auc_score(Y_validate, Y_pred_vali)))
print("Sensitivity: " + repr(recall_score(Y_validate, Y_pred_vali)))
tn, fp, fn, tp = confusion_matrix(Y_validate, Y_pred_vali).ravel()
print("Specificity: " + repr(tn / (tn + fp)))

print("###################")
Y_pred_vali = model5.predict(X_validate)
print("Accuracy: " + repr(accuracy_score(Y_validate, Y_pred_vali)))
print("Average Precision Score: " + repr(average_precision_score(Y_validate, Y_pred_vali)))
print("Kappa: " + repr(cohen_kappa_score(Y_validate, Y_pred_vali)))
print("Hamming Loss: " + repr(hamming_loss(Y_validate, Y_pred_vali)))
print("AUC: " + repr(roc_auc_score(Y_validate, Y_pred_vali)))
print("Sensitivity: " + repr(recall_score(Y_validate, Y_pred_vali)))
tn, fp, fn, tp = confusion_matrix(Y_validate, Y_pred_vali).ravel()
print("Specificity: " + repr(tn / (tn + fp)))

print("###################")
Y_pred_vali = model6.predict(X_validate)
print("Accuracy: " + repr(accuracy_score(Y_validate, Y_pred_vali)))
print("Average Precision Score: " + repr(average_precision_score(Y_validate, Y_pred_vali)))
print("Kappa: " + repr(cohen_kappa_score(Y_validate, Y_pred_vali)))
print("Hamming Loss: " + repr(hamming_loss(Y_validate, Y_pred_vali)))
print("AUC: " + repr(roc_auc_score(Y_validate, Y_pred_vali)))
print("Sensitivity: " + repr(recall_score(Y_validate, Y_pred_vali)))
tn, fp, fn, tp = confusion_matrix(Y_validate, Y_pred_vali).ravel()
print("Specificity: " + repr(tn / (tn + fp)))

print("###################")
Y_pred_vali = model7.predict(X_validate)
print("Accuracy: " + repr(accuracy_score(Y_validate, Y_pred_vali)))
print("Average Precision Score: " + repr(average_precision_score(Y_validate, Y_pred_vali)))
print("Kappa: " + repr(cohen_kappa_score(Y_validate, Y_pred_vali)))
print("Hamming Loss: " + repr(hamming_loss(Y_validate, Y_pred_vali)))
print("AUC: " + repr(roc_auc_score(Y_validate, Y_pred_vali)))
print("Sensitivity: " + repr(recall_score(Y_validate, Y_pred_vali)))
tn, fp, fn, tp = confusion_matrix(Y_validate, Y_pred_vali).ravel()
print("Specificity: " + repr(tn / (tn + fp)))

print("###################")
Y_pred_vali = model8.predict(X_validate)
print("Accuracy: " + repr(accuracy_score(Y_validate, Y_pred_vali)))
print("Average Precision Score: " + repr(average_precision_score(Y_validate, Y_pred_vali)))
print("Kappa: " + repr(cohen_kappa_score(Y_validate, Y_pred_vali)))
print("Hamming Loss: " + repr(hamming_loss(Y_validate, Y_pred_vali)))
print("AUC: " + repr(roc_auc_score(Y_validate, Y_pred_vali)))
print("Sensitivity: " + repr(recall_score(Y_validate, Y_pred_vali)))
tn, fp, fn, tp = confusion_matrix(Y_validate, Y_pred_vali).ravel()
print("Specificity: " + repr(tn / (tn + fp)))

print("###################")
Y_pred_vali = model9.predict(X_validate)
print("Accuracy: " + repr(accuracy_score(Y_validate, Y_pred_vali)))
print("Average Precision Score: " + repr(average_precision_score(Y_validate, Y_pred_vali)))
print("Kappa: " + repr(cohen_kappa_score(Y_validate, Y_pred_vali)))
print("Hamming Loss: " + repr(hamming_loss(Y_validate, Y_pred_vali)))
print("AUC: " + repr(roc_auc_score(Y_validate, Y_pred_vali)))
print("Sensitivity: " + repr(recall_score(Y_validate, Y_pred_vali)))
tn, fp, fn, tp = confusion_matrix(Y_validate, Y_pred_vali).ravel()
print("Specificity: " + repr(tn / (tn + fp)))

print("###################")
Y_pred_vali = model10.predict(X_validate)
print("Accuracy: " + repr(accuracy_score(Y_validate, Y_pred_vali)))
print("Average Precision Score: " + repr(average_precision_score(Y_validate, Y_pred_vali)))
print("Kappa: " + repr(cohen_kappa_score(Y_validate, Y_pred_vali)))
print("Hamming Loss: " + repr(hamming_loss(Y_validate, Y_pred_vali)))
print("AUC: " + repr(roc_auc_score(Y_validate, Y_pred_vali)))
print("Sensitivity: " + repr(recall_score(Y_validate, Y_pred_vali)))
tn, fp, fn, tp = confusion_matrix(Y_validate, Y_pred_vali).ravel()
print("Specificity: " + repr(tn / (tn + fp)))


Y_pred = model4.predict(X_test)
Y_prob = model4.predict_proba(X_test)
print("Accuracy: " + repr(accuracy_score(Y_test, Y_pred)))
print("Average Precision Score: " + repr(average_precision_score(Y_test, Y_pred)))
print("Kappa: " + repr(cohen_kappa_score(Y_test, Y_pred)))
print("Hamming Loss: " + repr(hamming_loss(Y_test, Y_pred)))
y_prob = np.empty((len(Y_prob),1))
for i in range(len(Y_prob)):
    #print(i)
    y_prob[i] = Y_prob[i][1]
    #print(y_prob[i][0])
print("AUC: " + repr(roc_auc_score(Y_test, y_prob)))
print("AUC: " + repr(roc_auc_score(Y_test, Y_pred)))
print("Sensitivity: " + repr(recall_score(Y_test, Y_pred)))
tn, fp, fn, tp = confusion_matrix(Y_test, Y_pred).ravel()
print("Specificity: " + repr(tn / (tn + fp)))




Accuracy: 0.8072289156626506
Average Precision Score: 0.7196542402296165
Kappa: 0.6116959064327485
Hamming Loss: 0.1927710843373494
AUC: 0.8058479532163743
Sensitivity: 0.7894736842105263
Specificity: 0.8222222222222222
###################
Accuracy: 0.7951807228915663
Average Precision Score: 0.7065845173010677
Kappa: 0.586580720773513
Hamming Loss: 0.20481927710843373
AUC: 0.7926900584795322
Sensitivity: 0.7631578947368421
Specificity: 0.8222222222222222
###################
Accuracy: 0.8072289156626506
Average Precision Score: 0.7196542402296165
Kappa: 0.6116959064327485
Hamming Loss: 0.1927710843373494
AUC: 0.8058479532163743
Sensitivity: 0.7894736842105263
Specificity: 0.8222222222222222
###################
Accuracy: 0.8433734939759037
Average Precision Score: 0.7632473212688812
Kappa: 0.6851473592063029
Hamming Loss: 0.1566265060240964
AUC: 0.8432748538011696
Sensitivity: 0.8421052631578947
Specificity: 0.8444444444444444
###################
Accuracy: 0.8072289156626506
Average Pre



In [5]:
model4 = joblib.load('./pkl/tpot_running_5.pkl')
Y_pred = model4.predict(X_test)
Y_prob = model4.predict_proba(X_test)
print("Accuracy: " + repr(accuracy_score(Y_test, Y_pred)))
print("Average Precision Score: " + repr(average_precision_score(Y_test, Y_pred)))
print("Kappa: " + repr(cohen_kappa_score(Y_test, Y_pred)))
print("Hamming Loss: " + repr(hamming_loss(Y_test, Y_pred)))
y_prob = np.empty((len(Y_prob),1))
for i in range(len(Y_prob)):
    #print(i)
    y_prob[i] = Y_prob[i][1]
    #print(y_prob[i][0])
print("AUC: " + repr(roc_auc_score(Y_test, y_prob)))
#print("AUC: " + repr(roc_auc_score(Y_test, Y_pred)))
print("Sensitivity: " + repr(recall_score(Y_test, Y_pred)))
tn, fp, fn, tp = confusion_matrix(Y_test, Y_pred).ravel()
print("Specificity: " + repr(tn / (tn + fp)))

Accuracy: 0.7951807228915663
Average Precision Score: 0.7065845173010677
Kappa: 0.586580720773513
Hamming Loss: 0.20481927710843373
AUC: 0.8906432748538011
Sensitivity: 0.7631578947368421
Specificity: 0.8222222222222222


In [9]:
for i in range(len(y_prob)):
    print(y_prob[i][0])

0.6413871248108438
0.590864652662682
0.7409354314943646
0.8378609128202502
0.8449068629679674
0.7737881634286459
0.8782377777568229
0.7536356434211773
0.8213248497166938
0.706109208277325
0.3716000860783897
0.3942728196808361
0.8905333747295947
0.7863843702714586
0.8160368095194575
0.7308174466456937
0.7474938462650705
0.778349717222842
0.5758228810618466
0.4340065644131185
0.9643080621715757
0.7666178922480512
0.7988493977224763
0.23734994612813082
0.45017463892403264
0.2094900571571396
0.7323958164966722
0.8613404707563439
0.20263105224139039
0.7720464218904775
0.008423722476354055
0.4069409031191069
0.18237521862320955
0.6412766276566372
0.16902152539191218
0.030171695864738782
0.3651138450282872
0.2107769719745521
0.22789403398365088
0.3372213504473148
0.6588533827254559
0.14778832417316093
0.09326799526651461
0.0727619357745986
0.08129939993417994
0.33454236462289527
0.3533995209500532
0.3310076526299381
0.6429757163360358
0.06221813084241434
0.4291254220970403
0.13917416412913705

In [11]:
print(tn,fp,fn,tp)
import sys
#p is proportion of trials that were successes
#n is the number of trials
import math
def adjusted_wald(p, n, z=1.96):
    p_adj = (n * p + (z**2)/2)/(n+z**2)
    n_adj = n + z**2
    span = z * math.sqrt(p_adj*(1-p_adj)/n_adj)
    return max(0, p_adj - span), min(p_adj + span, 1.0)
print("({:.2f}-{:.2f})".format(*adjusted_wald(float(0.80), float(83))))
print("({:.2f}-{:.2f})".format(*adjusted_wald(float(0.76), float(38))))
print("({:.2f}-{:.2f})".format(*adjusted_wald(float(0.82), float(45))))

37 8 9 29
(0.70-0.87)
(0.60-0.87)
(0.68-0.91)
