In [8]:
import csv
import pandas as pd
import numpy as np
from tpot import TPOTClassifier

from sklearn.metrics import accuracy_score
from sklearn.metrics import average_precision_score
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import hamming_loss
from sklearn.metrics import recall_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score

import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)
import joblib

file_feature = "./csv/uterus_features.csv"
file_train = "./csv/train.csv"
file_validate = "./csv/validation.csv"
file_test = "./csv/test.csv"

f = open(file_feature)
csv_f = csv.reader(f)
features = next(csv_f)
dataset = pd.read_csv(file_feature, names=features, usecols=range(1,6098), dtype=np.float64, skiprows=1, low_memory=False)
f = open(file_train)
csv_f = csv.reader(f)
features = next(csv_f)
dataset_train = pd.read_csv(file_train, names=features, usecols=range(1,1), dtype=np.float64, skiprows=1, low_memory=False)

with open('./csv/train.csv','r') as csvfile:
    reader = csv.reader(csvfile)
    train_list = [row[1] for row in reader]
with open('./csv/validation.csv','r') as csvfile:
    reader = csv.DictReader(csvfile)
    validation_list = [row['patient'] for row in reader]
with open('./csv/test.csv','r') as csvfile:
    reader = csv.DictReader(csvfile)
    test_list = [row['patient'] for row in reader]

dataset['outcome'] = pd.to_numeric(dataset['outcome'],errors='coerce')
array_OG = dataset.values
print(array_OG.shape)
train_list = train_list[1:]
validation_list = validation_list[0:]
test_list = test_list[0:]
#print(test_list)
#print(train_list)
#print(validation_list)

def cat_str(num_list):
    n_list = []
    for i in num_list:
        temp = i[12:]
        n_list.append(temp)
    n_list = [int(x) for x in n_list]
    return n_list

train_list = cat_str(train_list)
validation_list = cat_str(validation_list)
test_list = cat_str(test_list)

#print(train_list)
#print(validation_list)
#print(test_list)
#print(len(test_list))

train_feature = []
validate_feature = []
test_feature = []
count = 1
for i in range(len(array_OG)):
    num = i + 1
    if num in train_list:
        train_feature.append(array_OG[i])
    elif num in validation_list:
        validate_feature.append(array_OG[i])
    elif num in test_list:
        #print(count)
        count = count + 1
        test_feature.append(array_OG[i])
        #print(num)
        #print(array_OG[i,6096])
        
train_feature = np.array(train_feature)
validate_feature = np.array(validate_feature)
test_feature = np.array(test_feature)

train_feature = pd.DataFrame(train_feature)
train_feature.dropna(axis=1, thresh=2, inplace=True)
train_feature.dropna(how='all',thresh = 20,inplace=True)
train_feature = np.array(train_feature)
wh_inf = np.isinf(train_feature)
train_feature[wh_inf]=0
wh_nan = np.isnan(train_feature)
train_feature[wh_nan]=0

validate_feature = pd.DataFrame(validate_feature)
validate_feature.dropna(axis=1, thresh=2, inplace=True)
#validate_feature.dropna(how='all',thresh = 20,inplace=True)
validate_feature = np.array(validate_feature)
wh_inf = np.isinf(validate_feature)
validate_feature[wh_inf]=0
wh_nan = np.isnan(validate_feature)
validate_feature[wh_nan]=0

test_feature = pd.DataFrame(test_feature)
test_feature.dropna(axis=1, thresh=2, inplace=True)
#test_feature.dropna(how='all',thresh = 20,inplace=True)
test_feature = np.array(test_feature)
wh_inf = np.isinf(test_feature)
test_feature[wh_inf]=0
wh_nan = np.isnan(test_feature)
test_feature[wh_nan]=0

#only use image features
X_train = train_feature[:,:6093]
Y_train = train_feature[:,6093]
Y_train = Y_train.astype('int32')

X_validate = validate_feature[:,:6093]
Y_validate = validate_feature[:,6093]
Y_validate = Y_validate.astype('int32')

X_test = test_feature[:,:6093]
Y_test = test_feature[:,6093]
Y_test = Y_test.astype('int32')
seed = 7

np.random.seed(seed)
np.random.shuffle(X_train) 
np.random.seed(seed)
np.random.shuffle(Y_train)

print(Y_test)

(849, 6097)
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 1 1 1]


In [9]:
#直接运行Tpot
pipeline_optimizer = TPOTClassifier(generations=20, population_size=10, config_dict = 'TPOT light',cv=5, verbosity=2, scoring='roc_auc')
pipeline_optimizer.fit(X_train,Y_train)
joblib.dump(pipeline_optimizer.fitted_pipeline_,'./pkl/tpot_uterus_1.pkl')
pipeline_optimizer.export('./py/tpot_uterus_1.py')

Y_pred_vali = pipeline_optimizer.predict(X_validate)
print("Accuracy: " + repr(accuracy_score(Y_validate, Y_pred_vali)))
print("Average Precision Score: " + repr(average_precision_score(Y_validate, Y_pred_vali)))
print("Kappa: " + repr(cohen_kappa_score(Y_validate, Y_pred_vali)))
print("Hamming Loss: " + repr(hamming_loss(Y_validate, Y_pred_vali)))

print("AUC: " + repr(roc_auc_score(Y_validate, Y_pred_vali)))
print("Sensitivity: " + repr(recall_score(Y_validate, Y_pred_vali)))
tn, fp, fn, tp = confusion_matrix(Y_validate, Y_pred_vali).ravel()
print("Specificity: " + repr(tn / (tn + fp)))


HBox(children=(FloatProgress(value=0.0, description='Optimization Progress', max=210.0, style=ProgressStyle(de…


Generation 1 - Current best internal CV score: 0.7330661270232283
Generation 2 - Current best internal CV score: 0.7330661270232283
Generation 3 - Current best internal CV score: 0.7500488157016834
Generation 4 - Current best internal CV score: 0.7578134832476333
Generation 5 - Current best internal CV score: 0.7578134832476333
Generation 6 - Current best internal CV score: 0.7605214667403721
Generation 7 - Current best internal CV score: 0.7658183338437727
Generation 8 - Current best internal CV score: 0.7671984322716511
Generation 9 - Current best internal CV score: 0.767698963096973
Generation 10 - Current best internal CV score: 0.7714059538404794
Generation 11 - Current best internal CV score: 0.7714059538404794
Generation 12 - Current best internal CV score: 0.7714059538404794
Generation 13 - Current best internal CV score: 0.7718841786240642
Generation 14 - Current best internal CV score: 0.7718841786240642
Generation 15 - Current best internal CV score: 0.7718841786240642
Gene

In [10]:
Y_pred = pipeline_optimizer.predict(X_test)
Y_prob = pipeline_optimizer.predict_proba(X_test)
print("Accuracy: " + repr(accuracy_score(Y_test, Y_pred)))
print("Average Precision Score: " + repr(average_precision_score(Y_test, Y_pred)))
print("Kappa: " + repr(cohen_kappa_score(Y_test, Y_pred)))
print("Hamming Loss: " + repr(hamming_loss(Y_test, Y_pred)))

print("AUC: " + repr(roc_auc_score(Y_test, Y_prob[:,1])))
print("Sensitivity: " + repr(recall_score(Y_test, Y_pred)))
tn, fp, fn, tp = confusion_matrix(Y_test, Y_pred).ravel()
print("Specificity: " + repr(tn / (tn + fp)))

Accuracy: 0.7590361445783133
Average Precision Score: 0.6677981274944981
Kappa: 0.5106132075471698
Hamming Loss: 0.24096385542168675
AUC: 0.802923976608187
AUC: 0.7532163742690058
Sensitivity: 0.6842105263157895
Specificity: 0.8222222222222222


In [11]:
#直接运行Tpot
pipeline_optimizer = TPOTClassifier(generations=20, population_size=10, config_dict = 'TPOT light',cv=5, verbosity=2, scoring='roc_auc')
pipeline_optimizer.fit(X_train,Y_train)
joblib.dump(pipeline_optimizer.fitted_pipeline_,'./pkl/tpot_uterus_2.pkl')
pipeline_optimizer.export('./py/tpot_uterus_2.py')

Y_pred_vali = pipeline_optimizer.predict(X_validate)
print("Accuracy: " + repr(accuracy_score(Y_validate, Y_pred_vali)))
print("Average Precision Score: " + repr(average_precision_score(Y_validate, Y_pred_vali)))
print("Kappa: " + repr(cohen_kappa_score(Y_validate, Y_pred_vali)))
print("Hamming Loss: " + repr(hamming_loss(Y_validate, Y_pred_vali)))

print("AUC: " + repr(roc_auc_score(Y_validate, Y_pred_vali)))
print("Sensitivity: " + repr(recall_score(Y_validate, Y_pred_vali)))
tn, fp, fn, tp = confusion_matrix(Y_validate, Y_pred_vali).ravel()
print("Specificity: " + repr(tn / (tn + fp)))


HBox(children=(FloatProgress(value=0.0, description='Optimization Progress', max=210.0, style=ProgressStyle(de…


Generation 1 - Current best internal CV score: 0.7386392635008049
Generation 2 - Current best internal CV score: 0.7386392635008049
Generation 3 - Current best internal CV score: 0.7393196491337861
Generation 4 - Current best internal CV score: 0.7461180771070004
Generation 5 - Current best internal CV score: 0.7461180771070004
Generation 6 - Current best internal CV score: 0.7461180771070004
Generation 7 - Current best internal CV score: 0.7562902010498933
Generation 8 - Current best internal CV score: 0.7628991343391044
Generation 9 - Current best internal CV score: 0.7628991343391044
Generation 10 - Current best internal CV score: 0.7628991343391044
Generation 11 - Current best internal CV score: 0.7661638680999424
Generation 12 - Current best internal CV score: 0.7662720954657498
Generation 13 - Current best internal CV score: 0.7662720954657498
Generation 14 - Current best internal CV score: 0.7677380124977047
Generation 15 - Current best internal CV score: 0.7677380124977047
Gen

In [12]:
Y_pred = pipeline_optimizer.predict(X_test)
Y_prob = pipeline_optimizer.predict_proba(X_test)
print("Accuracy: " + repr(accuracy_score(Y_test, Y_pred)))
print("Average Precision Score: " + repr(average_precision_score(Y_test, Y_pred)))
print("Kappa: " + repr(cohen_kappa_score(Y_test, Y_pred)))
print("Hamming Loss: " + repr(hamming_loss(Y_test, Y_pred)))

print("AUC: " + repr(roc_auc_score(Y_test, Y_prob[:,1])))
print("AUC: " + repr(roc_auc_score(Y_test, Y_pred)))
print("Sensitivity: " + repr(recall_score(Y_test, Y_pred)))
tn, fp, fn, tp = confusion_matrix(Y_test, Y_pred).ravel()
print("Specificity: " + repr(tn / (tn + fp)))

Accuracy: 0.7710843373493976
Average Precision Score: 0.669030579892071
Kappa: 0.5490420360308836
Hamming Loss: 0.2289156626506024
AUC: 0.7637426900584795
AUC: 0.780701754385965
Sensitivity: 0.8947368421052632
Specificity: 0.6666666666666666


In [13]:
#直接运行Tpot
pipeline_optimizer = TPOTClassifier(generations=20, population_size=10, config_dict = 'TPOT light',cv=5, verbosity=2, scoring='roc_auc')
pipeline_optimizer.fit(X_train,Y_train)
joblib.dump(pipeline_optimizer.fitted_pipeline_,'./pkl/tpot_uterus_3.pkl')
pipeline_optimizer.export('./py/tpot_uterus_3.py')

Y_pred_vali = pipeline_optimizer.predict(X_validate)
print("Accuracy: " + repr(accuracy_score(Y_validate, Y_pred_vali)))
print("Average Precision Score: " + repr(average_precision_score(Y_validate, Y_pred_vali)))
print("Kappa: " + repr(cohen_kappa_score(Y_validate, Y_pred_vali)))
print("Hamming Loss: " + repr(hamming_loss(Y_validate, Y_pred_vali)))

print("AUC: " + repr(roc_auc_score(Y_validate, Y_pred_vali)))
print("Sensitivity: " + repr(recall_score(Y_validate, Y_pred_vali)))
tn, fp, fn, tp = confusion_matrix(Y_validate, Y_pred_vali).ravel()
print("Specificity: " + repr(tn / (tn + fp)))
Y_pred = pipeline_optimizer.predict(X_test)
Y_prob = pipeline_optimizer.predict_proba(X_test)
print("Accuracy: " + repr(accuracy_score(Y_test, Y_pred)))
print("Average Precision Score: " + repr(average_precision_score(Y_test, Y_pred)))
print("Kappa: " + repr(cohen_kappa_score(Y_test, Y_pred)))
print("Hamming Loss: " + repr(hamming_loss(Y_test, Y_pred)))

print("AUC: " + repr(roc_auc_score(Y_test, Y_prob[:,1])))
print("AUC: " + repr(roc_auc_score(Y_test, Y_pred)))
print("Sensitivity: " + repr(recall_score(Y_test, Y_pred)))
tn, fp, fn, tp = confusion_matrix(Y_test, Y_pred).ravel()
print("Specificity: " + repr(tn / (tn + fp)))

HBox(children=(FloatProgress(value=0.0, description='Optimization Progress', max=210.0, style=ProgressStyle(de…


Generation 1 - Current best internal CV score: 0.738743366263783
Generation 2 - Current best internal CV score: 0.740314634501718
Generation 3 - Current best internal CV score: 0.740314634501718
Generation 4 - Current best internal CV score: 0.7441586300123801
Generation 5 - Current best internal CV score: 0.7441586300123801
Generation 6 - Current best internal CV score: 0.7441586300123801
Generation 7 - Current best internal CV score: 0.7457018398889234
Generation 8 - Current best internal CV score: 0.7457018398889234
Generation 9 - Current best internal CV score: 0.7457018398889234
Generation 10 - Current best internal CV score: 0.7459824235028403
Generation 11 - Current best internal CV score: 0.7459824235028403
Generation 12 - Current best internal CV score: 0.7459824235028403
Generation 13 - Current best internal CV score: 0.7459824235028403
Generation 14 - Current best internal CV score: 0.7535609545184313
Generation 15 - Current best internal CV score: 0.7612461720999223
Genera

In [14]:
#直接运行Tpot
pipeline_optimizer = TPOTClassifier(generations=20, population_size=10, config_dict = 'TPOT light',cv=5, verbosity=2, scoring='roc_auc')
pipeline_optimizer.fit(X_train,Y_train)
joblib.dump(pipeline_optimizer.fitted_pipeline_,'./pkl/tpot_uterus_4.pkl')
pipeline_optimizer.export('./py/tpot_uterus_4.py')

Y_pred_vali = pipeline_optimizer.predict(X_validate)
print("Accuracy: " + repr(accuracy_score(Y_validate, Y_pred_vali)))
print("Average Precision Score: " + repr(average_precision_score(Y_validate, Y_pred_vali)))
print("Kappa: " + repr(cohen_kappa_score(Y_validate, Y_pred_vali)))
print("Hamming Loss: " + repr(hamming_loss(Y_validate, Y_pred_vali)))

print("AUC: " + repr(roc_auc_score(Y_validate, Y_pred_vali)))
print("Sensitivity: " + repr(recall_score(Y_validate, Y_pred_vali)))
tn, fp, fn, tp = confusion_matrix(Y_validate, Y_pred_vali).ravel()
print("Specificity: " + repr(tn / (tn + fp)))
Y_pred = pipeline_optimizer.predict(X_test)
Y_prob = pipeline_optimizer.predict_proba(X_test)
print("Accuracy: " + repr(accuracy_score(Y_test, Y_pred)))
print("Average Precision Score: " + repr(average_precision_score(Y_test, Y_pred)))
print("Kappa: " + repr(cohen_kappa_score(Y_test, Y_pred)))
print("Hamming Loss: " + repr(hamming_loss(Y_test, Y_pred)))

print("AUC: " + repr(roc_auc_score(Y_test, Y_prob[:,1])))
print("Sensitivity: " + repr(recall_score(Y_test, Y_pred)))
tn, fp, fn, tp = confusion_matrix(Y_test, Y_pred).ravel()
print("Specificity: " + repr(tn / (tn + fp)))

HBox(children=(FloatProgress(value=0.0, description='Optimization Progress', max=210.0, style=ProgressStyle(de…


Generation 1 - Current best internal CV score: 0.8020486475569552
Generation 2 - Current best internal CV score: 0.8040709039770337
Generation 3 - Current best internal CV score: 0.8040709039770337
Generation 4 - Current best internal CV score: 0.8040709039770337
Generation 5 - Current best internal CV score: 0.8259046233795132
Generation 6 - Current best internal CV score: 0.8259046233795132
Generation 7 - Current best internal CV score: 0.8315074617383825
Generation 8 - Current best internal CV score: 0.8315074617383825
Generation 9 - Current best internal CV score: 0.8315074617383825
Generation 10 - Current best internal CV score: 0.8315074617383825
Generation 11 - Current best internal CV score: 0.8315074617383825
Generation 12 - Current best internal CV score: 0.8315074617383825
Generation 13 - Current best internal CV score: 0.8315074617383825
Generation 14 - Current best internal CV score: 0.8315074617383825
Generation 15 - Current best internal CV score: 0.8315074617383825
Gen

In [15]:
#直接运行Tpot
pipeline_optimizer = TPOTClassifier(generations=20, population_size=10, config_dict = 'TPOT light',cv=5, verbosity=2, scoring='roc_auc')
pipeline_optimizer.fit(X_train,Y_train)
joblib.dump(pipeline_optimizer.fitted_pipeline_,'./pkl/tpot_uterus_5.pkl')
pipeline_optimizer.export('./py/tpot_uterus_5.py')

Y_pred_vali = pipeline_optimizer.predict(X_validate)
print("Accuracy: " + repr(accuracy_score(Y_validate, Y_pred_vali)))
print("Average Precision Score: " + repr(average_precision_score(Y_validate, Y_pred_vali)))
print("Kappa: " + repr(cohen_kappa_score(Y_validate, Y_pred_vali)))
print("Hamming Loss: " + repr(hamming_loss(Y_validate, Y_pred_vali)))

print("AUC: " + repr(roc_auc_score(Y_validate, Y_pred_vali)))
print("Sensitivity: " + repr(recall_score(Y_validate, Y_pred_vali)))
tn, fp, fn, tp = confusion_matrix(Y_validate, Y_pred_vali).ravel()
print("Specificity: " + repr(tn / (tn + fp)))
Y_pred = pipeline_optimizer.predict(X_test)
Y_prob = pipeline_optimizer.predict_proba(X_test)
print("Accuracy: " + repr(accuracy_score(Y_test, Y_pred)))
print("Average Precision Score: " + repr(average_precision_score(Y_test, Y_pred)))
print("Kappa: " + repr(cohen_kappa_score(Y_test, Y_pred)))
print("Hamming Loss: " + repr(hamming_loss(Y_test, Y_pred)))

print("AUC: " + repr(roc_auc_score(Y_test, Y_prob[:,1])))
print("Sensitivity: " + repr(recall_score(Y_test, Y_pred)))
tn, fp, fn, tp = confusion_matrix(Y_test, Y_pred).ravel()
print("Specificity: " + repr(tn / (tn + fp)))

HBox(children=(FloatProgress(value=0.0, description='Optimization Progress', max=210.0, style=ProgressStyle(de…


Generation 1 - Current best internal CV score: 0.8178698654620931
Generation 2 - Current best internal CV score: 0.8178698654620931
Generation 3 - Current best internal CV score: 0.8178698654620931
Generation 4 - Current best internal CV score: 0.8178698654620931
Generation 5 - Current best internal CV score: 0.8178698654620931
Generation 6 - Current best internal CV score: 0.8178698654620931
Generation 7 - Current best internal CV score: 0.8178698654620931
Generation 8 - Current best internal CV score: 0.8178698654620931
Generation 9 - Current best internal CV score: 0.8178698654620931
Generation 10 - Current best internal CV score: 0.8180390373903939
Generation 11 - Current best internal CV score: 0.8180390373903939
Generation 12 - Current best internal CV score: 0.8224595212110465
Generation 13 - Current best internal CV score: 0.835757951870784
Generation 14 - Current best internal CV score: 0.840053433360483
Generation 15 - Current best internal CV score: 0.840053433360483
Genera

In [16]:
#直接运行Tpot
pipeline_optimizer = TPOTClassifier(generations=20, population_size=10, config_dict = 'TPOT light',cv=5, verbosity=2, scoring='roc_auc')
pipeline_optimizer.fit(X_train,Y_train)
joblib.dump(pipeline_optimizer.fitted_pipeline_,'./pkl/tpot_uterus_6.pkl')
pipeline_optimizer.export('./py/tpot_uterus_6.py')

Y_pred_vali = pipeline_optimizer.predict(X_validate)
print("Accuracy: " + repr(accuracy_score(Y_validate, Y_pred_vali)))
print("Average Precision Score: " + repr(average_precision_score(Y_validate, Y_pred_vali)))
print("Kappa: " + repr(cohen_kappa_score(Y_validate, Y_pred_vali)))
print("Hamming Loss: " + repr(hamming_loss(Y_validate, Y_pred_vali)))

print("AUC: " + repr(roc_auc_score(Y_validate, Y_pred_vali)))
print("Sensitivity: " + repr(recall_score(Y_validate, Y_pred_vali)))
tn, fp, fn, tp = confusion_matrix(Y_validate, Y_pred_vali).ravel()
print("Specificity: " + repr(tn / (tn + fp)))
Y_pred = pipeline_optimizer.predict(X_test)
Y_prob = pipeline_optimizer.predict_proba(X_test)
print("Accuracy: " + repr(accuracy_score(Y_test, Y_pred)))
print("Average Precision Score: " + repr(average_precision_score(Y_test, Y_pred)))
print("Kappa: " + repr(cohen_kappa_score(Y_test, Y_pred)))
print("Hamming Loss: " + repr(hamming_loss(Y_test, Y_pred)))

print("AUC: " + repr(roc_auc_score(Y_test, Y_prob[:,1])))
print("Sensitivity: " + repr(recall_score(Y_test, Y_pred)))
tn, fp, fn, tp = confusion_matrix(Y_test, Y_pred).ravel()
print("Specificity: " + repr(tn / (tn + fp)))

HBox(children=(FloatProgress(value=0.0, description='Optimization Progress', max=210.0, style=ProgressStyle(de…


Generation 1 - Current best internal CV score: 0.7563682287375144
Generation 2 - Current best internal CV score: 0.7563682287375144
Generation 3 - Current best internal CV score: 0.7563682287375144
Generation 4 - Current best internal CV score: 0.7563682287375144
Generation 5 - Current best internal CV score: 0.7563682287375144
Generation 6 - Current best internal CV score: 0.7563682287375144
Generation 7 - Current best internal CV score: 0.7574371171902383
Generation 8 - Current best internal CV score: 0.758110517863639
Generation 9 - Current best internal CV score: 0.758110517863639
Generation 10 - Current best internal CV score: 0.758110517863639
Generation 11 - Current best internal CV score: 0.758110517863639
Generation 12 - Current best internal CV score: 0.758110517863639
Generation 13 - Current best internal CV score: 0.758110517863639
Generation 14 - Current best internal CV score: 0.758110517863639
Generation 15 - Current best internal CV score: 0.7591486772351317
Generation

In [17]:
#直接运行Tpot
pipeline_optimizer = TPOTClassifier(generations=20, population_size=10, config_dict = 'TPOT light',cv=5, verbosity=2, scoring='roc_auc')
pipeline_optimizer.fit(X_train,Y_train)
joblib.dump(pipeline_optimizer.fitted_pipeline_,'./pkl/tpot_uterus_7.pkl')
pipeline_optimizer.export('./py/tpot_uterus_7.py')

Y_pred_vali = pipeline_optimizer.predict(X_validate)
print("Accuracy: " + repr(accuracy_score(Y_validate, Y_pred_vali)))
print("Average Precision Score: " + repr(average_precision_score(Y_validate, Y_pred_vali)))
print("Kappa: " + repr(cohen_kappa_score(Y_validate, Y_pred_vali)))
print("Hamming Loss: " + repr(hamming_loss(Y_validate, Y_pred_vali)))

print("AUC: " + repr(roc_auc_score(Y_validate, Y_pred_vali)))
print("Sensitivity: " + repr(recall_score(Y_validate, Y_pred_vali)))
tn, fp, fn, tp = confusion_matrix(Y_validate, Y_pred_vali).ravel()
print("Specificity: " + repr(tn / (tn + fp)))
Y_pred = pipeline_optimizer.predict(X_test)
Y_prob = pipeline_optimizer.predict_proba(X_test)
print("Accuracy: " + repr(accuracy_score(Y_test, Y_pred)))
print("Average Precision Score: " + repr(average_precision_score(Y_test, Y_pred)))
print("Kappa: " + repr(cohen_kappa_score(Y_test, Y_pred)))
print("Hamming Loss: " + repr(hamming_loss(Y_test, Y_pred)))

print("AUC: " + repr(roc_auc_score(Y_test, Y_prob[:,1])))
print("Sensitivity: " + repr(recall_score(Y_test, Y_pred)))
tn, fp, fn, tp = confusion_matrix(Y_test, Y_pred).ravel()
print("Specificity: " + repr(tn / (tn + fp)))

HBox(children=(FloatProgress(value=0.0, description='Optimization Progress', max=210.0, style=ProgressStyle(de…


Generation 1 - Current best internal CV score: 0.7404050755055167
Generation 2 - Current best internal CV score: 0.7404050755055167
Generation 3 - Current best internal CV score: 0.7404050755055167
Generation 4 - Current best internal CV score: 0.7452626645060701
Generation 5 - Current best internal CV score: 0.7517334631081933
Generation 6 - Current best internal CV score: 0.7517334631081933
Generation 7 - Current best internal CV score: 0.7552688166435468
Generation 8 - Current best internal CV score: 0.7556460044608924
Generation 9 - Current best internal CV score: 0.7556460044608924
Generation 10 - Current best internal CV score: 0.7603521920920777
Generation 11 - Current best internal CV score: 0.7603521920920777
Generation 12 - Current best internal CV score: 0.7603521920920777
Generation 13 - Current best internal CV score: 0.7664969732368586
Generation 14 - Current best internal CV score: 0.7664969732368586
Generation 15 - Current best internal CV score: 0.7664969732368586
Gen

In [18]:
#直接运行Tpot
pipeline_optimizer = TPOTClassifier(generations=20, population_size=10, config_dict = 'TPOT light',cv=5, verbosity=2, scoring='roc_auc')
pipeline_optimizer.fit(X_train,Y_train)
joblib.dump(pipeline_optimizer.fitted_pipeline_,'./pkl/tpot_uterus_8.pkl')
pipeline_optimizer.export('./py/tpot_uterus_8.py')

Y_pred_vali = pipeline_optimizer.predict(X_validate)
print("Accuracy: " + repr(accuracy_score(Y_validate, Y_pred_vali)))
print("Average Precision Score: " + repr(average_precision_score(Y_validate, Y_pred_vali)))
print("Kappa: " + repr(cohen_kappa_score(Y_validate, Y_pred_vali)))
print("Hamming Loss: " + repr(hamming_loss(Y_validate, Y_pred_vali)))

print("AUC: " + repr(roc_auc_score(Y_validate, Y_pred_vali)))
print("Sensitivity: " + repr(recall_score(Y_validate, Y_pred_vali)))
tn, fp, fn, tp = confusion_matrix(Y_validate, Y_pred_vali).ravel()
print("Specificity: " + repr(tn / (tn + fp)))
Y_pred = pipeline_optimizer.predict(X_test)
Y_prob = pipeline_optimizer.predict_proba(X_test)
print("Accuracy: " + repr(accuracy_score(Y_test, Y_pred)))
print("Average Precision Score: " + repr(average_precision_score(Y_test, Y_pred)))
print("Kappa: " + repr(cohen_kappa_score(Y_test, Y_pred)))
print("Hamming Loss: " + repr(hamming_loss(Y_test, Y_pred)))

print("AUC: " + repr(roc_auc_score(Y_test, Y_prob[:,1])))
print("Sensitivity: " + repr(recall_score(Y_test, Y_pred)))
tn, fp, fn, tp = confusion_matrix(Y_test, Y_pred).ravel()
print("Specificity: " + repr(tn / (tn + fp)))

HBox(children=(FloatProgress(value=0.0, description='Optimization Progress', max=210.0, style=ProgressStyle(de…


Generation 1 - Current best internal CV score: 0.749321368508452
Generation 2 - Current best internal CV score: 0.751416690450296
Generation 3 - Current best internal CV score: 0.751416690450296
Generation 4 - Current best internal CV score: 0.7571749204552172
Generation 5 - Current best internal CV score: 0.7571749204552172
Generation 6 - Current best internal CV score: 0.7571749204552172
Generation 7 - Current best internal CV score: 0.7610178018490229
Generation 8 - Current best internal CV score: 0.7628460122992179
Generation 9 - Current best internal CV score: 0.7628460122992179
Generation 10 - Current best internal CV score: 0.7648140878733202
Generation 11 - Current best internal CV score: 0.7648140878733202
Generation 12 - Current best internal CV score: 0.782529407153926
Generation 13 - Current best internal CV score: 0.782529407153926
Generation 14 - Current best internal CV score: 0.782529407153926
Generation 15 - Current best internal CV score: 0.782529407153926
Generation

In [19]:
#直接运行Tpot
pipeline_optimizer = TPOTClassifier(generations=20, population_size=10, config_dict = 'TPOT light',cv=5, verbosity=2, scoring='roc_auc')
pipeline_optimizer.fit(X_train,Y_train)
joblib.dump(pipeline_optimizer.fitted_pipeline_,'./pkl/tpot_uterus_9.pkl')
pipeline_optimizer.export('./py/tpot_uterus_9.py')

Y_pred_vali = pipeline_optimizer.predict(X_validate)
print("Accuracy: " + repr(accuracy_score(Y_validate, Y_pred_vali)))
print("Average Precision Score: " + repr(average_precision_score(Y_validate, Y_pred_vali)))
print("Kappa: " + repr(cohen_kappa_score(Y_validate, Y_pred_vali)))
print("Hamming Loss: " + repr(hamming_loss(Y_validate, Y_pred_vali)))

print("AUC: " + repr(roc_auc_score(Y_validate, Y_pred_vali)))
print("Sensitivity: " + repr(recall_score(Y_validate, Y_pred_vali)))
tn, fp, fn, tp = confusion_matrix(Y_validate, Y_pred_vali).ravel()
print("Specificity: " + repr(tn / (tn + fp)))
Y_pred = pipeline_optimizer.predict(X_test)
Y_prob = pipeline_optimizer.predict_proba(X_test)
print("Accuracy: " + repr(accuracy_score(Y_test, Y_pred)))
print("Average Precision Score: " + repr(average_precision_score(Y_test, Y_pred)))
print("Kappa: " + repr(cohen_kappa_score(Y_test, Y_pred)))
print("Hamming Loss: " + repr(hamming_loss(Y_test, Y_pred)))

print("AUC: " + repr(roc_auc_score(Y_test, Y_prob[:,1])))
print("Sensitivity: " + repr(recall_score(Y_test, Y_pred)))
tn, fp, fn, tp = confusion_matrix(Y_test, Y_pred).ravel()
print("Specificity: " + repr(tn / (tn + fp)))

HBox(children=(FloatProgress(value=0.0, description='Optimization Progress', max=210.0, style=ProgressStyle(de…


Generation 1 - Current best internal CV score: 0.7419980334652257
Generation 2 - Current best internal CV score: 0.7621753692625748
Generation 3 - Current best internal CV score: 0.7621753692625748
Generation 4 - Current best internal CV score: 0.7621753692625748
Generation 5 - Current best internal CV score: 0.7621753692625748
Generation 6 - Current best internal CV score: 0.7621753692625748
Generation 7 - Current best internal CV score: 0.7621753692625748
Generation 8 - Current best internal CV score: 0.7621753692625748
Generation 9 - Current best internal CV score: 0.7621753692625748
Generation 10 - Current best internal CV score: 0.7650647641659563
Generation 11 - Current best internal CV score: 0.7651495555700943
Generation 12 - Current best internal CV score: 0.7651495555700943
Generation 13 - Current best internal CV score: 0.7884852783285149
Generation 14 - Current best internal CV score: 0.7884852783285149
Generation 15 - Current best internal CV score: 0.7884852783285149
Gen

In [20]:
#直接运行Tpot
pipeline_optimizer = TPOTClassifier(generations=20, population_size=10, config_dict = 'TPOT light',cv=5, verbosity=2, scoring='roc_auc')
pipeline_optimizer.fit(X_train,Y_train)
joblib.dump(pipeline_optimizer.fitted_pipeline_,'./pkl/tpot_uterus_10.pkl')
pipeline_optimizer.export('./py/tpot_uterus_10.py')

Y_pred_vali = pipeline_optimizer.predict(X_validate)
print("Accuracy: " + repr(accuracy_score(Y_validate, Y_pred_vali)))
print("Average Precision Score: " + repr(average_precision_score(Y_validate, Y_pred_vali)))
print("Kappa: " + repr(cohen_kappa_score(Y_validate, Y_pred_vali)))
print("Hamming Loss: " + repr(hamming_loss(Y_validate, Y_pred_vali)))

print("AUC: " + repr(roc_auc_score(Y_validate, Y_pred_vali)))
print("Sensitivity: " + repr(recall_score(Y_validate, Y_pred_vali)))
tn, fp, fn, tp = confusion_matrix(Y_validate, Y_pred_vali).ravel()
print("Specificity: " + repr(tn / (tn + fp)))
Y_pred = pipeline_optimizer.predict(X_test)
Y_prob = pipeline_optimizer.predict_proba(X_test)
print("Accuracy: " + repr(accuracy_score(Y_test, Y_pred)))
print("Average Precision Score: " + repr(average_precision_score(Y_test, Y_pred)))
print("Kappa: " + repr(cohen_kappa_score(Y_test, Y_pred)))
print("Hamming Loss: " + repr(hamming_loss(Y_test, Y_pred)))

print("AUC: " + repr(roc_auc_score(Y_test, Y_prob[:,1])))
print("Sensitivity: " + repr(recall_score(Y_test, Y_pred)))
tn, fp, fn, tp = confusion_matrix(Y_test, Y_pred).ravel()
print("Specificity: " + repr(tn / (tn + fp)))

HBox(children=(FloatProgress(value=0.0, description='Optimization Progress', max=210.0, style=ProgressStyle(de…


Generation 1 - Current best internal CV score: 0.7479280033982935
Generation 2 - Current best internal CV score: 0.7479280033982935
Generation 3 - Current best internal CV score: 0.7479280033982935
Generation 4 - Current best internal CV score: 0.7520557668426814
Generation 5 - Current best internal CV score: 0.7520557668426814
Generation 6 - Current best internal CV score: 0.7520557668426814
Generation 7 - Current best internal CV score: 0.7557997525870427
Generation 8 - Current best internal CV score: 0.7557997525870427
Generation 9 - Current best internal CV score: 0.7613297150610583
Generation 10 - Current best internal CV score: 0.7613297150610583
Generation 11 - Current best internal CV score: 0.7630378695430793
Generation 12 - Current best internal CV score: 0.7630378695430793
Generation 13 - Current best internal CV score: 0.7643361712497168
Generation 14 - Current best internal CV score: 0.7643361712497168
Generation 15 - Current best internal CV score: 0.7643361712497168
Gen

In [23]:
from sklearn.metrics import precision_recall_curve,auc
pipeline_optimizer = joblib.load('./pkl/tpot_uterus_5.pkl')
Y_pred = pipeline_optimizer.predict(X_test)
Y_prob = pipeline_optimizer.predict_proba(X_test)
print("Accuracy: " + repr(accuracy_score(Y_test, Y_pred)))
print("Average Precision Score: " + repr(average_precision_score(Y_test, Y_pred)))
print("Kappa: " + repr(cohen_kappa_score(Y_test, Y_pred)))
print("Hamming Loss: " + repr(hamming_loss(Y_test, Y_pred)))

print("AUC: " + repr(roc_auc_score(Y_test, Y_prob[:,1])))
print("Sensitivity: " + repr(recall_score(Y_test, Y_pred)))
tn, fp, fn, tp = confusion_matrix(Y_test, Y_pred).ravel()
print("Specificity: " + repr(tn / (tn + fp)))
from sklearn.metrics import precision_recall_curve,auc
fpr, tpr, thresholds= precision_recall_curve(Y_test, Y_prob[:,1], pos_label=1)
auc = auc(tpr,fpr)
print(auc)

Accuracy: 0.7349397590361446
Average Precision Score: 0.6387303600366377
Kappa: 0.46388725778038753
Hamming Loss: 0.26506024096385544
AUC: 0.7953216374269007
Sensitivity: 0.6842105263157895
Specificity: 0.7777777777777778
0.7493472897546699
