In [None]:
import os
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers,optimizers,losses
from tensorflow.keras.callbacks import EarlyStopping
import os, glob
import random, csv
import pandas as pd
from matplotlib import pyplot as plt

In [None]:
from collections import Counter
from sklearn import model_selection,svm
from sklearn import metrics
from sklearn.metrics import confusion_matrix,f1_score,precision_score,recall_score,\
                precision_recall_curve,roc_curve,roc_auc_score
from imblearn.metrics import geometric_mean_score
from imblearn.metrics import classification_report_imbalanced
from imblearn.over_sampling import RandomOverSampler

In [None]:
import os
from google.colab import drive
drive.mount('/content/drive')
path = "/content/drive/My Drive"
os.chdir(path)
os.listdir(path)

In [None]:
img_mean = tf.constant([0.485, 0.456, 0.406])
img_std = tf.constant([0.229, 0.224, 0.225])
def normalize(x, mean=img_mean, std=img_std):
    # x: [224, 224, 3]
    # mean: [224, 224, 3], std: [3]
    x = (x - mean)/std
    return x

def denormalize(x, mean=img_mean, std=img_std):
    x = x * std + mean
    return x

In [None]:
def load_csv(root, filename, name2label):

    if not os.path.exists(os.path.join(root, filename)):
        images = []
        for name in name2label.keys():
       
            images += glob.glob(os.path.join(root, name, '*.png'))
            images += glob.glob(os.path.join(root, name, '*.jpg'))
            images += glob.glob(os.path.join(root, name, '*.jpeg'))

        print(len(images), images)

        random.shuffle(images)
        with open(os.path.join(root, filename), mode='w', newline='') as f:
            writer = csv.writer(f)
            for img in images:  
                name = img.split(os.sep)[-2]
                label = name2label[name]
              
                writer.writerow([img, label])
            print('written into csv file:', filename)
            
                # read from csv file
    images, labels = [], []
    with open(os.path.join(root, filename)) as f:
        reader = csv.reader(f)
        for row in reader:
            
            img, label = row
            label = int(label)

            images.append(img)
            labels.append(label)

    assert len(images) == len(labels)

    return images, labels

In [None]:

def preprocess(x,y):
   
    x = tf.io.read_file(x)
    x = tf.image.decode_jpeg(x, channels=3) 
    x = tf.image.resize(x, [244, 244])

    x = tf.image.random_flip_up_down(x)
    x = tf.image.random_crop(x, [224,224,3])

    # x: [0,255]=> -1~1
    x = tf.cast(x, dtype=tf.float32) / 255.
    x = normalize(x)
    y = tf.convert_to_tensor(y)
    y = tf.one_hot(y, depth=2)

    return x, y

In [None]:
def load_data(root, mode='train'):
 
    name2label = {}  
    for name in sorted(os.listdir(os.path.join(root))):
        if not os.path.isdir(os.path.join(root, name)):
            continue
     
        name2label[name] = len(name2label.keys())

    images, labels = load_csv(root, 'binary0.2_images.csv', name2label)

    if mode == 'train':  # 60%
        images = images[:int(0.6 * len(images))]
        labels = labels[:int(0.6 * len(labels))]
    elif mode == 'val':  # 20% = 60%->80%
        images = images[int(0.6 * len(images)):int(0.8 * len(images))]
        labels = labels[int(0.6 * len(labels)):int(0.8 * len(labels))]
    else:  # 20% = 80%->100%
        images = images[int(0.8 * len(images)):]
        labels = labels[int(0.8 * len(labels)):]

    return images, labels, name2label

In [None]:
batchsz = 128
# 创建训练集Datset对象
images, labels, table = load_data('数据集ratio=0.025',mode='train')
db_train = tf.data.Dataset.from_tensor_slices((images, labels))
db_train = db_train.shuffle(1000).map(preprocess).batch(batchsz)
# 创建验证集Datset对象
images2, labels2, table = load_data('数据集ratio=0.025',mode='val')
db_val = tf.data.Dataset.from_tensor_slices((images2, labels2))
db_val = db_val.map(preprocess).batch(batchsz)
# 创建测试集Datset对象
images3, labels3, table = load_data('数据集ratio=0.025',mode='test')
db_test = tf.data.Dataset.from_tensor_slices((images3, labels3))
db_test = db_test.map(preprocess).batch(batchsz)

In [None]:
net10 = keras.applications.VGG19(weights='imagenet', include_top=False,
                               pooling='max')
net10.trainable = False
newnet = keras.Sequential([
    net10,
    layers.Dense(2) # 输出层单元个数
])
newnet.build(input_shape=(4,224,224,3))
newnet.summary()

In [None]:
newnet.compile(optimizer=optimizers.Adam(lr=1e-3),
               loss=losses.CategoricalCrossentropy(from_logits=True),
               metrics=['accuracy'])
newnet.fit(db_train, validation_data=db_val, validation_freq=1, epochs=100
           )
newnet.evaluate(db_test)

In [None]:
y_pred= newnet.predict_classes(db_test)

In [None]:
def metrics_all(x_test, y_test, y_pred):
    # y_pred= model.predict_classes(x_test)
    #  auc 面积计算
    fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred, pos_label=1)
    auc = metrics.auc(fpr, tpr)
    print('auc面积 = {0}\n'.format(auc))
    # F1-score 计算
    f1_micro = f1_score(y_test,y_pred,average='micro')
    f1_macro = f1_score(y_test,y_pred,average='macro') 
    print('f1_score_minority = {0}\n'.format(f1_micro))
    print('f1_score_majority = {0}\n'.format(f1_micro))
    # precision, recall 计算
    con_matrix=confusion_matrix(y_test,y_pred)
    P=precision_score(y_test,y_pred,average='binary')
    R=recall_score(y_test,y_pred,average='binary')
    F1=f1_score(y_test,y_pred,average='binary')
    precision,recall,_=precision_recall_curve(y_test,y_pred)
    fpr,tpr,_=roc_curve(y_test,y_pred)
    print('precison = {}\n'.format(P))
    print('recall_score = {}\n'.format(R))
    # G-mean 计算
    G_mean=geometric_mean_score(y_test, y_pred)
    print('G-mean = {}\n'.format(G_mean))
    # imbalance report       
    target_names = ['class 0', 'class 1'] # doctest : +NORMALIZE_WHITESPACE
    print(classification_report_imbalanced(y_test, y_pred,     target_names=target_names))
    # ROC 曲线绘制
    plt.figure()
    lw = 2
    plt.plot(fpr, tpr, color='darkorange',
             lw=lw, label='ROC curve (area = %0.2f)' % auc)
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic')
    plt.legend(loc="lower right")
    plt.show()
    plt.subplot(1,2,1)
    plt.plot(acc, label='Training Accuracy')
    plt.plot(val_acc, label='validation Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.legend()
    plt.subplot(1, 2, 2)
    plt.plot(loss, label='Training Loss')
    plt.plot(val_loss, label='Validation Loss')
    plt.title('Training and Validation Loss')
    plt.legend()
    plt.show()

In [None]:
metrics_all(images3, labels3, y_pred)