In [1]:
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import Dense,Activation,Flatten,Conv2D,MaxPooling2D,BatchNormalization
from tensorflow.keras.layers import Dropout,AveragePooling2D,GlobalAveragePooling2D
from tensorflow.keras import optimizers
from sklearn.model_selection import train_test_split
from keras.utils import np_utils
from sklearn.utils.class_weight import compute_class_weight
import tensorflow as tf
from sklearn.metrics import accuracy_score
from matplotlib import pyplot as plt
import matplotlib.patches as patches
from tensorflow.keras.applications.resnet50 import ResNet50,preprocess_input
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2
from tensorflow.keras.applications import EfficientNetB5,inception_v3,Xception,ResNet101V2,NASNetLarge
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint,ReduceLROnPlateau
import cv2
import pandas as pd
from tqdm import tqdm
import numpy as np
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import albumentations

os.environ["CUDA_VISIBLE_DEVICES"]="0"

Using TensorFlow backend.


In [2]:
img_path = 'corning/3_classes_dataset'
#img_path = 'Unit Test Data/ALIS_UnitTest_2020_Q4/PNG Sorted'
all_folder_name= os.listdir(img_path)
all_folder_name
folder_name= []
all_img_name= []
type_name = []
path_name=[]
for f in all_folder_name:
    all_name= os.listdir(img_path+'/'+f)
    for name_1 in all_name:
        try:
            all_name_1= os.listdir(img_path+'/'+f+'/'+name_1)
            for name in all_name_1:
                type_name.append(name_1)
                folder_name.append(f)
                all_img_name.append(name.split('.p')[0])
                path_name.append(f+'/'+name_1+'/'+name)
        except:
            for name in all_name:
                folder_name.append(f)
                all_img_name.append(name.split('.p')[0])

In [3]:
df = pd.DataFrame({'ID':all_img_name,'families':folder_name,'Types':type_name,'PATH':path_name})
choice = df['families']
families = pd.unique(choice)
print(choice.value_counts())

system_artifacts    2943
inclusions          2793
non_inclusions      1198
Name: families, dtype: int64


In [4]:
df  = df.replace('system_artifacts','non_inclusions')
choice = df['families']
families = pd.unique(choice)
families = families.tolist()
print(choice.value_counts())

non_inclusions    4141
inclusions        2793
Name: families, dtype: int64


In [5]:
def train_test(data,num):
    train_x = data[:num]
    valid_x = data[num:]
    return train_x,valid_x

In [6]:
def data_aug_train(train_x, train_y):
    
    new_x= []
    new_y= []
    
    transform = albumentations.Compose([
        albumentations.VerticalFlip(p=0.5, always_apply=False),
        albumentations.HorizontalFlip(p=0.5, always_apply=False),
        albumentations.Blur(blur_limit=3, p=0.7),
        albumentations.RandomCrop(256, 256, always_apply=False, p=1.0),
        albumentations.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=0, always_apply=False,
                                         interpolation=cv2.INTER_LINEAR, border_mode=cv2.BORDER_REPLICATE, p=0.7),
    ])
    
    for i in range(len(train_x)):
        
        aug= transform(image= train_x[i].astype(np.float32))
        new_x.append(aug['image'])
        new_y.append(train_y[i])
        
        
    return np.array(new_x), np.array(new_y)


def data_aug_test(train_x, train_y):
    
    new_x= []
    new_y= []
    
    transform = albumentations.Compose([
        albumentations.VerticalFlip(p=0.5, always_apply=False),
        albumentations.HorizontalFlip(p=0.5, always_apply=False),
        albumentations.Blur(blur_limit=3, p=0.7),
    ])
    
    for i in range(len(train_x)):
        
        aug= transform(image= train_x[i].astype(np.float32))
        new_x.append(aug['image'])
        new_y.append(train_y[i])
        
        
    return np.array(new_x), np.array(new_y)

In [7]:
X_train=[]
Y_train=[]
Test_name=[]
num=2750
for family in families:
    class_1_path = df[choice==family]['PATH']
    class_1_path = class_1_path.tolist()
    index = [i for i in range(len(class_1_path))]  
    np.random.shuffle(index)
    class_1_path = np.array(class_1_path)
    class_1_path = class_1_path[index].copy()
    #train_x,valid_x= train_test(class_1_path,num)
    for i in class_1_path:
        img= cv2.imread(img_path+'/'+i, 1)
        img= cv2.resize(img, (256,256),interpolation=cv2.INTER_AREA)
        label = families.index(family)
        X_train.append(img)
        Y_train.append(label)
X_train = np.array(X_train)
X_train =preprocess_input(X_train)

In [8]:
for i in range(len(Test_name)):
    Test_name[i] = Test_name[i].split('/')[2]

In [9]:
for i in range(len(Y_train)):
    if Y_train[i] ==2:
        Y_train[i] = 1   

In [10]:
Y_train= np.array( tf.keras.utils.to_categorical(Y_train))
print(X_train.shape,Y_train.shape)

(6934, 256, 256, 3) (6934, 2)


In [10]:
X_total = np.concatenate((X_train,X_valid),axis=0)
Y_total = np.concatenate((Y_train,Y_valid),axis=0)
print(X_train.shape,Y_total.shape)

(6934, 256, 256, 3) (6934,)


In [11]:
def train_valid(X,Y,num):
    X_train = []
    Y_train = []
    X_valid = []
    Y_valid = []
    index = [i for i in range(len(X))]  
    np.random.shuffle(index)
    X = np.array(X)
    X = X[index].copy()
    Y = Y[index].copy()
    for i in np.unique(Y):
        index_num =  np.where(Y_total==i)
        X_0 = X[index_num]
        Y_0 = Y[index_num]
        for j in range(len(X_0)):
            if j < num:
                X_train.append(X_0[j])
                Y_train.append(Y_0[j])
            else:
                X_valid.append(X_0[j])
                Y_valid.append(Y_0[j])
                
    Y_train= np.array( tf.keras.utils.to_categorical(Y_train))
    Y_valid= np.array( tf.keras.utils.to_categorical(Y_valid))
    return np.array(X_train),np.array(Y_train),np.array(X_valid),np.array(Y_valid)

In [12]:
X_train,Y_train,X_valid,Y_valid = train_valid(X_total,Y_total,2750)
print(X_train.shape,X_valid.shape,Y_train.shape,Y_valid.shape)

(5500, 256, 256, 3) (1434, 256, 256, 3) (5500, 2) (1434, 2)


In [13]:
Y_train= np.array( tf.keras.utils.to_categorical(Y_train))
Y_valid= np.array( tf.keras.utils.to_categorical(Y_valid))
#Y_test= np.array( tf.keras.utils.to_categorical(Y_test))
print(Y_train.shape,Y_valid.shape)#,Y_test.shape)

(5500, 2, 2) (1434, 2, 2)


In [12]:
# 影像大小
IMAGE_SIZE = (256, 256)

# 影像類別數
NUM_CLASSES = Y_train.shape[1]

In [13]:
def eff_net(output):
    
    net= EfficientNetB5(include_top=False, weights="imagenet", input_tensor=None,
               input_shape=(256,256,3))
    
    model= Sequential()
    model.add(net)
    model.add(GlobalAveragePooling2D())
    model.add(Dense(256, activation= 'relu'))
    model.add(Dropout(0.3))
    model.add(Dense(output, activation= 'softmax'))
    
    SGD = optimizers.SGD(lr=0.001, momentum=0.9, decay=0.001, nesterov=False)
    Adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.001, amsgrad=False)
    model.compile(loss='categorical_crossentropy',optimizer=SGD,metrics=['accuracy'])
    #model.summary()
    return model

In [14]:
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
earlystopping = EarlyStopping(monitor='val_loss', patience=50, verbose=0, mode='auto')
reduce_lr = ReduceLROnPlateau(monitor='val_loss', patience=5, verbose=0,factor=0.5,min_lr=0.0001)
modelcheckpoint = ModelCheckpoint('0125_2class_9.h5', monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1)



In [16]:
%%time
epochs= 15
best_val_acc= 0.60

model= eff_net(NUM_CLASSES) 
#model = load_model('0125_2class_9.h5')
for ep in range(epochs):    
    #X_train,Y_train,X_valid,Y_valid = train_valid(X_total,Y_total,2650)
    new_x, new_y= data_aug_train(X_train,  Y_train)
    #valid_x, valid_y = X_valid, Y_valid
    y_integers = np.argmax(new_y,axis=1)
    class_weights = compute_class_weight('balanced',np.unique(y_integers),y_integers)
    d_class_weights = dict(enumerate(class_weights))
    train_history= model.fit(new_x, new_y, epochs=1, batch_size=16, validation_data=(X_train,  Y_train),
                             verbose=1,callbacks=[reduce_lr],class_weight = d_class_weights)
#     train_history= model.fit(new_x, new_y, epochs=1, batch_size=16, validation_data=(valid_x, valid_y),
#                              verbose=1,callbacks=[reduce_lr])
    
    if train_history.history['val_accuracy'][-1] >= best_val_acc :
        model.save('0125_2class_9.h5')
        best_val_acc= train_history.history['val_accuracy'][-1]
        print('model save at val_acc: ', best_val_acc)
    if train_history.history['val_accuracy'][-1] >= 0.995 :
        model.save('0125_2class_9.h5')
        break
        
    print()

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'




Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
model save at val_acc:  0.9701470732688904

model save at val_acc:  0.9847130179405212

model save at val_acc:  0.9888952970504761

model save at val_acc:  0.9926449656486511

model save at val_acc:  0.9943755269050598


 25/434 [>.............................] - ETA: 2:18 - loss: 0.0501 - accuracy: 0.9750

KeyboardInterrupt: 

In [18]:
#model.save('0125_2class_4.h5')