In [None]:
import numpy as np
import os
from glob import glob
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
%matplotlib inline

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.metrics import categorical_accuracy

In [None]:
! rm -rf /tmp/data/
! mkdir -p /tmp/data/zipped/labeled/
! mkdir -p /tmp/data/unzipped/labeled/train/ok
! mkdir -p /tmp/data/unzipped/labeled/train/ng
! mkdir -p /tmp/data/unzipped/labeled/test/ok
! mkdir -p /tmp/data/unzipped/labeled/test/ng

In [None]:
! aws s3 cp s3://fstech.tw/wafer_defect/data/zipped/labeled/ /tmp/data/zipped/labeled/ --recursive

In [None]:
! unzip -q -o /tmp/data/zipped/labeled/NG-1.zip -d /tmp/data/unzipped/labeled/train/ng
! unzip -q -o /tmp/data/zipped/labeled/NG-2.zip -d /tmp/data/unzipped/labeled/train/ng/
! unzip -q -o /tmp/data/zipped/labeled/NG-3.zip -d /tmp/data/unzipped/labeled/train/ng/
! unzip -q -o /tmp/data/zipped/labeled/NG-4.zip -d /tmp/data/unzipped/labeled/train/ng/
! unzip -q -o /tmp/data/zipped/labeled/NG-5.zip -d /tmp/data/unzipped/labeled/test/ng/

In [None]:
! unzip -q -o /tmp/data/zipped/labeled/OK-1.zip -d /tmp/data/unzipped/labeled/train/ok
! unzip -q -o /tmp/data/zipped/labeled/OK-2.zip -d /tmp/data/unzipped/labeled/train/ok/
! unzip -q -o /tmp/data/zipped/labeled/OK-3.zip -d /tmp/data/unzipped/labeled/train/ok/
! unzip -q -o /tmp/data/zipped/labeled/OK-4.zip -d /tmp/data/unzipped/labeled/train/ok/
! unzip -q -o /tmp/data/zipped/labeled/OK-5.zip -d /tmp/data/unzipped/labeled/test/ok/

In [None]:
! mkdir -p /tmp/data/images/standard/labeled/train/ok
! mkdir -p /tmp/data/images/standard/labeled/train/ng
! mkdir -p /tmp/data/images/standard/labeled/test/ok
! mkdir -p /tmp/data/images/standard/labeled/test/ng

In [None]:
def parse_defect_list(raw_text_path):
    # Read file
    f = open(raw_text_path)
    lines = f.read()
    
    # Get only DefectList section of raw data
    defect_list = [l.strip('\n') for l in lines.split(';') if l.strip('\n').startswith('DefectList')][0]

    # Drop "DefectList" title, then split each row of DefectList into columns and convert to floats
    columns = [np.array(l.strip(' ').split(' '), dtype=np.float64) for l in defect_list.split('\n')[1:]]

    # Convert to numpy array
    parsed = np.array(columns)
    
    return parsed

def save_defect_standard(defect_array, output_dir, filename='out.png'):
    if len(defect_array)>0:
        # limit to test types 1 and 6 per client expertise
        vals = np.array([[arr[1], arr[2], arr[8]] for arr in defect_array if arr[10] in [1,6]])

        if len(vals)>0:
            x = vals.T[0]
            y = vals.T[1]
            #s = vals.T[2] # get original dsize values
            
            wafer_size = 300000
            dot_size = 2600
            
            # Reduce output image size by a specific factor
            f = 1000
            
            im = Image.new('RGB', (int(wafer_size/f+1), int(wafer_size/f+1)), 'white')
            draw = ImageDraw.Draw(im)

            draw.ellipse([0, 0, wafer_size/f, wafer_size/f], 'white', 'black')

            for i in range(len(x)):
                cur_x = x[i]/f
                cur_y = y[i]/f
                x0 = cur_x-(0.5*dot_size/f)
                y0 = cur_y-(0.5*dot_size/f)
                x1 = cur_x+(0.5*dot_size/f)
                y1 = cur_y+(0.5*dot_size/f)
                draw.ellipse([x0, y0, x1, y1], 'black', 'black')

            im.save(os.path.join(output_dir, filename))
        else:
            pass
    else:
        pass

In [None]:
for f in glob('/tmp/data/unzipped/labeled/*/*/*.001'):
    fname = f.split('/')[-1]
    label = f.split('/')[-2]
    train_or_test = f.split('/')[-3]
    defects = parse_defect_list(f)
    save_defect_standard(defects, 
                         '/tmp/data/images/standard/labeled/{}/{}'.format(train_or_test, label), 
                         '{}.png'.format(fname))

In [None]:
# Sanity check
print(len(glob('/tmp/data/images/standard/labeled/train/ok/*.png')))
print(len(glob('/tmp/data/images/standard/labeled/train/ng/*.png')))
print(len(glob('/tmp/data/images/standard/labeled/test/ok/*.png')))
print(len(glob('/tmp/data/images/standard/labeled/test/ng/*.png')))

In [None]:
model = Sequential()

model.add(Conv2D(64, (3, 3), activation='relu', input_shape=(300, 300, 1)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.50))

# Output
model.add(Dense(2, activation='softmax'))

#multi_model = multi_gpu_model(model, gpus=gpu_count) # if multiple gpus on single system

#adam = Adam()
sgd = SGD(nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=[categorical_accuracy])

# Model summary
model.summary()

In [None]:
batch_size = 64

train_datagen = ImageDataGenerator(rescale=1./255)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    '/tmp/data/images/standard/labeled/train/',
#    save_to_dir='/dev/shm/wafers/linearpolar/augmented/',
    target_size=(300, 300),
    batch_size=batch_size, 
    color_mode='grayscale'
)

test_generator = test_datagen.flow_from_directory(
        '/tmp/data/images/standard/labeled/test/',
        target_size=(300, 300),
        batch_size=batch_size,
        color_mode='grayscale')

In [None]:
history = model.fit_generator(
        train_generator,
        #steps_per_epoch=2000,
        epochs=5,
        validation_data=test_generator,
        validation_steps=800)