In [1]:
import numpy as np
import os
from glob import glob
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
%matplotlib inline

## Download data

In [2]:
! rm -rf /tmp/data/
! mkdir -p /tmp/data/zipped/labeled/
! mkdir -p /tmp/data/unzipped/labeled/train/ok
! mkdir -p /tmp/data/unzipped/labeled/train/ng
! mkdir -p /tmp/data/unzipped/labeled/test/ok
! mkdir -p /tmp/data/unzipped/labeled/test/ng

In [3]:
! aws s3 cp s3://fstech.tw/wafer_defect/data/zipped/labeled/ /tmp/data/zipped/labeled/ --recursive

download: s3://fstech.tw/wafer_defect/data/zipped/labeled/NG-synthetic.zip to ../../../../../tmp/data/zipped/labeled/NG-synthetic.zip
download: s3://fstech.tw/wafer_defect/data/zipped/labeled/NG-201808.zip to ../../../../../tmp/data/zipped/labeled/NG-201808.zip
download: s3://fstech.tw/wafer_defect/data/zipped/labeled/OK-1.zip to ../../../../../tmp/data/zipped/labeled/OK-1.zip
download: s3://fstech.tw/wafer_defect/data/zipped/labeled/NG-1.zip to ../../../../../tmp/data/zipped/labeled/NG-1.zip
download: s3://fstech.tw/wafer_defect/data/zipped/labeled/NG-4.zip to ../../../../../tmp/data/zipped/labeled/NG-4.zip
download: s3://fstech.tw/wafer_defect/data/zipped/labeled/OK-2.zip to ../../../../../tmp/data/zipped/labeled/OK-2.zip
download: s3://fstech.tw/wafer_defect/data/zipped/labeled/OK-4.zip to ../../../../../tmp/data/zipped/labeled/OK-4.zip
download: s3://fstech.tw/wafer_defect/data/zipped/labeled/NG-5.zip to ../../../../../tmp/data/zipped/labeled/NG-5.zip
download: s3://fstech.tw/wafer

In [4]:
! unzip -q -o /tmp/data/zipped/labeled/NG-1.zip -d /tmp/data/unzipped/labeled/train/ng
! unzip -q -o /tmp/data/zipped/labeled/NG-2.zip -d /tmp/data/unzipped/labeled/train/ng/
! unzip -q -o /tmp/data/zipped/labeled/NG-3.zip -d /tmp/data/unzipped/labeled/train/ng/
! unzip -q -o /tmp/data/zipped/labeled/NG-4.zip -d /tmp/data/unzipped/labeled/train/ng/
! unzip -q -o /tmp/data/zipped/labeled/NG-5.zip -d /tmp/data/unzipped/labeled/test/ng/

In [5]:
! unzip -q -o /tmp/data/zipped/labeled/OK-1.zip -d /tmp/data/unzipped/labeled/train/ok
! unzip -q -o /tmp/data/zipped/labeled/OK-2.zip -d /tmp/data/unzipped/labeled/train/ok/
! unzip -q -o /tmp/data/zipped/labeled/OK-3.zip -d /tmp/data/unzipped/labeled/train/ok/
! unzip -q -o /tmp/data/zipped/labeled/OK-4.zip -d /tmp/data/unzipped/labeled/train/ok/
! unzip -q -o /tmp/data/zipped/labeled/OK-5.zip -d /tmp/data/unzipped/labeled/test/ok/

## Preprocess data

In [6]:
! mkdir -p /tmp/data/images/standard/labeled/train/ok
! mkdir -p /tmp/data/images/standard/labeled/train/ng
! mkdir -p /tmp/data/images/standard/labeled/test/ok
! mkdir -p /tmp/data/images/standard/labeled/test/ng

In [7]:
def parse_defect_list(raw_text_path):
    # Read file
    f = open(raw_text_path)
    lines = f.read()
    
    # Get only DefectList section of raw data
    defect_list = [l.strip('\n') for l in lines.split(';') if l.strip('\n').startswith('DefectList')][0]

    # Drop "DefectList" title, then split each row of DefectList into columns and convert to floats
    columns = [np.array(l.strip(' ').split(' '), dtype=np.float64) for l in defect_list.split('\n')[1:]]

    # Convert to numpy array
    parsed = np.array(columns)
    
    return parsed

def save_defect_standard(defect_array, output_dir, filename='out.png'):
    if len(defect_array)>0:
        # limit to test types 1 and 6 per client expertise
        vals = np.array([[arr[1], arr[2], arr[8]] for arr in defect_array if arr[10] in [1,6]])

        if len(vals)>0:
            x = vals.T[0]
            y = vals.T[1]
            #s = vals.T[2] # get original dsize values
            
            wafer_size = 300000
            dot_size = 2600
            
            # Reduce output image size by a specific factor
            f = 1000
            
            im = Image.new('RGB', (int(wafer_size/f+1), int(wafer_size/f+1)), 'white')
            draw = ImageDraw.Draw(im)

            draw.ellipse([0, 0, wafer_size/f, wafer_size/f], 'white', 'black')

            for i in range(len(x)):
                cur_x = x[i]/f
                cur_y = y[i]/f
                x0 = cur_x-(0.5*dot_size/f)
                y0 = cur_y-(0.5*dot_size/f)
                x1 = cur_x+(0.5*dot_size/f)
                y1 = cur_y+(0.5*dot_size/f)
                draw.ellipse([x0, y0, x1, y1], 'black', 'black')

            im.save(os.path.join(output_dir, filename))
        else:
            pass
    else:
        pass

In [8]:
for f in glob('/tmp/data/unzipped/labeled/*/*/*.001'):
    fname = f.split('/')[-1]
    label = f.split('/')[-2]
    train_or_test = f.split('/')[-3]
    defects = parse_defect_list(f)
    save_defect_standard(defects, 
                         '/tmp/data/images/standard/labeled/{}/{}'.format(train_or_test, label), 
                         '{}.png'.format(fname))

In [9]:
# Sanity check
print(len(glob('/tmp/data/images/standard/labeled/train/ok/*.png')))
print(len(glob('/tmp/data/images/standard/labeled/train/ng/*.png')))
print(len(glob('/tmp/data/images/standard/labeled/test/ok/*.png')))
print(len(glob('/tmp/data/images/standard/labeled/test/ng/*.png')))

3940
4000
1000
989
