In [2]:
# Load in our libraries
import pandas as pd
import numpy as np
import sklearn
import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use('seaborn')
sns.set(font_scale=2) 
from tqdm import tqdm

%matplotlib inline
import torch
import torchvision
from PIL import Image
import random
import pickle

In [3]:
wm_38k = np.load('raw_data/Wafer_Map_Datasets.npz')
X = wm_38k["arr_0"]
y = wm_38k["arr_1"]

# Utils

In [4]:
# def mean_filter(img):
#     img_cpy = np.copy(img)
#     height = img_cpy.shape[0]
#     width = img_cpy.shape[1]
#     for h in range(1,height):
#         for w in range(1,width):
#             if img_cpy[h][w] == 2.0:
#                 filter_area = img_cpy[h-1:h+2, w-1:w+2]
#                 filter_area = np.where(filter_area == 2.0, filter_area, 0)
#                 if (filter_area == 2.0).sum() - 1.0 == 0.0:
#                     img_cpy[h][w] = 1.0
#                 else:
#                     mean = (filter_area.sum())/((filter_area == 2.0).sum() - 1.0 )
#                     if mean >= 1.25:
#                         img_cpy[h][w] = 1.0
                    
#     return img_cpy

In [5]:
def mean_filter(img):
    img_cpy = np.copy(img)
    height = img_cpy.shape[0]
    width = img_cpy.shape[1]
    for h in range(1,height):
        for w in range(1,width):
            if img_cpy[h][w] == 2.0:
                filter_area = img_cpy[h-1:h+2, w-1:w+2]
                zero_count = (filter_area == 0.0).sum()
                neighbor_mean = (filter_area.sum()-2.0)/(8 - zero_count)
                if neighbor_mean < 1.25:
                    img_cpy[h][w] = 1.0
    return img_cpy

In [6]:
def resize_arr(X, Y):
    x = []
    y = []
    for i in tqdm(range(len(X))):
        img_array = X[i]
        label = Y[i]
        img_array = mean_filter(img_array)
        img_array =Image.fromarray(img_array)
        im_resized = img_array.resize((64,64), Image.NEAREST)
        img_array = np.array(im_resized, dtype = 'f')
        y.append(label)
        x.append(img_array)
    x = np.array(x)
    return np.array(x), np.array(y)

In [7]:
def shuffle_idx(label):
    train_idx = []
    valid_idx = []
    test_idx = []
    train_label = []
    valid_label = []
    test_label = []
    for defect in range(0, 8):
        target_defect = np.where(label == defect)[0]
        np.random.shuffle(target_defect)
        training_size = int(len(target_defect)*0.60)
        valid_size = int(len(target_defect)*0.15)
        defect_train = target_defect[:training_size]
        defect_valid = target_defect[training_size:training_size+valid_size]
        defect_test = target_defect[training_size+valid_size:]
        train_idx.extend(list(defect_train))
        valid_idx.extend(list(defect_valid))
        test_idx.extend(list(defect_test))
        train_label.extend([defect] * len(defect_train))
        valid_label.extend([defect] * len(defect_valid))
        test_label.extend([defect] * len(defect_test))
    return train_idx, valid_idx, test_idx, train_label, valid_label, test_label


In [8]:
def shuffle_idx_n(label):
    train_idx = []
    valid_idx = []
    test_idx = []
    train_label = []
    valid_label = []
    test_label = []
    defect = 8
    target_defect = np.where(label == defect)[0]
    np.random.shuffle(target_defect)
    training_size = int(len(target_defect)*0.60)
    valid_size = int(len(target_defect)*0.15)
    defect_train = target_defect[:training_size]
    defect_valid = target_defect[training_size:training_size+valid_size]
    defect_test = target_defect[training_size+valid_size:]
    train_idx.extend(list(defect_train))
    valid_idx.extend(list(defect_valid))
    test_idx.extend(list(defect_test))
    train_label.extend([defect] * len(defect_train))
    valid_label.extend([defect] * len(defect_valid))
    test_label.extend([defect] * len(defect_test))
    return train_idx, valid_idx, test_idx, train_label, valid_label, test_label


# Generate Single_defect case

In [9]:
y.shape

(38015, 8)

In [10]:
single_defect_idx = []
for i in tqdm(range(len(y))):
    if y[i].sum() == 1:
        single_defect_idx.append(i)
single_defect_idx = np.array(single_defect_idx)

100%|█████████████████████████████████████████████████████████████████████████| 38015/38015 [00:00<00:00, 507151.70it/s]


In [11]:
single_defect_label = y[single_defect_idx]

In [12]:
single_defect_label = np.argmax(single_defect_label, axis = 1)
single_defect_X = X[single_defect_idx]

In [13]:
single_defect_label = np.where(single_defect_label == 5, 10, single_defect_label)
single_defect_label = np.where(single_defect_label == 7, 5, single_defect_label)
single_defect_label = np.where(single_defect_label == 10, 7, single_defect_label)

In [14]:
np.unique(single_defect_label, return_counts = True)     

(array([0, 1, 2, 3, 4, 5, 6, 7]),
 array([1000, 1000, 1000, 1000, 1000,  149, 1000,  866]))

In [15]:
X_, label = resize_arr(single_defect_X, single_defect_label)

  im_resized = img_array.resize((64,64), Image.NEAREST)
100%|██████████████████████████████████████████████████████████████████████████████| 7015/7015 [00:56<00:00, 123.48it/s]


In [16]:
# mapping_type={'Center':0,'Donut':1,'Edge-Loc':2,'Edge-Ring':3,'Loc':4,'Random':5,'Scratch':6,'Near-full':7,'none':8}

In [17]:
# mapping_type={'Center':0,'Donut':1,'Edge-Loc':2,'Edge-Ring':3,'Loc':4,'Random':5,'Scratch':6,'Near-full':7}

In [18]:
train_idx, valid_idx, test_idx, train_label, valid_label, test_label = shuffle_idx(label)

In [19]:
np.unique(label, return_counts = True)  

(array([0, 1, 2, 3, 4, 5, 6, 7]),
 array([1000, 1000, 1000, 1000, 1000,  149, 1000,  866]))

In [20]:
X_ = torch.Tensor(X_)

In [21]:
base = torch.zeros(1,64,64)
for idx in tqdm(train_idx):
    base = torch.cat([base, X_[idx].unsqueeze(0)], axis = 0)
train = base[1:]

base = torch.zeros(1,64,64)
for idx in tqdm(valid_idx):
    base = torch.cat([base, X_[idx].unsqueeze(0)], axis = 0)
valid = base[1:]

base = torch.zeros(1,64,64)
for idx in tqdm(test_idx):
    base = torch.cat([base, X_[idx].unsqueeze(0)], axis = 0)
test = base[1:]

100%|██████████████████████████████████████████████████████████████████████████████| 4208/4208 [00:08<00:00, 525.70it/s]
100%|████████████████████████████████████████████████████████████████████████████| 1051/1051 [00:00<00:00, 15142.20it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 1756/1756 [00:00<00:00, 5315.89it/s]


# Generate none defect case

In [22]:
none_defect_case = []
for i in tqdm(range(len(y))):
    if y[i].sum() == 0:
        none_defect_case.append(i)
none_defect_case = np.array(none_defect_case)

100%|█████████████████████████████████████████████████████████████████████████| 38015/38015 [00:00<00:00, 331595.70it/s]


In [23]:
none_defect_label = np.array([8]*none_defect_case.shape[0])
none_defect_X = X[none_defect_case]

none_defect_label

X_none, none_label = resize_arr(none_defect_X, none_defect_label)

  im_resized = img_array.resize((64,64), Image.NEAREST)
100%|██████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:04<00:00, 207.05it/s]


In [24]:
np.unique(none_defect_label, return_counts = True)     

(array([8]), array([1000]))

In [25]:
X_none, none_label = resize_arr(none_defect_X, none_defect_label)

  im_resized = img_array.resize((64,64), Image.NEAREST)
100%|██████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:04<00:00, 208.18it/s]


In [26]:
n_train_idx, n_valid_idx, n_test_idx, n_train_label, n_valid_label, n_test_label = shuffle_idx_n(none_label)

In [27]:
X_none = torch.Tensor(X_none)

In [28]:
base = torch.zeros(1,64,64)
for idx in tqdm(n_train_idx):
    base = torch.cat([base, X_none[idx].unsqueeze(0)], axis = 0)
n_train = base[1:]

base = torch.zeros(1,64,64)
for idx in tqdm(n_valid_idx):
    base = torch.cat([base, X_none[idx].unsqueeze(0)], axis = 0)
n_valid = base[1:]

base = torch.zeros(1,64,64)
for idx in tqdm(n_test_idx):
    base = torch.cat([base, X_none[idx].unsqueeze(0)], axis = 0)
n_test = base[1:]

100%|██████████████████████████████████████████████████████████████████████████████| 600/600 [00:00<00:00, 15467.53it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 150/150 [00:00<00:00, 23199.44it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 250/250 [00:00<00:00, 24238.93it/s]


# Save 

In [29]:
train = torch.concat([train, n_train], axis = 0)
valid = torch.concat([valid, n_valid], axis = 0)
test = torch.concat([test, n_test], axis = 0)


In [30]:
train_label.extend(n_train_label)
valid_label.extend(n_valid_label)
test_label.extend(n_test_label)

In [31]:
np.unique(train_label, return_counts = True)  

(array([0, 1, 2, 3, 4, 5, 6, 7, 8]),
 array([600, 600, 600, 600, 600,  89, 600, 519, 600]))

In [32]:
train.shape

torch.Size([4808, 64, 64])

In [33]:
import pickle 

with open('data/wm_38_X_train.pickle', 'wb') as handle:
    pickle.dump(train, handle, protocol = pickle.HIGHEST_PROTOCOL)    
with open('data/wm_38_y_train.pickle', 'wb') as handle:
    pickle.dump(train_label, handle, protocol = pickle.HIGHEST_PROTOCOL)    

with open('data/wm_38_X_valid.pickle', 'wb') as handle:
    pickle.dump(valid, handle, protocol = pickle.HIGHEST_PROTOCOL)    
with open('data/wm_38_y_valid.pickle', 'wb') as handle:
    pickle.dump(valid_label, handle, protocol = pickle.HIGHEST_PROTOCOL)    

with open('data/wm_38_X_test.pickle', 'wb') as handle:
    pickle.dump(test, handle, protocol = pickle.HIGHEST_PROTOCOL)    
with open('data/wm_38_y_test.pickle', 'wb') as handle:
    pickle.dump(test_label, handle, protocol = pickle.HIGHEST_PROTOCOL)    