# Notes from video chat on June 6

In [44]:
import os
import sys
import importlib
import pickle
import matplotlib.pyplot as plt
import numpy as np
import random
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, GaussianNoise
from tensorflow.keras.utils import to_categorical
import pandas as pd

notebooks_dir = os.path.abspath('')
proj_dir = os.path.dirname(notebooks_dir)
raw_data_dir = os.path.join(proj_dir, 'raw_data')
src_dir = os.path.join(proj_dir, 'src')

# Add src_dir to the system path to import helper file
sys.path.insert(1, src_dir)
#import cutpaste_helper
#importlib.reload(cutpaste_helper)

def makeWindows(image_dict,num_windows, width, height):
    '''
    Parameters:
    image_dict: dictionary containing arrays that represent images
    num_windows: the number of windows wanted
    width: the width of the window
    height: the height of the window
    '''
    
    #store the coordinates of the defective window
    
    #Grab windows from the full images from train images
    window_images = []
    for key in image_dict:
    #The 100 windows per image in a list
        for i in range(0, num_windows):
            #Get the width and height of an image
            dimensions = image_dict[key].shape
            image_w = dimensions[1]
            image_h = dimensions[0]
            #Random coordinates
            random_x_coord = random.randint(0, image_w - width)
            random_y_coord = random.randint(0, image_h-height)
            #Takes window out of a random part of the images at the random coordinates
            window = image_dict[key][random_y_coord:random_y_coord + window_y, random_x_coord:random_x_coord + window_x]
            window_images.append(window)
    window_images = np.array(window_images)
    return window_images

def makeDefects(window_list):
    #Create test window images dict with white square defects
    defect_window_list = np.copy(window_list)
#Fill defect train window images
    for image in defect_window_list:
        #Random sized white square (2D array full of ones)
        sq_random_x = random.randint(5,25)
        sq_random_y = random.randint(5,25)
        white_square = np.ones((sq_random_y,sq_random_x))
        #Random coordinates for the square
        random_x_coord = random.randint(0,window_x-sq_random_x)
        random_y_coord = random.randint(0,window_y-sq_random_y)
        #Replace the coordinates with white square
        image[random_y_coord:random_y_coord + sq_random_y, random_x_coord: random_x_coord + sq_random_x,0] = white_square
    return defect_window_list

def copyPaste(window_list):
    defect_window_list = np.copy(window_list)
    #Fill defect train window images
    for window in defect_window_list:
        #Random sized copy-pasted area (2D array)
        sq_random_x = random.randint(5,25)
        sq_random_y = random.randint(5,25)
        #Random coordinates for the square
        random_x_coord = random.randint(0,window.shape[1]-sq_random_x)
        random_y_coord = random.randint(0,window.shape[0]-sq_random_y)
        #Copy the area
        copyArea = window[random_y_coord:random_y_coord + sq_random_y, random_x_coord: random_x_coord + sq_random_x,0]
        #Paste coordinates
        random_x_coord = random.randint(0,window_x-sq_random_x)
        random_y_coord = random.randint(0,window_y-sq_random_y)
        #Paste the area
        window[random_y_coord:random_y_coord + sq_random_y, random_x_coord: random_x_coord + sq_random_x,0] = copyArea
    return defect_window_list

def createLabels(image_list, defect_status):
    #Create labels for the images
    labels_list = []
    for image in image_list:
        if defect_status:
            labels_list.append(1)
        else:
            labels_list.append(0)
    labels_list = np.array(labels_list)
    return labels_list
        
def shuffleTwoArrays(image_list, label_list):
    #Shuffles the image and label arrays in the same way
    randomize = np.arange(len(label_list))
    np.random.shuffle(randomize)
    image_list = image_list[randomize]
    label_list = label_list[randomize]
    return (image_list, label_list)

def generateImages(normal_image_list, defect_image_list, num_normal, num_defects):
    
    while True:
        #A generator that generates noise for all images
        generated_images = []
        generated_labels = []
        for i in range(num_normal):
            random_index = random.randint(0,len(normal_image_list)-1)
            generated_images.append(normal_image_list[random_index])
            generated_labels.append(0)
        for i in range(num_defects):
            random_index = random.randint(0,len(defect_image_list)-1)
            generated_images.append(defect_image_list[random_index])
            generated_labels.append(1)
        generated_images = np.array(generated_images)
        generated_labels = np.array(generated_labels)
        generated_images, generated_labels = shuffleTwoArrays(generated_images, generated_labels)
        n = generated_images.shape[0]
        
        yield generated_images, generated_labels.reshape(n, 1)
        
def valGeneratorImages(test_x, test_y):
    while True:
        yield test_x, test_y

    
# Load dictionaries with TEM images
f_name = os.path.join(raw_data_dir, 'test_full_arrays')
#Dict with test image arrays (each key has a value of an array of numbers between 0 and 1)
test_full_arrays = pickle.load(open(f_name, "rb"))
f_name = os.path.join(raw_data_dir, 'train_full_arrays')
#Dict with train image arrays (each key has a value of an array of numbers between 0 and 1)
train_full_arrays = pickle.load(open(f_name, "rb"))

window_x = 118
window_y = 84
num_windows = 100

#Make testing window images and labels
test_window_images = makeWindows(test_full_arrays, num_windows, window_x, window_y)
test_window_labels = createLabels(test_window_images, False)

#Make training window images and labels
train_window_images = makeWindows(train_full_arrays, num_windows ,window_x, window_y)
train_window_labels = createLabels(train_window_images, False)

#Make defect testing window images and labels
defect_test_window_images = copyPaste(test_window_images)
defect_test_window_labels = createLabels(defect_test_window_images, True)

#Make defect training window images and labels
defect_train_window_images = copyPaste(train_window_images)
defect_train_window_labels = createLabels(defect_train_window_images, True)
        
# #Put all testing & training images and labels in one list each and making them random
# all_training_images = np.concatenate((train_window_images, defect_train_window_images), axis = 0)
# all_training_labels = np.concatenate((train_window_labels, defect_train_window_labels), axis = 0)
# training_images, training_labels = shuffleTwoArrays(all_training_images, all_training_labels)
all_testing_images = np.concatenate((test_window_images, defect_test_window_images), axis = 0)
all_testing_labels = np.concatenate((test_window_labels, defect_test_window_labels), axis = 0)
testing_images, testing_labels = shuffleTwoArrays(all_testing_images, all_testing_labels)

#Stuff
# filters = [4,8,16,32]
filters = [4]
filter_size = 3
pool_size = 2
epoch_number = 10
my_models = []
my_val_accuracy = []
my_val_loss = []

# Build the model.
for filter_num in filters:
    print(f'{filter_num} Filters Results: ')
    train_gen = generateImages(train_window_images, defect_train_window_images, 32, 32)
    val_gen = generateImages(test_window_images, defect_test_window_images, 128, 128)
    model = Sequential([
      GaussianNoise(1.),
      Conv2D(filter_num, filter_size),
      MaxPooling2D(pool_size=pool_size),
      Conv2D(filter_num * 2, filter_size),
      MaxPooling2D(pool_size = pool_size),
      Conv2D(filter_num, filter_size),
      MaxPooling2D(pool_size=pool_size),
      Conv2D(filter_num * 4, filter_size),
      MaxPooling2D(pool_size = pool_size),
      Flatten(),
      Dense(2, activation='softmax'),
    ])

    # Compile the model.
    model.compile(
      'adam',
      loss='categorical_crossentropy',
      metrics=['accuracy'],
    )

    # Train the model.
    history = model.fit(
      train_gen,
      epochs = epoch_number,
      steps_per_epoch=11200/64,
      validation_data=val_gen
    )

#     # Predict on the first 5 test images.
#     predictions = model.predict(testing_images[:5])

#     # Print our model's predictions.
#     print("Predictions")
#     print(np.argmax(predictions, axis=1))

#     # Check our predictions against the ground truths.
#     print("Real Labels")
#     print(testing_labels[:5])
    
#     my_models.append(model)
#     my_val_accuracy.append(history.history['val_accuracy'][-1])
#     my_val_loss.append(history.history['val_loss'][-1])
    
# #Saves the models and model metrics to a dataframe, then saves the dataframe to a csv file and a excel file
# d = {'Models' : my_models, 'Value Accuracy' : my_val_accuracy, 'Value Loss' : my_val_loss}
# df = pd.DataFrame(data=d, index=filters)
# df.to_csv(r'C:\Users\songa\Cutpaste Work\project_cutpaste\CSV files\White Square Dataframe', index=False)
# df.to_excel(r'C:\Users\songa\Cutpaste Work\project_cutpaste\CSV files\White Square Dataframe.xlsx', index=False)
# display(df)

4 Filters Results: 
Epoch 1/10

KeyboardInterrupt: 

In [20]:
gen = generateImages(train_window_images, defect_train_window_images, 32, 32)

In [36]:
images, labels = next(val_gen)
print(np.shape(images))
print(np.shape(labels))
print(labels[0:5])

(256, 84, 118, 1)
(256,)
[0 0 0 1 1]


In [35]:
val_gen

<generator object generateImages at 0x7fce50261cd0>