# Preprocessing

In [8]:
import numpy as np
from PIL import Image
import time

def binarize(grayscale, thresh):
    return np.where(grayscale > thresh, 1, 0)

def get_data_image(filename, bin_thresh=0.85):
    img = Image.open(filename).resize((360, 360)).convert("LA")
    image = np.asarray(img)/256
    binarized = binarize(image, bin_thresh)
    ret_img = binarized[:, :, 0:1]
    return ret_img

start_time = time.time()
pp = get_data_image("detector_alpha_ce_ca_nm/plots/30.png", 0.7)
end_time = time.time()
print("Processing time: %f seconds" % (end_time-start_time))
pp.shape

Processing time: 0.041640 seconds


(360, 360, 1)

# Build model

In [16]:
from keras import layers
from keras import models
from keras import optimizers

def build_model():
    model = models.Sequential()
    model.add(layers.Conv2D(16, (3, 3), activation='relu', input_shape=(360, 360, 1)))
    model.add(layers.MaxPooling2D((3, 3)))
    model.add(layers.Conv2D(8, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((3, 3)))
    model.add(layers.Flatten())
    model.add(layers.Dropout(0.1))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))
    
    model.compile(
        loss='binary_crossentropy',
        optimizer=optimizers.RMSprop(lr=1e-4),
        metrics=['acc']

    )
    return model

In [4]:
model = build_model()
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 358, 358, 16)      160       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 119, 119, 16)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 117, 117, 8)       1160      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 39, 39, 8)         0         
_________________________________________________________________
flatten (Flatten)            (None, 12168)             0         
_________________________________________________________________
dropout (Dropout)            (None, 12168)             0         
_________________________________________________________________
dense (Dense)                (None, 64)                7

# Training

In [9]:
alpha_dir="detector_alpha_ce_ca_nm/plots/"
e_dir = "detector_electrons_ce_ca_nm/plots/"
train_data=[]

In [15]:
import tensorflow as tf
from keras.utils import Sequence

class PreprocessedDataGenerator(Sequence):
    def __init__(self, df, x_col, dim=(360,360,1), y_col=None, batch_size=32, num_classes=None,shuffle=True):
        self.batch_size = batch_size
        self.df = dataframe
        self.indices = self.df.index.tolist()
        self.num_classes = num_classes
        self.shuffle = shuffle
        self.x_col = x_col
        self.y_col = y_col
        self.dim=dim
        self.on_epoch_end()
        
    def on_epoch_end(self):
        self.index = np.arange(len(self.indices))
        if self.shuffle == True:
            np.random.shuffle(self.index)
            
    def __len__(self):
        # Denotes the number of batches per epoch
        return len(self.indices) // self.batch_size
    
    def __getitem__(self, index):
        # Generate a batch of data
        index = self.index[index * self.batch_size:(index + 1) * self.batch_size]
        batch = [self.indices[k] for k in index]     # Generate data
        X, y = self.__get_data(batch)
        return X, y
    
    def __get_data(self, batch):
        # X.shape : (batch_size, *dim)
        # We can have multiple Xs and can return them as a list     
        X = [] # logic to load the data from storage
        y = [] # logic for the target variables     # Generate data
        for i, id in enumerate(batch):
            # Store sample
            X[i,] = # logic     # Store class
            y[i] = # labels
        return X, y

In [6]:
model = build_model()

In [90]:
train_data = []
for i in range(4000):
    train_data.append((get_data_image("%s%d.png" % (alpha_dir, i), 1)))
    train_data.append((get_data_image("%s%d.png" % (e_dir, i), 1)))
    if i%1000==0: print("\n%d: "%(i), end="")
    if i%100==0: print("#", end="")



0: ##########
1000: ###

OSError: broken data stream when reading image file