# Binary Classifier on Particle Track Data

In [14]:
import pandas as pd
import os
import sys
import numpy as np
import math

## Get angle values and cast to boolean

In [8]:
track_params = pd.read_csv('../TRAIN/track_parms.csv')

In [33]:
track_params.head()

Unnamed: 0,filename,phi,z,phi_calc,phi_regression,sigma_regression
0,img000000.png,-0.1959,-5.164839,-0.20693,-0.206928,0.014192
1,img000001.png,-1.473349,5.784543,-1.409622,-1.409614,0.014184
2,img000002.png,9.206585,-2.295192,9.296442,9.29633,0.016293
3,img000003.png,5.37889,4.68507,5.281532,5.281474,0.014072
4,img000004.png,-6.700401,-0.851756,-6.739551,-6.739504,0.013997


In [9]:
y = track_params['phi']

In [16]:
y_bool = y.apply(lambda x: True if x > 0 else False)

In [17]:
y.head()

0   -0.195900
1   -1.473349
2    9.206585
3    5.378890
4   -6.700401
Name: phi, dtype: float64

In [18]:
y_bool.head()

0    False
1    False
2     True
3     True
4    False
Name: phi, dtype: bool

In [34]:
import gzip

In [38]:
with gzip.open("../TRAIN/images.raw.gz") as f:
    bytes = f.read(width*height)
    data = np.frombuffer(bytes, dtype='B', count=width*height)
    pixels = np.reshape(data, [width, height, 1], order='F')
    pixels_norm = np.transpose(pixels.astype(np.float) / 255., axes=(1, 0, 2) )

print(pixels_norm)

[[[0.]
  [0.]
  [0.]
  ...
  [0.]
  [0.]
  [0.]]

 [[0.]
  [0.]
  [0.]
  ...
  [0.]
  [0.]
  [0.]]

 [[0.]
  [0.]
  [0.]
  ...
  [0.]
  [0.]
  [0.]]

 ...

 [[0.]
  [0.]
  [0.]
  ...
  [0.]
  [0.]
  [0.]]

 [[0.]
  [0.]
  [0.]
  ...
  [0.]
  [0.]
  [0.]]

 [[0.]
  [0.]
  [0.]
  ...
  [0.]
  [0.]
  [0.]]]


## Simple Convolutional Classifier From Scratch

In [26]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import (
    Conv2D, Activation, MaxPooling2D,
    Flatten, Dense, Dropout
)

### Model Definition

In [27]:
width  = 36
height = 100
channels = 1

model = Sequential()

# Layer 1
model.add(Conv2D(32, (3, 3), input_shape=(width, height, channels)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Layer 2
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Layer 3
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Dense, Classification Layer
model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

### Data: Keras Image Generators

In [30]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [32]:
batch_size = 16

train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
        '../TRAIN',  # this is the target directory
        target_size=(width, height),  # all images will be resized to 150x150
        batch_size=batch_size,
        class_mode='binary',
        save_format='raw')  # since we use binary_crossentropy loss, we need binary labels

# this is a similar generator, for validation data
validation_generator = test_datagen.flow_from_directory(
        '../VALIDATION',
        target_size=(width, height),
        batch_size=batch_size,
        class_mode='binary')

Found 0 images belonging to 0 classes.
Found 0 images belonging to 0 classes.


## Get ImageGenerator from Provided Code

In [13]:
width  = 36
height = 100

# Open labels files so we can get number of samples and pass the
# data frames to the generators later
train_df = pd.read_csv('../TRAIN/track_parms.csv')
valid_df = pd.read_csv('../VALIDATION/track_parms.csv')
BATCH_SIZE = 100
STEP_SIZE_TRAIN = len(train_df)/BATCH_SIZE
STEP_SIZE_VALID = len(valid_df)/BATCH_SIZE

#-----------------------------------------------------
# generate_arrays_from_file
#-----------------------------------------------------
# Create generator to read in images and labels
# (used for both training and validation samples)
def generate_arrays_from_file(path, labels_df):

    images_path = path+'/images.raw.gz'
    print('generator created for: {}'.format(images_path))

    batch_input           = []
    batch_labels_phi      = []
    batch_labels_z        = []
    idx = 0
    ibatch = 0
    while True:  # loop forever, re-reading images from same file
        with gzip.open(images_path) as f:
            while True: # loop over images in file
            
                # Read in one image
                bytes = f.read(width*height)
                if len(bytes) != (width*height): break # break into outer loop so we can re-open file
                data = np.frombuffer(bytes, dtype='B', count=width*height)
                pixels = np.reshape(data, [width, height, 1], order='F')
                pixels_norm = np.transpose(pixels.astype(np.float) / 255., axes=(1, 0, 2) )
                
                # Labels
                phi = labels_df.phi[idx]
                z   = labels_df.z[idx]
                idx += 1

                # Add to batch and check if it is time to yield
                batch_input.append( pixels_norm )
                batch_labels_phi.append( phi )
                batch_labels_z.append( z )
                if len(batch_input) == BATCH_SIZE :
                    ibatch += 1
                    
                    # Since we are training multiple loss functions we must
                    # pass the labels back as a dictionary whose keys match
                    # the layer their corresponding values are being applied
                    # to.
                    labels_dict = {
                        'phi_output' :  np.array(batch_labels_phi ),
                        'z_output'   :  np.array(batch_labels_z   ),        
                    }
                    
                    yield ( np.array(batch_input), labels_dict )
                    batch_input      = []
                    batch_labels_phi = []
                    batch_labels_z   = []

            idx = 0
            f.close()


#===============================================================================
# Create training generator
train_generator = generate_arrays_from_file('../TRAIN', train_df)

In [4]:
from tensorflow.keras import applications

In [5]:
model = applications.VGG16(include_top=False, weights='imagenet')

In [2]:
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=(3, 150, 150)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

W0808 15:39:38.220549 4665337280 deprecation_wrapper.py:119] From /Users/dannowitz/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0808 15:39:38.236174 4665337280 deprecation_wrapper.py:119] From /Users/dannowitz/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0808 15:39:38.239531 4665337280 deprecation_wrapper.py:119] From /Users/dannowitz/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0808 15:39:38.253746 4665337280 deprecation_wrapper.py:119] From /Users/dannowitz/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:3976: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.



ValueError: Negative dimension size caused by subtracting 2 from 1 for 'max_pooling2d_1/MaxPool' (op: 'MaxPool') with input shapes: [?,1,148,32].