## Import Packages

In [1]:
# Import basic packages
import numpy as np
import scipy as sp
import pandas as pd
from scipy import interpolate
import random, os, time
import h5py

from astropy.nddata import block_reduce

from models import make_model

# Colab in order to download files
#from google.colab import files

# =========================================
# =========================================
# scikit-learn for Machine Learn

# Preprocessing
from sklearn.preprocessing import StandardScaler #Standar scaler for standardization
from sklearn.manifold import TSNE # Dimensionality reduction technique
from sklearn.model_selection import GridSearchCV #Cross-Validation
from sklearn.utils import resample #Resampling
# Classifiers
from sklearn.svm import SVC #Support Vector Classifier
from sklearn.ensemble import RandomForestClassifier as RF #Random Forests
# Metrics
from sklearn.metrics import accuracy_score 
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve

# =======================================
# =======================================
# Tensorflow and Keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import regularizers
from tensorflow.keras import optimizers
# Keras Layers
from keras.layers import InputLayer
from keras.layers import Conv2D
from keras.layers import BatchNormalization
from keras.layers import Dropout
from keras.layers import MaxPool2D
from keras.layers import Flatten
from keras.layers import Dense

# ==============================================
# ==============================================
# Matplotlib, seaborn and plot pretty 
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns
%matplotlib inline
from matplotlib import rcParams
rcParams['font.family'] = 'serif'

# Adjust rc parameters to make plots pretty
def plot_pretty(dpi=200, fontsize=9):
    

    plt.rc("savefig", dpi=dpi)       # dpi resolution of saved image files
    plt.rc('text', usetex=False)      # use LaTeX to process labels
    plt.rc('font', size=fontsize)    # fontsize
    plt.rc('xtick', direction='in')  # make axes ticks point inward
    plt.rc('ytick', direction='in')
    plt.rc('xtick.major', pad=10) 
    plt.rc('xtick.minor', pad=5)
    plt.rc('ytick.major', pad=10) 
    plt.rc('ytick.minor', pad=5)
    plt.rc('lines', dotted_pattern = [0.5, 1.1]) # fix dotted lines

    return

plot_pretty()

# Pillow
from PIL import Image

#Magic that allows me to have memory :)
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.compat.v1.Session(config=config)

#jupyter inline magic
%load_ext autoreload
%autoreload 2

%matplotlib inline

2022-09-27 16:40:36.914440: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 31016 MB memory:  -> device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0004:04:00.0, compute capability: 7.0
2022-09-27 16:40:36.917065: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 31016 MB memory:  -> device: 1, name: Tesla V100-SXM2-32GB, pci bus id: 0004:05:00.0, compute capability: 7.0
2022-09-27 16:40:36.919470: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:2 with 31016 MB memory:  -> device: 2, name: Tesla V100-SXM2-32GB, pci bus id: 0035:03:00.0, compute capability: 7.0
2022-09-27 16:40:36.921856: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:3 with 31016 MB memory:  -> device: 3, name: Tesla V100-SXM2-32GB, pci bus id

In [2]:
data_dir = '/project/r/rbond/jorlo/datasets/ml-clusters/for-debug/'

cut = 1000

In [3]:
pos_im_act = []
pos_im_des = []
neg_im = []

for directory in os.listdir(data_dir):
    print(directory)
    if directory[:3] == 'act' and (int(directory[4:8]) < cut):
        h5f = h5py.File(data_dir+directory)
        pos_im_act.append(h5f['act'][:])
    elif directory[:3] == 'des' and (int(directory[4:8]) < cut):
        h5f = h5py.File(data_dir+directory)
        pos_im_des.append(h5f['des'][:])
    elif directory[:6]=='random' and (int(directory[7:11]) < cut):
        h5f = h5py.File(data_dir+directory)
        neg_im.append(h5f['random'][:])


pos_im_act = np.vstack(pos_im_act)
pos_im_des = np.vstack(pos_im_des)
neg_im = np.vstack(neg_im)


pos_im = np.concatenate((pos_im_act, pos_im_des))


flags = []
for i in range(pos_im.shape[0]):
        if np.any(np.isnan(pos_im[i,...])):
                flags.append(i)

pos_im = np.delete(pos_im, flags, axis = 0)

flags = []
for i in range(neg_im.shape[0]):
        if np.any(np.isnan(neg_im[i,...])):
                flags.append(i)

neg_im = np.delete(neg_im, flags, axis = 0)


neg_im = neg_im[:len(pos_im)]
pos_im = pos_im[:len(neg_im)]

print(len(pos_im), len(neg_im))

save_neg_im = neg_im
save_pos_im = pos_im

random_2000_w_y.h5
des_5000_w_y.h5
des_2000_w_y.h5
random_0000_w_y.h5
des_0000_w_y.h5
des_1000_w_y.h5
random_6000_w_y.h5
random_7000_w_y.h5
random_5000_w_y.h5
des_6000_w_y.h5
act_1000_w_y.h5
random_4000_w_y.h5
random_3000_w_y.h5
act_3000_w_y.h5
des_4000_w_y.h5
random_1000_w_y.h5
act_0000_w_y.h5
des_3000_w_y.h5
922 922


In [4]:
reduce = 4

if reduce:
    neg_im_2 = []
    pos_im_2 = []

    for i in range(len(neg_im)):
        neg_im_2.append(block_reduce(save_neg_im[i], [reduce,reduce,1], func = np.mean))
        pos_im_2.append(block_reduce(save_pos_im[i], [reduce,reduce,1], func = np.mean))
        

    pos_im = np.array(pos_im_2)
    neg_im = np.array(neg_im_2)

In [5]:
pos_im, neg_im = pos_im[...,:3], neg_im[...,:3]

input_shape = pos_im.shape[1:]

tot = min(pos_im.shape[0], neg_im.shape[0])
train_size = int(0.7 * tot)
val_size = int(0.15 * tot)
test_size = int(0.15 * tot)

train_pos = pos_im[:train_size]
val_pos = pos_im[train_size:train_size + val_size]
test_pos = pos_im[train_size + val_size:]

train_neg = neg_im[:train_size]
val_neg = neg_im[train_size:train_size + val_size]
test_neg = neg_im[train_size + val_size:]

input_shape = train_pos.shape[1:]

train_images = np.concatenate((train_pos,train_neg))
val_images = np.concatenate((val_pos,val_neg))
test_images = np.concatenate((test_pos,test_neg))

train_labels = np.array(train_pos.shape[0]*[1] + train_neg.shape[0]*[0])
val_labels = np.array(val_pos.shape[0]*[1] + val_neg.shape[0]*[0])
test_labels = np.array(test_pos.shape[0]*[1] + test_neg.shape[0]*[0])

batch_size = 500
AUTOTUNE = tf.data.AUTOTUNE

train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels))
val_dataset = tf.data.Dataset.from_tensor_slices((val_images, val_labels))
test_dataset = tf.data.Dataset.from_tensor_slices((test_images, test_labels))

train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)
val_dataset = val_dataset.shuffle(buffer_size=1024).batch(batch_size)
test_dataset = test_dataset.shuffle(buffer_size=1024).batch(batch_size)

train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)
val_dataset = val_dataset.prefetch(buffer_size=AUTOTUNE)
test_dataset = test_dataset.prefetch(buffer_size=AUTOTUNE)

print('Input shape: ', input_shape)

2022-09-27 16:41:05.637114: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 31016 MB memory:  -> device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0004:04:00.0, compute capability: 7.0
2022-09-27 16:41:05.639379: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 31016 MB memory:  -> device: 1, name: Tesla V100-SXM2-32GB, pci bus id: 0004:05:00.0, compute capability: 7.0
2022-09-27 16:41:05.641609: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:2 with 31016 MB memory:  -> device: 2, name: Tesla V100-SXM2-32GB, pci bus id: 0035:03:00.0, compute capability: 7.0
2022-09-27 16:41:05.643829: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:3 with 31016 MB memory:  -> device: 3, name: Tesla V100-SXM2-32GB, pci bus id

In [8]:
model = make_model('DeepShadows', input_shape = input_shape, kernel_regularizer=regularizers.l2(0.001))


data_augmentation = tf.keras.preprocessing.image.ImageDataGenerator(rotation_range=360, width_shift_range=4,
    height_shift_range=4,zoom_range=0.3)
checkpoint_path = "/scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt"

name = 'DeepShadows'

checkpoint_dir = os.path.dirname(checkpoint_path)

def get_callbacks(name):
    return [tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1),
      tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=100),
      tf.keras.callbacks.TensorBoard(checkpoint_dir+name),
    ]        
toc = time.time()
history = model.fit(data_augmentation.flow(train_images, train_labels), epochs=int(100), 
                    validation_data=val_dataset, callbacks=get_callbacks(name))
tic = time.time()

print('Distributed time: ', tic-toc)
test_loss, test_acc = model.evaluate(test_dataset, verbose=2)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')
Number of devices: 4
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 99, 99, 16)        448       
                                                                 
 batch_normalization_3 (Batc  (None, 99, 99, 16)       64        
 hNormalization)                                                 
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 49, 49, 16)       0         
 2D)                                                             
                                                                 
 dropout_3 (Dropout)         (None, 49, 49, 16)     

Epoch 18/100
Epoch 18: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 19/100
Epoch 19: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 20/100
Epoch 20: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 21/100
Epoch 21: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 22/100
Epoch 22: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 23/100
Epoch 23: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 24/100
Epoch 24: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 25/100
Epoch 25: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 26/100
Epoch 26: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 27/100
Epoch 27: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 28/100
Epoch 2

Epoch 44/100
Epoch 44: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 45/100
Epoch 45: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 46/100
Epoch 46: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 47/100
Epoch 47: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 48/100
Epoch 48: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 49/100
Epoch 49: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 50/100
Epoch 50: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 51/100
Epoch 51: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 52/100
Epoch 52: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 53/100
Epoch 53: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 54/100
Epoch 5

Epoch 71/100
Epoch 71: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 72/100
Epoch 72: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 73/100
Epoch 73: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 74/100
Epoch 74: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 75/100
Epoch 75: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 76/100
Epoch 76: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 77/100
Epoch 77: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 78/100
Epoch 78: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 79/100
Epoch 79: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 80/100
Epoch 80: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 81/100
Epoch 8

Epoch 97/100
Epoch 97: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 98/100
Epoch 98: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 99/100
Epoch 99: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Epoch 100/100
Epoch 100: saving model to /scratch/r/rbond/jorlo/ml-clusters/models/DeepShadows.ckpt
Distributed time:  308.94493985176086
1/1 - 0s - loss: 0.6348 - accuracy: 0.7374 - 76ms/epoch - 76ms/step


In [9]:
test_loss, test_acc = model.evaluate(test_dataset, verbose=2)

print('\nTest accuracy: ', test_acc)

1/1 - 0s - loss: 0.6348 - accuracy: 0.7374 - 78ms/epoch - 78ms/step

Test accuracy:  0.7374100685119629
