In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
from numba import jit, prange, njit
from blimpy import Waterfall
import time
import random
from synthetic_real_dynamic import create_true, create_full_cadence, create_false, create_true_single_shot, create_true_faster
import math
from sklearn.metrics import silhouette_score
import sys
sys.path.insert(1, '../ML_Training')
sys.path.insert(2, '../GBT_pipeline')
from preprocess_dynamic import get_data
from single_search import search_model_eval, combine
from skimage.transform import rescale, resize, downscale_local_mean
import gc
from data_generation import create_data_set
from sklearn.tree import DecisionTreeClassifier


gpus = tf.config.list_physical_devices('GPU')
if gpus:
  # Restrict TensorFlow to only allocate 1GB of memory on the first GPU
    try:
        tf.config.experimental.set_virtual_device_configuration(
            gpus[0],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=10000)])
        tf.config.experimental.set_virtual_device_configuration(
            gpus[1],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=10000)])
        tf.config.experimental.set_virtual_device_configuration(
            gpus[2],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=7000)])
        tf.config.experimental.set_virtual_device_configuration(
            gpus[3],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=10000)])
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Virtual devices must be set before GPUs have been initialized
        print(e)



4 Physical GPUs, 4 Logical GPUs


In [2]:
NUM_SAMPLES = 2000
plate_train = np.load('../../../../../../../datax/scratch/pma/real_filtered_LARGE_HIP110750.npy')[8000:]

In [3]:
print(plate_train.shape)
data, false_data_train, true_data_train = create_data_set(plate_train, NUM_SAMPLES=NUM_SAMPLES, snr_base=20, snr_range = 50, factor=1)
del plate_train
gc.collect()

(6567, 6, 16, 4096)
Creating True
(2000, 6, 16, 4096)
(2000, 6, 16, 4096) (2000, 6, 16, 512)
(2000, 6, 16, 512)
(12000, 16, 512, 1)
Creating False
(12000, 6, 16, 4096) (12000, 6, 16, 512)
(12000, 6, 16, 512)
Creating True
(6000, 6, 16, 4096) (6000, 6, 16, 512)
(6000, 6, 16, 512)
(6000, 6, 16, 4096) (6000, 6, 16, 512)
(6000, 6, 16, 512)
(12000, 6, 16, 512, 1)


0

In [4]:
NUM_SAMPLES = 500
plate_test = np.load('../../../../../../../datax/scratch/pma/real_filtered_LARGE_test_HIP15638.npy')[8000:]

In [5]:
print(plate_test.shape)
NUM_SAMPLES = 500
data, false_data_test, true_data_test = create_data_set(plate_test, NUM_SAMPLES=NUM_SAMPLES, snr_base=20, snr_range = 10, factor=1)
del plate_test, data
gc.collect()

(6567, 6, 16, 4096)
Creating True
(500, 6, 16, 4096)
(500, 6, 16, 4096) (500, 6, 16, 512)
(500, 6, 16, 512)
(3000, 16, 512, 1)
Creating False
(3000, 6, 16, 4096) (3000, 6, 16, 512)
(3000, 6, 16, 512)
Creating True
(1500, 6, 16, 4096) (1500, 6, 16, 512)
(1500, 6, 16, 512)
(1500, 6, 16, 4096) (1500, 6, 16, 512)
(1500, 6, 16, 512)
(3000, 6, 16, 512, 1)


0

In [6]:
def combine(data):
    new_data = np.zeros((data.shape[0]*data.shape[1],data.shape[2],data.shape[3],data.shape[4]))
    for i in prange(data.shape[0]):
        new_data[i*data.shape[1] : (i+1)*data.shape[1],:,:,:] = data[i,:,:,:,:]
    return new_data

def model_compute(data, model):
    print("combine")
    data = combine(data)
    result= model.predict(data, batch_size=500)[2]
    print("recombine")
    return result

In [7]:
from execute_model import model_load
model = model_load("../test_bench/VAE-BLPC1-ENCODER_compressed_512v3-10.h5")

true_train = model_compute(true_data_train, model)
false_train =model_compute(false_data_train, model)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')
tensorflow      INFO     Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')
combine
recombine
combine
recombine


In [8]:
print(true_train.shape)

(72000, 8)


In [9]:
def recombine(data):
    result = []
    for k in range(data.shape[0]//6):
        result.append(data[k*6:(k+1)*6,:].ravel())
    result = np.array(result)
    return result
true_train = recombine(true_train)
false_train = recombine(false_train)
print(true_train.shape)

(12000, 48)


In [10]:

true_test = model_compute(true_data_test, model)
false_test =model_compute(false_data_test, model)
true_test = recombine(true_test)
false_test = recombine(false_test)
print(true_test.shape)

combine
recombine
combine
recombine
(3000, 48)


In [11]:
from sklearn.utils import shuffle
import numpy as np


train = np.concatenate((true_train,false_train))
print(train.shape)
true_labels = np.zeros((true_train.shape[0]))
true_labels[:]=1

false_labels = np.zeros((false_train.shape[0]))
false_labels[:]=0
labels = np.concatenate((true_labels,false_labels))
print(labels.shape)
train, labels = shuffle(train, labels)

(24000, 48)
(24000,)


In [12]:
test = np.concatenate((true_test,false_test))
print(test.shape)
true_test_labels = np.ones((true_test.shape[0]))

false_test_labels = np.zeros((false_test.shape[0]))
test_labels = np.concatenate((true_test_labels,false_test_labels))
print(test_labels.shape)
test, test_labels = shuffle(test, test_labels)

(6000, 48)
(6000,)


In [13]:
print(labels)

[0. 0. 0. ... 0. 0. 0.]


# Random Forest Decision Tree

In [37]:
from sklearn.ensemble import RandomForestClassifier

# Create the model with 100 trees
tree = RandomForestClassifier(n_estimators=10000, 
                               bootstrap = True,
                               max_features = 'sqrt',n_jobs=-1)
# Fit on training data
tree.fit(train, labels)

RandomForestClassifier(max_features='sqrt', n_estimators=10000, n_jobs=-1)

In [38]:
# Actual class predictions
rf_predictions = tree.predict(test)
# Probabilities for each class
rf_probs = tree.predict_proba(test)[:, 1]

In [39]:
from sklearn.metrics import roc_auc_score

# Calculate roc auc
roc_value = roc_auc_score(test_labels, rf_probs)
print(roc_value)

0.9956885555555557


In [29]:
# Actual class predictions
rf_predictions_true = tree.predict(true_test)
# Probabilities for each class
count = 0
for i in range(rf_predictions_true.shape[0]):
#     if rf_predictions_true[i]==test_labels[i]:
    count+=1
print(count/true_test.shape[0])

0.49333333333333335


In [40]:
from sklearn.metrics import accuracy_score
accuracy_score( test_labels,tree.predict(test))

0.9766666666666667

# SVM


In [18]:
from sklearn import svm

clf = svm.SVC()
clf.fit(train, labels)

SVC()

In [19]:
rf_probs = clf.predict(test)

roc_value = roc_auc_score(test_labels, rf_probs)
print(roc_value)

0.9643333333333333


# XGBoost

In [20]:
from sklearn.ensemble import GradientBoostingClassifier

clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0).fit(train, labels)

clf.score(test, test_labels)

0.97