In [1]:
#importing libs
import numpy as np
import matplotlib.pyplot as plt

from data_gen import *
from predict_info import *

import datetime

import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Reshape, Dense, Flatten, Input, Conv1D, Conv1DTranspose, MaxPooling1D, UpSampling1D, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard, ModelCheckpoint, ReduceLROnPlateau

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import auc, roc_curve, confusion_matrix


from tensorflow.config.experimental import list_physical_devices, set_virtual_device_configuration, VirtualDeviceConfiguration

gpus = list_physical_devices('GPU')
set_virtual_device_configuration(gpus[0], [VirtualDeviceConfiguration(memory_limit=4096)])#
print(len(gpus), "Physical GPUs")   

2023-04-29 12:01:13.189076: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


1 Physical GPUs


2023-04-29 12:01:13.901180: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2023-04-29 12:01:13.901790: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2023-04-29 12:01:13.936970: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-04-29 12:01:13.937277: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA GeForce RTX 2080 SUPER computeCapability: 7.5
coreClock: 1.83GHz coreCount: 48 deviceMemorySize: 7.79GiB deviceMemoryBandwidth: 462.00GiB/s
2023-04-29 12:01:13.937307: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2023-04-29 12:01:13.938658: I tensorflow/stream_executor/platfo

In [2]:
df = pd.read_csv("Data/scaled_xyzlog_without_GRBs.csv", index_col=0) # data (hopefuly without grbs)
df

  mask |= (ar1 == a)


Unnamed: 0,time,x,y,z,log_cps
0,481.489854,0.351577,-0.470408,-0.809389,1.981139
1,481.489901,0.402253,-0.651949,-0.642771,1.985651
2,481.489947,0.402425,-0.804490,-0.436865,1.984527
3,481.489993,0.359273,-0.910516,-0.204654,2.024280
4,481.490039,0.284601,-0.957816,0.039879,1.973128
...,...,...,...,...,...
2054802,719.491781,-0.080000,0.068985,0.994405,2.120574
2054803,719.491793,-0.125928,0.111252,0.985782,2.130334
2054804,719.491804,-0.170043,0.154518,0.973247,2.152288
2054805,719.491816,-0.212694,0.197985,0.956851,2.176091


In [3]:
def df_to_X_y_gen(df, windowSize=512):
    i = 0
    while True:
        dfNumpy = df.to_numpy()
        if abs(dfNumpy[i+windowSize][0] - (windowSize*(dfNumpy[i-1][0]-dfNumpy[i][0])+dfNumpy[i][0])) < 1:
            X = (dfNumpy[i:i+windowSize])
            y = (dfNumpy[i+windowSize][-1])
        i += 1
        yield np.array(X), np.array(y)

In [4]:
# Split the dataset into training and validation sets
train_size = int(len(df) * 0.8)
train_df = df[:train_size]
val_df = df[train_size:]

# Autoencoder

In [5]:
def create_autoencoder(input_size=512, bottleneck_size=16):
    # Define input layer
    input_layer = Input(shape=(input_size,1))
    normLayer = BatchNormalization()(input_layer)

    # Define encoder layers
    encoder_layer_1 = Conv1D(filters=512, kernel_size=3, activation='relu', padding='same')(normLayer)
    encoder_layer_2 = MaxPooling1D(pool_size=16)(encoder_layer_1)
    encoder_layer_3 = Conv1D(filters=64, kernel_size=3, activation='relu', padding='same')(encoder_layer_2)
    encoder_layer_4 = MaxPooling1D(pool_size=16)(encoder_layer_3)
    encoder_layer_5 = Conv1D(filters=32, kernel_size=3, activation='relu', padding='same')(encoder_layer_4)
    bottleneck_layer = Flatten()(encoder_layer_5)
    bottleneck_layer = Dense(units=16, activation='relu')(bottleneck_layer)

    # Define decoder layers
    bottleneck_layer = Dense(units=16, activation='relu')(bottleneck_layer)
    reshapeLayer = Reshape((16,1))(bottleneck_layer)
    decoder_layer_1 = Conv1D(filters=32, kernel_size=3, activation='relu', padding='same')(reshapeLayer)
    decoder_layer_2 = UpSampling1D(size=16)(decoder_layer_1)
    decoder_layer_3 = Conv1D(filters=64, kernel_size=3, activation='relu', padding='same')(decoder_layer_2)
    decoder_layer_4 = UpSampling1D(size=16)(decoder_layer_3)
    decoder_layer_5 = Conv1D(filters=512, kernel_size=3, activation='relu', padding='same')(decoder_layer_4)
    flatLayerDecoder = Flatten()(decoder_layer_5)
    outputLayer = Dense(512,activation='relu')(flatLayerDecoder)
    outputLayer = Reshape((512,1))(outputLayer)

    # Define autoencoder model
    autoencoder_model = Model(inputs=input_layer, outputs=outputLayer)

    # Compile the autoencoder model
    autoencoder_model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
    return autoencoder_model


In [6]:
fileName = '{}-{}-{}-{}'.format(datetime.datetime.today().month,
          datetime.datetime.today().day,
          datetime.datetime.today().hour,
          datetime.datetime.today().minute)
callback_conv = [EarlyStopping(monitor='val_loss', patience=7),
                 ModelCheckpoint("models/autoConv/{}.h5".format(fileName), 
                                 monitor='val_loss',
                                 verbose=1,
                                 save_best_only=True, 
                                 mode='min'),
                ReduceLROnPlateau(monitor='val_loss',factor=0.7,patience=3,verbose=True,min_lr=0.000001),
                TensorBoard(log_dir = "./Tensor_logs/auto/{}".format(fileName), update_freq="epoch")]
autoencoder = create_autoencoder()
autoencoder.summary()
history = autoencoder.fit(df_to_X_y_gen(train_df),
          validation_data = (df_to_X_y_gen(val_df)),
          epochs=100, callbacks=[callback_conv])

2023-04-29 12:01:15.181801: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2023-04-29 12:01:15.181818: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2023-04-29 12:01:15.181842: I tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1365] Profiler found 1 GPUs
2023-04-29 12:01:15.182433: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcupti.so.10.1
2023-04-29 12:01:15.183077: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2023-04-29 12:01:15.183129: I tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1487] CUPTI activity buffer flushed
2023-04-29 12:01:15.190314: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in 

ResourceExhaustedError: OOM when allocating tensor with shape[2097152,512] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:RandomUniform]

In [None]:
loss = history.history['loss']
val_loss = history.history['val_loss']


plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.yscale("log")
plt.show()

In [None]:
X_pred = autoencoder.predict(X_test)
x = np.arange(len(X_test[0]))
fig, axs = plt.subplots(5,5,figsize=(16,16))

for i in range(5):
    for j in range(5):
        ind = np.random.randint(len(y_test))
        axs[i,j].set_title(y_test[ind])
        axs[i,j].step(x,X_test[ind])
        axs[i,j].step(x,X_pred[ind],color="red");

In [None]:
#autoencoder.save("autoencoder_conv1d")