In [None]:
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
import pandas as pd
import gc

In [None]:
from pyarrow.parquet import ParquetFile
import pyarrow as pa 

pf = ParquetFile('../input/tau-test-1/BoostedTop_x1_fixed_0.snappy.parquet') 
rows = next(pf.iter_batches(batch_size = 3600)) 
df = pa.Table.from_batches([rows]).to_pandas() 
del rows

In [None]:
X_jets = np.array(df['X_jets'].tolist()).astype(np.float32)
labels = df['y'].to_numpy()

In [None]:
from sklearn.model_selection import train_test_split
X_data = X_jets.reshape((-1,125,125,8))
X_train, X_test, y_train, y_test = train_test_split(X_data, labels, test_size = 0.2, random_state=9)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

In [None]:
import tensorflow as tf
import tensorflow.keras as keras
from keras.models import Sequential, Model, load_model
from keras import optimizers
from keras import layers
from keras.initializers import glorot_uniform, he_uniform
# kernel_initializer=he_uniform(seed=0)
data_augmentation = keras.Sequential(
    [
        layers.Normalization(),
    ],
    name="data_augmentation",
)
# data_augmentation.layers[0].adapt(X_train)

In [None]:
from tensorflow import keras
from keras.models import Sequential, Model
from keras import layers
from keras import optimizers
from keras import regularizers

num_classes = 1
input_shape = (125, 125, 8)
model = Sequential(
    [
        keras.Input(shape=input_shape),
        layers.BatchNormalization(),
        layers.Conv2D(8, kernel_size=(3,3), activation="relu"),
        layers.BatchNormalization(),
        layers.MaxPooling2D(pool_size=(2,2)),
#         layers.Conv2D(16, kernel_size=(3,3), activation="relu"),
#         layers.BatchNormalization(),
#         layers.MaxPooling2D(pool_size=(2,2)),
#         layers.Conv2D(128, kernel_size=(3,3), activation="relu"),
#         layers.BatchNormalization(),
#         layers.MaxPooling2D(pool_size=(2,2)),
        layers.Flatten(),
        layers.Dense(512, activation="relu", kernel_regularizer=regularizers.l2(0.005)),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation="sigmoid", kernel_regularizer=regularizers.l2(0.001)),
    ]
)
model.summary()
# keras.utils.plot_model(model)

In [None]:
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=3e-4,
    decay_steps=4000,
    decay_rate=0.9)
opt_func = keras.optimizers.Adam(learning_rate=lr_schedule)

checkpoint_filepath = 'saved_model'
checkpoint_callback = keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_binary_accuracy',
    save_weights_only=True,
    save_best_only=True)

model.compile(loss='binary_crossentropy',
              optimizer=opt_func,
              metrics=[
                keras.metrics.BinaryAccuracy(name="binary_accuracy", dtype=float, threshold=0.5),
                keras.metrics.AUC(name="auc", from_logits=True),
              ],
             )

history = model.fit(X_train,
          y_train,
          epochs=20,
          validation_split=0.2,
          batch_size=32,
          shuffle=True,
          callbacks=[checkpoint_callback])