# PERSONAL SUBMISSION
The data was create in a previoous notebook: https://www.kaggle.com/code/josmejagamarra/tps-oct-22-personal-datasets

Load the tfrecords from: https://www.kaggle.com/datasets/josmejagamarra/tps-oct-2022-personal-ds-2

In [None]:
#Import libraries
import pandas as pd
import numpy as np
import gc
import tensorflow as tf
from tensorflow.data import TFRecordDataset
import os
from tensorflow.keras import Model

# 1. CONFIGURE THE TPU

In [None]:
#Get google cloud path of the dataset while on CPU 
from kaggle_datasets import KaggleDatasets
GCS_DS_PATH = KaggleDatasets().get_gcs_path("tps-oct-2022-personal-ds-2")
GCS_DS_PATH

In [None]:
# Configure the TPU
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Running on TPU ', tpu.master())
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
    BATCH_SIZE = 4096 * strategy.num_replicas_in_sync
    print("TPU")
except:
    tpu = None
    strategy = tf.distribute.get_strategy()
    BATCH_SIZE=512
    print("CPU")

# 2. LOAD THE INPUT 1 / INPUT 2 / TARGET DATA

In [None]:
# Used to autotune tensorflow dataset transformations
AUTO = tf.data.experimental.AUTOTUNE

# Get 10 datasets independently and store all in a list for later use
ds_input_1=[]  #For the input 1
ds_input_2=[]  #For the input 2

# Option for faster TPU data read
ignore_order = tf.data.Options()
ignore_order.experimental_deterministic = False

with strategy.scope():
    # LOAD INPUT 1
    for i in range(10):
        PATH=tf.io.gfile.glob(os.path.join(GCS_DS_PATH,f'Combining/Team_A_input/train_{i}/feats.tfrecord*'))
        ds = TFRecordDataset(PATH, num_parallel_reads=AUTO)
        ds = ds.with_options(ignore_order)
        dataset = ds.map(lambda x: tf.ensure_shape(tf.io.parse_tensor(x, out_type=tf.float16),(64)), num_parallel_calls=AUTO)
        ds_input_1.append(dataset)

    Input_1 = ds_input_1[0]
    for i in np.arange(1,10,1):
        Input_1 = Input_1.concatenate(ds_input_1[i])
    
    # LOAD INPUT 2
    for i in range(10):
        PATH=tf.io.gfile.glob(os.path.join(GCS_DS_PATH,f'Combining/Team_B_input/train_{i}/feats.tfrecord*'))
        ds = TFRecordDataset(PATH, num_parallel_reads=AUTO)
        ds = ds.with_options(ignore_order)
        dataset = ds.map(lambda x: tf.ensure_shape(tf.io.parse_tensor(x, out_type=tf.float16),(64)), num_parallel_calls=AUTO)
        ds_input_2.append(dataset)

    Input_2 = ds_input_2[0]
    for i in np.arange(1,10,1):
        Input_2 = Input_2.concatenate(ds_input_2[i])
        
    #LOAD TARGET
    PATH=tf.io.gfile.glob(os.path.join(GCS_DS_PATH,f'Combining/Target/target.tfrecord'))
    ds = TFRecordDataset(PATH, num_parallel_reads=AUTO)
    ds = ds.map(lambda x: tf.ensure_shape(tf.io.parse_tensor(x, tf.float16), (2)), num_parallel_calls=AUTO)
    target = ds.map(lambda x: ([x[-2]],[x[-1]]) )

In [None]:
# Zip the data from Input_1 / Input_2 / target
inputs = tf.data.Dataset.zip((Input_1, Input_2))
ds_all = tf.data.Dataset.zip((inputs, target))

# 3. DEFINE THE TRAINING / VALIDATION SET

In [None]:
def get_dataset_partitions_tf(ds, ds_size, train_split=0.9, shuffle_size=1000, batch_s=BATCH_SIZE):
    
    # Shuffle the dataset
    ds=ds.shuffle(shuffle_size, seed=2022)
    # Split the data (train / valid)
    train_size = int(train_split * ds_size)
    train_ds = ds.take(train_size)    
    val_ds = ds.skip(train_size)
    # Generate the batches
    train_ds = train_ds.batch(batch_s).cache().prefetch(tf.data.AUTOTUNE)
    val_ds = val_ds.batch(batch_s).cache().prefetch(tf.data.AUTOTUNE)
    
    return train_ds, val_ds

In [None]:
#Define the number of rowns
ds_size=21198036
#Generate the train and valid dataset
train_dataset, valid_dataset = get_dataset_partitions_tf(ds_all, ds_size=ds_size, batch_s=BATCH_SIZE)

# 4. DEFINE PERSONAL CALLBACK

In [None]:
def Personal_callback(metrics, threshold_metric, ep, lr_i=0.001):
    """Generate a list of personal Callbacks to use in the training process
    Args:
        model_name (string)        - Contain the name of the model
        metrics (string)           - Contains the metric to be evaluated
        threshold_metric (string)  - Specify the threshold for the metric
        ep (int)                   - The number of epoch in the training process
        lr_i                       - Specify the initial learning rate
        
    Returns:
        A list of callback functions to use in the training process
    """
    
    """
    Stop training
    """
    class stop_training(tf.keras.callbacks.Callback):                                               #Define the class
        def on_epoch_end(self, epoch, logs = {}):                                                   #Use in the end of the epoch
            if(logs.get(metrics)<threshold_metric and logs.get('val_'+metrics) <threshold_metric):  #Define threshold for metrics
                print("\Cancelling training!")
                self.model.stop_training = True                                                     #Stop the training process
    stop_train = stop_training()
    
    """
    Learning Rate Decay
    """
    global LR_init        #Define global variable
    LR_init=lr_i          #Specify the initial learning rate
    
    class learning_decay(tf.keras.callbacks.Callback):                        #Define the class
        def on_epoch_end(self, batch, logs={}):                               #Use in the end of the epoch
            lr = self.model.optimizer.lr                                      #Call the leraning rate from the model
            global LR_init                                                    #Define global variable
            new_lr = (LR_init) * 10.**(-(batch+1.)/(ep*10))                   #Define the learning rate decay function
            if lr > new_lr:                                                   #If the previous lr is greater than actual lr 
                tf.keras.backend.set_value(self.model.optimizer.lr, new_lr)   #Update the value of the learning rate
            else: 
                LR_init=lr                                                    #This used because we use the callback 'ReduceLROnPlateau'

    lr_decay = learning_decay()
    
    """
    Reduce Learning Rate
    """
    # Creating learning rate reduction callback
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor= 'val_'+metrics,   #Specify the metrics
                                                     factor=0.2,         # new_lr = lr * factor
                                                     patience=2,         # number of epochs with no improvement after which learning rate will be reduced
                                                     verbose=1,          # print out when learning rate goes down 
                                                     min_lr=1e-15)       # lower bound on the learning rate
    """
    Early Stopping
    """
    # Setup EarlyStopping callback to stop training if model's val_loss doesn't improve for 10 epochs
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor = metrics,         # watch the val loss metric
                                                  patience = 5,                 # if val loss decreases for 10 epochs in a row, stop training
                                                  min_delta = 0.00001,           # Minimum change in the monitored quantity to qualify as an improvement
                                                  restore_best_weights = False,  #Don't sabe the best weights, because we use the callback "ModelCheckpoint"
                                                     verbose=1)                  #Display a message
    
    return [stop_train, lr_decay, reduce_lr, early_stopping]   #Return a list of Callbacks

# 5. BUILD THE MODEL

In [None]:
#BUILD THE MODEL
def create_model():

    ############ 1. Setup input_1 ############
    input_1=tf.keras.layers.Input(shape=[64])
    in_1=tf.keras.layers.BatchNormalization()(input_1)
    in_1=tf.keras.layers.GaussianNoise(0.001)(in_1)
    in_1=tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=1))(in_1)
    
    in_1=tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(512, return_sequences=True))(in_1)
    in_1=tf.keras.layers.BatchNormalization()(in_1)
    in_1=tf.keras.layers.Dropout(0.5)(in_1)
    in_1=tf.keras.layers.Dense(units=1024, activation="tanh")(in_1)
    
    in_1=tf.keras.layers.BatchNormalization()(in_1)
    in_1=tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(256, return_sequences=True))(in_1)
    in_1=tf.keras.layers.BatchNormalization()(in_1)
    in_1=tf.keras.layers.Dropout(0.5)(in_1)
    in_1=tf.keras.layers.Dense(units=512, activation="tanh")(in_1)
    
    in_1=tf.keras.layers.BatchNormalization()(in_1)
    in_1=tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True))(in_1)
    in_1=tf.keras.layers.BatchNormalization()(in_1)
    in_1=tf.keras.layers.Dropout(0.5)(in_1)
    in_1=tf.keras.layers.Dense(units=256, activation="tanh")(in_1)
    
    output_1=tf.keras.layers.BatchNormalization()(in_1)
    model_1=tf.keras.Model(inputs=input_1,outputs=output_1)
    
    ############ 2. Setup input_2 ############
    input_2=tf.keras.layers.Input(shape=[64])
    in_2=tf.keras.layers.BatchNormalization()(input_2)
    in_2=tf.keras.layers.GaussianNoise(0.001)(in_2)
    in_2=tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=1))(in_2)
    
    in_2=tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(512, return_sequences=True))(in_2)
    in_2=tf.keras.layers.BatchNormalization()(in_2)
    in_2=tf.keras.layers.Dropout(0.5)(in_2)
    in_2=tf.keras.layers.Dense(units=1024, activation="tanh")(in_2)
    
    in_2=tf.keras.layers.BatchNormalization()(in_2)
    in_2=tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(256, return_sequences=True))(in_2)
    in_2=tf.keras.layers.BatchNormalization()(in_2)
    in_2=tf.keras.layers.Dropout(0.5)(in_2)
    in_2=tf.keras.layers.Dense(units=512, activation="tanh")(in_2)
    
    in_2=tf.keras.layers.BatchNormalization()(in_2)
    in_2=tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True))(in_2)
    in_2=tf.keras.layers.BatchNormalization()(in_2)
    in_2=tf.keras.layers.Dropout(0.5)(in_2)
    in_2=tf.keras.layers.Dense(units=256, activation="tanh")(in_2)
    
    output_2=tf.keras.layers.BatchNormalization()(in_2)
    model_2=tf.keras.Model(inputs=input_2,outputs=output_2)

    ############ 3. Concatenate ############
    concat = tf.keras.layers.Concatenate()([model_1.output, 
                                            model_2.output])
    
    ############ 4. Setup the rest ############
    x=tf.keras.layers.BatchNormalization()(concat)
    x=tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(256, return_sequences=True))(x)
    x=tf.keras.layers.BatchNormalization()(x)
    x=tf.keras.layers.Dropout(0.5)(x)
    x=tf.keras.layers.Dense(units=512, activation="tanh")(x)
    
    x=tf.keras.layers.BatchNormalization()(x)
    x=tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True))(x)
    x=tf.keras.layers.BatchNormalization()(x)
    x=tf.keras.layers.Dropout(0.5)(x)
    x=tf.keras.layers.Dense(units=256, activation="tanh")(x)
    
    x=tf.keras.layers.BatchNormalization()(x)
    x=tf.keras.layers.Dropout(0.5)(x)
    x=tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True))(x)
    x=tf.keras.layers.BatchNormalization()(x)
    x=tf.keras.layers.Dense(units=128, activation="tanh")(x)

    x=tf.keras.layers.BatchNormalization()(x)
    x=tf.keras.layers.Dropout(0.5)(x)

    out1=tf.keras.layers.Dense(1,activation="sigmoid",name="teamA")(x)
    out2=tf.keras.layers.Dense(1,activation="sigmoid",name="teamB")(x)

    model=tf.keras.Model(inputs=[model_1.input,model_2.input],
                         outputs=[out1,out2])

    ############ COMPILE THE MODEL ############
    model.compile(loss=[tf.keras.losses.BinaryCrossentropy(from_logits=False),
                        tf.keras.losses.BinaryCrossentropy(from_logits=False)],                      
                  optimizer='adam',  
                  metrics=['accuracy'])                 

    #SAVE THE INITIAL WEIGHTS (Use when try with different models)
    InitialW = model.get_weights()
    
    return model, InitialW

In [None]:
############ MODEL 1 ############
with strategy.scope():
    gc.collect()
    
    #Create the model
    model_1,InitialW_1 = create_model()
    #Load the initial weights
    model_1.set_weights(InitialW_1)
    #Reset the value of learning rate in the model
    tf.keras.backend.set_value(model_1.optimizer.lr, 0.001)

    #Fit the model
    history = model_1.fit(train_dataset,
                        epochs=15,
                        validation_data=valid_dataset,
                        callbacks=Personal_callback(metrics='loss', 
                                                    threshold_metric=0.01,
                                                    ep=100))

    #Save the model weights (Use when you train again)
    FIT_1_1=model_1.get_weights()

# 6. FIT THE MODELS
In this case we used 5 models, after the training, predict the data with the mean of all models prediction.

In [None]:
############ MODEL 1 ############
with strategy.scope():
    gc.collect()
    
    #Create the model
    model_1,InitialW_1 = create_model()
    #Load the initial weights
    model_1.set_weights(FIT_1_2)
    #Reset the value of learning rate in the model
    tf.keras.backend.set_value(model_1.optimizer.lr, 0.001)

    #Fit the model
    history = model_1.fit(train_dataset,
                        epochs=25,
                        validation_data=valid_dataset,
                        callbacks=Personal_callback(metrics='loss', 
                                                    threshold_metric=0.01,
                                                    ep=100))

    #Save the model weights (Use when you train again)
    FIT_2_1=model_1.get_weights()

In [None]:
############ MODEL 2 ############
with strategy.scope():
    gc.collect()

    #Create the model
    model_2,InitialW_2 = create_model()
    #Load the initial weights
    model_2.set_weights(InitialW_2)
    #Reset the value of learning rate in the model
    tf.keras.backend.set_value(model_2.optimizer.lr, 0.001)

    #Fit the model
    history = model_2.fit(train_dataset,
                        epochs=15,
                        validation_data=valid_dataset,
                        callbacks=Personal_callback(metrics='loss', 
                                                    threshold_metric=0.01,
                                                    ep=100))

    #Save the model weights (Use when you train again)
    FIT_1_2=model_2.get_weights()

In [None]:
############ MODEL 2 ############
with strategy.scope():
    gc.collect()

    #Create the model
    model_2,InitialW_2 = create_model()
    #Load the initial weights
    model_2.set_weights(FIT_1_2)
    #Reset the value of learning rate in the model
    tf.keras.backend.set_value(model_2.optimizer.lr, 0.001)

    #Fit the model
    history = model_2.fit(train_dataset,
                        epochs=25,
                        validation_data=valid_dataset,
                        callbacks=Personal_callback(metrics='loss', 
                                                    threshold_metric=0.01,
                                                    ep=100))

    #Save the model weights (Use when you train again)
    FIT_2_2=model_2.get_weights()

In [None]:
############ MODEL 3 ############
with strategy.scope():
    gc.collect()

    #Create the model
    model_3,InitialW_3 = create_model()
    #Load the initial weights
    model_3.set_weights(InitialW_3)
    #Reset the value of learning rate in the model
    tf.keras.backend.set_value(model_3.optimizer.lr, 0.001)

    #Fit the model
    history = model_3.fit(train_dataset,
                        epochs=15,
                        validation_data=valid_dataset,
                        callbacks=Personal_callback(metrics='loss', 
                                                    threshold_metric=0.01,
                                                    ep=100))

    #Save the model weights (Use when you train again)
    FIT_1_3=model_3.get_weights()

In [None]:
############ MODEL 3 ############
with strategy.scope():
    gc.collect()

    #Create the model
    model_3,InitialW_3 = create_model()
    #Load the initial weights
    model_3.set_weights(FIT_1_3)
    #Reset the value of learning rate in the model
    tf.keras.backend.set_value(model_3.optimizer.lr, 0.001)

    #Fit the model
    history = model_3.fit(train_dataset,
                        epochs=25,
                        validation_data=valid_dataset,
                        callbacks=Personal_callback(metrics='loss', 
                                                    threshold_metric=0.01,
                                                    ep=100))

    #Save the model weights (Use when you train again)
    FIT_2_3=model_3.get_weights()

In [None]:
############ MODEL 4 ############
with strategy.scope():
    gc.collect()

    #Create the model
    model_4,InitialW_4 = create_model()
    #Load the initial weights
    model_4.set_weights(InitialW_4)
    #Reset the value of learning rate in the model
    tf.keras.backend.set_value(model_4.optimizer.lr, 0.001)

    #Fit the model
    history = model_4.fit(train_dataset,
                        epochs=15,
                        validation_data=valid_dataset,
                        callbacks=Personal_callback(metrics='loss', 
                                                    threshold_metric=0.01,
                                                    ep=100))

    #Save the model weights (Use when you train again)
    FIT_1_4=model_4.get_weights()

In [None]:
############ MODEL 4 ############
with strategy.scope():
    gc.collect()

    #Create the model
    model_4,InitialW_4 = create_model()
    #Load the initial weights
    model_4.set_weights(FIT_1_4)
    #Reset the value of learning rate in the model
    tf.keras.backend.set_value(model_4.optimizer.lr, 0.001)

    #Fit the model
    history = model_4.fit(train_dataset,
                        epochs=25,
                        validation_data=valid_dataset,
                        callbacks=Personal_callback(metrics='loss', 
                                                    threshold_metric=0.01,
                                                    ep=100))

    #Save the model weights (Use when you train again)
    FIT_2_4=model_4.get_weights()

In [None]:
############ MODEL 5 ############
with strategy.scope():
    gc.collect()

    #Create the model
    model_5,InitialW_5 = create_model()
    #Load the initial weights
    model_5.set_weights(InitialW_5)
    #Reset the value of learning rate in the model
    tf.keras.backend.set_value(model_5.optimizer.lr, 0.001)

    #Fit the model
    history = model_5.fit(train_dataset,
                        epochs=15,
                        validation_data=valid_dataset,
                        callbacks=Personal_callback(metrics='loss', 
                                                    threshold_metric=0.01,
                                                    ep=100))

    #Save the model weights (Use when you train again)
    FIT_1_5=model_5.get_weights()

In [None]:
############ MODEL 5 ############
with strategy.scope():
    gc.collect()

    #Create the model
    model_5,InitialW_5 = create_model()
    #Load the initial weights
    model_5.set_weights(FIT_1_5)
    #Reset the value of learning rate in the model
    tf.keras.backend.set_value(model_5.optimizer.lr, 0.001)

    #Fit the model
    history = model_5.fit(train_dataset,
                        epochs=25,
                        validation_data=valid_dataset,
                        callbacks=Personal_callback(metrics='loss', 
                                                    threshold_metric=0.01,
                                                    ep=100))

    #Save the model weights (Use when you train again)
    FIT_2_5=model_5.get_weights()

# 7. LOAD THE TEST DATA

In [None]:
PATH=tf.io.gfile.glob(os.path.join(GCS_DS_PATH,f'Combining/Test/test_in_1.tfrecord'))
test_in_1 = TFRecordDataset(PATH, num_parallel_reads=AUTO)
test_in_1 = test_in_1.map(lambda x: tf.ensure_shape(tf.io.parse_tensor(x, tf.float16), (64)))

PATH=tf.io.gfile.glob(os.path.join(GCS_DS_PATH,f'Combining/Test/test_in_2.tfrecord'))
test_in_2 = TFRecordDataset(PATH, num_parallel_reads=AUTO)
test_in_2 = test_in_2.map(lambda x: tf.ensure_shape(tf.io.parse_tensor(x, tf.float16), (64)))

In [None]:
test_inputs = tf.data.Dataset.zip(((test_in_1, test_in_2), ))
test_inputs = test_inputs.batch(512*8).cache().prefetch(tf.data.AUTOTUNE)

# 8. PREDICT AND SAVE THE SUBMISSION

In [None]:
with strategy.scope():
    import time
    start_time = time.time()
    ##
    #Predict the target values using the previous test dataset and the load model
    x_1=model_1.predict(test_inputs)
    predict_1 = np.squeeze(x_1)
    
    x_2=model_2.predict(test_inputs)
    predict_2 = np.squeeze(x_2)
    
    x_3=model_3.predict(test_inputs)
    predict_3 = np.squeeze(x_3)
    
    x_4=model_4.predict(test_inputs)
    predict_4 = np.squeeze(x_4)
    
    x_5=model_5.predict(test_inputs)
    predict_5 = np.squeeze(x_5)
    ##
    print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
df=pd.read_csv("../input/tabular-playground-series-oct-2022/sample_submission.csv")
df["team_A_scoring_within_10sec"]=0
df["team_B_scoring_within_10sec"]=0

df["team_A_scoring_within_10sec"]=(predict_1[0]+predict_2[0]+predict_3[0]+predict_4[0]+predict_5[0])/5
df["team_B_scoring_within_10sec"]=(predict_1[1]+predict_2[1]+predict_3[1]+predict_4[1]+predict_5[1])/5

df.to_csv("submission_final.csv",index=False)
df