# Imports and load data

In [1]:
import sys
sys.path.append('/home/apoorva/Desktop/Work/olr')

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.utils import to_categorical

from sklearn.model_selection import train_test_split
from scripts.utils.load import load_pca_anomaly
from sklearn.metrics import classification_report, confusion_matrix

2024-01-12 19:57:32.288256: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
pca_x, olr_labels = load_pca_anomaly()

In [14]:
def pentad_data(count):
    '''
    count is 0-indexed
    count = 0 corresponds to first leading pentad
    count = 1 corresponds to second leading pentad
    count = 2 corresponds to third leading pentad
    '''
    global olr_labels, pca_x
    assert count == 0 or count == 1 or count == 2
    pca_x_50 = pca_x[:, :50]
    pca_x_50 = np.array([pca_x_50[i*40+j:i*40+j+15, :] for j in range(134 - (5*count)) for i in range(40)])
    labels = np.reshape(np.reshape(olr_labels, (40, 135))[:, 1+(5*count):], (-1))
    X_train, X_test, y_train, y_test = train_test_split(pca_x_50, labels, random_state=1337, train_size=0.875, stratify=labels)
    # X_train = np.reshape(X_train, (X_train.shape[0], -1))
    # X_test = np.reshape(X_test, (X_test.shape[0], -1))
    return X_train, X_test, y_train, y_test

# Model Training: Conv1D

## First Pentad

In [15]:
X_train, X_test, y_train, y_test = pentad_data(0)

In [16]:
y_train_onehot = to_categorical(y_train)
y_test_onehot = to_categorical(y_test)

In [17]:
inputs = keras.layers.Input(shape=(15, 50))
x = layers.SeparableConv1D(filters=32, kernel_size=3, 
                           activation=keras.layers.ELU(),
                           depthwise_initializer=keras.initializers.GlorotNormal(),
                           pointwise_initializer=keras.initializers.GlorotNormal(),
                           depthwise_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5),
                           pointwise_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5),
                           )(inputs)
x = layers.BatchNormalization()(x)
x = keras.layers.SeparableConv1D(filters=16, kernel_size=3, 
                           activation=keras.layers.ELU(),
                           depthwise_initializer=keras.initializers.GlorotNormal(),
                           pointwise_initializer=keras.initializers.GlorotNormal(),
                           depthwise_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5),
                           pointwise_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5)
                           )(x)
x = layers.BatchNormalization()(x)
x = layers.SeparableConv1D(filters=8, kernel_size=3, 
                           activation=keras.layers.ELU(),
                           depthwise_initializer=keras.initializers.GlorotNormal(),
                           pointwise_initializer=keras.initializers.GlorotNormal(),
                           depthwise_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5),
                           pointwise_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5),
                           )(x)
x = layers.BatchNormalization()(x)
x = layers.Flatten()(x)
# x = layers.Dropout(0.3)(x)
x = keras.layers.Dense(10, activation=keras.layers.LeakyReLU())(x)
x = keras.layers.Dense(3, name='logits')(x)
output = layers.Activation('softmax')(x)

pred_model = keras.Model(inputs=inputs, outputs=output)
pred_model.summary()

LEARNING_RATE=9.19e-3
EPOCHS=100

pred_model.compile(
                loss=keras.losses.CategoricalFocalCrossentropy(alpha=0.3, gamma=0.5),
                # optimizer=keras.optimizers.Adam(learning_rate=LEARNING_RATE),
                optimizer=keras.optimizers.SGD(learning_rate=LEARNING_RATE, momentum=0.9, nesterov=True),
                metrics=[
                    keras.metrics.F1Score(average='macro'),
                    keras.metrics.CategoricalAccuracy(),
                ],
            )

# scheduler = keras.optimizers.schedules.CosineDecay(initial_learning_rate=LEARNING_RATE, decay_steps=EPOCHS, alpha=2e-3)
# callbacks=[
#             keras.callbacks.EarlyStopping(patience=10, monitor='val_f1_score', mode='max', start_from_epoch=50, restore_best_weights=True),
#             # keras.callbacks.LearningRateScheduler(schedule=scheduler),
#         ]

pred_model.fit(X_train, y_train_onehot,
            batch_size=16,
            epochs=EPOCHS,
            validation_split=0.2)#,
            # callbacks=callbacks)

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 15, 50)]          0         
                                                                 
 separable_conv1d_3 (Separa  (None, 13, 32)            1782      
 bleConv1D)                                                      
                                                                 
 batch_normalization_3 (Bat  (None, 13, 32)            128       
 chNormalization)                                                
                                                                 
 separable_conv1d_4 (Separa  (None, 11, 16)            624       
 bleConv1D)                                                      
                                                                 
 batch_normalization_4 (Bat  (None, 11, 16)            64        
 chNormalization)                                          

<keras.src.callbacks.History at 0x7f3c4c15d1e0>

In [18]:
y_pred = pred_model.predict(X_train)

y_pred = np.argmax(y_pred, axis=1)

print("Training")
print(confusion_matrix(y_pred=y_pred, y_true=y_train))
print(classification_report(y_pred=y_pred, y_true=y_train))

y_pred = pred_model.predict(X_test)

y_pred = np.argmax(y_pred, axis=1)

print("Testing")
print(confusion_matrix(y_pred=y_pred, y_true=y_test))
print(classification_report(y_pred=y_pred, y_true=y_test))

Training
[[ 253  313   48]
 [ 200 3089  112]
 [  54  442  179]]
              precision    recall  f1-score   support

         0.0       0.50      0.41      0.45       614
         1.0       0.80      0.91      0.85      3401
         2.0       0.53      0.27      0.35       675

    accuracy                           0.75      4690
   macro avg       0.61      0.53      0.55      4690
weighted avg       0.72      0.75      0.73      4690

Testing
[[ 14  68   6]
 [ 54 394  38]
 [ 12  73  11]]
              precision    recall  f1-score   support

         0.0       0.17      0.16      0.17        88
         1.0       0.74      0.81      0.77       486
         2.0       0.20      0.11      0.15        96

    accuracy                           0.63       670
   macro avg       0.37      0.36      0.36       670
weighted avg       0.59      0.63      0.60       670



## Second Pentad

In [26]:
X_train, X_test, y_train, y_test = pentad_data(1)

y_train_onehot = to_categorical(y_train)
y_test_onehot = to_categorical(y_test)

In [27]:
inputs = keras.layers.Input(shape=(15, 50))
x = layers.SeparableConv1D(filters=32, kernel_size=3, 
                           activation=keras.layers.ELU(),
                           depthwise_initializer=keras.initializers.GlorotNormal(),
                           pointwise_initializer=keras.initializers.GlorotNormal(),
                           depthwise_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5),
                           pointwise_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5),
                           )(inputs)
x = layers.BatchNormalization()(x)
x = keras.layers.SeparableConv1D(filters=16, kernel_size=3, 
                           activation=keras.layers.ELU(),
                           depthwise_initializer=keras.initializers.GlorotNormal(),
                           pointwise_initializer=keras.initializers.GlorotNormal(),
                           depthwise_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5),
                           pointwise_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5)
                           )(x)
x = layers.BatchNormalization()(x)
x = layers.SeparableConv1D(filters=8, kernel_size=3, 
                           activation=keras.layers.ELU(),
                           depthwise_initializer=keras.initializers.GlorotNormal(),
                           pointwise_initializer=keras.initializers.GlorotNormal(),
                           depthwise_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5),
                           pointwise_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5),
                           )(x)
x = layers.BatchNormalization()(x)
x = layers.Flatten()(x)
# x = layers.Dropout(0.3)(x)
x = keras.layers.Dense(10, activation=keras.layers.LeakyReLU())(x)
x = keras.layers.Dense(3, name='logits')(x)
output = layers.Activation('softmax')(x)

pred_model = keras.Model(inputs=inputs, outputs=output)
pred_model.summary()

LEARNING_RATE=9.2e-3
EPOCHS=85

pred_model.compile(
                loss=keras.losses.CategoricalFocalCrossentropy(alpha=0.15, gamma=0.5),
                # optimizer=keras.optimizers.Adam(learning_rate=LEARNING_RATE),
                optimizer=keras.optimizers.SGD(learning_rate=LEARNING_RATE, momentum=0.9, nesterov=True),
                metrics=[
                    keras.metrics.F1Score(average='macro'),
                    keras.metrics.CategoricalAccuracy(),
                ],
            )

# scheduler = keras.optimizers.schedules.CosineDecay(initial_learning_rate=LEARNING_RATE, decay_steps=EPOCHS, alpha=2e-3)
# callbacks=[
#             keras.callbacks.EarlyStopping(patience=10, monitor='val_f1_score', mode='max', start_from_epoch=50, restore_best_weights=True),
#             # keras.callbacks.LearningRateScheduler(schedule=scheduler),
#         ]

pred_model.fit(X_train, y_train_onehot,
            batch_size=16,
            epochs=EPOCHS,
            validation_split=0.2)#,
            # callbacks=callbacks)

Model: "model_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_6 (InputLayer)        [(None, 15, 50)]          0         
                                                                 
 separable_conv1d_12 (Separ  (None, 13, 32)            1782      
 ableConv1D)                                                     
                                                                 
 batch_normalization_14 (Ba  (None, 13, 32)            128       
 tchNormalization)                                               
                                                                 
 separable_conv1d_13 (Separ  (None, 11, 16)            624       
 ableConv1D)                                                     
                                                                 
 batch_normalization_15 (Ba  (None, 11, 16)            64        
 tchNormalization)                                         

<keras.src.callbacks.History at 0x7f3c94bcdb70>

In [28]:
y_pred = pred_model.predict(X_train)
y_pred = np.argmax(y_pred, axis=1)

print("Training")
print(confusion_matrix(y_pred=y_pred, y_true=y_train))
print(classification_report(y_pred=y_pred, y_true=y_train))


y_pred = pred_model.predict(X_test)
y_pred = np.argmax(y_pred, axis=1)

print("Testing")
print(confusion_matrix(y_pred=y_pred, y_true=y_test))
print(classification_report(y_pred=y_pred, y_true=y_test))

Training
[[ 204  359   27]
 [ 163 2870  217]
 [  27  419  229]]
              precision    recall  f1-score   support

         0.0       0.52      0.35      0.41       590
         1.0       0.79      0.88      0.83      3250
         2.0       0.48      0.34      0.40       675

    accuracy                           0.73      4515
   macro avg       0.60      0.52      0.55      4515
weighted avg       0.71      0.73      0.71      4515

Testing
[[  6  70   8]
 [ 47 369  49]
 [  6  81   9]]
              precision    recall  f1-score   support

         0.0       0.10      0.07      0.08        84
         1.0       0.71      0.79      0.75       465
         2.0       0.14      0.09      0.11        96

    accuracy                           0.60       645
   macro avg       0.32      0.32      0.31       645
weighted avg       0.55      0.60      0.57       645



## Third Pentad

In [21]:
X_train, X_test, y_train, y_test = pentad_data(2)

y_train_onehot = to_categorical(y_train)
y_test_onehot = to_categorical(y_test)

In [22]:
inputs = keras.layers.Input(shape=(15, 50))
x = layers.SeparableConv1D(filters=32, kernel_size=3, 
                           activation=keras.layers.ELU(),
                           depthwise_initializer=keras.initializers.GlorotNormal(),
                           pointwise_initializer=keras.initializers.GlorotNormal(),
                           depthwise_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5),
                           pointwise_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5),
                           )(inputs)
x = layers.BatchNormalization()(x)
x = keras.layers.SeparableConv1D(filters=16, kernel_size=3, 
                           activation=keras.layers.ELU(),
                           depthwise_initializer=keras.initializers.GlorotNormal(),
                           pointwise_initializer=keras.initializers.GlorotNormal(),
                           depthwise_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5),
                           pointwise_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5)
                           )(x)
x = layers.BatchNormalization()(x)
x = layers.SeparableConv1D(filters=8, kernel_size=3, 
                           activation=keras.layers.ELU(),
                           depthwise_initializer=keras.initializers.GlorotNormal(),
                           pointwise_initializer=keras.initializers.GlorotNormal(),
                           depthwise_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5),
                           pointwise_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5),
                           )(x)
x = layers.BatchNormalization()(x)
x = layers.Flatten()(x)
# x = layers.Dropout(0.3)(x)
x = keras.layers.Dense(10, activation=keras.layers.LeakyReLU())(x)
x = keras.layers.Dense(3, name='logits')(x)
output = layers.Activation('softmax')(x)

pred_model = keras.Model(inputs=inputs, outputs=output)
pred_model.summary()

LEARNING_RATE=9.2e-3
EPOCHS=85

pred_model.compile(
                loss=keras.losses.CategoricalFocalCrossentropy(alpha=0.15, gamma=0.5),
                # optimizer=keras.optimizers.Adam(learning_rate=LEARNING_RATE),
                optimizer=keras.optimizers.SGD(learning_rate=LEARNING_RATE, momentum=0.9, nesterov=True),
                metrics=[
                    keras.metrics.F1Score(average='macro'),
                    keras.metrics.CategoricalAccuracy(),
                ],
            )

# scheduler = keras.optimizers.schedules.CosineDecay(initial_learning_rate=LEARNING_RATE, decay_steps=EPOCHS, alpha=2e-3)
# callbacks=[
#             keras.callbacks.EarlyStopping(patience=10, monitor='val_f1_score', mode='max', start_from_epoch=50, restore_best_weights=True),
#             # keras.callbacks.LearningRateScheduler(schedule=scheduler),
#         ]

pred_model.fit(X_train, y_train_onehot,
            batch_size=16,
            epochs=EPOCHS,
            validation_split=0.2)#,
            # callbacks=callbacks)

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 15, 50)]          0         
                                                                 
 separable_conv1d_9 (Separa  (None, 13, 32)            1782      
 bleConv1D)                                                      
                                                                 
 batch_normalization_9 (Bat  (None, 13, 32)            128       
 chNormalization)                                                
                                                                 
 separable_conv1d_10 (Separ  (None, 11, 16)            624       
 ableConv1D)                                                     
                                                                 
 batch_normalization_10 (Ba  (None, 11, 16)            64        
 tchNormalization)                                         

<keras.src.callbacks.History at 0x7f3c94bcc3a0>

In [23]:
y_pred = pred_model.predict(X_train)

y_pred = np.argmax(y_pred, axis=1)

print("Training")
print(confusion_matrix(y_pred=y_pred, y_true=y_train))
print(classification_report(y_pred=y_pred, y_true=y_train))

y_pred = pred_model.predict(X_test)

y_pred = np.argmax(y_pred, axis=1)

print("Testing")
print(confusion_matrix(y_pred=y_pred, y_true=y_test))
print(classification_report(y_pred=y_pred, y_true=y_test))

Training
[[ 138  371   59]
 [  95 2800  202]
 [  29  367  279]]
              precision    recall  f1-score   support

         0.0       0.53      0.24      0.33       568
         1.0       0.79      0.90      0.84      3097
         2.0       0.52      0.41      0.46       675

    accuracy                           0.74      4340
   macro avg       0.61      0.52      0.55      4340
weighted avg       0.71      0.74      0.72      4340

Testing
[[  9  60  12]
 [ 37 362  44]
 [  7  76  13]]
              precision    recall  f1-score   support

         0.0       0.17      0.11      0.13        81
         1.0       0.73      0.82      0.77       443
         2.0       0.19      0.14      0.16        96

    accuracy                           0.62       620
   macro avg       0.36      0.35      0.35       620
weighted avg       0.57      0.62      0.59       620



# WandB Sweep

To pick the best possible params for the Conv-1D model, since randomly trying params wasn't fruitful.

In [None]:
import datetime
import os
import wandb

import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.utils import to_categorical
from wandb.keras import WandbCallback, WandbMetricsLogger

EPOCHS = 100
LEARNING_RATE = 1e-2
MOMENTUM = 0.9
X_train_50 = None
y_train_onehot = None

def def_model():
    inputs = keras.Input(shape=(750,))
    x = layers.Reshape((15, 50))(inputs)
    x = layers.SeparableConv1D(filters=32, kernel_size=3, 
                            activation=keras.layers.ELU(),
                            depthwise_initializer=keras.initializers.GlorotNormal(),
                            pointwise_initializer=keras.initializers.GlorotNormal())(x)
    x = layers.BatchNormalization()(x)
    x = keras.layers.SeparableConv1D(filters=16, kernel_size=3, 
                            activation=keras.layers.ELU(),
                            depthwise_initializer=keras.initializers.GlorotNormal(),
                            pointwise_initializer=keras.initializers.GlorotNormal())(x)
    x = layers.BatchNormalization()(x)
    x = layers.SeparableConv1D(filters=8, kernel_size=3, 
                            activation=keras.layers.ELU(),
                            depthwise_initializer=keras.initializers.GlorotNormal(),
                            pointwise_initializer=keras.initializers.GlorotNormal())(x)
    x = layers.BatchNormalization()(x)
    x = layers.Flatten()(x)
    x = layers.Dense(10, activation=keras.layers.LeakyReLU(),
                    kernel_initializer=keras.initializers.GlorotNormal(),
                    bias_initializer=keras.initializers.Zeros())(x)
    x = layers.BatchNormalization()(x)
    outputs = layers.Dense(3, activation="softmax")(x)

    model = keras.Model(inputs=inputs, outputs=outputs)
    return model


def get_data():
    global X_train_50, y_train_onehot
    pca_x, olr_labels = load_pca_anomaly()
    pca_x_50 = pca_x[:, :50]
    pca_x_50 = np.array([pca_x_50[i*40+j:i*40+j+15, :] for j in range(135) for i in range(40)])
    olr_labels = np.reshape(olr_labels, -1)
    X_train, _, y_train, _ = train_test_split(pca_x_50, olr_labels, random_state=1337, train_size=0.875, stratify=olr_labels)
    X_train_50 = np.reshape(X_train, (4725, -1))
    y_train_onehot = to_categorical(y_train)


def get_optimizer(lr=1e-3, optimizer="adam"):
    if optimizer.lower() == "adam":
        return keras.optimizers.Adam(learning_rate=lr)
    if optimizer.lower() == "sgd":
        return keras.optimizers.SGD(learning_rate=lr, momentum=0.9)
    if optimizer.lower() == "nesterov":
        return keras.optimizers.SGD(learning_rate=lr, momentum=0.9, nesterov=True)
    if optimizer.lower() == "rmsprop":
        return keras.optimizers.RMSprop(earning_rate=lr, momentum=0.9)


def train(model, batch_size=64, epochs=10, lr=1e-3, optimizer='rmsprop', alpha=0.25, gamma=2, log_freq=10):
    global X_train_50, y_train_onehot
    tf.keras.backend.clear_session()
    model.compile(loss=keras.losses.CategoricalFocalCrossentropy(alpha=alpha, gamma=gamma), 
                  optimizer=get_optimizer(lr, optimizer), 
                  metrics=  [
                                keras.metrics.F1Score(average='macro'),
                                keras.metrics.CategoricalAccuracy(),
                            ])
    TIMESTAMP = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    mydir = os.path.join(os.getcwd(), f"logs/logdir_{TIMESTAMP}")
    os.makedirs(mydir)
    callbacks=[
                keras.callbacks.TensorBoard(log_dir=mydir),
                WandbCallback(log_gradients=True, training_data=(X_train_50, y_train_onehot)),
                WandbMetricsLogger(log_freq=log_freq)
            ]

    model.fit(X_train, 
              y_train_onehot, 
              batch_size=batch_size, 
              epochs=epochs, 
              validation_split=0.1, 
              callbacks=callbacks)
    

def sweep_train(config_defaults=None):
    with wandb.init(config=config_defaults):
        wandb.config.architecture_name = "Conv-1D"
        wandb.config.dataset_name = "OLR"

        model = def_model()

        train(model, 
              wandb.config.batch_size, 
              wandb.config.epochs,
              wandb.config.lr,
              wandb.config.optimizer,
              wandb.config.alpha,
              wandb.config.gamma)


if __name__ == '__main__':
    get_data()
    wandb.login()
    sweep_configuration = {
        "method": "random",
        "name": "sweep_rms",
        "metric":   {
                        "goal": "maximize", 
                        "name": "val_f1_score"
                    },  
        "parameters":   {
                            "batch_size":   {
                                                "values": [8, 16, 32, 64, 128]
                                            },
                            "epochs":   {
                                            "distribution": "int_uniform",
                                            "max": 400,
                                            "min": 15
                                        },
                            "lr":   {
                                        "distribution": "uniform",
                                        "max": 1e-2, 
                                        "min": 1e-6
                                    },
                            "optimizer":    {
                                                "values": ["sgd", "nesterov", "adam", "rmsprop"],
                                            },
                            "alpha":{
                                        "values": [0.1, 0.15, 0.2, 0.25, 0.3, 0.35]
                                    },
                            "gamma":{
                                        "values": [0.5, 1, 2, 3, 4, 5]
                                    },
                        },
    }
    sweep_id = wandb.sweep(sweep=sweep_configuration, project="OLR_Base_Model")
    wandb.agent(sweep_id, function=sweep_train, count=50)