## Import Packages

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn import metrics
from matplotlib.ticker import MaxNLocator
import keras_tuner as kt
from tensorflow.keras import regularizers
from tensorflow.keras import Model
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout

2024-04-12 05:30:40.071426: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  import kerastuner as kt


## Setting Parameters

In [4]:
# Model Parameters
hidden_layers = 3 # default value 

## Defining Model Function

In [6]:
def create_1d_cnn(input_size, hls = 3):
    model = tf.keras.Sequential()
    for i in range(hls):
        if i == 0:
            model.add(Conv1D(filters=32, kernel_size=17, activation='relu', input_shape=input_size))
        else: 
            model.add(Conv1D(filters=64, kernel_size=17, activation='relu'))
                      
        model.add(MaxPooling2D((2, 2)))
        model.add(Dropout(0.25))

    model.add(Flatten())
    #model.add(Dense(64, activation='relu'))
    model.add(Dense(75, activation='softmax'))
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    model.compile(optimizer="adam", loss=loss)
    
    return model

## Defining Metric function 

In [7]:
def PredictionTable(predictions):
    y_pred = []
    for pred in predictions:
        y_pred.append(np.argmax(pred))
    cm = pd.DataFrame(metrics.confusion_matrix(y_val, y_pred))
    new_cm = pd.DataFrame(columns = ["TN", "FP", "FN", "TP"])
    for i in range(75):
        true_negative = 0
        for j in range(75):
            if j == i:
                continue
            true_negative += sum(cm[j][0:i].append(cm[j][i+1:]))
        new_cm.loc[i] = [true_negative, sum(cm[i][0:i].append(cm[i][i+1:])), sum(cm.iloc[i][0:i].append(cm.iloc[i][i+1:])), cm[i][i]]
    cm = new_cm
    cm["TN"] = cm["TN"].astype(np.int64)
    cm["FP"] = cm["FP"].astype(np.int64)
    cm["FN"] = cm["FN"].astype(np.int64)
    cm["TP"] = cm["TP"].astype(np.int64)
    cm = cm.assign(precision = cm["TP"] / (cm["TP"] + cm["FP"]))
    cm["precision"].astype(np.float64)
    cm = cm.assign(recall = cm["TP"] / (cm["TP"] + cm["FN"]))
    cm["recall"].astype(np.float64)
    cm = cm.assign(f1 = 2 * 1 / ((1 / cm["precision"]) + (1 / cm["recall"])))
    cm = cm.assign(fbeta = (1 + 0.5 ** 2) * (cm["precision"] * cm["recall"]) / ((0.5 ** 2 * cm["precision"]) + cm["recall"]))
    cm["fbeta"].fillna(0, inplace=True)
    cm["cluster"] = labels["Cluster"]
    cm = cm.merge(dataset_sizes, how = 'inner', left_on='cluster', right_on=0)
    cm.drop(columns=0, inplace = True)
    cm.rename(columns={1:"size"}, inplace = True)
    cm["log_size"] = np.log2(cm["size"])
    return cm

## Analysis 

In [None]:
# getting the input data 


model = create_1d_cnn(hidden_layers = hidden_layers, input_size = input_size)

callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=0)

model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=50, batch_size=1024, callbacks=callback, verbose=1)


predictions = tf.nn.softmax(model.predict(x_val)).numpy()
cm = PredictionTable(predictions)

fbeta_dict[activation_function] = []
fbeta_dict[activation_function].extend(list(cm["fbeta"]))

Title = str(hidden_layers)

print(Title)
fig = cm.plot.scatter(x="log_size", y="fbeta",ylim=[-0.05, 1.05], title=Title).get_figure()
# path = "Plots/" + Title + ".png"
# plt.savefig(path)
# Loss
ax = plt.figure().gca()
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
plt.plot(history.history['loss'], label="Train Loss")
plt.plot(history.history['val_loss'], label="Val Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title(Title + " Loss")
plt.legend(loc="best")
plt.ylim(0, 1.5)