In [2]:
from pathlib import Path
import numpy as np
import sys
import tensorflow as tf
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

sys.path.append(str(Path.cwd().parent / "src"))
from data_prep import load_and_prepare_data

# load data
csv_path = "../data/external/FPA_FOD_Plus.csv"

In [3]:
print("TF VERSION:", tf.__version__)

TF VERSION: 2.11.0


In [4]:
X_train, Y_train, X_val, Y_val, X_test, Y_test = load_and_prepare_data(
    csv_path,
    smote_strategy="none"
)

X_train = X_train.astype("float32")
X_val = X_val.astype("float32")


Loading data with Polars...
File loaded into Pandas. Shape: (2302521, 308)

 No oversampling applied (dataset remains imbalanced).


In [5]:
# define fnn per other example using smotenc (this one doesnt)
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation="relu", input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(32, activation="relu"),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(4, activation="softmax")
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                3200      
                                                                 
 dropout (Dropout)           (None, 64)                0         
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dropout_1 (Dropout)         (None, 32)                0         
                                                                 
 dense_2 (Dense)             (None, 4)                 132       
                                                                 
Total params: 5,412
Trainable params: 5,412
Non-trainable params: 0
_________________________________________________________________
None


2025-12-07 00:51:58.829207: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [8]:
print("\nTraining FNN (NO SMOTE)...")

history = model.fit(
    X_train, Y_train,
    validation_data=(X_val, Y_val),
    epochs=20,
    batch_size=64,
    verbose=1
)



Training FNN (NO SMOTE)...
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [7]:
# evaluate metrics
y_pred_prob = model.predict(X_val)
y_pred = np.argmax(y_pred_prob, axis=1)

print("\nValidation Metrics")
print("Accuracy:", accuracy_score(Y_val, y_pred))
print("\nClassification Report:\n", classification_report(Y_val, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(Y_val, y_pred))


Validation Metrics
Accuracy: 0.9736310650938971

Classification Report:
               precision    recall  f1-score   support

           0       0.97      1.00      0.99    448361
           1       0.00      0.00      0.00     11185
           2       0.00      0.00      0.00       738
           3       0.00      0.00      0.00       220

    accuracy                           0.97    460504
   macro avg       0.24      0.25      0.25    460504
weighted avg       0.95      0.97      0.96    460504


Confusion Matrix:
 [[448361      0      0      0]
 [ 11185      0      0      0]
 [   738      0      0      0]
 [   220      0      0      0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
