In [1]:
import os
import sys
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Mac CPU stuff
os.environ["OMP_NUM_THREADS"] = "2"
os.environ["TF_NUM_INTRAOP_THREADS"] = "2"
os.environ["TF_NUM_INTEROP_THREADS"] = "2"

import tensorflow as tf
tf.keras.backend.set_floatx("float32")

print("TensorFlow Version:", tf.__version__)

# Add SRC to sys path
sys.path.append("/Users/nedim/Desktop/MY_REPOS/mids-w207-section1-team1-finalproject/src")

from data_prep_dropmissing import load_and_prepare_data_dropmissing

2025-12-10 18:56:18.156267: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


TensorFlow Version: 2.11.0


In [2]:
csv_path = "../data/external/FPA_FOD_Plus.csv"

print("Loading data (drop missing + SMOTENC)...")
X_train, Y_train, X_val, Y_val, X_test, Y_test = load_and_prepare_data_dropmissing(
    csv_path,
    smote_strategy="smotenc",
    sample_size=500_000
)

print("\nShapes:")
print("Train:", X_train.shape, Y_train.shape)
print("Val  :", X_val.shape, Y_val.shape)
print("Test :", X_test.shape, Y_test.shape)

Loading data (drop missing + SMOTENC)...

Loading with Polars...
âž¡ Loaded: (2302521, 308)

ðŸ§¹ Dropping rows with ANY missing value...
âž¡ Removed 1,312,209 rows â€” Remaining: 990,312
âœ‚ Training down-sampled âžœ (500000, 45)

Applying SMOTENC...

Shapes:
Train: (1948208, 48) (1948208,)
Val  : (198063, 48) (198063,)
Test : (198063, 48) (198063,)


In [3]:
def build_fnn(input_dim, hidden_layers=[64, 32], dropout_rate=0.2, learning_rate=0.01):
    model = Sequential()
    model.add(Dense(hidden_layers[0], activation="relu", input_shape=(input_dim,)))
    model.add(Dropout(dropout_rate))

    if len(hidden_layers) > 1:
        for units in hidden_layers[1:]:
            model.add(Dense(units, activation="relu"))
            model.add(Dropout(dropout_rate))

    model.add(Dense(4, activation="softmax"))  # 4 output classes

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )
    return model


print("\nBuilding FNN Model...")

model = build_fnn(
    X_train.shape[1],
    hidden_layers=[64, 32],
    dropout_rate=0.2,
    learning_rate=0.01
)

model.summary()


Building FNN Model...
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                3136      
                                                                 
 dropout (Dropout)           (None, 64)                0         
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dropout_1 (Dropout)         (None, 32)                0         
                                                                 
 dense_2 (Dense)             (None, 4)                 132       
                                                                 
Total params: 5,348
Trainable params: 5,348
Non-trainable params: 0
_________________________________________________________________


2025-12-10 19:00:58.233312: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
EPOCHS = 20
BATCH_SIZE = 64

print("\nTraining model...")

history = model.fit(
    X_train,
    Y_train,
    validation_data=(X_val, Y_val),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    verbose=1
)

print("\nTraining complete")


Training model...
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

Training complete


In [5]:
print("\nEvaluating on Validation Set...")
y_pred_val = np.argmax(model.predict(X_val), axis=1)

print("Accuracy:", accuracy_score(Y_val, y_pred_val))
print("\nClassification Report:\n", classification_report(Y_val, y_pred_val))
print("\nConfusion Matrix:\n", confusion_matrix(Y_val, y_pred_val))


Evaluating on Validation Set...
Accuracy: 0.9741092480675341

Classification Report:
               precision    recall  f1-score   support

           0       0.97      1.00      0.99    192935
           1       0.00      0.00      0.00      4768
           2       0.00      0.00      0.00       282
           3       0.00      0.00      0.00        78

    accuracy                           0.97    198063
   macro avg       0.24      0.25      0.25    198063
weighted avg       0.95      0.97      0.96    198063


Confusion Matrix:
 [[192935      0      0      0]
 [  4768      0      0      0]
 [   282      0      0      0]
 [    78      0      0      0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
