In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

# 1. Load the dataset
df = pd.read_csv('/kaggle/input/os-hard-mode-iso-csv/dos_hard_mode_iso.csv')

# 2. Preprocessing
# Select features: ID, DLC bytes, and Delta_T
features = ['ID_int', 'DLC0', 'DLC1', 'DLC2', 'DLC3', 'DLC4', 'DLC5', 'DLC6', 'DLC7', 'Delta_T']
X = df[features].values
y = df['Label'].values

# Scaling is crucial for Neural Networks
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 3. Create Sequences (Sliding Window)
# LSTMs require input shape: (samples, time_steps, features)
def create_sequences(data, labels, window_size=10):
    X_seq, y_seq = [], []
    for i in range(len(data) - window_size):
        X_seq.append(data[i:i + window_size])
        y_seq.append(labels[i + window_size])
    return np.array(X_seq), np.array(y_seq)

WINDOW_SIZE = 10  # Look back at the last 10 messages
X_seq, y_seq = create_sequences(X_scaled, y, window_size=WINDOW_SIZE)

# 4. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X_seq, y_seq, test_size=0.2, random_state=42, stratify=y_seq
)

# 5. Build the LSTM Model
model = Sequential([
    LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True),
    Dropout(0.2),
    LSTM(32),
    Dropout(0.2),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid') # Binary output: 0 (Normal) or 1 (DoS)
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 6. Train the Model
print("Starting LSTM training...")
history = model.fit(
    X_train, y_train,
    epochs=10,
    batch_size=64,
    validation_split=0.1,
    verbose=1
)

# 7. Evaluation
y_pred = (model.predict(X_test) > 0.5).astype("int32")

print("\n--- LSTM Performance ---")
print(classification_report(y_test, y_pred))

# 8. Save the model
model.save('dos_lstm_model.h5')
print("Model saved as dos_lstm_model.h5")

2025-12-31 14:19:31.375333: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1767190771.606070      47 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1767190771.670862      47 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

Starting LSTM training...
Epoch 1/10


2025-12-31 14:19:49.768621: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)
  super().__init__(**kwargs)


[1m676/676[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 13ms/step - accuracy: 0.8376 - loss: 0.3589 - val_accuracy: 0.9076 - val_loss: 0.2381
Epoch 2/10
[1m676/676[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 12ms/step - accuracy: 0.9087 - loss: 0.2421 - val_accuracy: 0.9130 - val_loss: 0.2270
Epoch 3/10
[1m676/676[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 12ms/step - accuracy: 0.9114 - loss: 0.2332 - val_accuracy: 0.9144 - val_loss: 0.2235
Epoch 4/10
[1m676/676[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 12ms/step - accuracy: 0.9111 - loss: 0.2318 - val_accuracy: 0.9138 - val_loss: 0.2254
Epoch 5/10
[1m676/676[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 12ms/step - accuracy: 0.9108 - loss: 0.2314 - val_accuracy: 0.9130 - val_loss: 0.2246
Epoch 6/10
[1m676/676[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 12ms/step - accuracy: 0.9101 - loss: 0.2300 - val_accuracy: 0.9130 - val_loss: 0.2226
Epoch 7/10
[1m676/676[0m [32m




--- LSTM Performance ---
              precision    recall  f1-score   support

           0       0.93      0.94      0.93      7998
           1       0.88      0.85      0.87      4012

    accuracy                           0.91     12010
   macro avg       0.90      0.90      0.90     12010
weighted avg       0.91      0.91      0.91     12010

Model saved as dos_lstm_model.h5


In [20]:
import numpy as np
import tensorflow as tf
from sklearn.metrics import classification_report

# 1. LOAD the data from the path string into a numpy array
# Your error shows the path is '/kaggle/working/X_test2.npy'
print("Loading X_test data from disk...")
X_test_loaded = np.load('/kaggle/working/X_test2.npy')
y_test_loaded = np.load('/kaggle/working/y_test2.npy') 

print(f"Data loaded. Shape: {X_test_loaded.shape}")

# 2. Make Predictions with the Base Model
print("Predicting with Base Model...")
# Use the LOADED data, not the string path
y_probs = base_model.predict(X_test_loaded, verbose=0)
y_pred_base = (y_probs > 0.5).astype(int)

# 3. Print the Performance Report
print("\n" + "="*35)
print("   BASE MODEL PERFORMANCE (FP32)")
print("="*35)
print(classification_report(y_test_loaded, y_pred_base, target_names=['Normal', 'DoS Attack']))

Loading X_test data from disk...
Data loaded. Shape: (12010, 10, 10)
Predicting with Base Model...

   BASE MODEL PERFORMANCE (FP32)
              precision    recall  f1-score   support

      Normal       0.91      0.96      0.94      7996
  DoS Attack       0.90      0.82      0.86      4014

    accuracy                           0.91     12010
   macro avg       0.91      0.89      0.90     12010
weighted avg       0.91      0.91      0.91     12010



In [21]:
import os
import time
import numpy as np
from sklearn.metrics import f1_score, accuracy_score

# 1. Evaluate Base Model (Float32)
start_base = time.time()
base_probs = base_model.predict(X_test_loaded, verbose=0)
base_preds = (base_probs > 0.5).astype(int)
base_time = (time.time() - start_base) / len(X_test_loaded)
base_f1 = f1_score(y_test_loaded, base_preds)

# 2. Evaluate Quantized Model (Int8) - Using a sample of 100 for speed
interpreter = tf.lite.Interpreter(model_path='/kaggle/working/dos_model_full_int.tflite')
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()[0]
output_details = interpreter.get_output_details()[0]
in_scale, in_zero = input_details['quantization']
out_scale, out_zero = output_details['quantization']

q_preds = []
start_q = time.time()
for i in range(len(X_test_loaded)):
    input_data = ((X_test_loaded[i] / in_scale) + in_zero).astype(np.uint8)
    interpreter.set_tensor(input_details['index'], np.expand_dims(input_data, axis=0))
    interpreter.invoke()
    output_data = interpreter.get_tensor(output_details['index'])
    prob = (output_data.astype(np.float32) - out_zero) * out_scale
    q_preds.append(1 if prob[0][0] > 0.5 else 0)
q_time = (time.time() - start_q) / len(X_test_loaded)
q_f1 = f1_score(y_test_loaded, q_preds)

# 3. Model Sizes
base_size = os.path.getsize('/kaggle/input/x-test-npy/final_dos_attack_model.keras') / 1024
q_size = os.path.getsize('/kaggle/working/dos_model_full_int.tflite') / 1024

print("\n" + "="*40)
print(f"{'Metric':<20} | {'Base (FP32)':<12} | {'Int8':<10}")
print("-" * 40)
print(f"{'Model Size (KB)':<20} | {base_size:<12.2f} | {q_size:<10.2f}")
print(f"{'F1-Score (Attack)':<20} | {base_f1:<12.4f} | {q_f1:<10.4f}")
print(f"{'Latency/Msg (ms)':<20} | {base_time*1000:<12.4f} | {q_time*1000:<10.4f}")
print("="*40)

    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    



Metric               | Base (FP32)  | Int8      
----------------------------------------
Model Size (KB)      | 431.98       | 125.66    
F1-Score (Attack)    | 0.8614       | 0.8608    
Latency/Msg (ms)     | 0.1284       | 0.0685    


In [22]:
from sklearn.metrics import precision_score, recall_score, f1_score

# 1. Base Model Calculations
base_precision = precision_score(y_test_loaded, base_preds)
base_recall = recall_score(y_test_loaded, base_preds)

# 2. Int8 Model Calculations (Using q_preds from the previous step)
q_precision = precision_score(y_test_loaded, q_preds)
q_recall = recall_score(y_test_loaded, q_preds)

# 3. Print Comparison Table
print("\n" + "="*50)
print(f"{'Metric (DoS Class)':<20} | {'Base (FP32)':<12} | {'Int8 (INT8)':<10}")
print("-" * 50)
print(f"{'Precision':<20} | {base_precision:<12.4f} | {q_precision:<10.4f}")
print(f"{'Recall':<20} | {base_recall:<12.4f} | {q_recall:<10.4f}")
print(f"{'F1-Score':<20} | {base_f1:<12.4f} | {q_f1:<10.4f}")
print("="*50)


Metric (DoS Class)   | Base (FP32)  | Int8 (INT8)
--------------------------------------------------
Precision            | 0.9046       | 0.9045    
Recall               | 0.8221       | 0.8211    
F1-Score             | 0.8614       | 0.8608    


In [18]:
import tensorflow as tf
import numpy as np

# 1. Load the actual data (must be loaded as arrays, not strings)
X_test_data = np.load('/kaggle/working/X_test2.npy')
y_test_data = np.load('/kaggle/working/y_test2.npy')

# 2. Load the Keras model
# Note: Ensure the path points to your .keras file
model = tf.keras.models.load_model('/kaggle/input/x-test-npy/final_dos_attack_model.keras')

# 3. Evaluate
print("Evaluating Baseline Keras Model...")
loss, accuracy = model.evaluate(X_test_data, y_test_data, verbose=0)

print(f"\nBaseline Accuracy: {accuracy * 100:.2f}%")

Evaluating Baseline Keras Model...

Baseline Accuracy: 91.16%


In [6]:
model = tf.keras.Sequential([
    # Define static batch size (1) for LSTM stability in LiteRT
    tf.keras.layers.InputLayer(input_shape=(10, 10), batch_size=1),
    
    tf.keras.layers.LSTM(64, return_sequences=True),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    
    tf.keras.layers.LSTM(32),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1)
])



In [5]:
model.summary()

In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# 1. Load and Preprocess Dataset
df = pd.read_csv('/kaggle/input/os-hard-mode-iso-csv/dos_hard_mode_iso.csv')
features = ['ID_int', 'DLC0', 'DLC1', 'DLC2', 'DLC3', 'DLC4', 'DLC5', 'DLC6', 'DLC7', 'Delta_T']
X = df[features].values
y = df['Label'].values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Create sequences for LSTM
def create_sequences(data, labels, window_size=10):
    X_seq, y_seq = [], []
    for i in range(len(data) - window_size):
        X_seq.append(data[i:i + window_size])
        y_seq.append(labels[i + window_size])
    return np.array(X_seq), np.array(y_seq)

X_seq, y_seq = create_sequences(X_scaled, y, window_size=10)
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, random_state=42)

# 2. Define LSTM Model
model = Sequential([
    LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True),
    BatchNormalization(),
    Dropout(0.2),
    LSTM(32),
    BatchNormalization(),
    Dropout(0.2),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 3. Setup Callbacks to save in .keras format
# This saves the model automatically if validation loss improves
checkpoint = ModelCheckpoint(
    'best_dos_model.keras', 
    monitor='val_loss', 
    save_best_only=True, 
    verbose=1
)

early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# 4. Train the Model
model.fit(
    X_train, y_train,
    epochs=20,
    batch_size=64,
    validation_data=(X_test, y_test),
    callbacks=[checkpoint, early_stop]
)

# 5. Explicitly save the final model
model.save('final_dos_attack_mode4l243.keras')

print("Model saved as new.keras successfully!")

Epoch 1/20


2025-12-31 15:37:48.404338: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)
  super().__init__(**kwargs)


[1m750/751[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 13ms/step - accuracy: 0.8722 - loss: 0.3213
Epoch 1: val_loss improved from inf to 0.23241, saving model to best_dos_model.keras
[1m751/751[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 15ms/step - accuracy: 0.8723 - loss: 0.3212 - val_accuracy: 0.9107 - val_loss: 0.2324
Epoch 2/20
[1m748/751[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 13ms/step - accuracy: 0.9101 - loss: 0.2363
Epoch 2: val_loss improved from 0.23241 to 0.23037, saving model to best_dos_model.keras
[1m751/751[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 15ms/step - accuracy: 0.9101 - loss: 0.2363 - val_accuracy: 0.9112 - val_loss: 0.2304
Epoch 3/20
[1m747/751[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 13ms/step - accuracy: 0.9109 - loss: 0.2312
Epoch 3: val_loss improved from 0.23037 to 0.22823, saving model to best_dos_model.keras
[1m751/751[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 15ms/step - 

In [3]:
import numpy as np
import tensorflow as tf
import pickle
import gc
import os

# 1. Re-generate the data (to ensure X_train.npy is healthy)
# Use your original dataframe and scaler logic here
# Assuming X_train, X_test, etc., are in memory. If not, re-run your split code first.

print("Saving fresh data files...")
np.save('X_train_fixed.npy', X_train)

# 2. Clear RAM
tf.keras.backend.clear_session()
gc.collect()

# 3. Build the TFLite-compatible Static Model
model_static = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(10, 10), batch_size=1),
    # unroll=True is MANDATORY for full integer quantization of LSTMs
    tf.keras.layers.LSTM(64, return_sequences=True, unroll=True),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.LSTM(32, unroll=True),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

Saving fresh data files...




In [4]:
# 4. Load weights from your trained model
# Replace 'final_dos_attack_model.keras' with your saved model path
trained_model = tf.keras.models.load_model('final_dos_attack_model.keras')
model_static.set_weights(trained_model.get_weights())
del trained_model
gc.collect()

4156

In [5]:
# 5. Calibration Generator (using only 50 samples to save RAM)
def representative_data_gen():
    # Load just a tiny slice
    data = np.load('X_train_fixed.npy', mmap_mode='r')
    for i in range(50):
        sample = np.expand_dims(data[i], axis=0).astype(np.float32)
        yield [sample]

In [6]:
# 6. Convert
converter = tf.lite.TFLiteConverter.from_keras_model(model_static)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen

In [7]:
# Set flags for Integer Quantization
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8

In [8]:
print("Starting conversion... this should be stable now.")
tflite_model = converter.convert()

Starting conversion... this should be stable now.
INFO:tensorflow:Assets written to: /tmp/tmpvysqvfxn/assets


INFO:tensorflow:Assets written to: /tmp/tmpvysqvfxn/assets


Saved artifact at '/tmp/tmpvysqvfxn'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(1, 10, 10), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(1, 1), dtype=tf.float32, name=None)
Captures:
  139589351791056: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139589351786640: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139589351787600: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139589351787792: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139589351787408: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139589351787024: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139589351790864: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139589376891152: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139589376893072: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139589351789520: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139589351790096: TensorSp

W0000 00:00:1767195802.309278    2160 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1767195802.309333    2160 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.
I0000 00:00:1767195802.325812    2160 mlir_graph_optimization_pass.cc:401] MLIR V1 optimization pass is not enabled
fully_quantize: 0, inference_type: 6, input_inference_type: UINT8, output_inference_type: UINT8


In [9]:
with open('dos_model_full_int.tflite', 'wb') as f:
    f.write(tflite_model)

print("✅ Done! File saved as dos_model_full_int.tflite")

✅ Done! File saved as dos_model_full_int.tflite


In [2]:
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from IPython.display import FileLink

# 1. Load the data (Ensure the path is correct for your environment)
df = pd.read_csv('/kaggle/input/os-hard-mode-iso-csv/dos_hard_mode_iso.csv')
features = ['ID_int', 'DLC0', 'DLC1', 'DLC2', 'DLC3', 'DLC4', 'DLC5', 'DLC6', 'DLC7', 'Delta_T']
X = df[features].values
y = df['Label'].values

# 2. Scale and SAVE the scaler as a .pkl file
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
with open('scaler2.pkl', 'wb') as f:
    pickle.dump(scaler, f)

# 3. Create sequences
def create_sequences(data, labels, window_size=10):
    X_seq, y_seq = [], []
    for i in range(len(data) - window_size):
        X_seq.append(data[i:i + window_size])
        y_seq.append(labels[i + window_size])
    return np.array(X_seq), np.array(y_seq)

X_seq, y_seq = create_sequences(X_scaled, y, window_size=10)

# 4. Split the data
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, random_state=42)

# 5. Save everything to disk
np.save('X_train2.npy', X_train)
np.save('y_train3.npy', y_train)
np.save('X_test2.npy', X_test)
np.save('y_test2.npy', y_test)

print("Files saved: X_train.npy, y_train.npy, X_test.npy, y_test.npy, scaler.pkl")

# 6. Generate download links
display(FileLink('X_train2.npy'))
display(FileLink('y_train3.npy'))
display(FileLink('X_test2.npy'))
display(FileLink('y_test2.npy'))
display(FileLink('scaler2.pkl'))

Files saved: X_train.npy, y_train.npy, X_test.npy, y_test.npy, scaler.pkl


In [3]:
import tensorflow as tf
import numpy as np

# 1. LOAD your existing trained model first
# Replace 'final_dos_attack_model.keras' with your actual filename if different
model = tf.keras.models.load_model('/kaggle/input/x-test-npy/final_dos_attack_model.keras')

# 2. Rebuild the model with a static batch size (Exactly as before)
model_static = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(10, 10), batch_size=1),
    tf.keras.layers.LSTM(64, return_sequences=True),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.LSTM(32),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# 3. NOW copy the weights (this will work because 'model' is an object now)
model_static.set_weights(model.get_weights())

2025-12-31 16:37:53.013214: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [4]:
import numpy as np
import tensorflow as tf
import gc

# 1. Clear memory from previous attempts
tf.keras.backend.clear_session()
gc.collect()

# 2. Load data using mmap (doesn't pull the whole file into RAM)
# This prevents the kernel from dying immediately
calibration_data = np.load('/kaggle/input/x-test-npy/X_train (1).npy', mmap_mode='r')

# 3. Use a very small generator
def representative_data_gen():
    # Reducing to 50 samples can save RAM while still giving good calibration
    for i in range(50):
        # Read one sample at a time
        sample = calibration_data[i].copy() 
        sample = np.expand_dims(sample, axis=0).astype(np.float32)
        yield [sample]

In [5]:
# 4. Configure converter
# Make sure model_static is defined right before this
converter = tf.lite.TFLiteConverter.from_keras_model(model_static)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen

In [6]:
# Mandatory for LSTMs
converter.experimental_enable_resource_variables = True
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8

In [3]:


import numpy as np
import tensorflow as tf
import pickle
import gc
import os

# 1. Re-generate the data (to ensure X_train.npy is healthy)
# Use your original dataframe and scaler logic here
# Assuming X_train, X_test, etc., are in memory. If not, re-run your split code first.
X_train='/kaggle/input/x-test-npy/X_train (1).npy'
print("Saving fresh data files...")
np.save('X_train_fixed.npy', X_train)

# 2. Clear RAM
tf.keras.backend.clear_session()
gc.collect()

# 3. Build the TFLite-compatible Static Model
model_static = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(10, 10), batch_size=1),
    # unroll=True is MANDATORY for full integer quantization of LSTMs
    tf.keras.layers.LSTM(64, return_sequences=True, unroll=True),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.LSTM(32, unroll=True),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# 4. Load weights from your trained model
# Replace 'final_dos_attack_model.keras' with your saved model path
trained_model = tf.keras.models.load_model('/kaggle/input/x-test-npy/final_dos_attack_model.keras')
model_static.set_weights(trained_model.get_weights())
del trained_model
gc.collect()

# 5. Calibration Generator (using only 50 samples to save RAM)
def representative_data_gen():
    # Load just a tiny slice
    data = np.load('X_train_fixed.npy', mmap_mode='r')
    for i in range(50):
        sample = np.expand_dims(data[i], axis=0).astype(np.float32)
        yield [sample]

Saving fresh data files...


In [None]:
tflite_model = converter.convert()
with open('dos_model_full_int.tflite', 'wb') as f:
    f.write(tflite_model)

print("Full Integer Quantized model saved successfully!")

In [None]:
# 3. Define the Representative Dataset Generator
def representative_data_gen():
    # Use 100 samples from X_train to calibrate ranges
    for input_value in tf.data.Dataset.from_tensor_slices(X_train).batch(1).take(100):
        yield [tf.cast(input_value, tf.float32)]

# 4. Setup the Converter for Full Integer
converter = tf.lite.TFLiteConverter.from_keras_model(model_static)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen

# Ensure the converter handles the LSTM internals correctly
converter.experimental_enable_resource_variables = True

# Force everything to Int8 (No float fallback)
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8

# 5. Convert and Save
tflite_full_int_model = converter.convert()

with open('dos_model_full_int.tflite', 'wb') as f:
    f.write(tflite_full_int_model)

print("Success! Full Integer Quantized model saved as dos_model_full_int.tflite")

INFO:tensorflow:Assets written to: /tmp/tmpi6r7d7ac/assets


INFO:tensorflow:Assets written to: /tmp/tmpi6r7d7ac/assets


Saved artifact at '/tmp/tmpi6r7d7ac'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(1, 10, 10), dtype=tf.float32, name='keras_tensor_81')
Output Type:
  TensorSpec(shape=(1, 1), dtype=tf.float32, name=None)
Captures:
  133814004537808: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133814004546064: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133813617180944: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133814004545872: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133813609522832: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133814004545296: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133813617181136: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133814004544528: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133814004546256: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133813615385424: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133813615389648: Tenso

W0000 00:00:1767193457.261243    1305 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1767193457.261268    1305 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.


In [1]:
# 4. Proceed with conversion...
def representative_data_gen():
    for i in range(100):
        # We use X_train here (ensure it is loaded in your environment)
        sample = np.expand_dims(X_train[i], axis=0).astype(np.float32)
        yield [sample]



In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)

tflite_model = converter.convert()

In [5]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen


In [8]:
import tensorflow as tf

# Create the converter from your existing trained model
converter_drq = tf.lite.TFLiteConverter.from_keras_model(model)

# 1. Enable Dynamic Range Quantization
converter_drq.optimizations = [tf.lite.Optimize.DEFAULT]

# 2. Allow LiteRT to use original TF kernels for the LSTM's TensorLists
converter_drq.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS, # Use TFLite ops where possible
    tf.lite.OpsSet.SELECT_TF_OPS    # Use TF ops for the failing LSTM parts
]

# 3. Essential flags for LSTM/Resource Variables
converter_drq.experimental_enable_resource_variables = True
converter_drq._experimental_lower_tensor_list_ops = False

# 4. Convert
tflite_drq_model = converter_drq.convert()

# 5. Save the baseline model
with open('dos_model_drq_flex.tflite', 'wb') as f:
    f.write(tflite_drq_model)

print("Success! DRQ Model (with Flex Ops) saved as dos_model_drq_flex.tflite")

INFO:tensorflow:Assets written to: /tmp/tmpoo188pbc/assets


INFO:tensorflow:Assets written to: /tmp/tmpoo188pbc/assets


Saved artifact at '/tmp/tmpoo188pbc'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 10, 10), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  134984918624272: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134984918624848: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134984918624656: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134984918625424: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134984918626000: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134984918625040: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134984918622928: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134984918627344: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134984918628112: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134984918628688: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134984918626960: Te

W0000 00:00:1767192843.735319     691 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1767192843.735352     691 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.


In [9]:
import numpy as np
import tensorflow as tf

def evaluate_drq_model(tflite_path, X_test, y_test):
    # 1. Load the TFLite model and allocate tensors
    interpreter = tf.lite.Interpreter(model_path=tflite_path)
    interpreter.allocate_tensors()

    # 2. Get input and output details
    input_details = interpreter.get_input_details()[0]
    output_details = interpreter.get_output_details()[0]

    predictions = []
    
    print(f"Evaluating {tflite_path}...")

    # 3. Iterate through the test set
    for i in range(len(X_test)):
        # Prepare the input: add batch dimension and ensure float32
        input_data = np.expand_dims(X_test[i], axis=0).astype(np.float32)
        
        # Set the tensor
        interpreter.set_tensor(input_details['index'], input_data)
        
        # Run inference
        interpreter.invoke()
        
        # Get the result (sigmoid output)
        output_data = interpreter.get_tensor(output_details['index'])
        # Convert sigmoid probability to binary class (0 or 1)
        pred = 1 if output_data[0][0] > 0.5 else 0
        predictions.append(pred)

    # 4. Calculate Accuracy
    predictions = np.array(predictions)
    accuracy = (np.sum(predictions == y_test) / len(y_test)) * 100
    return accuracy

# Run the evaluation
drq_accuracy = evaluate_drq_model('/kaggle/working/dos_model_drq_flex.tflite', X_test, y_test)
print(f"\nDRQ Model Accuracy: {drq_accuracy:.2f}%")

Evaluating /kaggle/working/dos_model_drq_flex.tflite...


INFO: Created TensorFlow Lite delegate for select TF ops.
INFO: TfLiteFlexDelegate delegate: 4 nodes delegated out of 23 nodes with 3 partitions.

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.



DRQ Model Accuracy: 91.15%


In [15]:
import numpy as np

# 1. LOAD the actual data from the disk
# Make sure these paths match your saved file names
print("Loading test data into memory...")
X_test_data = np.load('/kaggle/working/X_test2.npy') 
y_test_data = np.load('/kaggle/working/y_test2.npy')

print(f"Loaded {len(X_test_data)} samples.")

# 2. Re-run the evaluation using the DATA, not the path strings
# (Make sure 'evaluate_tflite_uint8' is defined in your notebook)
try:
    predictions = evaluate_tflite_uint8(
        '/kaggle/working/dos_model_full_int.tflite', 
        X_test_data, 
        y_test_data
    )
except Exception as e:
    print(f"Evaluation failed: {e}")

Loading test data into memory...
Loaded 12010 samples.
Testing 12010 samples...

--- Final Evaluation ---
Accuracy: 91.12%

Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.96      0.93      7996
           1       0.90      0.82      0.86      4014

    accuracy                           0.91     12010
   macro avg       0.91      0.89      0.90     12010
weighted avg       0.91      0.91      0.91     12010



In [10]:
import os

keras_size = os.path.getsize('final_dos_attack_model.keras') / 1024
tflite_size = os.path.getsize('dos_model_drq_flex.tflite') / 1024

print(f"Original Model Size: {keras_size:.2f} KB")
print(f"DRQ TFLite Size:     {tflite_size:.2f} KB")
print(f"Compression Ratio:   {keras_size/tflite_size:.2f}x")

Original Model Size: 431.98 KB
DRQ TFLite Size:     63.37 KB
Compression Ratio:   6.82x


In [8]:
# 1. Create the sequences from the scaled data
# window_size=10 creates a 'lookback' of 10 rows for each prediction
X_seq, y_seq = create_sequences(X_scaled, y, window_size=10)

# 2. Split into training and testing sets
# X_train: features for training (80%)
# X_test: features for testing (20%)
# y_train: labels for training
# y_test: labels for testing
X_train, X_test, y_train, y_test = train_test_split(
    X_seq, y_seq, test_size=0.2, random_state=42
)

# 3. Verify the data is ready
print(f"X_train shape: {X_train.shape}") # Should be (Samples, 10, 10)
print(f"y_train shape: {y_train.shape}") # Should be (Samples,)
print(f"X_test shape:  {X_test.shape}")
print(f"y_test shape:  {y_test.shape}")

X_train shape: (48038, 10, 10)
y_train shape: (48038,)
X_test shape:  (12010, 10, 10)
y_test shape:  (12010,)


In [1]:
import tensorflow as tf

def representative_data_gen():
    # Use X_train for calibration
    for input_value in tf.data.Dataset.from_tensor_slices(X_train).batch(1).take(100):
        # The model expects float32 during calibration
        yield [tf.cast(input_value, tf.float32)]



2025-12-31 14:42:17.744150: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1767192137.760331     691 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1767192137.764969     691 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

In [1]:
import numpy as np
import tensorflow as tf
import pickle
import gc
import os

# 1. Re-generate the data (to ensure X_train.npy is healthy)
# Use your original dataframe and scaler logic here
# Assuming X_train, X_test, etc., are in memory. If not, re-run your split code first.
X_train='/kaggle/working/X_train2.npy'
print("Saving fresh data files...")
np.save('X_train_fixed.npy', X_train)

# 2. Clear RAM
tf.keras.backend.clear_session()
gc.collect()

# 3. Build the TFLite-compatible Static Model
model_static = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(10, 10), batch_size=1),
    # unroll=True is MANDATORY for full integer quantization of LSTMs
    tf.keras.layers.LSTM(64, return_sequences=True, unroll=True),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.LSTM(32, unroll=True),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])



2025-12-31 16:39:48.294859: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1767199188.323117     293 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1767199188.331473     293 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1767199188.354355     293 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1767199188.354387     293 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1767199188.354392     293 computation_placer.cc:177] computation placer alr

Saving fresh data files...


2025-12-31 16:39:52.725135: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [3]:
# 4. Load weights from your trained model
# Replace 'final_dos_attack_model.keras' with your saved model path
trained_model = tf.keras.models.load_model('/kaggle/input/x-test-npy/final_dos_attack_model.keras')
model_static.set_weights(trained_model.get_weights())
del trained_model
gc.collect()

4111

In [4]:
# 5. Calibration Generator (using only 50 samples to save RAM)
def representative_data_gen():
    # Load just a tiny slice
    data = np.load('X_train_fixed.npy', mmap_mode='r')
    for i in range(50):
        sample = np.expand_dims(data[i], axis=0).astype(np.float32)
        yield [sample]

In [6]:

converter = tf.lite.TFLiteConverter.from_keras_model(model_static)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen

In [7]:
# Set flags for Integer Quantization
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8

In [11]:
import numpy as np
import tensorflow as tf
import gc

# 1. LOAD the data from the string path into a real array
# Your error showed X_train was pointing to '/kaggle/working/X_train2.npy'
print("Loading actual data from disk...")
path_to_data = '/kaggle/working/X_train2.npy' # Ensure this matches your file name
X_train_data = np.load(path_to_data) 

print(f"Data loaded successfully. Shape: {X_train_data.shape}")

# 2. Define the generator using the REAL data array
def representative_data_gen():
    for i in range(100):
        # Slice one sample and ensure it has the batch dimension (1, 10, 10)
        sample = X_train_data[i:i+1].astype(np.float32)
        yield [sample]

# 3. Build the Static Model with unroll=True (Required for LSTM Quantization)
model_static = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(10, 10), batch_size=1),
    tf.keras.layers.LSTM(64, return_sequences=True, unroll=True),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.LSTM(32, unroll=True),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# 4. Load weights from your trained model
trained_model = tf.keras.models.load_model('/kaggle/input/x-test-npy/final_dos_attack_model.keras')
model_static.set_weights(trained_model.get_weights())

# 5. Setup and Run Converter
converter = tf.lite.TFLiteConverter.from_keras_model(model_static)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8
converter.experimental_enable_resource_variables = True



Loading actual data from disk...
Data loaded successfully. Shape: (48038, 10, 10)




In [12]:
print("Starting conversion... this should now work.")
tflite_model = converter.convert()

with open('dos_model_full_int.tflite', 'wb') as f:
    f.write(tflite_model)

print("✅ Success! Full Integer model saved as dos_model_full_int.tflite")

Starting conversion... this should now work.
INFO:tensorflow:Assets written to: /tmp/tmp0tn4426t/assets


INFO:tensorflow:Assets written to: /tmp/tmp0tn4426t/assets


Saved artifact at '/tmp/tmp0tn4426t'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(1, 10, 10), dtype=tf.float32, name='keras_tensor_43')
Output Type:
  TensorSpec(shape=(1, 1), dtype=tf.float32, name=None)
Captures:
  132994875083280: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132994875081936: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132994875084624: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132994875084816: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132994875083856: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132994875084048: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132994875084432: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132994875085200: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132994875085584: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132994875085968: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132994875086160: Tenso

W0000 00:00:1767199580.436190     293 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1767199580.436219     293 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.
fully_quantize: 0, inference_type: 6, input_inference_type: UINT8, output_inference_type: UINT8


✅ Success! Full Integer model saved as dos_model_full_int.tflite


In [30]:
"""
CAN Bus Intrusion Detection System Model Comparison
Compares FP32 Keras model vs INT8 TFLite quantized model
"""

import numpy as np
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, classification_report, roc_curve, auc,
    precision_recall_curve, average_precision_score
)
import pandas as pd
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Set style for better-looking plots
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")


class TFLiteModel:
    """Wrapper for TFLite model with proper quantization handling"""
    
    def __init__(self, model_path):
        self.interpreter = tf.lite.Interpreter(model_path=str(model_path))
        self.interpreter.allocate_tensors()
        
        # Get input and output details
        self.input_details = self.interpreter.get_input_details()[0]
        self.output_details = self.interpreter.get_output_details()[0]
        
        # Extract quantization parameters
        self.input_scale = self.input_details['quantization'][0]
        self.input_zero_point = self.input_details['quantization'][1]
        self.output_scale = self.output_details['quantization'][0]
        self.output_zero_point = self.output_details['quantization'][1]
        
        print(f"TFLite Model Loaded:")
        print(f"  Input shape: {self.input_details['shape']}")
        print(f"  Input dtype: {self.input_details['dtype'].__name__}")
        print(f"  Input scale: {self.input_scale}, zero_point: {self.input_zero_point}")
        print(f"  Output dtype: {self.output_details['dtype'].__name__}")
        print(f"  Output scale: {self.output_scale}, zero_point: {self.output_zero_point}")
    
    def quantize_input(self, x):
        """Quantize float input to appropriate dtype"""
        quantized = x / self.input_scale + self.input_zero_point
        return quantized.astype(self.input_details['dtype'])
    
    def dequantize_output(self, y_quantized):
        """Dequantize output back to float"""
        # Handle both int8 and uint8
        if self.output_details['dtype'] == np.uint8:
            y_float = y_quantized.astype(np.float32)
        else:
            y_float = y_quantized.astype(np.float32)
        return (y_float - self.output_zero_point) * self.output_scale
    
    def predict(self, X):
        """Run inference on batch of samples"""
        predictions = []
        
        for i in range(len(X)):
            # Quantize input
            x_quantized = self.quantize_input(X[i:i+1])
            
            # Set input tensor
            self.interpreter.set_tensor(self.input_details['index'], x_quantized)
            
            # Run inference
            self.interpreter.invoke()
            
            # Get output tensor
            output = self.interpreter.get_tensor(self.output_details['index'])
            
            # Dequantize output
            output_float = self.dequantize_output(output)
            predictions.append(output_float[0])
        
        return np.array(predictions)


class ModelComparator:
    """Compare FP32 and INT8 models"""
    
    def __init__(self, keras_path, tflite_path, X_test, y_test):
        self.X_test = X_test
        self.y_test = y_test
        self.output_dir = Path("comparison_results")
        self.output_dir.mkdir(exist_ok=True)
        
        # Load models
        print("Loading FP32 Keras model...")
        self.fp32_model = keras.models.load_model(keras_path)
        print(f"Model loaded. Input shape: {self.fp32_model.input_shape}")
        
        print("\nLoading INT8 TFLite model...")
        self.int8_model = TFLiteModel(tflite_path)
        
        # Run inference
        print("\n" + "="*70)
        print("Running Inference...")
        print("="*70)
        self.run_inference()
        
        # Compute metrics
        print("\nComputing metrics...")
        self.compute_metrics()
        
        # Generate visualizations
        print("\nGenerating visualizations...")
        self.generate_visualizations()
        
        # Print comparison table
        print("\n" + "="*70)
        self.print_comparison_table()
        print("="*70)
        
        print(f"\nAll results saved to: {self.output_dir}/")
    
    def run_inference(self):
        """Run inference on both models"""
        # FP32 model
        print("Running FP32 Keras inference...")
        self.fp32_probs = self.fp32_model.predict(self.X_test, batch_size=128, verbose=1)
        self.fp32_probs = self.fp32_probs.flatten()
        self.fp32_preds = (self.fp32_probs >= 0.5).astype(int)
        
        # INT8 model
        print("\nRunning INT8 TFLite inference...")
        self.int8_probs = self.int8_model.predict(self.X_test)
        self.int8_probs = self.int8_probs.flatten()
        self.int8_preds = (self.int8_probs >= 0.5).astype(int)
        
        print(f"\nInference complete!")
        print(f"  FP32 predictions shape: {self.fp32_preds.shape}")
        print(f"  INT8 predictions shape: {self.int8_preds.shape}")
    
    def compute_metrics(self):
        """Compute all metrics for both models"""
        self.metrics = {}
        
        for name, y_pred, y_prob in [
            ('FP32', self.fp32_preds, self.fp32_probs),
            ('INT8', self.int8_preds, self.int8_probs)
        ]:
            # Confusion matrix
            tn, fp, fn, tp = confusion_matrix(self.y_test, y_pred).ravel()
            
            # Compute metrics
            acc = accuracy_score(self.y_test, y_pred)
            prec = precision_score(self.y_test, y_pred, zero_division=0)
            rec = recall_score(self.y_test, y_pred, zero_division=0)
            f1 = f1_score(self.y_test, y_pred, zero_division=0)
            fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
            
            # ROC and PR AUC
            fpr_curve, tpr_curve, _ = roc_curve(self.y_test, y_prob)
            roc_auc = auc(fpr_curve, tpr_curve)
            pr_auc = average_precision_score(self.y_test, y_prob)
            
            self.metrics[name] = {
                'accuracy': acc,
                'precision': prec,
                'recall': rec,
                'f1_score': f1,
                'fpr': fpr,
                'roc_auc': roc_auc,
                'pr_auc': pr_auc,
                'confusion_matrix': confusion_matrix(self.y_test, y_pred),
                'classification_report': classification_report(self.y_test, y_pred)
            }
            
            print(f"\n{name} Model Classification Report:")
            print(self.metrics[name]['classification_report'])
    
    def generate_visualizations(self):
        """Generate all comparison plots"""
        self.plot_roc_curves()
        self.plot_pr_curves()
        self.plot_confusion_matrices()
        self.plot_metrics_comparison()
        self.plot_probability_distributions()
        print(f"All plots saved to {self.output_dir}/")
    
    def plot_roc_curves(self):
        """Plot ROC curves for both models"""
        plt.figure(figsize=(10, 8))
        
        colors = ['#2E86AB', '#A23B72']
        for (name, y_prob), color in zip(
            [('FP32', self.fp32_probs), ('INT8', self.int8_probs)],
            colors
        ):
            fpr, tpr, _ = roc_curve(self.y_test, y_prob)
            roc_auc = self.metrics[name]['roc_auc']
            plt.plot(fpr, tpr, color=color, lw=2.5, 
                    label=f'{name} (AUC = {roc_auc:.4f})')
        
        plt.plot([0, 1], [0, 1], 'k--', lw=2, label='Random Classifier')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate', fontsize=12, fontweight='bold')
        plt.ylabel('True Positive Rate', fontsize=12, fontweight='bold')
        plt.title('ROC Curves: FP32 vs INT8 Models', fontsize=14, fontweight='bold')
        plt.legend(loc="lower right", fontsize=11)
        plt.grid(True, alpha=0.3)
        plt.tight_layout()
        plt.savefig(self.output_dir / 'roc_curves.png', dpi=300, bbox_inches='tight')
        plt.close()
    
    def plot_pr_curves(self):
        """Plot Precision-Recall curves"""
        plt.figure(figsize=(10, 8))
        
        colors = ['#2E86AB', '#A23B72']
        for (name, y_prob), color in zip(
            [('FP32', self.fp32_probs), ('INT8', self.int8_probs)],
            colors
        ):
            precision, recall, _ = precision_recall_curve(self.y_test, y_prob)
            pr_auc = self.metrics[name]['pr_auc']
            plt.plot(recall, precision, color=color, lw=2.5,
                    label=f'{name} (AP = {pr_auc:.4f})')
        
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('Recall', fontsize=12, fontweight='bold')
        plt.ylabel('Precision', fontsize=12, fontweight='bold')
        plt.title('Precision-Recall Curves: FP32 vs INT8 Models', 
                 fontsize=14, fontweight='bold')
        plt.legend(loc="lower left", fontsize=11)
        plt.grid(True, alpha=0.3)
        plt.tight_layout()
        plt.savefig(self.output_dir / 'pr_curves.png', dpi=300, bbox_inches='tight')
        plt.close()
    
    def plot_confusion_matrices(self):
        """Plot confusion matrices for both models"""
        fig, axes = plt.subplots(1, 2, figsize=(16, 6))
        
        for idx, (name, ax) in enumerate(zip(['FP32', 'INT8'], axes)):
            cm = self.metrics[name]['confusion_matrix']
            sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax,
                       cbar_kws={'label': 'Count'},
                       annot_kws={'size': 14, 'weight': 'bold'})
            ax.set_xlabel('Predicted Label', fontsize=12, fontweight='bold')
            ax.set_ylabel('True Label', fontsize=12, fontweight='bold')
            ax.set_title(f'{name} Model Confusion Matrix', 
                        fontsize=14, fontweight='bold')
            ax.set_xticklabels(['Normal (0)', 'Attack (1)'])
            ax.set_yticklabels(['Normal (0)', 'Attack (1)'])
        
        plt.tight_layout()
        plt.savefig(self.output_dir / 'confusion_matrices.png', 
                   dpi=300, bbox_inches='tight')
        plt.close()
    
    def plot_metrics_comparison(self):
        """Plot bar chart comparing key metrics"""
        metrics_to_plot = ['precision', 'recall', 'f1_score', 'fpr']
        metric_labels = ['Precision', 'Recall', 'F1-Score', 'FPR']
        
        fp32_values = [self.metrics['FP32'][m] for m in metrics_to_plot]
        int8_values = [self.metrics['INT8'][m] for m in metrics_to_plot]
        
        x = np.arange(len(metric_labels))
        width = 0.35
        
        fig, ax = plt.subplots(figsize=(12, 7))
        bars1 = ax.bar(x - width/2, fp32_values, width, label='FP32 Keras',
                      color='#2E86AB', alpha=0.8)
        bars2 = ax.bar(x + width/2, int8_values, width, label='INT8 TFLite',
                      color='#A23B72', alpha=0.8)
        
        ax.set_xlabel('Metrics', fontsize=12, fontweight='bold')
        ax.set_ylabel('Score', fontsize=12, fontweight='bold')
        ax.set_title('Performance Metrics Comparison: FP32 vs INT8', 
                    fontsize=14, fontweight='bold')
        ax.set_xticks(x)
        ax.set_xticklabels(metric_labels)
        ax.legend(fontsize=11)
        ax.set_ylim([0, 1.1])
        ax.grid(True, alpha=0.3, axis='y')
        
        # Add value labels on bars
        def autolabel(bars):
            for bar in bars:
                height = bar.get_height()
                ax.annotate(f'{height:.4f}',
                          xy=(bar.get_x() + bar.get_width() / 2, height),
                          xytext=(0, 3),
                          textcoords="offset points",
                          ha='center', va='bottom', fontsize=9)
        
        autolabel(bars1)
        autolabel(bars2)
        
        plt.tight_layout()
        plt.savefig(self.output_dir / 'metrics_comparison.png', 
                   dpi=300, bbox_inches='tight')
        plt.close()
    
    def plot_probability_distributions(self):
        """Plot prediction probability distributions"""
        fig, axes = plt.subplots(2, 2, figsize=(16, 12))
        
        # FP32 distribution
        axes[0, 0].hist(self.fp32_probs[self.y_test == 0], bins=50, 
                       alpha=0.7, color='blue', label='Normal', edgecolor='black')
        axes[0, 0].hist(self.fp32_probs[self.y_test == 1], bins=50, 
                       alpha=0.7, color='red', label='Attack', edgecolor='black')
        axes[0, 0].axvline(0.5, color='black', linestyle='--', linewidth=2, 
                          label='Threshold')
        axes[0, 0].set_xlabel('Prediction Probability', fontweight='bold')
        axes[0, 0].set_ylabel('Frequency', fontweight='bold')
        axes[0, 0].set_title('FP32 Model: Probability Distribution by Class', 
                            fontweight='bold')
        axes[0, 0].legend()
        axes[0, 0].grid(True, alpha=0.3)
        
        # INT8 distribution
        axes[0, 1].hist(self.int8_probs[self.y_test == 0], bins=50, 
                       alpha=0.7, color='blue', label='Normal', edgecolor='black')
        axes[0, 1].hist(self.int8_probs[self.y_test == 1], bins=50, 
                       alpha=0.7, color='red', label='Attack', edgecolor='black')
        axes[0, 1].axvline(0.5, color='black', linestyle='--', linewidth=2, 
                          label='Threshold')
        axes[0, 1].set_xlabel('Prediction Probability', fontweight='bold')
        axes[0, 1].set_ylabel('Frequency', fontweight='bold')
        axes[0, 1].set_title('INT8 Model: Probability Distribution by Class', 
                            fontweight='bold')
        axes[0, 1].legend()
        axes[0, 1].grid(True, alpha=0.3)
        
        # Overall comparison
        axes[1, 0].hist(self.fp32_probs, bins=50, alpha=0.6, 
                       color='#2E86AB', label='FP32', edgecolor='black')
        axes[1, 0].hist(self.int8_probs, bins=50, alpha=0.6, 
                       color='#A23B72', label='INT8', edgecolor='black')
        axes[1, 0].axvline(0.5, color='black', linestyle='--', linewidth=2)
        axes[1, 0].set_xlabel('Prediction Probability', fontweight='bold')
        axes[1, 0].set_ylabel('Frequency', fontweight='bold')
        axes[1, 0].set_title('Overall Probability Distribution Comparison', 
                            fontweight='bold')
        axes[1, 0].legend()
        axes[1, 0].grid(True, alpha=0.3)
        
        # Difference plot
        prob_diff = self.fp32_probs - self.int8_probs
        axes[1, 1].hist(prob_diff, bins=50, color='purple', 
                       alpha=0.7, edgecolor='black')
        axes[1, 1].axvline(0, color='black', linestyle='--', linewidth=2)
        axes[1, 1].set_xlabel('Probability Difference (FP32 - INT8)', 
                             fontweight='bold')
        axes[1, 1].set_ylabel('Frequency', fontweight='bold')
        axes[1, 1].set_title('Prediction Probability Difference Distribution', 
                            fontweight='bold')
        axes[1, 1].grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.savefig(self.output_dir / 'probability_distributions.png', 
                   dpi=300, bbox_inches='tight')
        plt.close()
    
    def print_comparison_table(self):
        """Print detailed comparison table"""
        print("\n" + "="*70)
        print("COMPREHENSIVE MODEL COMPARISON")
        print("="*70 + "\n")
        
        # Create comparison dataframe
        metrics_list = ['accuracy', 'precision', 'recall', 'f1_score', 'fpr', 
                       'roc_auc', 'pr_auc']
        
        data = []
        for metric in metrics_list:
            fp32_val = self.metrics['FP32'][metric]
            int8_val = self.metrics['INT8'][metric]
            diff = int8_val - fp32_val
            pct_change = (diff / fp32_val * 100) if fp32_val != 0 else 0
            
            data.append({
                'Metric': metric.upper().replace('_', ' '),
                'FP32 Keras': f'{fp32_val:.6f}',
                'INT8 TFLite': f'{int8_val:.6f}',
                'Δ Difference': f'{diff:+.6f}',
                '% Change': f'{pct_change:+.2f}%'
            })
        
        df = pd.DataFrame(data)
        print(df.to_string(index=False))
        
        # Save to CSV
        df.to_csv(self.output_dir / 'comparison_table.csv', index=False)
        
        # Summary statistics
        print(f"\n{'='*70}")
        print("SUMMARY STATISTICS")
        print(f"{'='*70}")
        print(f"Total test samples: {len(self.y_test)}")
        print(f"Normal samples: {np.sum(self.y_test == 0)} "
              f"({np.sum(self.y_test == 0)/len(self.y_test)*100:.2f}%)")
        print(f"Attack samples: {np.sum(self.y_test == 1)} "
              f"({np.sum(self.y_test == 1)/len(self.y_test)*100:.2f}%)")
        
        print(f"\nMean probability difference: "
              f"{np.mean(np.abs(self.fp32_probs - self.int8_probs)):.6f}")
        print(f"Max probability difference: "
              f"{np.max(np.abs(self.fp32_probs - self.int8_probs)):.6f}")
        
        # Prediction agreement
        agreement = np.sum(self.fp32_preds == self.int8_preds) / len(self.y_test)
        print(f"\nPrediction agreement rate: {agreement:.4f} "
              f"({agreement*100:.2f}%)")
        
        disagreement_indices = np.where(self.fp32_preds != self.int8_preds)[0]
        print(f"Disagreement cases: {len(disagreement_indices)}")


# Main execution
if __name__ == "__main__":
    print("="*70)
    print("CAN BUS INTRUSION DETECTION SYSTEM - MODEL COMPARISON")
    print("="*70)
    
    # Define file paths - UPDATE THESE TO MATCH YOUR FILES
    KERAS_MODEL_PATH = "/kaggle/input/x-test-npy/final_dos_attack_model.keras"  # Your FP32 Keras model
    TFLITE_MODEL_PATH = "/kaggle/working/dos_model_full_int.tflite"  # Your INT8 TFLite model
    
    # IMPORTANT: Make sure both paths point to different files!
    # Example:
    # KERAS_MODEL_PATH = "/kaggle/input/models/base_model.keras"
    # TFLITE_MODEL_PATH = "/kaggle/input/models/guardcan_lstm_int8.tflite"
    
    # =====================================================================
    # LOAD TEST DATA
    # =====================================================================
    # Option 1: If X_test and y_test are already in memory as variables
    try:
        # Check if they exist in the current namespace
        if isinstance(X_test, str):
            # If they're file paths, load them
            print(f"\nLoading test data from files...")
            print(f"  X_test: {X_test}")
            print(f"  y_test: {y_test}")
            X_test_data = np.load(X_test)
            y_test_data = np.load(y_test)
        else:
            # Already loaded as arrays
            X_test_data = X_test
            y_test_data = y_test
    except NameError:
        # Option 2: Load from file paths
        print("\nX_test and y_test not found in scope. Loading from files...")
        X_TEST_PATH = "/kaggle/working/X_test2.npy"  # UPDATE THIS PATH
        Y_TEST_PATH = "/kaggle/working/y_test2.npy"  # UPDATE THIS PATH
        
        print(f"  X_test: {X_TEST_PATH}")
        print(f"  y_test: {Y_TEST_PATH}")
        
        X_test_data = np.load(X_TEST_PATH)
        y_test_data = np.load(Y_TEST_PATH)
    
    # Verify data shapes
    print(f"\nTest data loaded successfully:")
    print(f"  X_test shape: {X_test_data.shape}")
    print(f"  y_test shape: {y_test_data.shape}")
    print(f"  Data type: {X_test_data.dtype}")
    
    assert len(X_test_data.shape) == 3, "X_test must be 3D: (samples, timesteps, features)"
    assert X_test_data.shape[1:] == (10, 10), f"Expected shape (N, 10, 10), got {X_test_data.shape}"
    
    # Run comparison
    comparator = ModelComparator(
        keras_path=KERAS_MODEL_PATH,
        tflite_path=TFLITE_MODEL_PATH,
        X_test=X_test_data,  # Pass the loaded numpy arrays
        y_test=y_test_data   # Not the file paths!
    )
    
    print("\n" + "="*70)
    print("ANALYSIS COMPLETE!")
    print("="*70)
    print("\nConclusion:")
    
    # Automated conclusion
    acc_diff = (comparator.metrics['INT8']['accuracy'] - 
                comparator.metrics['FP32']['accuracy'])
    f1_diff = (comparator.metrics['INT8']['f1_score'] - 
               comparator.metrics['FP32']['f1_score'])
    
    if abs(acc_diff) < 0.01 and abs(f1_diff) < 0.01:
        print("✓ INT8 quantization has MINIMAL IMPACT on model performance.")
        print("  The quantized model is suitable for deployment.")
    elif acc_diff < -0.05 or f1_diff < -0.05:
        print("⚠ INT8 quantization significantly degrades performance.")
        print("  Consider alternative quantization strategies or keep FP32.")
    else:
        print("→ INT8 quantization shows moderate impact on performance.")
        print("  Evaluate based on deployment constraints (latency/memory).")
    
    print(f"\nAll results saved to: {comparator.output_dir}/")

CAN BUS INTRUSION DETECTION SYSTEM - MODEL COMPARISON

Loading test data from files...
  X_test: /kaggle/working/X_test2.npy
  y_test: /kaggle/working/y_test2.npy

Test data loaded successfully:
  X_test shape: (12010, 10, 10)
  y_test shape: (12010,)
  Data type: float64
Loading FP32 Keras model...
Model loaded. Input shape: (None, 10, 10)

Loading INT8 TFLite model...
TFLite Model Loaded:
  Input shape: [ 1 10 10]
  Input dtype: uint8
  Input scale: 0.02018970064818859, zero_point: 102
  Output dtype: uint8
  Output scale: 0.00390625, zero_point: 0

Running Inference...
Running FP32 Keras inference...
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step

Running INT8 TFLite inference...

Inference complete!
  FP32 predictions shape: (12010,)
  INT8 predictions shape: (12010,)

Computing metrics...

FP32 Model Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.96      0.94      7996
           1       0.90 