# Importing Dependencies

In [31]:
import pandas as pd
import polars as pl
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Dropout, Flatten, GRU, BatchNormalization
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
import joblib
import glob
import os

# Loading Dataset

In [32]:
# original = pl.read_csv("Entire_Dataset/UNSW_2018_IoT_Botnet_Dataset_1.csv", n_rows=0)
# merged = pl.read_csv("Final_Merged_Dataset.csv", n_rows=0, ignore_errors=True)

# missing = set(original.columns) - set(merged.columns)
# print(missing)


In [33]:
cols = pl.read_csv('Bot_Iot_final_scaled.csv', n_rows=0, ignore_errors=True).columns

schema_overide = {c: pl.Float64 for c in cols if c!="attack"}
schema_overide["attack"] = pl.Int8

df = pl.scan_csv("Bot_Iot_final_scaled.csv", infer_schema_length=None)

In [19]:
print(cols)

['pkSeqID', 'stime', 'sport', 'dport', 'pkts', 'bytes', 'ltime', 'seq', 'dur', 'mean', 'stddev', 'sum', 'min', 'max', 'spkts', 'dpkts', 'sbytes', 'dbytes', 'rate', 'srate', 'drate', 'attack', 'flgs_eU', 'flgs_e_dS', 'flgs_e_&', 'flgs_e_r', 'flgs_e', 'flgs_e___t', 'flgs_e__D', 'flgs_e____F', 'flgs_e_d', 'flgs_e_g', 'flgs_e_*', 'flgs_e_s', 'flgs_e_dD', 'proto_igmp', 'proto_tcp', 'proto_ipv6-icmp', 'proto_arp', 'proto_rarp', 'proto_udp', 'proto_icmp', 'state_NRS', 'state_TST', 'state_INT', 'state_ECO', 'state_MAS', 'state_CON', 'state_ACC', 'state_CLO', 'state_REQ', 'state_URP', 'state_FIN', 'state_RST', 'state_PAR', 'state_RSP', 'saddr_1', 'saddr_2', 'saddr_3', 'saddr_4', 'daddr_1', 'daddr_2', 'daddr_3', 'daddr_4']


In [22]:
pl.read_csv("Bot_Iot_final_scaled.csv", n_rows=0).columns

['pkSeqID',
 'stime',
 'sport',
 'dport',
 'pkts',
 'bytes',
 'ltime',
 'seq',
 'dur',
 'mean',
 'stddev',
 'sum',
 'min',
 'max',
 'spkts',
 'dpkts',
 'sbytes',
 'dbytes',
 'rate',
 'srate',
 'drate',
 'attack',
 'flgs_eU',
 'flgs_e_dS',
 'flgs_e_&',
 'flgs_e_r',
 'flgs_e',
 'flgs_e___t',
 'flgs_e__D',
 'flgs_e____F',
 'flgs_e_d',
 'flgs_e_g',
 'flgs_e_*',
 'flgs_e_s',
 'flgs_e_dD',
 'proto_igmp',
 'proto_tcp',
 'proto_ipv6-icmp',
 'proto_arp',
 'proto_rarp',
 'proto_udp',
 'proto_icmp',
 'state_NRS',
 'state_TST',
 'state_INT',
 'state_ECO',
 'state_MAS',
 'state_CON',
 'state_ACC',
 'state_CLO',
 'state_REQ',
 'state_URP',
 'state_FIN',
 'state_RST',
 'state_PAR',
 'state_RSP',
 'saddr_1',
 'saddr_2',
 'saddr_3',
 'saddr_4',
 'daddr_1',
 'daddr_2',
 'daddr_3',
 'daddr_4']

In [20]:
# target_counts = (
#     df.select(pl.col("attack").value_counts())
#     .collect()
# )
# print(target_counts)

In [27]:
LABEL_COL = 'attack'
BATCH_SIZE = 1024          # CSV read batch
WINDOW = 20                # time window
MODEL_BATCH = 64            # training batch size
FEATURE_COLS = [c for c in cols if c != LABEL_COL]
SELECTED_COLS = FEATURE_COLS + [LABEL_COL]
NUM_FEATURES = len(SELECTED_COLS) - 1


# ---------- CSV loader ----------
def get_dataset(file_path):
    dataset = tf.data.experimental.make_csv_dataset(
        file_path,
        batch_size=BATCH_SIZE,
        label_name=LABEL_COL,
        select_columns=SELECTED_COLS,
        na_value="?",
        num_epochs=1,
        ignore_errors=True,
        shuffle=False      # IMPORTANT for temporal ordering
    )
    return dataset


# ---------- Convert feature dict -> tensor ----------
def pack(features, label):
    x = tf.stack(
        [tf.cast(features[col], tf.float32) for col in FEATURE_COLS],
        axis=1
    )
    return x, tf.cast(label, tf.float32)



# ---------- Build streaming sliding-window dataset ----------
raw_train_data = get_dataset("Bot_Iot_final_scaled.csv")

dataset = raw_train_data.map(pack)

dataset = dataset.unbatch()   # CRITICAL: convert to row stream

dataset = dataset.window(WINDOW, shift=1, drop_remainder=True)

dataset = dataset.flat_map(
    lambda x, y: tf.data.Dataset.zip((
        x.batch(WINDOW),
        y.batch(WINDOW)
    ))
)

dataset = dataset.map(lambda x, y: (x, y[-1]))  # label = last timestep

dataset = dataset.batch(MODEL_BATCH).prefetch(tf.data.AUTOTUNE)

# Model Training

In [None]:
model1 = Sequential([
    Conv1D(64, 3, activation='relu',
           input_shape=(WINDOW, NUM_FEATURES)),
    MaxPooling1D(2),
    LSTM(64),
    Dropout(0.5),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

model1.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

print(model1.summary())

history = model1.fit(dataset, epochs=2)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


None
Epoch 1/2


2026-02-23 00:28:08.818480: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:473] Loaded cuDNN version 91900


[1m1146409/1146409[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3992s[0m 3ms/step - accuracy: 0.9999 - loss: 5.4479e-04
Epoch 2/2
[1m      1/1146409[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m24:25:37[0m 77ms/step - accuracy: 1.0000 - loss: 3.4765e-06

2026-02-23 01:34:40.042614: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2026-02-23 01:34:40.042638: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_4]]


[1m1146409/1146409[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4025s[0m 4ms/step - accuracy: 0.9999 - loss: 7.9912e-04


2026-02-23 02:41:45.200197: I tensorflow/core/framework/local_rendezvous.cc:430] Local rendezvous send item cancelled. Key hash: 9676163142229759962
2026-02-23 02:41:45.201121: I tensorflow/core/framework/local_rendezvous.cc:426] Local rendezvous recv item cancelled. Key hash: 823930602756181906
2026-02-23 02:41:45.201128: I tensorflow/core/framework/local_rendezvous.cc:426] Local rendezvous recv item cancelled. Key hash: 1585693964698534087
2026-02-23 02:41:45.201134: I tensorflow/core/framework/local_rendezvous.cc:426] Local rendezvous recv item cancelled. Key hash: 9637192283536841607
2026-02-23 02:41:45.201139: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[StatefulPartitionedCall/sequential_1/dropout_1/Cast/_10]]


In [None]:
model2 = Sequential([
        Conv1D(64, kernel_size=3, activation='relu', input_shape=(WINDOW,NUM_FEATURES)),
        MaxPooling1D(2),
        GRU(64),
        Dropout(0.5),
        Dense(32, activation='relu'),
        Dense(1, activation='sigmoid')
    ])

model2.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
print(model2.summary())

history = model2.fit(dataset, epochs=2)

None
Epoch 1/2
[1m1146409/1146409[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3993s[0m 3ms/step - accuracy: 0.9999 - loss: 4.8963e-04
Epoch 2/2
[1m      1/1146409[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m46:36:51[0m 146ms/step - accuracy: 1.0000 - loss: 9.2190e-06

2026-02-23 04:15:29.716880: I tensorflow/core/framework/local_rendezvous.cc:426] Local rendezvous recv item cancelled. Key hash: 2257151656708163750
2026-02-23 04:15:29.717148: I tensorflow/core/framework/local_rendezvous.cc:426] Local rendezvous recv item cancelled. Key hash: 17482540518480046374
2026-02-23 04:15:29.717165: I tensorflow/core/framework/local_rendezvous.cc:426] Local rendezvous recv item cancelled. Key hash: 5596771798013402040


[1m1146409/1146409[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3944s[0m 3ms/step - accuracy: 0.9999 - loss: 5.6724e-04


2026-02-23 05:21:13.984874: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_4]]


In [34]:
joblib.dump(scaler, 'scaler.save')

model.save('iot_model.keras')