# unsupervised model


In [None]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest

from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Dense, LSTM, RepeatVector, TimeDistributed


In [None]:
df = pd.read_csv("bell_dns.csv")  # can be unlabeled
df = df.dropna().reset_index(drop=True)


In [None]:
df["byte_ratio"] = df["total_fwd_bytes"] / (df["total_bwd_bytes"] + 1)
df["pkt_ratio"]  = df["fwd_pkt_len_mean"] / (df["bwd_pkt_len_mean"] + 1)
df["flow_rate"]  = df["flow_duration"] / (df["tot_fwd_pkts"] + 1)

X = df.drop(columns=["label"], errors="ignore")


In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [None]:
iso = IsolationForest(contamination=0.02, random_state=42)
iso_score = -iso.fit_predict(X_scaled)


In [None]:
input_dim = X_scaled.shape[1]

inp = Input(shape=(input_dim,))
enc = Dense(32, activation="relu")(inp)
enc = Dense(16, activation="relu")(enc)
dec = Dense(32, activation="relu")(enc)
dec = Dense(input_dim, activation="linear")(dec)

ae = Model(inp, dec)
ae.compile(optimizer="adam", loss="mse")
ae.fit(X_scaled, X_scaled, epochs=10, batch_size=64, verbose=0)

ae_score = np.mean((ae.predict(X_scaled) - X_scaled)**2, axis=1)


In [None]:
SEQ_LEN = 10
X_seq = X_scaled[:len(X_scaled)//SEQ_LEN*SEQ_LEN]
X_seq = X_seq.reshape(-1, SEQ_LEN, input_dim)

lstm = Sequential([
    LSTM(32, return_sequences=True, input_shape=(SEQ_LEN, input_dim)),
    LSTM(16),
    RepeatVector(SEQ_LEN),
    LSTM(16, return_sequences=True),
    LSTM(32, return_sequences=True),
    TimeDistributed(Dense(input_dim))
])

lstm.compile(optimizer="adam", loss="mse")
lstm.fit(X_seq, X_seq, epochs=10, batch_size=32, verbose=0)

lstm_score = np.repeat(
    np.mean((lstm.predict(X_seq) - X_seq)**2, axis=(1,2)),
    SEQ_LEN
)[:len(X_scaled)]


In [None]:
def normalize(x):
    return (x - x.min()) / (x.max() - x.min() + 1e-6)

anomaly_score = (
    0.35 * normalize(iso_score) +
    0.35 * normalize(ae_score) +
    0.30 * normalize(lstm_score)
)


In [None]:
pd.DataFrame({
    "anomaly_score": anomaly_score
}).to_csv("../outputs/anomaly_score.csv", index=False)
