<a href="https://colab.research.google.com/github/Thanasiss23/ekpa/blob/main/Lambda_IPS_KDD99_full.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Lambda IPS Notebook (KDD Cup 1999)
Μετασχηματισμός του IDS σε IPS, με εκπαίδευση σε KDD’99 και προληπτικές ενέργειες.

In [1]:
# 0) Εγκατάσταση απαιτούμενων πακέτων
!pip install scikit-learn pandas numpy joblib tensorflow boto3

Collecting boto3
  Downloading boto3-1.40.25-py3-none-any.whl.metadata (6.7 kB)
Collecting botocore<1.41.0,>=1.40.25 (from boto3)
  Downloading botocore-1.40.25-py3-none-any.whl.metadata (5.7 kB)
Collecting jmespath<2.0.0,>=0.7.1 (from boto3)
  Downloading jmespath-1.0.1-py3-none-any.whl.metadata (7.6 kB)
Collecting s3transfer<0.14.0,>=0.13.0 (from boto3)
  Downloading s3transfer-0.13.1-py3-none-any.whl.metadata (1.7 kB)
Downloading boto3-1.40.25-py3-none-any.whl (139 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.3/139.3 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading botocore-1.40.25-py3-none-any.whl (14.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.0/14.0 MB[0m [31m38.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading jmespath-1.0.1-py3-none-any.whl (20 kB)
Downloading s3transfer-0.13.1-py3-none-any.whl (85 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.3/85.3 kB[0m [31m2.8 MB/s[0m eta [36m0:0

## 1) Φόρτωση KDD Cup 1999 και προεπεξεργασία

In [2]:
import os, gzip, pathlib, json, joblib, logging, time, subprocess
import numpy as np, pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.compose import ColumnTransformer
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

def load_kdd99(path: str) -> pd.DataFrame:
    p = pathlib.Path(path)
    if p.suffix == '.gz':
        with gzip.open(p, 'rt') as f:
            df = pd.read_csv(f, header=None)
    else:
        df = pd.read_csv(p, header=None)
    cols = [
        'duration','protocol_type','service','flag','src_bytes','dst_bytes','land','wrong_fragment','urgent',
        'hot','num_failed_logins','logged_in','num_compromised','root_shell','su_attempted','num_root',
        'num_file_creations','num_shells','num_access_files','num_outbound_cmds','is_host_login','is_guest_login',
        'count','srv_count','serror_rate','srv_serror_rate','rerror_rate','srv_rerror_rate','same_srv_rate',
        'diff_srv_rate','srv_diff_host_rate','dst_host_count','dst_host_srv_count','dst_host_same_srv_rate',
        'dst_host_diff_srv_rate','dst_host_same_src_port_rate','dst_host_srv_diff_host_rate','dst_host_serror_rate',
        'dst_host_srv_serror_rate','dst_host_rerror_rate','dst_host_srv_rerror_rate','label'
    ]
    df = df.iloc[:, :42]
    df.columns = cols
    return df

def prepare_data(df):
    y = df['label']
    X = df.drop('label', axis=1)
    categorical = ['protocol_type','service','flag']
    numeric = [c for c in X.columns if c not in categorical]
    preprocessor = ColumnTransformer([
        ('num', MinMaxScaler(), numeric),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical)
    ])
    X_proc = preprocessor.fit_transform(X)
    class_names = np.unique(y)
    class_map = {c:i for i,c in enumerate(class_names)}
    y_enc = y.map(class_map)
    return X_proc, y_enc, preprocessor, class_names

## 2) Ορισμός μοντέλου (MLP)

In [3]:
def build_mlp(input_dim, n_classes):
    inputs = keras.Input(shape=(input_dim,))
    x = layers.Dense(256, activation='relu')(inputs)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(64, activation='relu')(x)
    outputs = layers.Dense(n_classes, activation='softmax')(x)
    model = keras.Model(inputs, outputs)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

## 3) Εκπαίδευση και αποθήκευση artifacts

In [4]:
def train_and_save(kdd_path, out_dir='artifacts'):
    os.makedirs(out_dir, exist_ok=True)
    df = load_kdd99(kdd_path)
    X, y, preproc, class_names = prepare_data(df)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    model = build_mlp(X.shape[1], len(class_names))
    model.fit(X_train, y_train, epochs=5, batch_size=256, validation_split=0.1)
    preds = model.predict(X_test)
    print(classification_report(y_test, np.argmax(preds, axis=1)))
    model.save(os.path.join(out_dir, 'model.h5'))
    joblib.dump(preproc, os.path.join(out_dir, 'preproc.pkl'))
    joblib.dump(class_names, os.path.join(out_dir, 'classes.pkl'))

## 4) Policy Engine και Actions

In [5]:
from dataclasses import dataclass
from typing import Dict, Any

class ActionExecutor:
    def execute(self, action: str, ctx: Dict[str, Any]):
        if action == 'block_ip':
            print(f"[IPS] Blocking IP {ctx.get('src_ip')}")
        elif action == 'isolate_device':
            print(f"[IPS] Isolating device {ctx.get('dst_ip')}")
        elif action == 'notify_admin':
            print("[IPS] Notifying admin for manual intervention")
        else:
            print(f"[IPS] Unknown action: {action}")

@dataclass
class PolicyRule:
    label: str
    actions: list

class PolicyEngine:
    def __init__(self, rules: list, executor: ActionExecutor):
        self.rules = rules
        self.executor = executor
    def evaluate(self, label: str, ctx: Dict[str, Any]):
        for rule in self.rules:
            if rule.label == label:
                for a in rule.actions:
                    self.executor.execute(a, ctx)

## 5) IPS Runtime (inference + policies)

In [6]:
class IPSRuntime:
    def __init__(self, artifacts_dir='artifacts'):
        self.model = keras.models.load_model(os.path.join(artifacts_dir, 'model.h5'))
        self.preproc = joblib.load(os.path.join(artifacts_dir, 'preproc.pkl'))
        self.classes = joblib.load(os.path.join(artifacts_dir, 'classes.pkl'))
        rules = [
            PolicyRule('normal', []),
            PolicyRule('neptune', ['block_ip','notify_admin']),
            PolicyRule('smurf', ['isolate_device','notify_admin'])
        ]
        self.engine = PolicyEngine(rules, ActionExecutor())
    def handle_record(self, record: dict):
        df = pd.DataFrame([record])
        X = self.preproc.transform(df)
        probs = self.model.predict(X)
        idx = np.argmax(probs, axis=1)[0]
        label = self.classes[idx]
        ctx = record
        self.engine.evaluate(label, ctx)
        return label, probs[0]

In [9]:
!wget http://kdd.ics.uci.edu/databases/kddcup99/kddcup.data_10_percent.gz


--2025-09-07 20:16:57--  http://kdd.ics.uci.edu/databases/kddcup99/kddcup.data_10_percent.gz
Resolving kdd.ics.uci.edu (kdd.ics.uci.edu)... 128.195.1.94
Connecting to kdd.ics.uci.edu (kdd.ics.uci.edu)|128.195.1.94|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2144903 (2.0M) [application/x-gzip]
Saving to: ‘kddcup.data_10_percent.gz’


2025-09-07 20:16:58 (1.92 MB/s) - ‘kddcup.data_10_percent.gz’ saved [2144903/2144903]



In [10]:
train_and_save("kddcup.data_10_percent.gz", out_dir="artifacts")


Epoch 1/5
[1m1390/1390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 8ms/step - accuracy: 0.9651 - loss: 0.1861 - val_accuracy: 0.9987 - val_loss: 0.0051
Epoch 2/5
[1m1390/1390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 8ms/step - accuracy: 0.9986 - loss: 0.0061 - val_accuracy: 0.9985 - val_loss: 0.0054
Epoch 3/5
[1m1390/1390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 8ms/step - accuracy: 0.9989 - loss: 0.0047 - val_accuracy: 0.9994 - val_loss: 0.0023
Epoch 4/5
[1m1390/1390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 8ms/step - accuracy: 0.9990 - loss: 0.0040 - val_accuracy: 0.9993 - val_loss: 0.0029
Epoch 5/5
[1m1390/1390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 8ms/step - accuracy: 0.9991 - loss: 0.0033 - val_accuracy: 0.9994 - val_loss: 0.0022
[1m3088/3088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

           0       0.99      1.00      0.99       430
           1       1.00      0.40      0.57         5
           3       1.00      1.00      1.00        11
           4       0.50      0.50      0.50         2
           5       0.99      0.98      0.99       241
           6       1.00      0.80      0.89         5
           7       0.00      0.00      0.00         1
           9       1.00      1.00      1.00     21280
          10       0.88      0.97      0.92        36
          11       1.00      1.00      1.00     19550
          12       0.00      0.00      0.00         3
          13       0.00      0.00      0.00         1
          14       0.98      1.00      0.99        58
          15       1.00      0.98      0.99       239
          16       0.00      0.00      0.00         1
          17       1.00      0.98      0.99       306
          18       1.00      1.00      1.00     56207
          20       1.00    

In [12]:
runtime = IPSRuntime(artifacts_dir="artifacts")
sample_record = {
    'duration': 0, 'protocol_type': 'tcp', 'service': 'http', 'flag': 'SF',
    'src_bytes': 181, 'dst_bytes': 5450, 'land': 0, 'wrong_fragment': 0, 'urgent': 0,
    'hot': 0, 'num_failed_logins': 0, 'logged_in': 1, 'num_compromised': 0,
    'root_shell': 0, 'su_attempted': 0, 'num_root': 0, 'num_file_creations': 0,
    'num_shells': 0, 'num_access_files': 0, 'num_outbound_cmds': 0,
    'is_host_login': 0, 'is_guest_login': 0, 'count': 9, 'srv_count': 9,
    'serror_rate': 0.0, 'srv_serror_rate': 0.0, 'rerror_rate': 0.0, 'srv_rerror_rate': 0.0,
    'same_srv_rate': 1.0, 'diff_srv_rate': 0.0, 'srv_diff_host_rate': 0.0,
    'dst_host_count': 9, 'dst_host_srv_count': 9, 'dst_host_same_srv_rate': 1.0,
    'dst_host_diff_srv_rate': 0.0, 'dst_host_same_src_port_rate': 1.0,
    'dst_host_srv_diff_host_rate': 0.0, 'dst_host_serror_rate': 0.0,
    'dst_host_srv_serror_rate': 0.0, 'dst_host_rerror_rate': 0.0,
    'dst_host_srv_rerror_rate': 0.0,
    'label': 'normal',
    'src_ip': '192.168.0.10', 'dst_ip': '192.168.0.1'
}
result = runtime.handle_record(sample_record)
print(result)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step
('normal.', array([1.6157428e-04, 5.8729274e-05, 3.1972999e-05, 9.0236972e-06,
       2.7071647e-07, 2.3776761e-04, 7.9997574e-07, 3.0858380e-05,
       8.7482993e-05, 3.7992669e-07, 3.5694422e-05, 9.9817204e-01,
       1.6509039e-08, 2.3334327e-05, 2.4985255e-07, 5.2901833e-06,
       4.2473513e-04, 7.7998957e-05, 3.6847032e-06, 3.6719994e-05,
       2.8884259e-07, 5.6375877e-04, 3.7312053e-05], dtype=float32))
