In [None]:
import pandas as pd

df = pd.read_csv("C:/Users/DELL/OneDrive/Desktop/anu course/datasets/intrusion1.csv")
print("Columns:", df.columns.tolist())
print(df.head())

Columns: ['session_id', 'network_packet_size', 'protocol_type', 'login_attempts', 'session_duration', 'encryption_used', 'ip_reputation_score', 'failed_logins', 'browser_type', 'unusual_time_access', 'attack_detected']
  session_id  network_packet_size protocol_type  login_attempts  \
0  SID_00001                  599           TCP               4   
1  SID_00002                  472           TCP               3   
2  SID_00003                  629           TCP               3   
3  SID_00004                  804           UDP               4   
4  SID_00005                  453           TCP               5   

   session_duration encryption_used  ip_reputation_score  failed_logins  \
0        492.983263             DES             0.606818              1   
1       1557.996461             DES             0.301569              0   
2         75.044262             DES             0.739164              2   
3        601.248835             DES             0.123267              0   
4  

In [20]:
import pandas as pd
from pathlib import Path
from typing import Tuple

def load_data(path: str | Path = "C:/Users/DELL/OneDrive/Desktop/anu course/datasets/intrusion1.csv") -> Tuple[pd.DataFrame, pd.Series]:
    """Load heart disease dataset and return X, y."""
    df = pd.read_csv(path)
    target_col = 'attack_detected' # Replace with actual column name from inspection
    
    X = df.drop(target_col, axis=1)
    y = df[target_col]
    return X, y

In [21]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier

def build_pipeline() -> Pipeline:
    """
    Default pipeline for intrusion detection.
    Replace or extend with feature-engineering, sampling, or other preprocessors as needed.
    """
    return Pipeline([
        ("scaler", StandardScaler()),
        ("clf", RandomForestClassifier(n_estimators=100, random_state=42))
    ])


In [28]:
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer

def preprocess_data(X):
    """Convert categorical columns to numeric and handle missing values"""
    X_processed = X.copy()
    
    # Handle missing values first
    imputer = SimpleImputer(strategy='mean')
    numeric_cols = X_processed.select_dtypes(include=['float64', 'int64']).columns
    X_processed[numeric_cols] = imputer.fit_transform(X_processed[numeric_cols])
    
    # Identify categorical columns
    categorical_cols = X_processed.select_dtypes(include=['object']).columns
    
    # Encode each categorical column
    for col in categorical_cols:
        le = LabelEncoder()
        X_processed[col] = le.fit_transform(X_processed[col])
    
    return X_processed

In [29]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import joblib
from pathlib import Path
import os

# Remove these lines - functions already defined above
# from intrusion_ml.data_loader import load_data
# from intrusion_ml.pipeline import build_pipeline

def train_and_evaluate():
    """Train pipeline and save model for intrusion detection."""
    X, y = load_data("C:/Users/DELL/OneDrive/Desktop/anu course/datasets/intrusion1.csv")
    X = preprocess_data(X)

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    pipe = build_pipeline()
    pipe.fit(X_train, y_train)

    y_pred = pipe.predict(X_test)
    print(classification_report(y_test, y_pred))

    model_dir = "C:/Users/DELL/OneDrive/Desktop/anu course/models"
    os.makedirs(model_dir, exist_ok=True)
    
    joblib.dump(pipe, os.path.join(model_dir, "intrusion_model.pkl"))
    print(f"Model saved successfully")
    return pipe

In [30]:
def main():
    train_and_evaluate()




In [31]:
if __name__ == "__main__":
    main()

              precision    recall  f1-score   support

           0       0.84      1.00      0.91      1042
           1       1.00      0.77      0.87       866

    accuracy                           0.89      1908
   macro avg       0.92      0.88      0.89      1908
weighted avg       0.91      0.89      0.89      1908

Model saved successfully
