In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import joblib
import os

In [None]:

# 1. Simulate Keylogger Behavior Data
def load_dataset(path="keylogger_data.csv"):
    if not os.path.exists(path):
        print("Dataset not found. Creating synthetic keylogger detection dataset...")

        data = {
            "keystroke_rate": np.random.uniform(0, 20, 1000),          # keys/sec
            "screenshot_freq": np.random.randint(0, 2, 1000),          # 0/1
            "file_access_rate": np.random.uniform(0, 30, 1000),        # files/min
            "network_bytes_sent": np.random.randint(0, 50000, 1000),
            "is_hidden_process": np.random.randint(0, 2, 1000),
            "process_duration": np.random.uniform(1, 300, 1000),       # seconds
            "is_keylogger": np.random.randint(0, 2, 1000)
        }

        df = pd.DataFrame(data)
        df.to_csv(path, index=False)
    else:
        df = pd.read_csv(path)

    print(f"Loaded dataset with shape: {df.shape}")
    return df


In [None]:
def preprocess(df):
    X = df.drop("is_keylogger", axis=1)
    y = df["is_keylogger"]
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled, y, scaler

In [None]:
def train_model(X_train, y_train):
    model = AdaBoostClassifier(n_estimators=100)
    model.fit(X_train, y_train)
    return model

In [None]:
def evaluate(model, X_test, y_test):
    y_pred = model.predict(X_test)
    print("\nClassification Report:\n", classification_report(y_test, y_pred))
    cm = confusion_matrix(y_test, y_pred)
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
    plt.title("Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.show()

In [None]:
def save(model, scaler):
    os.makedirs("models", exist_ok=True)
    joblib.dump(model, "models/keylogger_model.pkl")
    joblib.dump(scaler, "models/keylogger_scaler.pkl")
    print("Model and scaler saved to 'models/'")

In [None]:
if __name__ == "__main__":
    df = load_dataset()
    X, y, scaler = preprocess(df)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = train_model(X_train, y_train)
    evaluate(model, X_test, y_test)
    save(model, scaler)