In [None]:
import numpy as np
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout


In [None]:
syscalls = [
    "open", "read", "write", "close", "exec", "connect", "accept", "fork",
    "kill", "chmod", "chown", "unlink", "reboot", "mount", "ptrace", "bind"
]

syscall_to_int = {sc: i+1 for i, sc in enumerate(syscalls)}  # 0 is reserved for padding


In [None]:
def generate_sequence(is_malicious):
    if is_malicious:
        # Malware typically does unusual or dangerous patterns
        pattern = ["fork", "exec", "ptrace", "chmod", "unlink", "reboot"]
        seq = random.choices(pattern + syscalls, k=30)
    else:
        # Normal applications have common syscall patterns
        pattern = ["open", "read", "write", "close", "connect", "accept"]
        seq = random.choices(pattern + syscalls, k=30)

    return [syscall_to_int[s] for s in seq]

In [None]:
def generate_dataset(num_samples=1000):
    X = []
    y = []
    for _ in range(num_samples):
        X.append(generate_sequence(is_malicious=True))
        y.append(1)
        X.append(generate_sequence(is_malicious=False))
        y.append(0)
    return np.array(X), np.array(y)

In [None]:
def build_model(vocab_size, input_length):
    model = Sequential([
        Embedding(input_dim=vocab_size + 1, output_dim=32, input_length=input_length),
        LSTM(64, return_sequences=False),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


In [None]:
def train():
    X, y = generate_dataset()
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = build_model(vocab_size=len(syscalls), input_length=X.shape[1])
    model.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.1, verbose=1)

    preds = (model.predict(X_test) > 0.5).astype(int)
    print("\nClassification Report:\n", classification_report(y_test, preds))


In [None]:
if __name__ == "__main__":
    train()