# Malware Detection with Machine Learning

Complete pipeline for training Random Forest malware classifier.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
import joblib

print('✓ Imports successful')

## Generate Synthetic Dataset

In [None]:
# Generate synthetic malware data
np.random.seed(42)

n_samples = 10000
n_features = 87

# Malicious samples
malicious = np.random.randn(n_samples // 2, n_features)
malicious[:, 1] += 2.0  # Higher entropy

# Benign samples
benign = np.random.randn(n_samples // 2, n_features)
benign[:, 1] -= 0.5  # Lower entropy

X = np.vstack([malicious, benign])
y = np.hstack([np.ones(n_samples // 2), np.zeros(n_samples // 2)])

print(f'Dataset: {X.shape}')
print(f'Malicious: {sum(y==1)}, Benign: {sum(y==0)}')

## Train Model

In [None]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train Random Forest
model = RandomForestClassifier(
    n_estimators=100,
    max_depth=20,
    random_state=42,
    n_jobs=-1
)

print('Training model...')
model.fit(X_train_scaled, y_train)
print('✓ Training complete!')

## Evaluate Model

In [None]:
# Make predictions
y_pred = model.predict(X_test_scaled)

# Print metrics
print('Classification Report:')
print(classification_report(y_test, y_pred))

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

## Save Model

In [None]:
# Save model
joblib.dump(model, '../../backend/models/malware_detector_rf.pkl')
joblib.dump(scaler, '../../backend/models/scaler_malware.pkl')
print('✓ Model saved!')