In [5]:
# train_model_7_features.py
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import joblib

# Load both benign and crypto datasets
benign = pd.read_csv("benign_set.csv")
crypto = pd.read_csv("crypto_set.csv")

# Combine
data = pd.concat([benign, crypto], ignore_index=True)

print("Dataset shape:", data.shape)
print("Columns:", data.columns.tolist())

# Convert 'label' column to numeric
# (benign â†’ 0, crypto â†’ 1)
data['label'] = data['label'].replace({'benign': 0, 'crypto': 1, 'malicious': 1})

# Ensure all features exist
features = [
    "websocket",
    "wasm",
    "hash_function",
    "webworkers",
    "messageloop_load",
    "postmessage_load",
    "parallel_functions"
]

for col in features:
    if col not in data.columns:
        data[col] = 0  # default if missing

X = data[features]
y = data["label"]

# Split and train
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

print("âœ… Training Accuracy:", clf.score(X_train, y_train))
print("âœ… Testing Accuracy:", clf.score(X_test, y_test))

# Save model
joblib.dump(clf, "cryptojacking_detector_7feat.pkl")
print("ðŸ’¾ Model saved as cryptojacking_detector_7feat.pkl")


Dataset shape: (4000, 8)
Columns: ['label', 'websocket', 'wasm', 'hash_function', 'webworkers', 'messageloop_load', 'postmessage_load', 'parallel_functions']


  data['label'] = data['label'].replace({'benign': 0, 'crypto': 1, 'malicious': 1})


âœ… Training Accuracy: 1.0
âœ… Testing Accuracy: 0.99875
ðŸ’¾ Model saved as cryptojacking_detector_7feat.pkl
