Training and testing the model

In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score
import numpy as np
import joblib

In [2]:
#Load the data
#Train data
npz = np.load('../../../../../data/processed/android_malware/train_data.npz',  allow_pickle=True)
train_inputs, train_targets = npz['inputs'].astype(np.float64), npz['targets'].astype(np.int64)

#Validation data
npz = np.load('../../../../../data/processed/android_malware/validation_data.npz',  allow_pickle=True)
validation_inputs, validation_targets = npz['inputs'].astype(np.float64), npz['targets'].astype(np.int64)

#Test data
npz = np.load('../../../../../data/processed/android_malware/test_data.npz',  allow_pickle=True)
test_inputs, test_targets = npz['inputs'].astype(np.float64), npz['targets'].astype(np.int64)

In [3]:
#Define and train the model
MAX_DEPTH = 20
N_ESTIMATORS = 100

clf_random_f = RandomForestClassifier(n_estimators=N_ESTIMATORS, max_depth=MAX_DEPTH, random_state=42, n_jobs=-1)
clf_random_f.fit(train_inputs, train_targets)

In [4]:
#Test the model with train data
y_train_pred = clf_random_f.predict(train_inputs)
print("F1 Score (Train inputs):", f1_score(y_train_pred, train_targets, average='weighted'))

F1 Score (Train inputs): 0.9582138014282402


In [5]:
#Test the model with test data
y_val_pred = clf_random_f.predict(validation_inputs)
print("F1 Score (Test inputs):", f1_score(y_val_pred, validation_targets, average='weighted'))

F1 Score (Test inputs): 0.9335565651824984


In [6]:
#Test the model with test data
y_test_pred = clf_random_f.predict(test_inputs)
print("F1 Score (Test inputs):", f1_score(y_test_pred, test_targets, average='weighted'))

F1 Score (Test inputs): 0.93503602842302


In [7]:
#Save the model
joblib.dump(clf_random_f, '../../../../../models/android_malware/random_forest/android_malware_detector.pkl')

['../../../../../models/android_malware/random_forest/android_malware_detector.pkl']