In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import StackingClassifier

from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import confusion_matrix

from sklearn.model_selection import train_test_split


import pandas as pd

In [None]:

file_path = 'dataset/preProcessed.csv'  

df = pd.read_csv(file_path)

df = df.drop('getDomain', axis=1)


predict_rows = df.sample(15, random_state=42)
df = df.drop(predict_rows.index)


df_Y = df['labels']
df_X = df.drop('labels', axis=1)

X_train, X_test, y_train, y_test = train_test_split(df_X, df_Y, test_size=0.20, random_state=42)


predict_features = predict_rows.drop('labels', axis=1)
predict_labels = predict_rows['labels']





In [None]:


base_learners_set1 = [('rf', RandomForestClassifier(criterion='entropy', max_features='sqrt', min_samples_leaf=1, min_samples_split=3, n_estimators=100)), 
                ('mlp', MLPClassifier(max_iter=500, activation='relu', alpha=0.001, hidden_layer_sizes=(20,), learning_rate='adaptive', solver='adam')),
                ('knn', KNeighborsClassifier(algorithm='auto', leaf_size=15, n_neighbors=20, p=1, weights='distance'))]

base_learners_set2 = [('rf', RandomForestClassifier(criterion='entropy', max_features='sqrt', min_samples_leaf=1, min_samples_split=3, n_estimators=100)), 
                ('mlp', MLPClassifier(max_iter=500, activation='relu', alpha=0.001, hidden_layer_sizes=(20,), learning_rate='adaptive', solver='adam')), 
                ('svm', SVC(C=10, kernel='rbf', tol=0.001))]

base_learners_set3 = [('rf', RandomForestClassifier(criterion='entropy', max_features='sqrt', min_samples_leaf=1, min_samples_split=3, n_estimators=100)),
                ('knn', KNeighborsClassifier(algorithm='auto', leaf_size=15, n_neighbors=20, p=1, weights='distance')), 
                ('svm', SVC(C=10, kernel='rbf', tol=0.001))]

base_learners_set4 = [('mlp', MLPClassifier(max_iter=500, activation='relu', alpha=0.001, hidden_layer_sizes=(20,), learning_rate='adaptive', solver='adam')),
                ('knn', KNeighborsClassifier(algorithm='auto', leaf_size=15, n_neighbors=20, p=1, weights='distance')), 
                ('svm', SVC(C=10, kernel='rbf', tol=0.001))]


base_learners = [base_learners_set1, base_learners_set2, base_learners_set3, base_learners_set4]

for base_learner_group in base_learners:
    meta_learner = LogisticRegression()
    clf = StackingClassifier(estimators=base_learner_group, final_estimator=meta_learner)

    # Train the model on the full training data
    clf.fit(X_train, y_train)

    # Predict on the test set and evaluate
    test_predictions = clf.predict(X_test)
    print('Test Set Evaluation:')
    print('Accuracy:', accuracy_score(y_test, test_predictions)*100)
    print('F1 Score:', f1_score(y_test, test_predictions)*100)
    print('Recall:', recall_score(y_test, test_predictions)*100)
    print('Precision:', precision_score(y_test, test_predictions)*100)
    print('ROC AUC:', roc_auc_score(y_test, test_predictions)*100)
    print('Confusion Matrix:', confusion_matrix(y_test, test_predictions))
    print('-----------------------------------------\n')

    # Predict on the 15 rows and compare with actual labels
    predict_predictions = clf.predict(predict_labels)
    print('Prediction on 15 Rows:')
    print('Predicted Labels:', predict_predictions)
    print('Actual Labels:', predict_labels.values)
    print('-----------------------------------------\n')