In [8]:
import pandas as pd
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler

# Load the preprocessed CSV file
df = pd.read_csv('dataset.csv')

# Separate features (X) and target variable (y)
X = df.drop(['num', 'dataset', 'id'], axis=1)
y = df['num']

In [9]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [20]:
# Initialize base models
rf_model = RandomForestClassifier()
lr_model = LogisticRegression()

# Train base models
rf_model.fit(X_train_scaled, y_train)
lr_model.fit(X_train_scaled, y_train)

# Make predictions of base models
rf_predictions = rf_model.predict(X_train_scaled)
lr_predictions = lr_model.predict(X_train_scaled)

# Stack predictions of base models
stacked_predictions = []
for rf_pred, lr_pred in zip(rf_predictions, lr_predictions):
    stacked_predictions.append([rf_pred, lr_pred])
    
# Initialize AdaBoost for stacking
adaboost_model = AdaBoostClassifier(algorithm='SAMME')

# Train AdaBoost model
adaboost_model.fit(stacked_predictions, y_train)

# Make predictions on test set
rf_test_predictions = rf_model.predict(X_test_scaled)
lr_test_predictions = lr_model.predict(X_test_scaled)
stacked_test_predictions = []
for rf_pred, lr_pred in zip(rf_test_predictions, lr_test_predictions):
    stacked_test_predictions.append([rf_pred, lr_pred])
    
# Make predictions using AdaBoost
adaboost_predictions = adaboost_model.predict(stacked_test_predictions)

In [21]:
# Evaluate performance
accuracy = accuracy_score(y_test, adaboost_predictions)
print("Accuracy:", accuracy)

conf_matrix = confusion_matrix(y_test, adaboost_predictions)
false_positive_rate = conf_matrix[0, 1] / (conf_matrix[0, 0] + conf_matrix[0, 1])

print("False Positive Rate:", false_positive_rate)

precision = precision_score(y_test, adaboost_predictions)
recall = recall_score(y_test, adaboost_predictions)
f1 = f1_score(y_test, adaboost_predictions)

# Calculate specificity
tn, fp, fn, tp = confusion_matrix(y_test, adaboost_predictions).ravel()
specificity = tn / (tn + fp)

print("Precision:", precision)
print("Recall:", recall)
print("Specificity:", specificity)
print("F1 Score:", f1)

Accuracy: 0.7913669064748201
False Positive Rate: 0.17142857142857143
Precision: 0.8125
Recall: 0.7536231884057971
Specificity: 0.8285714285714286
F1 Score: 0.7819548872180451


In [22]:
from joblib import dump
dump(scaler, 'scaler.pkl')
dump(rf_model, 'rf_model.pkl')
dump(lr_model, 'lr_model.pkl')
dump(adaboost_model, 'adaboost_model.pkl')

['adaboost_model.pkl']