**Import Necessary Libraries**

In [1]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

**Load and Read Train Data**

In [2]:
path = "C:/Users/shakib/Downloads/Train Data.csv"
train_data = pd.read_csv(path)

**1. Extract Label and Features** <br>
**2. Convert to Numpy Array**

In [3]:
X = train_data.drop(columns=['Label'])
y = train_data['Label']

X = np.asarray(X)
y = np.asarray(y)

**Define K-Fold Cross-Validation**

In [4]:
kfold = KFold(n_splits=5, shuffle=True, random_state=4)

**Initialize Base Models**

In [5]:
DT = DecisionTreeClassifier()
SVM = SVC(probability=True)

class FNN(nn.Module):
    def __init__(self, input_dim):
        super(FNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.dropout1 = nn.Dropout(0.2)
        self.fc2 = nn.Linear(64, 32)
        self.dropout2 = nn.Dropout(0.1)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout1(x)
        x = torch.relu(self.fc2(x))
        x = self.dropout2(x)
        x = torch.sigmoid(self.fc3(x))
        return x

**Define Hyperparameter Grids for GridSearchCV**

In [6]:
dt_param_grid = {
    'max_depth': [None, 4, 5, 6],
    'min_samples_split': [2, 5, 10]
}

svm_param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf']
}

**Train and Predict**

In [7]:
# Track and extract base model performances and features
meta_features = []
true_labels = []
SVM_scores = []
FNN_scores = []
DT_scores = []

# K-fold cross-validation loop
for train_index, test_index in kfold.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Train and get predictions from Decision Tree with GridSearchCV
    dt_grid_search = GridSearchCV(estimator=DT, param_grid=dt_param_grid, scoring='roc_auc', cv=5, verbose=0, n_jobs=-1)
    dt_grid_search.fit(X_train, y_train)
    decision_tree_pred = dt_grid_search.predict_proba(X_test)[:, 1]
    dt_roc_auc = roc_auc_score(y_test, decision_tree_pred)
    DT_scores.append(dt_roc_auc)

    # Train and get predictions from SVM with GridSearchCV
    svm_grid_search = GridSearchCV(estimator=SVM, param_grid=svm_param_grid, scoring='roc_auc', cv=5, verbose=0, n_jobs=-1)
    svm_grid_search.fit(X_train, y_train)
    svm_pred = svm_grid_search.predict_proba(X_test)[:, 1]
    svm_roc_auc = roc_auc_score(y_test, svm_pred)
    SVM_scores.append(svm_roc_auc)

    # Train and get predictions from Feedforward Neural Network with early stopping
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

    fnn = FNN(input_dim=X.shape[1])
    criterion = nn.BCELoss()
    optimizer = optim.Adam(fnn.parameters(), lr=0.01)

    # Early stopping parameters
    early_stop_patience = 10
    best_val_loss = float('inf')
    patience_counter = 0
    best_model_state = None  # Variable to store the best model state

    for epoch in range(60):
        # Training phase
        fnn.train()
        optimizer.zero_grad()
        outputs = fnn(X_train_tensor)
        loss = criterion(outputs, y_train_tensor)
        loss.backward()
        optimizer.step()

        # Calculate validation loss
        fnn.eval()
        with torch.no_grad():
            val_outputs = fnn(X_test_tensor)
            val_loss = criterion(val_outputs, torch.tensor(y_test, dtype=torch.float32).view(-1, 1))

        # Early stopping check
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0  # Reset counter if validation loss improves
            best_model_state = fnn.state_dict()  # Save the model state if it improves
        else:
            patience_counter += 1  # Increment counter if no improvement

        if patience_counter >= early_stop_patience:
            fnn.load_state_dict(best_model_state)  # Load the best model if early stopped
            break

    # Make predictions with the trained Feedforward Neural Network model
    fnn.eval()
    with torch.no_grad():
        nn_pred = fnn(X_test_tensor).numpy().flatten()
    nn_roc_auc = roc_auc_score(y_test, nn_pred)
    FNN_scores.append(nn_roc_auc)

    # Aggregate predictions for meta model
    meta_features.append(np.column_stack((decision_tree_pred, svm_pred, nn_pred)))
    true_labels.append(y_test)

**Concatenate Meta Features and True Labels**

In [8]:
meta_features = np.vstack(meta_features)
true_labels = np.concatenate(true_labels)

**Convert to DataFrame for Tabular Display**

In [9]:
table = pd.DataFrame(meta_features, columns=["Feature 1", "Feature 2", "Feature 3"])
table["True Label"] = true_labels
table.head(5)

Unnamed: 0,Feature 1,Feature 2,Feature 3,True Label
0,0.111111,0.067589,0.039787,1.0
1,0.008097,0.001885,0.001088,0.0
2,1.0,0.975924,0.985845,1.0
3,0.008097,0.010383,0.003549,0.0
4,0.008097,0.009111,0.001921,0.0


**Initialize Test Meta Models**

In [10]:
LR_meta_model = LogisticRegression(C=0.01, solver='liblinear')
KNN_meta_model = KNeighborsClassifier(n_neighbors = 10)
GB_meta_model = GradientBoostingClassifier(random_state=42)
RF_meta_model = RandomForestClassifier(random_state=42)

**Train Meta Models on Meta Features**

In [11]:
LR_meta_model.fit(meta_features, true_labels)
KNN_meta_model.fit(meta_features, true_labels)
GB_meta_model.fit(meta_features, true_labels)
RF_meta_model.fit(meta_features, true_labels)

**Get Predictions from Meta Models**

In [12]:
LR_meta_predictions = LR_meta_model.predict(meta_features)
KNN_meta_predictions = KNN_meta_model.predict(meta_features)
GB_meta_predictions = GB_meta_model.predict(meta_features)
RF_meta_predictions = RF_meta_model.predict(meta_features)

In [13]:
# Initialize test meta models
LR_meta_model = LogisticRegression(C=0.01, solver='liblinear')
KNN_meta_model = KNeighborsClassifier(n_neighbors = 10)
GB_meta_model = GradientBoostingClassifier(random_state=42)
RF_meta_model = RandomForestClassifier(random_state=42)

# Train meta models on meta features
LR_meta_model.fit(meta_features, true_labels)
KNN_meta_model.fit(meta_features, true_labels)
GB_meta_model.fit(meta_features, true_labels)
RF_meta_model.fit(meta_features, true_labels)

# Get predictions from meta models
LR_meta_predictions = LR_meta_model.predict(meta_features)
KNN_meta_predictions = KNN_meta_model.predict(meta_features)
GB_meta_predictions = GB_meta_model.predict(meta_features)
RF_meta_predictions = RF_meta_model.predict(meta_features)

# Convert probabilities to binary predictions
LR_meta_labels = (LR_meta_predictions >= 0.5).astype(int)
KNN_meta_labels = (KNN_meta_predictions >= 0.5).astype(int)
GB_meta_labels = (GB_meta_predictions >= 0.5).astype(int)
RF_meta_labels = (RF_meta_predictions >= 0.5).astype(int)

# Calculate accuracy
LR_accuracy = accuracy_score(true_labels, LR_meta_labels)
KNN_accuracy = accuracy_score(true_labels, KNN_meta_labels)
GB_accuracy = accuracy_score(true_labels, GB_meta_labels)
RF_accuracy = accuracy_score(true_labels, RF_meta_labels)

**Convert Probabilities to Binary Predictions**

In [14]:
LR_meta_labels = (LR_meta_predictions >= 0.5).astype(int)
KNN_meta_labels = (KNN_meta_predictions >= 0.5).astype(int)
GB_meta_labels = (GB_meta_predictions >= 0.5).astype(int)
RF_meta_labels = (RF_meta_predictions >= 0.5).astype(int)

**Calculate Accuracy**

In [15]:
LR_accuracy = accuracy_score(true_labels, LR_meta_labels)
KNN_accuracy = accuracy_score(true_labels, KNN_meta_labels)
GB_accuracy = accuracy_score(true_labels, GB_meta_labels)
RF_accuracy = accuracy_score(true_labels, RF_meta_labels)

**Load and Read Unseen Data**

In [16]:
path2 = "C:/Users/shakib/Downloads/Unseen Data.csv"
unseen_data = pd.read_csv(path2)

**Extract Label and Features from Unseen Data**

In [17]:
X_unseen = unseen_data.drop(columns=['Label']).values 
true_labels_unseen = unseen_data['Label'].values

# For Neural Network
X_unseen_tensor = torch.tensor(X_unseen, dtype=torch.float32)

**Generate Predictions from Trained Base Models on Unseen Data**

In [18]:
decision_tree_pred_unseen = dt_grid_search.predict_proba(X_unseen)[:, 1]
svm_pred_unseen = svm_grid_search.predict_proba(X_unseen)[:, 1]

fnn.eval()
with torch.no_grad():
    nn_pred_unseen = fnn(X_unseen_tensor).numpy().flatten()

**Aggregate Predictions for Meta Features**

In [19]:
meta_features_unseen = np.column_stack((decision_tree_pred_unseen, svm_pred_unseen, nn_pred_unseen))

**Get Predictions from Meta Models for Unseen Data**

In [20]:
LR_meta_predictions_unseen = LR_meta_model.predict(meta_features_unseen)
KNN_meta_predictions_unseen = KNN_meta_model.predict(meta_features_unseen)
GB_meta_predictions_unseen = GB_meta_model.predict(meta_features_unseen)
RF_meta_predictions_unseen = RF_meta_model.predict(meta_features_unseen)

**Convert Probabilities to Binary Predictions**

In [21]:
LR_meta_labels_unseen = (LR_meta_predictions_unseen >= 0.5).astype(int)
KNN_meta_labels_unseen = (KNN_meta_predictions_unseen >= 0.5).astype(int)
GB_meta_labels_unseen = (GB_meta_predictions_unseen >= 0.5).astype(int)
RF_meta_labels_unseen = (RF_meta_predictions_unseen >= 0.5).astype(int)

**Calculate Accuracy for Unseen Data**

In [22]:
LR_accuracy_unseen = accuracy_score(true_labels_unseen, LR_meta_labels_unseen)
KNN_accuracy_unseen = accuracy_score(true_labels_unseen, KNN_meta_labels_unseen)
GB_accuracy_unseen = accuracy_score(true_labels_unseen, GB_meta_labels_unseen)
RF_accuracy_unseen = accuracy_score(true_labels_unseen, RF_meta_labels_unseen)

**Results Table**

In [23]:
data = {
    'Meta Models': [
        'Logistic Regression', 
        'K-Nearest Neighbors', 
        'Gradient Boost',
        'Random Forest'
    ],
    'Train Data Accuracy': [
        f"{LR_accuracy * 100:.2f}%", 
        f"{KNN_accuracy * 100:.2f}%", 
        f"{GB_accuracy * 100:.2f}%",
        f"{RF_accuracy * 100:.2f}%"
    ],
    'Unseen Data Accuracy': [
        f"{LR_accuracy_unseen * 100:.2f}%", 
        f"{KNN_accuracy_unseen * 100:.2f}%", 
        f"{GB_accuracy_unseen * 100:.2f}%",
        f"{RF_accuracy_unseen * 100:.2f}%"
    ]
}

df = pd.DataFrame(data)
df

Unnamed: 0,Meta Models,Train Data Accuracy,Unseen Data Accuracy
0,Logistic Regression,92.80%,92.14%
1,K-Nearest Neighbors,93.90%,88.57%
2,Gradient Boost,98.40%,91.43%
3,Random Forest,100.00%,91.43%
