**Import Necessary Libraries**

In [1]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold
from sklearn.svm import SVC

**Load and Read Train Data**

In [2]:
path = "C:/Users/shakib/Downloads/Train Data.csv"
train_data = pd.read_csv(path)

**1. Extract Label and Features** <br>
**2. Convert to Numpy Array**

In [3]:
X = train_data.drop(columns=['Label'])
y = train_data['Label']

X = np.asarray(X)
y = np.asarray(y)

**Define K-Fold Cross-Validation**

In [4]:
kfold = KFold(n_splits=5, shuffle=True, random_state=4)

**Train and Predict**

In [5]:
# Initialize variables to track results
LR_accuracy = []
DT_accuracy = []
SVM_accuracy = []
KNN_accuracy = []

# K-fold cross-validation loop
for fold, (train_idx, test_idx) in enumerate(kfold.split(X, y), 1):
    # Split data
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]
    
    # Train models
    LR = LogisticRegression(C=0.01, solver='liblinear')
    LR.fit(X_train, y_train)
    
    DT = DecisionTreeClassifier(max_depth=4)
    DT.fit(X_train, y_train)
    
    SVM = SVC(max_iter=18000)
    SVM.fit(X_train, y_train)
    
    KNN = KNeighborsClassifier(n_neighbors = 10)
    KNN.fit(X_train, y_train)
    
    # Predict on test set
    LR_y_hat = LR.predict(X_test)
    DT_y_hat = DT.predict(X_test)
    SVM_y_hat = SVM.predict(X_test)
    KNN_y_hat = KNN.predict(X_test)
                            
    # Compute accuracy
    LR_accuracy.append(accuracy_score(y_test, LR_y_hat))
    DT_accuracy.append(accuracy_score(y_test, DT_y_hat))
    SVM_accuracy.append(accuracy_score(y_test, SVM_y_hat))
    KNN_accuracy.append(accuracy_score(y_test, KNN_y_hat))
    
    # Print accuracy for the current fold
    print(f"LR {fold} Accuracy: {accuracy_score(y_test, LR_y_hat):.4f}")
    print(f"DT {fold} Accuracy: {accuracy_score(y_test, DT_y_hat):.4f}")
    print(f"SVM {fold} Accuracy: {accuracy_score(y_test, SVM_y_hat):.4f}")
    print(f"KNN {fold} Accuracy: {accuracy_score(y_test, KNN_y_hat):.4f}")
    
# Print overall K-Fold accuracies
LR_mean_accuracy = np.mean(LR_accuracy)
print(f"\nMean K-Fold LR Accuracy: {LR_mean_accuracy:.4f}")

DT_mean_accuracy = np.mean(DT_accuracy)
print(f"\nMean K-Fold DT Accuracy: {DT_mean_accuracy:.4f}")

SVM_mean_accuracy = np.mean(SVM_accuracy)
print(f"\nMean K-Fold SVM Accuracy: {SVM_mean_accuracy:.4f}")

KNN_mean_accuracy = np.mean(KNN_accuracy)
print(f"\nMean K-Fold KNN Accuracy: {KNN_mean_accuracy:.4f}")

LR 1 Accuracy: 0.8900
DT 1 Accuracy: 0.8900
SVM 1 Accuracy: 0.9350
KNN 1 Accuracy: 0.9100
LR 2 Accuracy: 0.9050
DT 2 Accuracy: 0.9450
SVM 2 Accuracy: 0.9450
KNN 2 Accuracy: 0.9350
LR 3 Accuracy: 0.9000
DT 3 Accuracy: 0.9150
SVM 3 Accuracy: 0.9400
KNN 3 Accuracy: 0.8850
LR 4 Accuracy: 0.8550
DT 4 Accuracy: 0.8800
SVM 4 Accuracy: 0.9200
KNN 4 Accuracy: 0.8500
LR 5 Accuracy: 0.8650
DT 5 Accuracy: 0.9150
SVM 5 Accuracy: 0.9150
KNN 5 Accuracy: 0.8850

Mean K-Fold LR Accuracy: 0.8830

Mean K-Fold DT Accuracy: 0.9090

Mean K-Fold SVM Accuracy: 0.9310

Mean K-Fold KNN Accuracy: 0.8930


**Extract Label and Features for Neural Network**

In [6]:
import torch

labels = torch.tensor(train_data['Label'].values, dtype=torch.long)
features = torch.tensor(train_data.drop(columns=['Label']).values, dtype=torch.float)

**Define Feedforward Neural Network (FNN)**

In [7]:
import torch.nn as nn

class FNN(nn.Module):
    def __init__(self, input_dim):
        super(FNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.dropout1 = nn.Dropout(0.2)
        self.fc2 = nn.Linear(64, 32)
        self.dropout2 = nn.Dropout(0.1)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout1(x)
        x = torch.relu(self.fc2(x))
        x = self.dropout2(x)
        x = torch.sigmoid(self.fc3(x))
        return x

**Initialize Parameters**

In [8]:
input_dim = features.shape[1]
num_epochs = 30
batch_size = 8
criterion = nn.BCELoss()

**Train and Predict Feedforward Neural Network**

In [9]:
fold_results = []

for fold, (train_index, val_index) in enumerate(kfold.split(features)):
    # Split data
    features_train, features_val = features[train_index], features[val_index]
    labels_train, labels_val = labels[train_index], labels[val_index]
    
    # Initialize model and optimizer for each fold
    NN = FNN(input_dim=input_dim)
    optimizer = optim.Adam(NN.parameters(), lr=0.001)
    
    best_val_loss = float('inf')
    trigger_times = 0
    patience = 10  # Number of epochs to wait for improvement

    for epoch in range(num_epochs):
        NN.train()
        # Shuffle training data
        permutation = torch.randperm(features_train.size()[0])
        
        for i in range(0, features_train.size()[0], batch_size):
            # Get the current batch
            indices = permutation[i:i + batch_size]
            batch_features, batch_labels = features_train[indices], labels_train[indices].float().view(-1, 1)

            # Forward pass
            outputs = NN(batch_features)
            loss = criterion(outputs, batch_labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        # Validation check
        NN.eval()
        with torch.no_grad():
            val_outputs = NN(features_val)
            val_loss = criterion(val_outputs, labels_val.float().view(-1, 1))
            print(f"Epoch [{epoch + 1}/{num_epochs}], Validation Loss: {val_loss.item():.4f}")

            if val_loss < best_val_loss:
                best_val_loss = val_loss
                trigger_times = 0  # Reset patience counter
            else:
                trigger_times += 1
                if trigger_times >= patience:
                    print(f"Early stopping at epoch: {epoch + 1}")
                    break

    fold_results.append(best_val_loss.item())  # Store the validation loss for each fold

# Print results for each fold
for i, val_loss in enumerate(fold_results):
    print(f"Fold {i + 1} - Validation Loss: {val_loss:.4f}")

# Calculate average validation loss across all folds
average_val_loss = sum(fold_results) / len(fold_results)
print(f"Average Validation Loss across all folds: {average_val_loss:.4f}")

# Set the model to evaluation mode
NN.eval()

# Forward pass to get predictions
with torch.no_grad():
    outputs = NN(features)
    predicted = (outputs >= 0.5).float()  # Convert probabilities to binary predictions

# Calculate accuracy
FNN_train_accuracy = accuracy_score(labels, predicted)
print(f"\nMean K-Fold FNN Accuracy: {FNN_train_accuracy:.4f}")

Epoch [1/30], Validation Loss: 0.4734
Epoch [2/30], Validation Loss: 0.2568
Epoch [3/30], Validation Loss: 0.2201
Epoch [4/30], Validation Loss: 0.2052
Epoch [5/30], Validation Loss: 0.2099
Epoch [6/30], Validation Loss: 0.1936
Epoch [7/30], Validation Loss: 0.1864
Epoch [8/30], Validation Loss: 0.2132
Epoch [9/30], Validation Loss: 0.1877
Epoch [10/30], Validation Loss: 0.1806
Epoch [11/30], Validation Loss: 0.1774
Epoch [12/30], Validation Loss: 0.1756
Epoch [13/30], Validation Loss: 0.1840
Epoch [14/30], Validation Loss: 0.1731
Epoch [15/30], Validation Loss: 0.1724
Epoch [16/30], Validation Loss: 0.1815
Epoch [17/30], Validation Loss: 0.1770
Epoch [18/30], Validation Loss: 0.1778
Epoch [19/30], Validation Loss: 0.1693
Epoch [20/30], Validation Loss: 0.1800
Epoch [21/30], Validation Loss: 0.1733
Epoch [22/30], Validation Loss: 0.1687
Epoch [23/30], Validation Loss: 0.1728
Epoch [24/30], Validation Loss: 0.1786
Epoch [25/30], Validation Loss: 0.1803
Epoch [26/30], Validation Loss: 0.

**Load and Read Unseen Data**

In [10]:
path2 = "C:/Users/shakib/Downloads/Unseen Data.csv"
unseen_data = pd.read_csv(path2)

**Extract Label and Features from Unseen Data**

In [11]:
y_unseen = unseen_data['Label']
X_unseen = np.asarray(unseen_data.drop(columns=['Label']))

# For Neural Network
X_value = unseen_data.drop(columns=['Label']).values
X_unseen_tensor = torch.tensor(X_value, dtype=torch.float32)

**Predict on Unseen Data**

In [12]:
LR_y_unseen_pred = LR.predict(X_unseen)
LR_accuracy = accuracy_score(y_unseen, LR_y_unseen_pred)

DT_y_unseen_pred = DT.predict(X_unseen)
DT_accuracy = accuracy_score(y_unseen, DT_y_unseen_pred)

SVM_y_unseen_pred = SVM.predict(X_unseen)
SVM_accuracy = accuracy_score(y_unseen, SVM_y_unseen_pred)

KNN_y_unseen_pred = KNN.predict(X_unseen)
KNN_accuracy = accuracy_score(y_unseen, KNN_y_unseen_pred)

NN.eval()
with torch.no_grad():
    fnn_y_unseen_pred = NN(X_unseen_tensor).numpy().flatten()

# Threshold predictions to get binary labels
fnn_y_unseen_pred = (fnn_y_unseen_pred > 0.5).astype(int)

FNN_accuracy = accuracy_score(y_unseen, fnn_y_unseen_pred)

**Results Table**

In [13]:
data = {
    'Base Models': [
        'Logistic Regression', 
        'Decision Tree', 
        'Support Vector Machine', 
        'K-Nearest Neighbors', 
        'Feedforward Neural Network'
    ],
    'Train Data Accuracy': [
        f"{LR_mean_accuracy * 100:.2f}%", 
        f"{DT_mean_accuracy * 100:.2f}%", 
        f"{SVM_mean_accuracy * 100:.2f}%", 
        f"{KNN_mean_accuracy * 100:.2f}%", 
        f"{FNN_train_accuracy * 100:.2f}%"
    ],
    'Unseen Data Accuracy': [
        f"{LR_accuracy * 100:.2f}%", 
        f"{DT_accuracy * 100:.2f}%", 
        f"{SVM_accuracy * 100:.2f}%", 
        f"{KNN_accuracy * 100:.2f}%", 
        f"{FNN_accuracy * 100:.2f}%"
    ]
}

df = pd.DataFrame(data)
df

Unnamed: 0,Base Models,Train Data Accuracy,Unseen Data Accuracy
0,Logistic Regression,88.30%,82.86%
1,Decision Tree,90.90%,88.57%
2,Support Vector Machine,93.10%,90.00%
3,K-Nearest Neighbors,89.30%,86.43%
4,Feedforward Neural Network,95.20%,87.86%
