In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import csv

In [9]:
# File paths
file_paths_basic = [
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_BC-15_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Huong_thom-1_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Nep-87_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Q-5_modify_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Thien_uu-8_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Xi-23_features_v2.csv'
]

# Parameter grids for GridSearchCV
param_grids = {
    'knn': {
        'n_neighbors': [4, 10, 12, 15, 19],
        'metric': ['euclidean', 'manhattan', 'minkowski'],
        'weights': ['uniform', 'distance']
    },
    'ann': {
        'activation': ['tanh', 'relu', 'logistic'],
        'alpha': [ 0.001, 0.01, 0.05],
        'hidden_layer_sizes': [(50,), (100,), (150,), (50,50)],
        'max_iter': [1000, 2000]
    },
    'svm': {
        'kernel': ['rbf', 'linear', 'poly'],
        'C': [1, 10, 100, 1000],
        'gamma': [ 0.01, 0.1, 1, 'scale'],
        'degree': [3, 4, 5]
    },
    'rf': {
        'n_estimators': [100, 500, 1200, 1500],
        'max_features': [3, 6, 10],
        'criterion': ['gini', 'entropy'],
        'max_depth': [6, 10, 20, None]
    }
}

results = []
accuracies = []

for file_path in file_paths_basic:
    # Load data
    data = pd.read_csv(file_path)
    X = data.drop(['filename', 'label'], axis=1)
    y = data['label']
    
    # Standardize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.33, random_state=42)
    
    # Define base models
    knn = KNeighborsClassifier()
    ann = MLPClassifier(random_state=42)
    svm = SVC(probability=True, random_state=42)
    rf = RandomForestClassifier(random_state=42)
    
    # Perform GridSearchCV for each base model
    knn_grid = GridSearchCV(knn, param_grids['knn'], cv=3, n_jobs=-1)
    ann_grid = GridSearchCV(ann, param_grids['ann'], cv=3, n_jobs=-1)
    svm_grid = GridSearchCV(svm, param_grids['svm'], cv=3, n_jobs=-1)
    rf_grid = GridSearchCV(rf, param_grids['rf'], cv=3, n_jobs=-1)
    
    # Fit base models
    knn_grid.fit(X_train, y_train)
    ann_grid.fit(X_train, y_train)
    svm_grid.fit(X_train, y_train)
    rf_grid.fit(X_train, y_train)
    
    # Best estimators
    knn_best = knn_grid.best_estimator_
    ann_best = ann_grid.best_estimator_
    svm_best = svm_grid.best_estimator_
    rf_best = rf_grid.best_estimator_
    
    # Print best parameters
    print(f"Best parameters for {file_path}:")
    print(f"KNN: {knn_grid.best_params_}")
    print(f"ANN: {ann_grid.best_params_}")
    print(f"SVM: {svm_grid.best_params_}")
    print(f"RF: {rf_grid.best_params_}")
    

    meta_learner = LogisticRegression(C=100, penalty='l1', solver='liblinear',random_state=42)
    
    # Create the stacking classifier with the best base models
    stacking_classifier = StackingClassifier(
        estimators=[('knn', knn_best), ('ann', ann_best), ('svm', svm_best), ('rf', rf_best)],
        final_estimator=meta_learner,
        cv=5
    )
    
    # Train the stacking classifier
    stacking_classifier.fit(X_train, y_train)
    y_pred = stacking_classifier.predict(X_test)
    
    # Calculate metrics
    acc = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    
    # Get the best meta-learner parameters after training
    
    print(f"File: {file_path}")
    print("Stacking Model (KNN + ANN + SVM + RF):")
    print(f"Accuracy: {acc * 100:.2f}%")
    print(f"Precision: {precision * 100:.2f}%")
    print(f"Recall: {recall * 100:.2f}%")
    print(f"F-measure: {f1 * 100:.2f}%")

    # Format the results to xx,xx%
    results.append([
        file_path,
        f"{acc * 100:.2f}".replace('.', ',') + '%',
        f"{precision * 100:.2f}".replace('.', ',') + '%',
        f"{recall * 100:.2f}".replace('.', ',') + '%',
        f"{f1 * 100:.2f}".replace('.', ',') + '%'
    ])
    accuracies.append(acc)

# Specify the output file path
output_file = r'C:\AIL301m\result\Stacking\Basic_results_4_models_gridsearch.csv'

# Write the results to a CSV file
with open(output_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['File', 'Accuracy', 'Precision', 'Recall', 'F-measure'])
    writer.writerows(results)

average_accuracy = np.mean(accuracies)
print(f"Overall Average Accuracy: {average_accuracy * 100:.2f}%")
print(f"Results have been written to {output_file}")

Best parameters for C:\AIL301m\Feature extract\basic\basicv2\Merged_BC-15_features_v2.csv:
KNN: {'metric': 'euclidean', 'n_neighbors': 4, 'weights': 'uniform'}
ANN: {'activation': 'relu', 'alpha': 0.01, 'hidden_layer_sizes': (150,), 'max_iter': 1000}
SVM: {'C': 100, 'degree': 3, 'gamma': 0.01, 'kernel': 'rbf'}
RF: {'criterion': 'entropy', 'max_depth': 20, 'max_features': 6, 'n_estimators': 1500}
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_BC-15_features_v2.csv
Stacking Model (KNN + ANN + SVM + RF):
Accuracy: 94.48%
Precision: 94.49%
Recall: 94.48%
F-measure: 94.48%
Best parameters for C:\AIL301m\Feature extract\basic\basicv2\Merged_Huong_thom-1_features_v2.csv:
KNN: {'metric': 'euclidean', 'n_neighbors': 4, 'weights': 'distance'}
ANN: {'activation': 'relu', 'alpha': 0.01, 'hidden_layer_sizes': (150,), 'max_iter': 1000}
SVM: {'C': 10, 'degree': 3, 'gamma': 0.01, 'kernel': 'rbf'}
RF: {'criterion': 'entropy', 'max_depth': 10, 'max_features': 10, 'n_estimators': 500}
File: C:\AIL