In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import StackingClassifier, RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import csv
from sklearn.model_selection import GridSearchCV

In [2]:
file_paths_basic = [
    r'C:\AIL301m\Feature extract\basic\Merged_BC-15_basic_features.csv',
    r'C:\AIL301m\Feature extract\basic\Merged_Huong_thom-1_basic_features.csv',
    r'C:\AIL301m\Feature extract\basic\Merged_Nep-87_basic_features.csv',
    r'C:\AIL301m\Feature extract\basic\Merged_Q-5_modify_basic_features.csv',
    r'C:\AIL301m\Feature extract\basic\Merged_Thien_uu-8_basic_features.csv',
    r'C:\AIL301m\Feature extract\basic\Merged_Xi-23_features.csv'
]

results = []
accuracies = []
for file_path in file_paths_basic:
    data = pd.read_csv(file_path)
    X = data.drop(['filename', 'label'], axis=1)
    y = data['label']
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.33, random_state=42)
    
    # Define base models
    svm = SVC(probability=True, random_state=42)
    ann = MLPClassifier(hidden_layer_sizes=(20,), max_iter=1000, random_state=42)
    
    # Define meta-learner
    meta_learner = LogisticRegression(random_state=42)
    
    # Create the stacking classifier
    stacking_classifier = StackingClassifier(
        estimators=[('svm', svm), ('ann', ann)],
        final_estimator=meta_learner,
        cv=5
    )
    
    # Train the stacking classifier
    stacking_classifier.fit(X_train, y_train)
    y_pred = stacking_classifier.predict(X_test)
    
    # Calculate metrics
    acc = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')

    print(f"File: {file_path}")
    print("Stacking Model (SVM + ANN):")
    print(f"Accuracy: {acc * 100:.2f}%")
    print(f"Precision: {precision * 100:.2f}%")
    print(f"Recall: {recall * 100:.2f}%")
    print(f"F-measure: {f1 * 100:.2f}%")

    # Format the results to xx,xx%
    results.append([
        file_path,
        f"{acc * 100:.2f}".replace('.', ',') + '%',
        f"{precision * 100:.2f}".replace('.', ',') + '%',
        f"{recall * 100:.2f}".replace('.', ',') + '%',
        f"{f1 * 100:.2f}".replace('.', ',') + '%'
    ])
    accuracies.append(acc)

# Specify the output file path
output_file = r'C:\AIL301m\result\Stacking\Basic_ANN_SVM_results.csv'

# Write the results to a CSV file
with open(output_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['File', 'Accuracy', 'Precision', 'Recall', 'F-measure'])
    writer.writerows(results)
average_accuracy = np.mean(accuracies)
print(f"Overall Average Accuracy: {average_accuracy * 100:.2f}%")
print(f"Results have been written to {output_file}")

File: C:\AIL301m\Feature extract\basic\Merged_BC-15_basic_features.csv
Stacking Model (SVM + ANN):
Accuracy: 91.43%
Precision: 91.46%
Recall: 91.43%
F-measure: 91.43%
File: C:\AIL301m\Feature extract\basic\Merged_Huong_thom-1_basic_features.csv
Stacking Model (SVM + ANN):
Accuracy: 94.38%
Precision: 94.41%
Recall: 94.38%
F-measure: 94.38%
File: C:\AIL301m\Feature extract\basic\Merged_Nep-87_basic_features.csv
Stacking Model (SVM + ANN):
Accuracy: 97.68%
Precision: 97.70%
Recall: 97.68%
F-measure: 97.68%
File: C:\AIL301m\Feature extract\basic\Merged_Q-5_modify_basic_features.csv
Stacking Model (SVM + ANN):
Accuracy: 95.37%
Precision: 95.40%
Recall: 95.37%
F-measure: 95.37%
File: C:\AIL301m\Feature extract\basic\Merged_Thien_uu-8_basic_features.csv
Stacking Model (SVM + ANN):
Accuracy: 97.28%
Precision: 97.28%
Recall: 97.28%
F-measure: 97.28%
File: C:\AIL301m\Feature extract\basic\Merged_Xi-23_features.csv
Stacking Model (SVM + ANN):
Accuracy: 95.54%
Precision: 95.54%
Recall: 95.54%
F-me

In [10]:
file_paths_basic = [
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_BC-15_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Huong_thom-1_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Nep-87_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Q-5_modify_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Thien_uu-8_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Xi-23_features_v2.csv'
]


results = []
accuracies = []
for file_path in file_paths_basic:
    data = pd.read_csv(file_path)
    X = data.drop(['filename', 'label'], axis=1)
    y = data['label']
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.33, random_state=42)
    
    # Define base models
    rf = RandomForestClassifier(n_estimators=1000, max_features=6,random_state=42)
    ann = MLPClassifier(hidden_layer_sizes=(64,), max_iter=2000, random_state=42)
    
    # Define meta-learner
    meta_learner = LogisticRegression(random_state=42)
    
    # Create the stacking classifier
    stacking_classifier = StackingClassifier(
        estimators=[('rf', rf), ('ann', ann)],
        final_estimator=meta_learner,
        cv=5
    )
    
    # Train the stacking classifier
    stacking_classifier.fit(X_train, y_train)
    y_pred = stacking_classifier.predict(X_test)
    
    # Calculate metrics
    acc = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')

    print(f"File: {file_path}")
    print("Stacking Model (RF + ANN):")
    print(f"Accuracy: {acc * 100:.2f}%")
    print(f"Precision: {precision * 100:.2f}%")
    print(f"Recall: {recall * 100:.2f}%")
    print(f"F-measure: {f1 * 100:.2f}%")

    # Format the results to xx,xx%
    results.append([
        file_path,
        f"{acc * 100:.2f}".replace('.', ',') + '%',
        f"{precision * 100:.2f}".replace('.', ',') + '%',
        f"{recall * 100:.2f}".replace('.', ',') + '%',
        f"{f1 * 100:.2f}".replace('.', ',') + '%'
    ])
    accuracies.append(acc)

# Specify the output file path
output_file = r'C:\AIL301m\result\Stacking\Basic_ANN_RF_results_v2.csv'

# Write the results to a CSV file
with open(output_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['File', 'Accuracy', 'Precision', 'Recall', 'F-measure'])
    writer.writerows(results)
average_accuracy = np.mean(accuracies)
print(f"Overall Average Accuracy: {average_accuracy * 100:.2f}%")
print(f"Results have been written to {output_file}")


File: C:\AIL301m\Feature extract\basic\Merged_BC-15_basic_features.csv
Stacking Model (RF + ANN):
Accuracy: 91.52%
Precision: 91.53%
Recall: 91.52%
F-measure: 91.51%
File: C:\AIL301m\Feature extract\basic\Merged_Huong_thom-1_basic_features.csv
Stacking Model (RF + ANN):
Accuracy: 94.38%
Precision: 94.43%
Recall: 94.38%
F-measure: 94.38%
File: C:\AIL301m\Feature extract\basic\Merged_Nep-87_basic_features.csv
Stacking Model (RF + ANN):
Accuracy: 97.58%
Precision: 97.59%
Recall: 97.58%
F-measure: 97.58%
File: C:\AIL301m\Feature extract\basic\Merged_Q-5_modify_basic_features.csv
Stacking Model (RF + ANN):
Accuracy: 95.77%
Precision: 95.80%
Recall: 95.77%
F-measure: 95.77%
File: C:\AIL301m\Feature extract\basic\Merged_Thien_uu-8_basic_features.csv
Stacking Model (RF + ANN):
Accuracy: 97.43%
Precision: 97.43%
Recall: 97.43%
F-measure: 97.43%
File: C:\AIL301m\Feature extract\basic\Merged_Xi-23_features.csv
Stacking Model (RF + ANN):
Accuracy: 96.42%
Precision: 96.42%
Recall: 96.42%
F-measure:

In [5]:
file_paths_basic = [
    r'C:\AIL301m\Feature extract\basic\Merged_BC-15_basic_features.csv',
    r'C:\AIL301m\Feature extract\basic\Merged_Huong_thom-1_basic_features.csv',
    r'C:\AIL301m\Feature extract\basic\Merged_Nep-87_basic_features.csv',
    r'C:\AIL301m\Feature extract\basic\Merged_Q-5_modify_basic_features.csv',
    r'C:\AIL301m\Feature extract\basic\Merged_Thien_uu-8_basic_features.csv',
    r'C:\AIL301m\Feature extract\basic\Merged_Xi-23_features.csv'
]

results = []
accuracies = []

for file_path in file_paths_basic:
    data = pd.read_csv(file_path)
    X = data.drop(['filename', 'label'], axis=1)
    y = data['label']
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.33, random_state=42)
    
    # Define base models
    knn = KNeighborsClassifier(n_neighbors=16)
    ann = MLPClassifier(hidden_layer_sizes=(20,), max_iter=1000, random_state=42)
    
    # Define meta-learner
    meta_learner = LogisticRegression(random_state=42)
    
    # Create the stacking classifier
    stacking_classifier = StackingClassifier(
        estimators=[('knn', knn), ('ann', ann)],
        final_estimator=meta_learner,
        cv=5
    )
    
    # Train the stacking classifier
    stacking_classifier.fit(X_train, y_train)
    y_pred = stacking_classifier.predict(X_test)
    
    # Calculate metrics
    acc = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')

    print(f"File: {file_path}")
    print("Stacking Model (KNN + ANN):")
    print(f"Accuracy: {acc * 100:.2f}%")
    print(f"Precision: {precision * 100:.2f}%")
    print(f"Recall: {recall * 100:.2f}%")
    print(f"F-measure: {f1 * 100:.2f}%")

    # Format the results to xx,xx%
    results.append([
        file_path,
        f"{acc * 100:.2f}".replace('.', ',') + '%',
        f"{precision * 100:.2f}".replace('.', ',') + '%',
        f"{recall * 100:.2f}".replace('.', ',') + '%',
        f"{f1 * 100:.2f}".replace('.', ',') + '%'
    ])
    accuracies.append(acc)

# Specify the output file path
output_file = r'C:\AIL301m\result\Stacking\Basic_ANN_KNN_results_KNN_ANN.csv'

# Write the results to a CSV file
with open(output_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['File', 'Accuracy', 'Precision', 'Recall', 'F-measure'])
    writer.writerows(results)
average_accuracy = np.mean(accuracies)
print(f"Overall Average Accuracy: {average_accuracy * 100:.2f}%")
print(f"Results have been written to {output_file}")

File: C:\AIL301m\Feature extract\basic\Merged_BC-15_basic_features.csv
Stacking Model (KNN + ANN):
Accuracy: 91.27%
Precision: 91.28%
Recall: 91.27%
F-measure: 91.27%
File: C:\AIL301m\Feature extract\basic\Merged_Huong_thom-1_basic_features.csv
Stacking Model (KNN + ANN):
Accuracy: 94.60%
Precision: 94.62%
Recall: 94.60%
F-measure: 94.60%
File: C:\AIL301m\Feature extract\basic\Merged_Nep-87_basic_features.csv
Stacking Model (KNN + ANN):
Accuracy: 97.79%
Precision: 97.80%
Recall: 97.79%
F-measure: 97.79%
File: C:\AIL301m\Feature extract\basic\Merged_Q-5_modify_basic_features.csv
Stacking Model (KNN + ANN):
Accuracy: 95.77%
Precision: 95.80%
Recall: 95.77%
F-measure: 95.77%
File: C:\AIL301m\Feature extract\basic\Merged_Thien_uu-8_basic_features.csv
Stacking Model (KNN + ANN):
Accuracy: 97.28%
Precision: 97.28%
Recall: 97.28%
F-measure: 97.28%
File: C:\AIL301m\Feature extract\basic\Merged_Xi-23_features.csv
Stacking Model (KNN + ANN):
Accuracy: 95.98%
Precision: 95.98%
Recall: 95.98%
F-me

In [3]:
file_paths_basic = [
    r'C:\AIL301m\Feature extract\basic\Merged_BC-15_basic_features.csv',
    r'C:\AIL301m\Feature extract\basic\Merged_Huong_thom-1_basic_features.csv',
    r'C:\AIL301m\Feature extract\basic\Merged_Nep-87_basic_features.csv',
    r'C:\AIL301m\Feature extract\basic\Merged_Q-5_modify_basic_features.csv',
    r'C:\AIL301m\Feature extract\basic\Merged_Thien_uu-8_basic_features.csv',
    r'C:\AIL301m\Feature extract\basic\Merged_Xi-23_features.csv'
]

results = []
accuracies = []

for file_path in file_paths_basic:
    data = pd.read_csv(file_path)
    X = data.drop(['filename', 'label'], axis=1)
    y = data['label']
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.33, random_state=42)
    
    # Define base models
    knn = KNeighborsClassifier(n_neighbors=18)
    ann = MLPClassifier(hidden_layer_sizes=(50,), max_iter=2000, random_state=42)
    svm = SVC(kernel='rbf',probability=False, random_state=42)
    rf = RandomForestClassifier(n_estimators=700, max_features=22, random_state=42)
    
    # Define meta-learner
    meta_learner = LogisticRegression(random_state=42)
    
    # Create the stacking classifier
    stacking_classifier = StackingClassifier(
        estimators=[('knn', knn), ('ann', ann), ('svm', svm), ('rf', rf)],
        final_estimator=meta_learner,
        cv=5
    )
    
    # Train the stacking classifier
    stacking_classifier.fit(X_train, y_train)
    y_pred = stacking_classifier.predict(X_test)
    
    # Calculate metrics
    acc = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')

    print(f"File: {file_path}")
    print("Stacking Model (KNN + ANN + SVM + RF):")
    print(f"Accuracy: {acc * 100:.2f}%")
    print(f"Precision: {precision * 100:.2f}%")
    print(f"Recall: {recall * 100:.2f}%")
    print(f"F-measure: {f1 * 100:.2f}%")

    # Format the results to xx,xx%
    results.append([
        file_path,
        f"{acc * 100:.2f}".replace('.', ',') + '%',
        f"{precision * 100:.2f}".replace('.', ',') + '%',
        f"{recall * 100:.2f}".replace('.', ',') + '%',
        f"{f1 * 100:.2f}".replace('.', ',') + '%'
    ])
    accuracies.append(acc)

# Specify the output file path
output_file = r'C:\AIL301m\result\Stacking\Basic_results_4_models.csv'

# Write the results to a CSV file
with open(output_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['File', 'Accuracy', 'Precision', 'Recall', 'F-measure'])
    writer.writerows(results)
average_accuracy = np.mean(accuracies)
print(f"Overall Average Accuracy: {average_accuracy * 100:.2f}%") 

print(f"Results have been written to {output_file}")


File: C:\AIL301m\Feature extract\basic\Merged_BC-15_basic_features.csv
Stacking Model (KNN + ANN + SVM + RF):
Accuracy: 92.01%
Precision: 92.04%
Recall: 92.01%
F-measure: 92.01%
File: C:\AIL301m\Feature extract\basic\Merged_Huong_thom-1_basic_features.csv
Stacking Model (KNN + ANN + SVM + RF):
Accuracy: 94.38%
Precision: 94.42%
Recall: 94.38%
F-measure: 94.38%
File: C:\AIL301m\Feature extract\basic\Merged_Nep-87_basic_features.csv
Stacking Model (KNN + ANN + SVM + RF):
Accuracy: 97.89%
Precision: 97.91%
Recall: 97.89%
F-measure: 97.89%
File: C:\AIL301m\Feature extract\basic\Merged_Q-5_modify_basic_features.csv
Stacking Model (KNN + ANN + SVM + RF):
Accuracy: 95.77%
Precision: 95.82%
Recall: 95.77%
F-measure: 95.77%
File: C:\AIL301m\Feature extract\basic\Merged_Thien_uu-8_basic_features.csv
Stacking Model (KNN + ANN + SVM + RF):
Accuracy: 97.73%
Precision: 97.74%
Recall: 97.73%
F-measure: 97.73%
File: C:\AIL301m\Feature extract\basic\Merged_Xi-23_features.csv
Stacking Model (KNN + ANN +

In [4]:
file_paths_basic = [
    r'C:\AIL301m\Feature extract\basic\Merged_BC-15_basic_features.csv',
    r'C:\AIL301m\Feature extract\basic\Merged_Huong_thom-1_basic_features.csv',
    r'C:\AIL301m\Feature extract\basic\Merged_Nep-87_basic_features.csv',
    r'C:\AIL301m\Feature extract\basic\Merged_Q-5_modify_basic_features.csv',
    r'C:\AIL301m\Feature extract\basic\Merged_Thien_uu-8_basic_features.csv',
    r'C:\AIL301m\Feature extract\basic\Merged_Xi-23_features.csv'
]

results = []
accuracies = []

for file_path in file_paths_basic:
    data = pd.read_csv(file_path)
    X = data.drop(['filename', 'label'], axis=1)
    y = data['label']
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.33, random_state=42)
    
    # Define base models
    knn = KNeighborsClassifier(n_neighbors=16)
    ann = MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000, random_state=42)
    svm = SVC(kernel='rbf',probability=True, random_state=42)
    rf = RandomForestClassifier(n_estimators=500, max_features=5, random_state=42)
    lr = LogisticRegression(random_state=42)
    # Define meta-learner
    meta_learner = LogisticRegression(random_state=42)
    
    # Create the stacking classifier
    stacking_classifier = StackingClassifier(
        estimators=[('knn', knn), ('ann', ann), ('svm', svm), ('rf', rf), ('lr', lr)],
        final_estimator=meta_learner,
        cv=5
    )
    
    # Train the stacking classifier
    stacking_classifier.fit(X_train, y_train)
    y_pred = stacking_classifier.predict(X_test)
    
    # Calculate metrics
    acc = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')

    print(f"File: {file_path}")
    print("Stacking Model (KNN + ANN + SVM + RF + LR):")
    print(f"Accuracy: {acc * 100:.2f}%")
    print(f"Precision: {precision * 100:.2f}%")
    print(f"Recall: {recall * 100:.2f}%")
    print(f"F-measure: {f1 * 100:.2f}%")

    # Format the results to xx,xx%
    results.append([
        file_path,
        f"{acc * 100:.2f}".replace('.', ',') + '%',
        f"{precision * 100:.2f}".replace('.', ',') + '%',
        f"{recall * 100:.2f}".replace('.', ',') + '%',
        f"{f1 * 100:.2f}".replace('.', ',') + '%'
    ])
    accuracies.append(acc)
# Specify the output file path
output_file = r'C:\AIL301m\result\Stacking\Basic_results_5_models.csv'

# Write the results to a CSV file
with open(output_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['File', 'Accuracy', 'Precision', 'Recall', 'F-measure'])
    writer.writerows(results)
average_accuracy = np.mean(accuracies)
print(f"Overall Average Accuracy: {average_accuracy * 100:.2f}%") 
print(f"Results have been written to {output_file}")


File: C:\AIL301m\Feature extract\basic\Merged_BC-15_basic_features.csv
Stacking Model (KNN + ANN + SVM + RF + LR):
Accuracy: 91.60%
Precision: 91.65%
Recall: 91.60%
F-measure: 91.59%
File: C:\AIL301m\Feature extract\basic\Merged_Huong_thom-1_basic_features.csv
Stacking Model (KNN + ANN + SVM + RF + LR):
Accuracy: 94.53%
Precision: 94.57%
Recall: 94.53%
F-measure: 94.53%
File: C:\AIL301m\Feature extract\basic\Merged_Nep-87_basic_features.csv
Stacking Model (KNN + ANN + SVM + RF + LR):
Accuracy: 98.00%
Precision: 98.00%
Recall: 98.00%
F-measure: 98.00%
File: C:\AIL301m\Feature extract\basic\Merged_Q-5_modify_basic_features.csv
Stacking Model (KNN + ANN + SVM + RF + LR):
Accuracy: 95.27%
Precision: 95.29%
Recall: 95.27%
F-measure: 95.27%
File: C:\AIL301m\Feature extract\basic\Merged_Thien_uu-8_basic_features.csv
Stacking Model (KNN + ANN + SVM + RF + LR):
Accuracy: 97.43%
Precision: 97.43%
Recall: 97.43%
F-measure: 97.43%
File: C:\AIL301m\Feature extract\basic\Merged_Xi-23_features.csv
St

In [4]:
file_paths_basic = [
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_BC-15_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Huong_thom-1_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Nep-87_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Q-5_modify_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Thien_uu-8_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Xi-23_features_v2.csv'
]

results = []
accuracies = []

for file_path in file_paths_basic:
    data = pd.read_csv(file_path)
    X = data.drop(['filename', 'label'], axis=1)
    y = data['label']
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.33, random_state=42)
    
    # Define base models
    knn = KNeighborsClassifier(n_neighbors=18)
    ann = MLPClassifier(hidden_layer_sizes=(50,), max_iter=2000, random_state=42)
    svm = SVC(kernel='rbf',probability=True, random_state=42)
    rf = RandomForestClassifier(n_estimators=700, max_features=6, random_state=42)
    
    # Define meta-learner
    meta_learner = LogisticRegression(random_state=42)
    
    # Create the stacking classifier
    stacking_classifier = StackingClassifier(
        estimators=[('knn', knn), ('ann', ann), ('svm', svm), ('rf', rf)],
        final_estimator=meta_learner,
        cv=3
    )
    
    # Train the stacking classifier
    stacking_classifier.fit(X_train, y_train)
    y_pred = stacking_classifier.predict(X_test)
    
    # Calculate metrics
    acc = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')

    print(f"File: {file_path}")
    print("Stacking Model (KNN + ANN + SVM + RF):")
    print(f"Accuracy: {acc * 100:.2f}%")
    print(f"Precision: {precision * 100:.2f}%")
    print(f"Recall: {recall * 100:.2f}%")
    print(f"F-measure: {f1 * 100:.2f}%")

    # Format the results to xx,xx%
    results.append([
        file_path,
        f"{acc * 100:.2f}".replace('.', ',') + '%',
        f"{precision * 100:.2f}".replace('.', ',') + '%',
        f"{recall * 100:.2f}".replace('.', ',') + '%',
        f"{f1 * 100:.2f}".replace('.', ',') + '%'
    ])
    accuracies.append(acc)

# Specify the output file path
output_file = r'C:\AIL301m\result\Stacking\Basic_results_4_models_v2.csv'

# Write the results to a CSV file
with open(output_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['File', 'Accuracy', 'Precision', 'Recall', 'F-measure'])
    writer.writerows(results)
average_accuracy = np.mean(accuracies)
print(f"Overall Average Accuracy: {average_accuracy * 100:.2f}%") 

print(f"Results have been written to {output_file}")


File: C:\AIL301m\Feature extract\basic\basicv2\Merged_BC-15_features_v2.csv
Stacking Model (KNN + ANN + SVM + RF):
Accuracy: 93.66%
Precision: 93.66%
Recall: 93.66%
F-measure: 93.66%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Huong_thom-1_features_v2.csv
Stacking Model (KNN + ANN + SVM + RF):
Accuracy: 98.54%
Precision: 98.54%
Recall: 98.54%
F-measure: 98.54%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Nep-87_features_v2.csv
Stacking Model (KNN + ANN + SVM + RF):
Accuracy: 97.89%
Precision: 97.91%
Recall: 97.89%
F-measure: 97.89%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Q-5_modify_features_v2.csv
Stacking Model (KNN + ANN + SVM + RF):
Accuracy: 98.19%
Precision: 98.20%
Recall: 98.19%
F-measure: 98.19%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Thien_uu-8_features_v2.csv
Stacking Model (KNN + ANN + SVM + RF):
Accuracy: 98.49%
Precision: 98.50%
Recall: 98.49%
F-measure: 98.49%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Xi-23_feature

In [9]:
file_paths_basic = [
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_BC-15_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Huong_thom-1_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Nep-87_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Q-5_modify_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Thien_uu-8_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Xi-23_features_v2.csv'
]

results = []
accuracies = []

for file_path in file_paths_basic:
    data = pd.read_csv(file_path)
    X = data.drop(['filename', 'label'], axis=1)
    y = data['label']
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.33, random_state=42)
    
    # Define base models
    knn = KNeighborsClassifier(n_neighbors=19)
    ann = MLPClassifier(hidden_layer_sizes=(50,), max_iter=2000, random_state=42)
    svm = SVC(kernel='rbf',probability=True, random_state=42)
    rf = RandomForestClassifier(n_estimators=1200, max_features=6, random_state=42)
    
    # Define meta-learner
    meta_learner = LogisticRegression(random_state=42)
    
    # Create the stacking classifier
    stacking_classifier = StackingClassifier(
        estimators=[('knn', knn), ('ann', ann), ('svm', svm), ('rf', rf)],
        final_estimator=meta_learner,
        cv=3
    )
    
    # Train the stacking classifier
    stacking_classifier.fit(X_train, y_train)
    y_pred = stacking_classifier.predict(X_test)
    
    # Calculate metrics
    acc = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')

    print(f"File: {file_path}")
    print("Stacking Model (KNN + ANN + SVM + RF):")
    print(f"Accuracy: {acc * 100:.2f}%")
    print(f"Precision: {precision * 100:.2f}%")
    print(f"Recall: {recall * 100:.2f}%")
    print(f"F-measure: {f1 * 100:.2f}%")

    # Format the results to xx,xx%
    results.append([
        file_path,
        f"{acc * 100:.2f}".replace('.', ',') + '%',
        f"{precision * 100:.2f}".replace('.', ',') + '%',
        f"{recall * 100:.2f}".replace('.', ',') + '%',
        f"{f1 * 100:.2f}".replace('.', ',') + '%'
    ])
    accuracies.append(acc)

# Specify the output file path
output_file = r'C:\AIL301m\result\Stacking\Basic_results_4_models_v2_test.csv'

# Write the results to a CSV file
with open(output_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['File', 'Accuracy', 'Precision', 'Recall', 'F-measure'])
    writer.writerows(results)
average_accuracy = np.mean(accuracies)
print(f"Overall Average Accuracy: {average_accuracy * 100:.2f}%") 

print(f"Results have been written to {output_file}")

File: C:\AIL301m\Feature extract\basic\basicv2\Merged_BC-15_features_v2.csv
Stacking Model (KNN + ANN + SVM + RF):
Accuracy: 93.66%
Precision: 93.66%
Recall: 93.66%
F-measure: 93.66%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Huong_thom-1_features_v2.csv
Stacking Model (KNN + ANN + SVM + RF):
Accuracy: 98.54%
Precision: 98.54%
Recall: 98.54%
F-measure: 98.54%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Nep-87_features_v2.csv
Stacking Model (KNN + ANN + SVM + RF):
Accuracy: 97.89%
Precision: 97.91%
Recall: 97.89%
F-measure: 97.89%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Q-5_modify_features_v2.csv
Stacking Model (KNN + ANN + SVM + RF):
Accuracy: 98.19%
Precision: 98.20%
Recall: 98.19%
F-measure: 98.19%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Thien_uu-8_features_v2.csv
Stacking Model (KNN + ANN + SVM + RF):
Accuracy: 98.49%
Precision: 98.50%
Recall: 98.49%
F-measure: 98.49%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Xi-23_feature

In [5]:
file_paths_basic = [
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_BC-15_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Huong_thom-1_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Nep-87_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Q-5_modify_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Thien_uu-8_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Xi-23_features_v2.csv'
]

params = {
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_BC-15_features_v2.csv': {
        'knn': {'n_neighbors': 4, 'metric': 'euclidean'},
        'ann': {'activation':'tanh', 'alpha': 0.05, 'hidden_layer_sizes': (150,),'max_iter': 1000},
        'svm': {'kernel': 'rbf', 'C': 100, 'gamma':0.01},
        'rf': {'n_estimators': 1200, 'max_features': 6, 'criterion':'entropy', 'max_depth': 10}
    },
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Huong_thom-1_features_v2.csv': {
        'knn': {'n_neighbors': 12, 'metric': 'euclidean','weights': 'distance'},
        'ann': {'alpha': 0.05, 'hidden_layer_sizes': (50,),'max_iter': 1000},
        'svm': {'kernel': 'rbf', 'C': 100, 'gamma':0.01},
        'rf': {'max_features': 6, 'criterion':'entropy', 'max_depth': 10}
    },
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Nep-87_features_v2.csv': {
        'knn': {'n_neighbors': 4, 'metric': 'manhattan'},
        'ann': {'activation':'tanh', 'hidden_layer_sizes': (50,),'max_iter': 1000},
        'svm': {'kernel': 'rbf', 'C': 100, 'gamma':0.01},
        'rf': {'max_features': 6, 'criterion':'entropy', 'max_depth': 6,'n_estimators':500}
    },
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Q-5_modify_features_v2.csv': {
        'knn': {'n_neighbors': 4, 'metric': 'euclidean'},
        'ann': {'activation':'tanh', 'alpha': 0.05, 'hidden_layer_sizes': (150,),'max_iter': 1000},
        'svm': {'kernel': 'rbf', 'C': 10, 'gamma':0.01},
        'rf': {'max_features': 6, 'criterion':'entropy', 'max_depth': 10,'n_estimators':200}
    },
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Thien_uu-8_features_v2.csv': {
        'knn': {'n_neighbors': 10, 'metric': 'euclidean'},
        'ann': {'hidden_layer_sizes': (50,), 'alpha': 0.05, 'max_iter': 1000},
        'svm': {'kernel': 'linear', 'C': 10, 'gamma':1},
        'rf': {'max_features': 6, 'max_depth': 10}
    },
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Xi-23_features_v2.csv': {
        'knn': {'n_neighbors': 6, 'metric': 'euclidean','weights': 'distance'},
        'ann': {'activation':'tanh', 'hidden_layer_sizes': (150,),'max_iter': 1000},
        'svm': {'kernel': 'rbf', 'C': 100, 'gamma':0.01},
        'rf': {'max_features': 6, 'criterion':'entropy', 'max_depth': 6,'n_estimators':500}
    }
}

results = []
accuracies = []

for file_path in file_paths_basic:
    data = pd.read_csv(file_path)
    X = data.drop(['filename', 'label'], axis=1)
    y = data['label']
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.33, random_state=42)
    
    # Fetch the parameters for the current file
    param_set = params[file_path]
    
    # Define base models with specific parameters
    knn = KNeighborsClassifier(**param_set['knn'])
    ann = MLPClassifier(**param_set['ann'], random_state=42)
    svm = SVC(**param_set['svm'], random_state=42)
    rf = RandomForestClassifier(**param_set['rf'], random_state=42)
    
    # Define meta-learner
    meta_learner = LogisticRegression(C=100, max_iter=200, penalty='l1', solver='liblinear',random_state=42)
    
    # Create the stacking classifier
    stacking_classifier = StackingClassifier(
        estimators=[('knn', knn), ('ann', ann), ('svm', svm), ('rf', rf)],
        final_estimator=meta_learner,
        cv=5
    )
    
    # Train the stacking classifier
    stacking_classifier.fit(X_train, y_train)
    y_pred = stacking_classifier.predict(X_test)
    
    # Calculate metrics
    acc = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')

    print(f"File: {file_path}")
    print("Stacking Model (KNN + ANN + SVM + RF):")
    print(f"Accuracy: {acc * 100:.2f}%")
    print(f"Precision: {precision * 100:.2f}%")
    print(f"Recall: {recall * 100:.2f}%")
    print(f"F-measure: {f1 * 100:.2f}%")

    # Format the results to xx,xx%
    results.append([
        file_path,
        f"{acc * 100:.2f}".replace('.', ',') + '%',
        f"{precision * 100:.2f}".replace('.', ',') + '%',
        f"{recall * 100:.2f}".replace('.', ',') + '%',
        f"{f1 * 100:.2f}".replace('.', ',') + '%'
    ])
    accuracies.append(acc)

# Specify the output file path
output_file = r'C:\AIL301m\result\Stacking\Basic_results_4_models_v3.csv'

# Write the results to a CSV file
with open(output_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['File', 'Accuracy', 'Precision', 'Recall', 'F-measure'])
    writer.writerows(results)
average_accuracy = np.mean(accuracies)
print(f"Overall Average Accuracy: {average_accuracy * 100:.2f}%") 

print(f"Results have been written to {output_file}")

File: C:\AIL301m\Feature extract\basic\basicv2\Merged_BC-15_features_v2.csv
Stacking Model (KNN + ANN + SVM + RF):
Accuracy: 94.65%
Precision: 94.67%
Recall: 94.65%
F-measure: 94.64%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Huong_thom-1_features_v2.csv
Stacking Model (KNN + ANN + SVM + RF):
Accuracy: 98.76%
Precision: 98.76%
Recall: 98.76%
F-measure: 98.76%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Nep-87_features_v2.csv
Stacking Model (KNN + ANN + SVM + RF):
Accuracy: 98.10%
Precision: 98.12%
Recall: 98.10%
F-measure: 98.10%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Q-5_modify_features_v2.csv
Stacking Model (KNN + ANN + SVM + RF):
Accuracy: 98.09%
Precision: 98.10%
Recall: 98.09%
F-measure: 98.09%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Thien_uu-8_features_v2.csv
Stacking Model (KNN + ANN + SVM + RF):
Accuracy: 98.34%
Precision: 98.34%
Recall: 98.34%
F-measure: 98.34%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Xi-23_feature

In [1]:
file_paths_basic = [
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_BC-15_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Huong_thom-1_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Nep-87_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Q-5_modify_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Thien_uu-8_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Xi-23_features_v2.csv'
]

params = {
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_BC-15_features_v2.csv': {
        'knn': {'metric': 'euclidean', 'n_neighbors': 4, 'weights': 'uniform'},
        'ann': {'activation': 'relu', 'alpha': 0.01, 'hidden_layer_sizes': (150,), 'max_iter': 1000},
        'svm': {'C': 100, 'degree': 3, 'gamma': 0.01, 'kernel': 'rbf'},
        'rf': {'criterion': 'entropy', 'max_depth': 20, 'max_features': 6, 'n_estimators': 1500}
    },
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Huong_thom-1_features_v2.csv': {
        'knn': {'metric': 'euclidean', 'n_neighbors': 4, 'weights': 'distance'},
        'ann': {'activation': 'relu', 'alpha': 0.01, 'hidden_layer_sizes': (150,), 'max_iter': 1000},
        'svm': {'C': 10, 'degree': 3, 'gamma': 0.01, 'kernel': 'rbf'},
        'rf': {'max_features': 6, 'criterion':'entropy', 'max_depth': 10}
    },
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Nep-87_features_v2.csv': {
        'knn': {'metric': 'manhattan', 'n_neighbors': 10, 'weights': 'distance'},
        'ann': {'activation': 'relu', 'alpha': 0.05, 'hidden_layer_sizes': (150,), 'max_iter': 1000},
        'svm': {'C': 1000, 'degree': 3, 'gamma': 0.01, 'kernel': 'linear'},
        'rf': {'criterion': 'entropy', 'max_depth': 20, 'max_features': 3, 'n_estimators': 1500}
    },
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Q-5_modify_features_v2.csv': {
        'knn': {'metric': 'euclidean', 'n_neighbors': 4, 'weights': 'uniform'},
        'ann': {'activation': 'relu', 'alpha': 0.05, 'hidden_layer_sizes': (100,), 'max_iter': 1000},
        'svm': {'C': 100, 'degree': 3, 'gamma': 0.01, 'kernel': 'rbf'},
        'rf': {'criterion': 'entropy', 'max_depth': 10, 'max_features': 6, 'n_estimators': 15000}
    },
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Thien_uu-8_features_v2.csv': {
        'knn': {'metric': 'euclidean', 'n_neighbors': 4, 'weights': 'uniform'},
        'ann': {'activation': 'relu', 'alpha': 0.05, 'hidden_layer_sizes': (150,), 'max_iter': 1000},
        'svm': {'C': 1000, 'degree': 3, 'gamma': 0.01, 'kernel': 'rbf'},
        'rf': {'criterion': 'entropy', 'max_depth': 6, 'max_features': 10, 'n_estimators': 100}
    },
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Xi-23_features_v2.csv': {
        'knn': {'metric': 'euclidean', 'n_neighbors': 4, 'weights': 'uniform'},
        'ann': {'activation': 'logistic', 'alpha': 0.01, 'hidden_layer_sizes': (100,), 'max_iter': 1000},
        'svm': {'C': 10, 'degree': 3, 'gamma': 0.01, 'kernel': 'rbf'},
        'rf': {'criterion': 'entropy', 'max_depth': 20, 'max_features': 6, 'n_estimators': 1200}
    }
}

results = []
accuracies = []

for file_path in file_paths_basic:
    data = pd.read_csv(file_path)
    X = data.drop(['filename', 'label'], axis=1)
    y = data['label']
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.33, random_state=42)
    
    # Fetch the parameters for the current file
    param_set = params[file_path]
    
    # Define base models with specific parameters
    knn = KNeighborsClassifier(**param_set['knn'])
    ann = MLPClassifier(**param_set['ann'], random_state=42)
    svm = SVC(**param_set['svm'], random_state=42)
    rf = RandomForestClassifier(**param_set['rf'], random_state=42)
    
    # Define meta-learner
    meta_learner = LogisticRegression(C=100, max_iter=200, penalty='l1', solver='liblinear',random_state=42)
    
    # Create the stacking classifier
    stacking_classifier = StackingClassifier(
        estimators=[('knn', knn), ('ann', ann), ('svm', svm), ('rf', rf)],
        final_estimator=meta_learner,
        cv=5
    )
    
    # Train the stacking classifier
    stacking_classifier.fit(X_train, y_train)
    y_pred = stacking_classifier.predict(X_test)
    
    # Calculate metrics
    acc = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')

    print(f"File: {file_path}")
    print("Stacking Model (KNN + ANN + SVM + RF):")
    print(f"Accuracy: {acc * 100:.2f}%")
    print(f"Precision: {precision * 100:.2f}%")
    print(f"Recall: {recall * 100:.2f}%")
    print(f"F-measure: {f1 * 100:.2f}%")

    # Format the results to xx,xx%
    results.append([
        file_path,
        f"{acc * 100:.2f}".replace('.', ',') + '%',
        f"{precision * 100:.2f}".replace('.', ',') + '%',
        f"{recall * 100:.2f}".replace('.', ',') + '%',
        f"{f1 * 100:.2f}".replace('.', ',') + '%'
    ])
    accuracies.append(acc)

# Specify the output file path
output_file = r'C:\AIL301m\result\Stacking\Basic_results_4_models_v3.csv'

# Write the results to a CSV file
with open(output_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['File', 'Accuracy', 'Precision', 'Recall', 'F-measure'])
    writer.writerows(results)
average_accuracy = np.mean(accuracies)
print(f"Overall Average Accuracy: {average_accuracy * 100:.2f}%") 

print(f"Results have been written to {output_file}")

NameError: name 'pd' is not defined