In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import csv

In [2]:
file_paths_basic = [
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_BC-15_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Huong_thom-1_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Nep-87_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Q-5_modify_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Thien_uu-8_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Xi-23_features_v2.csv'
]
results = []
accuracies = []
for file_path in file_paths_basic:
    data = pd.read_csv(file_path)
    X = data.drop(['filename', 'label'], axis=1)
    y = data['label']
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.33, random_state=42)
    
    # Create and fit the KNN classifier
    knn = KNeighborsClassifier(n_neighbors=16)
    knn.fit(X_train, y_train)
    y_pred_knn = knn.predict(X_test)
    acc_knn = accuracy_score(y_test, y_pred_knn)
    precision = precision_score(y_test, y_pred_knn)
    recall = recall_score(y_test, y_pred_knn)
    f1 = f1_score(y_test, y_pred_knn)

    print(f"File: {file_path}")
    print("KNN:")
    print(f"Accuracy: {acc_knn * 100:.2f}%")
    precision = precision_score(y_test, y_pred_knn)
    print(f"Precision: {precision * 100:.2f}%")

    # Calculate recall
    recall = recall_score(y_test, y_pred_knn)
    print(f"Recall: {recall * 100:.2f}%")

    # Calculate F-measure
    f1 = f1_score(y_test, y_pred_knn)
    print(f"F-measure: {f1 * 100:.2f}%")
    # Format the results to xx,xx%
    results.append([
        file_path,
        f"{acc_knn * 100:.2f}".replace('.', ',') + '%',
        f"{precision * 100:.2f}".replace('.', ',') + '%',
        f"{recall * 100:.2f}".replace('.', ',') + '%',
        f"{f1 * 100:.2f}".replace('.', ',') + '%'
    ])
    accuracies.append(acc_knn)

# Specify the output file path
output_file = r'C:\AIL301m\result\basic_36\Basic_KNN_36_results.csv'

# Write the results to a CSV file
with open(output_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['File', 'Accuracy', 'Precision', 'Recall', 'F-measure'])
    writer.writerows(results)
average_accuracy = np.mean(accuracies)
print(f"Overall Average Accuracy: {average_accuracy * 100:.2f}%") 
print(f"Results have been written to {output_file}")

File: C:\AIL301m\Feature extract\basic\basicv2\Merged_BC-15_features_v2.csv
KNN:
Accuracy: 88.06%
Precision: 84.29%
Recall: 94.20%
F-measure: 88.97%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Huong_thom-1_features_v2.csv
KNN:
Accuracy: 94.23%
Precision: 93.46%
Recall: 95.68%
F-measure: 94.56%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Nep-87_features_v2.csv
KNN:
Accuracy: 95.05%
Precision: 94.42%
Recall: 95.81%
F-measure: 95.11%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Q-5_modify_features_v2.csv
KNN:
Accuracy: 93.26%
Precision: 89.15%
Recall: 98.82%
F-measure: 93.73%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Thien_uu-8_features_v2.csv
KNN:
Accuracy: 95.62%
Precision: 94.48%
Recall: 97.01%
F-measure: 95.73%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Xi-23_features_v2.csv
KNN:
Accuracy: 93.06%
Precision: 92.73%
Recall: 95.03%
F-measure: 93.87%
Overall Average Accuracy: 93.21%
Results have been written to C:\AIL301m\result\basic_36

In [3]:
file_paths_basic = [
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_BC-15_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Huong_thom-1_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Nep-87_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Q-5_modify_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Thien_uu-8_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Xi-23_features_v2.csv'
]
results = []
accuracies = []
for file_path in file_paths_basic:
    data = pd.read_csv(file_path)
    X = data.drop(['filename', 'label'], axis=1)
    y = data['label']
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.33, random_state=42)
    
    # Create and fit the KNN classifier
    svm = SVC(kernel='rbf')
    svm.fit(X_train, y_train)
    y_pred_svm = svm.predict(X_test)
    acc_svm = accuracy_score(y_test, y_pred_svm)
    f1_svm = f1_score(y_test, y_pred_svm)

    print(f"File: {file_path}")
    print("KNN:")
    print(f"Accuracy: {acc_svm * 100:.2f}%")
    precision = precision_score(y_test, y_pred_svm)
    print(f"Precision: {precision * 100:.2f}%")

    # Calculate recall
    recall = recall_score(y_test, y_pred_svm)
    print(f"Recall: {recall * 100:.2f}%")

    # Calculate F-measure
    f1 = f1_score(y_test, y_pred_svm)
    print(f"F-measure: {f1 * 100:.2f}%")
    # Format the results to xx,xx%
    results.append([
        file_path,
        f"{acc_svm * 100:.2f}".replace('.', ',') + '%',
        f"{precision * 100:.2f}".replace('.', ',') + '%',
        f"{recall * 100:.2f}".replace('.', ',') + '%',
        f"{f1 * 100:.2f}".replace('.', ',') + '%'
    ])
    accuracies.append(acc_svm)

# Specify the output file path
output_file = r'C:\AIL301m\result\basic_36\Basic_SVM_36_results.csv'

# Write the results to a CSV file
with open(output_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['File', 'Accuracy', 'Precision', 'Recall', 'F-measure'])
    writer.writerows(results)
average_accuracy = np.mean(accuracies)
print(f"Overall Average Accuracy: {average_accuracy * 100:.2f}%") 
print(f"Results have been written to {output_file}")

File: C:\AIL301m\Feature extract\basic\basicv2\Merged_BC-15_features_v2.csv
KNN:
Accuracy: 91.43%
Precision: 90.08%
Recall: 93.56%
F-measure: 91.79%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Huong_thom-1_features_v2.csv
KNN:
Accuracy: 97.23%
Precision: 96.32%
Recall: 98.47%
F-measure: 97.38%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Nep-87_features_v2.csv
KNN:
Accuracy: 98.31%
Precision: 99.15%
Recall: 97.48%
F-measure: 98.31%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Q-5_modify_features_v2.csv
KNN:
Accuracy: 96.28%
Precision: 94.17%
Recall: 98.82%
F-measure: 96.44%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Thien_uu-8_features_v2.csv
KNN:
Accuracy: 97.28%
Precision: 97.03%
Recall: 97.61%
F-measure: 97.32%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Xi-23_features_v2.csv
KNN:
Accuracy: 95.83%
Precision: 96.21%
Recall: 96.34%
F-measure: 96.28%
Overall Average Accuracy: 96.06%
Results have been written to C:\AIL301m\result\basic_36

In [4]:
file_paths_basic = [
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_BC-15_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Huong_thom-1_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Nep-87_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Q-5_modify_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Thien_uu-8_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Xi-23_features_v2.csv'
]

results = []
accuracies = []
for file_path in file_paths_basic:
    data = pd.read_csv(file_path)
    X = data.drop(['filename', 'label'], axis=1)
    y = data['label']
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Perform the custom split: 67% training, 33% testing
    n_samples = len(y)
    indices = np.arange(n_samples)
    np.random.shuffle(indices)
    
    train_size = int(0.67 * n_samples)
    train_indices = indices[:train_size]
    test_indices = indices[train_size:]
    
    X_train, X_test = X_scaled[train_indices], X_scaled[test_indices]
    y_train, y_test = y.iloc[train_indices], y.iloc[test_indices]
    
    # Create and fit the Random Forest classifier with specified parameters
    rf = RandomForestClassifier(n_estimators=500, max_features=6, random_state=42, oob_score=True)
    rf.fit(X_train, y_train)
    y_pred_rf = rf.predict(X_test)
    acc_rf = accuracy_score(y_test, y_pred_rf)
    precision = precision_score(y_test, y_pred_rf)
    recall = recall_score(y_test, y_pred_rf)
    f1 = f1_score(y_test, y_pred_rf)
    oob_score = rf.oob_score_
    
    print(f"File: {file_path}")
    print("Random Forest:")
    print(f"Accuracy: {acc_rf * 100:.2f}%")
    print(f"Precision: {precision * 100:.2f}%")
    print(f"Recall: {recall * 100:.2f}%")
    print(f"F-measure: {f1 * 100:.2f}%")
    print(f"OOB Score: {oob_score * 100:.2f}%")
    
    # Format the results to xx,xx%
    results.append([
        file_path,
        f"{acc_rf * 100:.2f}".replace('.', ',') + '%',
        f"{precision * 100:.2f}".replace('.', ',') + '%',
        f"{recall * 100:.2f}".replace('.', ',') + '%',
        f"{f1 * 100:.2f}".replace('.', ',') + '%',
        f"{oob_score * 100:.2f}".replace('.', ',') + '%'
    ])
    accuracies.append(acc_rf)

# Specify the output file path
output_file = r'C:\AIL301m\result\basic_36\Basic_RF_36_results.csv'

# Write the results to a CSV file
with open(output_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['File', 'Accuracy', 'Precision', 'Recall', 'F-measure', 'OOB Score'])
    writer.writerows(results)
average_accuracy = np.mean(accuracies)
print(f"Overall Average Accuracy: {average_accuracy * 100:.2f}%") 
print(f"Results have been written to {output_file}")


File: C:\AIL301m\Feature extract\basic\basicv2\Merged_BC-15_features_v2.csv
Random Forest:
Accuracy: 91.93%
Precision: 90.13%
Recall: 93.77%
F-measure: 91.91%
OOB Score: 92.04%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Huong_thom-1_features_v2.csv
Random Forest:
Accuracy: 95.91%
Precision: 95.32%
Recall: 96.92%
F-measure: 96.12%
OOB Score: 95.76%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Nep-87_features_v2.csv
Random Forest:
Accuracy: 96.10%
Precision: 95.72%
Recall: 95.94%
F-measure: 95.83%
OOB Score: 97.92%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Q-5_modify_features_v2.csv
Random Forest:
Accuracy: 94.97%
Precision: 94.55%
Recall: 95.32%
F-measure: 94.93%
OOB Score: 95.73%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Thien_uu-8_features_v2.csv
Random Forest:
Accuracy: 98.04%
Precision: 97.91%
Recall: 98.20%
F-measure: 98.06%
OOB Score: 96.73%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Xi-23_features_v2.csv
Random Forest:
Accura

In [5]:
file_paths_basic = [
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_BC-15_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Huong_thom-1_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Nep-87_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Q-5_modify_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Thien_uu-8_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Xi-23_features_v2.csv'
]

results = []
accuracies = []

for file_path in file_paths_basic:
    data = pd.read_csv(file_path)
    X = data.drop(['filename', 'label'], axis=1)
    y = data['label']
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.33, random_state=42)
    
    # Create and fit the Logistic Regression classifier
    lr = LogisticRegression(max_iter=1000)
    lr.fit(X_train, y_train)
    y_pred_lr = lr.predict(X_test)
    acc_lr = accuracy_score(y_test, y_pred_lr)
    precision = precision_score(y_test, y_pred_lr)
    recall = recall_score(y_test, y_pred_lr)
    f1 = f1_score(y_test, y_pred_lr)

    print(f"File: {file_path}")
    print("Logistic Regression:")
    print(f"Accuracy: {acc_lr * 100:.2f}%")
    print(f"Precision: {precision * 100:.2f}%")
    print(f"Recall: {recall * 100:.2f}%")
    print(f"F-measure: {f1 * 100:.2f}%")

    # Format the results to xx,xx%
    results.append([
        file_path,
        f"{acc_lr * 100:.2f}".replace('.', ',') + '%',
        f"{precision * 100:.2f}".replace('.', ',') + '%',
        f"{recall * 100:.2f}".replace('.', ',') + '%',
        f"{f1 * 100:.2f}".replace('.', ',') + '%'
    ])
    accuracies.append(acc_lr)

# Specify the output file path
output_file = r'C:\AIL301m\result\basic_36\Basic_LR_36_results.csv'

# Write the results to a CSV file
with open(output_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['File', 'Accuracy', 'Precision', 'Recall', 'F-measure'])
    writer.writerows(results)
average_accuracy = np.mean(accuracies)
print(f"Overall Average Accuracy: {average_accuracy * 100:.2f}%") 
print(f"Results have been written to {output_file}")


File: C:\AIL301m\Feature extract\basic\basicv2\Merged_BC-15_features_v2.csv
Logistic Regression:
Accuracy: 88.14%
Precision: 89.03%
Recall: 87.60%
F-measure: 88.31%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Huong_thom-1_features_v2.csv
Logistic Regression:
Accuracy: 96.57%
Precision: 96.27%
Recall: 97.21%
F-measure: 96.74%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Nep-87_features_v2.csv
Logistic Regression:
Accuracy: 98.10%
Precision: 98.32%
Recall: 97.90%
F-measure: 98.11%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Q-5_modify_features_v2.csv
Logistic Regression:
Accuracy: 92.76%
Precision: 91.75%
Recall: 94.28%
F-measure: 93.00%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Thien_uu-8_features_v2.csv
Logistic Regression:
Accuracy: 97.73%
Precision: 97.90%
Recall: 97.61%
F-measure: 97.76%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Xi-23_features_v2.csv
Logistic Regression:
Accuracy: 92.25%
Precision: 93.76%
Recall: 92.29%
F-measure:

In [6]:
file_paths_basic = [
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_BC-15_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Huong_thom-1_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Nep-87_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Q-5_modify_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Thien_uu-8_features_v2.csv',
    r'C:\AIL301m\Feature extract\basic\basicv2\Merged_Xi-23_features_v2.csv'
]

results = []
accuracies = []

for file_path in file_paths_basic:
    data = pd.read_csv(file_path)
    X = data.drop(['filename', 'label'], axis=1)
    y = data['label']
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.33, random_state=42)
    
    # Create and fit the ANN classifier
    ann = MLPClassifier(hidden_layer_sizes=(20,), max_iter=1000, random_state=42)
    ann.fit(X_train, y_train)
    y_pred_ann = ann.predict(X_test)
    acc_ann = accuracy_score(y_test, y_pred_ann)
    precision = precision_score(y_test, y_pred_ann, average='weighted')
    recall = recall_score(y_test, y_pred_ann, average='weighted')
    f1 = f1_score(y_test, y_pred_ann, average='weighted')

    print(f"File: {file_path}")
    print("ANN:")
    print(f"Accuracy: {acc_ann * 100:.2f}%")
    print(f"Precision: {precision * 100:.2f}%")
    print(f"Recall: {recall * 100:.2f}%")
    print(f"F-measure: {f1 * 100:.2f}%")

    # Format the results to xx,xx%
    results.append([
        file_path,
        f"{acc_ann * 100:.2f}".replace('.', ',') + '%',
        f"{precision * 100:.2f}".replace('.', ',') + '%',
        f"{recall * 100:.2f}".replace('.', ',') + '%',
        f"{f1 * 100:.2f}".replace('.', ',') + '%'
    ])
    accuracies.append(acc_ann)

# Specify the output file path
output_file = r'C:\AIL301m\result\basic_36\Basic_ANN_36_results.csv'

# Write the results to a CSV file
with open(output_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['File', 'Accuracy', 'Precision', 'Recall', 'F-measure'])
    writer.writerows(results)
average_accuracy = np.mean(accuracies)
print(f"Overall Average Accuracy: {average_accuracy * 100:.2f}%") 
print(f"Results have been written to {output_file}")


File: C:\AIL301m\Feature extract\basic\basicv2\Merged_BC-15_features_v2.csv
ANN:
Accuracy: 93.33%
Precision: 93.33%
Recall: 93.33%
F-measure: 93.33%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Huong_thom-1_features_v2.csv
ANN:
Accuracy: 97.74%
Precision: 97.74%
Recall: 97.74%
F-measure: 97.74%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Nep-87_features_v2.csv
ANN:
Accuracy: 97.89%
Precision: 97.90%
Recall: 97.89%
F-measure: 97.89%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Q-5_modify_features_v2.csv
ANN:
Accuracy: 97.59%
Precision: 97.59%
Recall: 97.59%
F-measure: 97.59%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Thien_uu-8_features_v2.csv
ANN:
Accuracy: 98.04%
Precision: 98.04%
Recall: 98.04%
F-measure: 98.04%
File: C:\AIL301m\Feature extract\basic\basicv2\Merged_Xi-23_features_v2.csv
ANN:
Accuracy: 95.98%
Precision: 95.98%
Recall: 95.98%
F-measure: 95.98%
Overall Average Accuracy: 96.76%
Results have been written to C:\AIL301m\result\basic_36