In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import csv

In [4]:
file_paths_basic = [
    r'C:\AIL301m\Feature extract\basic\basic52\Merged_BC-15_features_v3.csv',
    r'C:\AIL301m\Feature extract\basic\basic52\Merged_Huong_thom-1_features_v3.csv',
    r'C:\AIL301m\Feature extract\basic\basic52\Merged_Nep-87_features_v3.csv',
    r'C:\AIL301m\Feature extract\basic\basic52\Merged_Q-5_modify_features_v3.csv',
    r'C:\AIL301m\Feature extract\basic\basic52\Merged_Thien_uu-8_features_v3.csv',
    r'C:\AIL301m\Feature extract\basic\basic52\Merged_Xi-23_features_v3.csv'
]
results = []
accuracies = []
for file_path in file_paths_basic:
    data = pd.read_csv(file_path)
    X = data.drop(['filename', 'label'], axis=1)
    y = data['label']
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.33, random_state=42)
    
    # Create and fit the KNN classifier
    knn = KNeighborsClassifier(n_neighbors=16)
    knn.fit(X_train, y_train)
    y_pred_knn = knn.predict(X_test)
    acc_knn = accuracy_score(y_test, y_pred_knn)
    precision = precision_score(y_test, y_pred_knn)
    recall = recall_score(y_test, y_pred_knn)
    f1 = f1_score(y_test, y_pred_knn)

    print(f"File: {file_path}")
    print("KNN:")
    print(f"Accuracy: {acc_knn * 100:.2f}%")
    precision = precision_score(y_test, y_pred_knn)
    print(f"Precision: {precision * 100:.2f}%")

    # Calculate recall
    recall = recall_score(y_test, y_pred_knn)
    print(f"Recall: {recall * 100:.2f}%")

    # Calculate F-measure
    f1 = f1_score(y_test, y_pred_knn)
    print(f"F-measure: {f1 * 100:.2f}%")
    # Format the results to xx,xx%
    results.append([
        file_path,
        f"{acc_knn * 100:.2f}".replace('.', ',') + '%',
        f"{precision * 100:.2f}".replace('.', ',') + '%',
        f"{recall * 100:.2f}".replace('.', ',') + '%',
        f"{f1 * 100:.2f}".replace('.', ',') + '%'
    ])
    accuracies.append(acc_knn)

# Specify the output file path
output_file = r'C:\AIL301m\result\basic_52\Basic_KNN_52_results.csv'

# Write the results to a CSV file
with open(output_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['File', 'Accuracy', 'Precision', 'Recall', 'F-measure'])
    writer.writerows(results)
average_accuracy = np.mean(accuracies)
print(f"Overall Average Accuracy: {average_accuracy * 100:.2f}%") 
print(f"Results have been written to {output_file}")

File: C:\AIL301m\Feature extract\basic\basic52\Merged_BC-15_features_v3.csv
KNN:
Accuracy: 88.22%
Precision: 83.85%
Recall: 95.33%
F-measure: 89.22%
File: C:\AIL301m\Feature extract\basic\basic52\Merged_Huong_thom-1_features_v3.csv
KNN:
Accuracy: 95.33%
Precision: 94.30%
Recall: 96.93%
F-measure: 95.60%
File: C:\AIL301m\Feature extract\basic\basic52\Merged_Nep-87_features_v3.csv
KNN:
Accuracy: 95.89%
Precision: 95.44%
Recall: 96.44%
F-measure: 95.93%
File: C:\AIL301m\Feature extract\basic\basic52\Merged_Q-5_modify_features_v3.csv
KNN:
Accuracy: 92.96%
Precision: 88.95%
Recall: 98.42%
F-measure: 93.45%
File: C:\AIL301m\Feature extract\basic\basic52\Merged_Thien_uu-8_features_v3.csv
KNN:
Accuracy: 95.92%
Precision: 94.51%
Recall: 97.61%
F-measure: 96.04%
File: C:\AIL301m\Feature extract\basic\basic52\Merged_Xi-23_features_v3.csv
KNN:
Accuracy: 93.20%
Precision: 92.64%
Recall: 95.42%
F-measure: 94.01%
Overall Average Accuracy: 93.59%
Results have been written to C:\AIL301m\result\basic_52

In [5]:
file_paths_basic = [
    r'C:\AIL301m\Feature extract\basic\basic52\Merged_BC-15_features_v3.csv',
    r'C:\AIL301m\Feature extract\basic\basic52\Merged_Huong_thom-1_features_v3.csv',
    r'C:\AIL301m\Feature extract\basic\basic52\Merged_Nep-87_features_v3.csv',
    r'C:\AIL301m\Feature extract\basic\basic52\Merged_Q-5_modify_features_v3.csv',
    r'C:\AIL301m\Feature extract\basic\basic52\Merged_Thien_uu-8_features_v3.csv',
    r'C:\AIL301m\Feature extract\basic\basic52\Merged_Xi-23_features_v3.csv'
]
results = []
accuracies = []
for file_path in file_paths_basic:
    data = pd.read_csv(file_path)
    X = data.drop(['filename', 'label'], axis=1)
    y = data['label']
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.33, random_state=42)
    
    # Create and fit the KNN classifier
    svm = SVC(kernel='rbf')
    svm.fit(X_train, y_train)
    y_pred_svm = svm.predict(X_test)
    acc_svm = accuracy_score(y_test, y_pred_svm)
    f1_svm = f1_score(y_test, y_pred_svm)

    print(f"File: {file_path}")
    print("KNN:")
    print(f"Accuracy: {acc_svm * 100:.2f}%")
    precision = precision_score(y_test, y_pred_svm)
    print(f"Precision: {precision * 100:.2f}%")

    # Calculate recall
    recall = recall_score(y_test, y_pred_svm)
    print(f"Recall: {recall * 100:.2f}%")

    # Calculate F-measure
    f1 = f1_score(y_test, y_pred_svm)
    print(f"F-measure: {f1 * 100:.2f}%")
    # Format the results to xx,xx%
    results.append([
        file_path,
        f"{acc_svm * 100:.2f}".replace('.', ',') + '%',
        f"{precision * 100:.2f}".replace('.', ',') + '%',
        f"{recall * 100:.2f}".replace('.', ',') + '%',
        f"{f1 * 100:.2f}".replace('.', ',') + '%'
    ])
    accuracies.append(acc_svm)

# Specify the output file path
output_file = r'C:\AIL301m\result\basic_52\Basic_SVM_52_results.csv'

# Write the results to a CSV file
with open(output_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['File', 'Accuracy', 'Precision', 'Recall', 'F-measure'])
    writer.writerows(results)
average_accuracy = np.mean(accuracies)
print(f"Overall Average Accuracy: {average_accuracy * 100:.2f}%") 
print(f"Results have been written to {output_file}")

File: C:\AIL301m\Feature extract\basic\basic52\Merged_BC-15_features_v3.csv
KNN:
Accuracy: 91.93%
Precision: 91.57%
Recall: 92.75%
F-measure: 92.16%
File: C:\AIL301m\Feature extract\basic\basic52\Merged_Huong_thom-1_features_v3.csv
KNN:
Accuracy: 97.66%
Precision: 97.37%
Recall: 98.19%
F-measure: 97.78%
File: C:\AIL301m\Feature extract\basic\basic52\Merged_Nep-87_features_v3.csv
KNN:
Accuracy: 98.10%
Precision: 98.93%
Recall: 97.27%
F-measure: 98.10%
File: C:\AIL301m\Feature extract\basic\basic52\Merged_Q-5_modify_features_v3.csv
KNN:
Accuracy: 96.88%
Precision: 95.59%
Recall: 98.42%
F-measure: 96.99%
File: C:\AIL301m\Feature extract\basic\basic52\Merged_Thien_uu-8_features_v3.csv
KNN:
Accuracy: 98.04%
Precision: 98.49%
Recall: 97.61%
F-measure: 98.05%
File: C:\AIL301m\Feature extract\basic\basic52\Merged_Xi-23_features_v3.csv
KNN:
Accuracy: 96.05%
Precision: 96.96%
Recall: 95.95%
F-measure: 96.45%
Overall Average Accuracy: 96.44%
Results have been written to C:\AIL301m\result\basic_52

In [6]:

file_paths_basic = [
    r'C:\AIL301m\Feature extract\basic\basic52\Merged_BC-15_features_v3.csv',
    r'C:\AIL301m\Feature extract\basic\basic52\Merged_Huong_thom-1_features_v3.csv',
    r'C:\AIL301m\Feature extract\basic\basic52\Merged_Nep-87_features_v3.csv',
    r'C:\AIL301m\Feature extract\basic\basic52\Merged_Q-5_modify_features_v3.csv',
    r'C:\AIL301m\Feature extract\basic\basic52\Merged_Thien_uu-8_features_v3.csv',
    r'C:\AIL301m\Feature extract\basic\basic52\Merged_Xi-23_features_v3.csv'
]

results = []
accuracies = []
for file_path in file_paths_basic:
    data = pd.read_csv(file_path)
    X = data.drop(['filename', 'label'], axis=1)
    y = data['label']
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Perform the custom split: 67% training, 33% testing
    n_samples = len(y)
    indices = np.arange(n_samples)
    np.random.shuffle(indices)
    
    train_size = int(0.67 * n_samples)
    train_indices = indices[:train_size]
    test_indices = indices[train_size:]
    
    X_train, X_test = X_scaled[train_indices], X_scaled[test_indices]
    y_train, y_test = y.iloc[train_indices], y.iloc[test_indices]
    
    # Create and fit the Random Forest classifier with specified parameters
    rf = RandomForestClassifier(n_estimators=500, max_features=8, random_state=42, oob_score=True)
    rf.fit(X_train, y_train)
    y_pred_rf = rf.predict(X_test)
    acc_rf = accuracy_score(y_test, y_pred_rf)
    precision = precision_score(y_test, y_pred_rf)
    recall = recall_score(y_test, y_pred_rf)
    f1 = f1_score(y_test, y_pred_rf)
    oob_score = rf.oob_score_
    
    print(f"File: {file_path}")
    print("Random Forest:")
    print(f"Accuracy: {acc_rf * 100:.2f}%")
    print(f"Precision: {precision * 100:.2f}%")
    print(f"Recall: {recall * 100:.2f}%")
    print(f"F-measure: {f1 * 100:.2f}%")
    print(f"OOB Score: {oob_score * 100:.2f}%")
    
    # Format the results to xx,xx%
    results.append([
        file_path,
        f"{acc_rf * 100:.2f}".replace('.', ',') + '%',
        f"{precision * 100:.2f}".replace('.', ',') + '%',
        f"{recall * 100:.2f}".replace('.', ',') + '%',
        f"{f1 * 100:.2f}".replace('.', ',') + '%',
        f"{oob_score * 100:.2f}".replace('.', ',') + '%'
    ])
    accuracies.append(acc_rf)

# Specify the output file path
output_file = r'C:\AIL301m\result\basic_52\Basic_RF_52_results.csv'

# Write the results to a CSV file
with open(output_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['File', 'Accuracy', 'Precision', 'Recall', 'F-measure', 'OOB Score'])
    writer.writerows(results)
average_accuracy = np.mean(accuracies)
print(f"Overall Average Accuracy: {average_accuracy * 100:.2f}%") 
print(f"Results have been written to {output_file}")


File: C:\AIL301m\Feature extract\basic\basic52\Merged_BC-15_features_v3.csv
Random Forest:
Accuracy: 92.26%
Precision: 90.97%
Recall: 93.42%
F-measure: 92.18%
OOB Score: 92.57%
File: C:\AIL301m\Feature extract\basic\basic52\Merged_Huong_thom-1_features_v3.csv
Random Forest:
Accuracy: 97.37%
Precision: 96.77%
Recall: 98.15%
F-measure: 97.45%
OOB Score: 96.98%
File: C:\AIL301m\Feature extract\basic\basic52\Merged_Nep-87_features_v3.csv
Random Forest:
Accuracy: 98.21%
Precision: 98.51%
Recall: 97.88%
F-measure: 98.19%
OOB Score: 97.66%
File: C:\AIL301m\Feature extract\basic\basic52\Merged_Q-5_modify_features_v3.csv
Random Forest:
Accuracy: 95.17%
Precision: 94.90%
Recall: 95.29%
F-measure: 95.09%
OOB Score: 95.49%
File: C:\AIL301m\Feature extract\basic\basic52\Merged_Thien_uu-8_features_v3.csv
Random Forest:
Accuracy: 96.98%
Precision: 98.25%
Recall: 96.00%
F-measure: 97.11%
OOB Score: 96.65%
File: C:\AIL301m\Feature extract\basic\basic52\Merged_Xi-23_features_v3.csv
Random Forest:
Accura

In [7]:
file_paths_basic = [
    r'C:\AIL301m\Feature extract\basic\basic52\Merged_BC-15_features_v3.csv',
    r'C:\AIL301m\Feature extract\basic\basic52\Merged_Huong_thom-1_features_v3.csv',
    r'C:\AIL301m\Feature extract\basic\basic52\Merged_Nep-87_features_v3.csv',
    r'C:\AIL301m\Feature extract\basic\basic52\Merged_Q-5_modify_features_v3.csv',
    r'C:\AIL301m\Feature extract\basic\basic52\Merged_Thien_uu-8_features_v3.csv',
    r'C:\AIL301m\Feature extract\basic\basic52\Merged_Xi-23_features_v3.csv'
]

results = []
accuracies = []

for file_path in file_paths_basic:
    data = pd.read_csv(file_path)
    X = data.drop(['filename', 'label'], axis=1)
    y = data['label']
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.33, random_state=42)
    
    # Create and fit the Logistic Regression classifier
    lr = LogisticRegression(max_iter=1000)
    lr.fit(X_train, y_train)
    y_pred_lr = lr.predict(X_test)
    acc_lr = accuracy_score(y_test, y_pred_lr)
    precision = precision_score(y_test, y_pred_lr)
    recall = recall_score(y_test, y_pred_lr)
    f1 = f1_score(y_test, y_pred_lr)

    print(f"File: {file_path}")
    print("Logistic Regression:")
    print(f"Accuracy: {acc_lr * 100:.2f}%")
    print(f"Precision: {precision * 100:.2f}%")
    print(f"Recall: {recall * 100:.2f}%")
    print(f"F-measure: {f1 * 100:.2f}%")

    # Format the results to xx,xx%
    results.append([
        file_path,
        f"{acc_lr * 100:.2f}".replace('.', ',') + '%',
        f"{precision * 100:.2f}".replace('.', ',') + '%',
        f"{recall * 100:.2f}".replace('.', ',') + '%',
        f"{f1 * 100:.2f}".replace('.', ',') + '%'
    ])
    accuracies.append(acc_lr)

# Specify the output file path
output_file = r'C:\AIL301m\result\basic_52\Basic_LR_52_results.csv'

# Write the results to a CSV file
with open(output_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['File', 'Accuracy', 'Precision', 'Recall', 'F-measure'])
    writer.writerows(results)
average_accuracy = np.mean(accuracies)
print(f"Overall Average Accuracy: {average_accuracy * 100:.2f}%") 
print(f"Results have been written to {output_file}")


File: C:\AIL301m\Feature extract\basic\basic52\Merged_BC-15_features_v3.csv
Logistic Regression:
Accuracy: 90.61%
Precision: 90.69%
Recall: 90.98%
F-measure: 90.84%
File: C:\AIL301m\Feature extract\basic\basic52\Merged_Huong_thom-1_features_v3.csv
Logistic Regression:
Accuracy: 97.30%
Precision: 97.35%
Recall: 97.49%
F-measure: 97.42%
File: C:\AIL301m\Feature extract\basic\basic52\Merged_Nep-87_features_v3.csv
Logistic Regression:
Accuracy: 98.21%
Precision: 98.52%
Recall: 97.90%
F-measure: 98.21%
File: C:\AIL301m\Feature extract\basic\basic52\Merged_Q-5_modify_features_v3.csv
Logistic Regression:
Accuracy: 94.77%
Precision: 93.01%
Recall: 97.04%
F-measure: 94.98%
File: C:\AIL301m\Feature extract\basic\basic52\Merged_Thien_uu-8_features_v3.csv
Logistic Regression:
Accuracy: 98.64%
Precision: 99.10%
Recall: 98.21%
F-measure: 98.65%
File: C:\AIL301m\Feature extract\basic\basic52\Merged_Xi-23_features_v3.csv
Logistic Regression:
Accuracy: 93.57%
Precision: 94.72%
Recall: 93.73%
F-measure:

In [8]:
file_paths_basic = [
    r'C:\AIL301m\Feature extract\basic\basic52\Merged_BC-15_features_v3.csv',
    r'C:\AIL301m\Feature extract\basic\basic52\Merged_Huong_thom-1_features_v3.csv',
    r'C:\AIL301m\Feature extract\basic\basic52\Merged_Nep-87_features_v3.csv',
    r'C:\AIL301m\Feature extract\basic\basic52\Merged_Q-5_modify_features_v3.csv',
    r'C:\AIL301m\Feature extract\basic\basic52\Merged_Thien_uu-8_features_v3.csv',
    r'C:\AIL301m\Feature extract\basic\basic52\Merged_Xi-23_features_v3.csv'
]
results = []
accuracies = []

for file_path in file_paths_basic:
    data = pd.read_csv(file_path)
    X = data.drop(['filename', 'label'], axis=1)
    y = data['label']
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.33, random_state=42)
    
    # Create and fit the ANN classifier
    ann = MLPClassifier(hidden_layer_sizes=(20,), max_iter=1000, random_state=42)
    ann.fit(X_train, y_train)
    y_pred_ann = ann.predict(X_test)
    acc_ann = accuracy_score(y_test, y_pred_ann)
    precision = precision_score(y_test, y_pred_ann, average='weighted')
    recall = recall_score(y_test, y_pred_ann, average='weighted')
    f1 = f1_score(y_test, y_pred_ann, average='weighted')

    print(f"File: {file_path}")
    print("ANN:")
    print(f"Accuracy: {acc_ann * 100:.2f}%")
    print(f"Precision: {precision * 100:.2f}%")
    print(f"Recall: {recall * 100:.2f}%")
    print(f"F-measure: {f1 * 100:.2f}%")

    # Format the results to xx,xx%
    results.append([
        file_path,
        f"{acc_ann * 100:.2f}".replace('.', ',') + '%',
        f"{precision * 100:.2f}".replace('.', ',') + '%',
        f"{recall * 100:.2f}".replace('.', ',') + '%',
        f"{f1 * 100:.2f}".replace('.', ',') + '%'
    ])
    accuracies.append(acc_ann)

# Specify the output file path
output_file = r'C:\AIL301m\result\basic_52\Basic_ANN_52_results.csv'

# Write the results to a CSV file
with open(output_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['File', 'Accuracy', 'Precision', 'Recall', 'F-measure'])
    writer.writerows(results)
average_accuracy = np.mean(accuracies)
print(f"Overall Average Accuracy: {average_accuracy * 100:.2f}%") 
print(f"Results have been written to {output_file}")


File: C:\AIL301m\Feature extract\basic\basic52\Merged_BC-15_features_v3.csv
ANN:
Accuracy: 94.81%
Precision: 94.81%
Recall: 94.81%
F-measure: 94.81%
File: C:\AIL301m\Feature extract\basic\basic52\Merged_Huong_thom-1_features_v3.csv
ANN:
Accuracy: 98.54%
Precision: 98.54%
Recall: 98.54%
F-measure: 98.54%
File: C:\AIL301m\Feature extract\basic\basic52\Merged_Nep-87_features_v3.csv
ANN:
Accuracy: 98.21%
Precision: 98.21%
Recall: 98.21%
F-measure: 98.21%
File: C:\AIL301m\Feature extract\basic\basic52\Merged_Q-5_modify_features_v3.csv
ANN:
Accuracy: 98.29%
Precision: 98.29%
Recall: 98.29%
F-measure: 98.29%
File: C:\AIL301m\Feature extract\basic\basic52\Merged_Thien_uu-8_features_v3.csv
ANN:
Accuracy: 98.04%
Precision: 98.04%
Recall: 98.04%
F-measure: 98.04%
File: C:\AIL301m\Feature extract\basic\basic52\Merged_Xi-23_features_v3.csv
ANN:
Accuracy: 96.78%
Precision: 96.79%
Recall: 96.78%
F-measure: 96.78%
Overall Average Accuracy: 97.44%
Results have been written to C:\AIL301m\result\basic_52