In [None]:
pip install --upgrade -r requirements.txt

In [1]:
import numpy as np
import pandas as pd
import json
from collections import defaultdict

class FuzzyKNN:
    def __init__(self, k=5, m=2):
        """
        Fuzzy K-Nearest Neighbors classifier.

        Parameters:
        - k: จำนวนเพื่อนบ้าน (ต้องเป็นเลขคี่)
        - m: Fuzziness parameter (ค่าความ fuzzy)
        """
        self.k = k
        self.m = m

    def predict(self, train_labels, test_labels, distance_matrix):
        """
        ทำนายข้อมูล Test โดยใช้ค่าระยะห่างที่คำนวณไว้แล้ว

        Parameters:
        - train_labels: อาร์เรย์ของ label สำหรับข้อมูล train (200 แถว)
        - test_labels: อาร์เรย์ของ label สำหรับข้อมูล test (500 แถว)
        - distance_matrix: เมทริกซ์ระยะห่าง (200x500)

        Returns:
        - DataFrame แสดงผลลัพธ์ Fuzzy Membership ของแต่ละ test sample
        """
        N, M = distance_matrix.shape
        assert len(train_labels) == N, f"train_labels ต้องมีขนาด {N}"
        assert len(test_labels) == M, f"test_labels ต้องมีขนาด {M}"

        predictions = []

        for j in range(M):  # วนลูปทุกแถวของ Test Data
            distances = distance_matrix[:, j]  # ดึงระยะห่างของ Test ตัวที่ j กับทุก Train
            sorted_indices = np.argsort(distances)[:self.k]  # เลือก K ตัวที่ใกล้ที่สุด
            nearest_labels = train_labels[sorted_indices]
            nearest_distances = distances[sorted_indices]

            # คำนวณสมาชิก (Fuzzy Membership)
            membership = defaultdict(float)
            if np.any(nearest_distances == 0):  # ถ้ามีตัวที่ตรงกันพอดี
                label = nearest_labels[np.argmin(nearest_distances)]
                predictions.append({label: 1.0})
                continue

            inv_distances = 1 / (nearest_distances ** (2 / (self.m - 1)))
            total_inv = np.sum(inv_distances)

            for i, label in enumerate(nearest_labels):
                membership[label] += inv_distances[i] / total_inv

            predictions.append(membership)

        # แปลง Fuzzy Membership เป็น Label ที่ทำนายได้
        predicted_labels = [max(membership, key=membership.get) for membership in predictions]

        # สร้าง DataFrame แสดงผล
        df_results = pd.DataFrame({
            "Test Label": test_labels, 
            "Predicted Label": predicted_labels
        })

        return df_results


def run_fuzzy_knn(train_label_path, test_label_path, distance_matrix_path, k_min=1, k_max=21, m_min=2, m_max=5):
    with open(train_label_path, "r", encoding="utf-8") as file:
        train_labels_data = json.load(file)
    
    with open(test_label_path, "r", encoding="utf-8") as file:
        test_labels_data = json.load(file)

    train_labels_df = pd.DataFrame(train_labels_data)
    test_labels_df = pd.DataFrame(test_labels_data)

    train_labels = train_labels_df.iloc[:, 1].to_numpy()
    test_labels = test_labels_df.iloc[:, 1].to_numpy()

    distance_matrix_df = pd.read_csv(distance_matrix_path, header=None)
    distance_matrix = distance_matrix_df.to_numpy()

    best_accuracy = 0
    best_k = None
    best_m = None

    for k in range(k_min, k_max + 1, 2):
        for m in np.arange(m_min, m_max + 0.1, 0.1):
            fuzzy_knn = FuzzyKNN(k=k, m=m)
            df_results = fuzzy_knn.predict(train_labels, test_labels, distance_matrix)

            correct_predictions = (df_results["Test Label"] == df_results["Predicted Label"]).sum()
            total_predictions = len(df_results)
            accuracy = (correct_predictions / total_predictions) * 100

            print(f"🔹 k={k}, m={m} → Accuracy: {accuracy:.2f}% ({correct_predictions}/{total_predictions} correct)")

            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_k = k
                best_m = m

    return best_k, best_m, best_accuracy


# ตัวอย่างการใช้งาน run_fuzzy_knn สำหรับหลาย fold
if __name__ == "__main__":
    folds = ["fold_1", "fold_2","fold_3", "fold_4"]  # ระบุ fold ที่ต้องการทดสอบ
    base_path = "C:/Users/KUNG_LOBSTER69/Documents/GitHub/WORK/Windows/CODE_BME/PROJECT_CYBER_SECURITY/RESULT"

    # เก็บผลลัพธ์ในแต่ละ fold
    results = []

    for fold in folds:
        train_label_path = f"{base_path}/05.DATA_VALIDATION/{fold}/MALWARE_100_BENIGN_100/validation_train.json"
        test_label_path = f"{base_path}/05.DATA_VALIDATION/{fold}/MALWARE_100_BENIGN_100/validation_test.json"
        distance_matrix_path = f"{base_path}/06.EDIT_DISTANCE_VALIDATION/{fold}/MATRIX_EDIT_DISTANCE_MALWARE_100_BENIGN_100.csv"

        print(f"\n📂 Running for {fold}...")
        best_k, best_m, best_accuracy = run_fuzzy_knn(train_label_path, test_label_path, distance_matrix_path)

        # เก็บผลลัพธ์ของแต่ละ fold
        results.append({
            "Fold": fold,
            "Best k": best_k,
            "Best m": best_m,
            "Best Accuracy": best_accuracy
        })

    # แสดงผลสรุปทั้งหมดหลังจากการทดสอบเสร็จสิ้น
    print("\n✅ สรุปผลการทดสอบทั้งหมด:")
    for result in results:
        print(f"📂 {result['Fold']} → Best k = {result['Best k']}, Best m = {result['Best m']}, Best Accuracy = {result['Best Accuracy']:.2f}%")



📂 Running for fold_1...
🔹 k=1, m=2.0 → Accuracy: 86.20% (431/500 correct)
🔹 k=1, m=2.1 → Accuracy: 86.20% (431/500 correct)
🔹 k=1, m=2.2 → Accuracy: 86.20% (431/500 correct)
🔹 k=1, m=2.3000000000000003 → Accuracy: 86.20% (431/500 correct)
🔹 k=1, m=2.4000000000000004 → Accuracy: 86.20% (431/500 correct)
🔹 k=1, m=2.5000000000000004 → Accuracy: 86.20% (431/500 correct)
🔹 k=1, m=2.6000000000000005 → Accuracy: 86.20% (431/500 correct)
🔹 k=1, m=2.7000000000000006 → Accuracy: 86.20% (431/500 correct)
🔹 k=1, m=2.8000000000000007 → Accuracy: 86.20% (431/500 correct)
🔹 k=1, m=2.900000000000001 → Accuracy: 86.20% (431/500 correct)
🔹 k=1, m=3.000000000000001 → Accuracy: 86.20% (431/500 correct)
🔹 k=1, m=3.100000000000001 → Accuracy: 86.20% (431/500 correct)
🔹 k=1, m=3.200000000000001 → Accuracy: 86.20% (431/500 correct)
🔹 k=1, m=3.300000000000001 → Accuracy: 86.20% (431/500 correct)
🔹 k=1, m=3.4000000000000012 → Accuracy: 86.20% (431/500 correct)
🔹 k=1, m=3.5000000000000013 → Accuracy: 86.20% (43