In [1]:
import json
import numpy as np
import os

# Update the path for the benign_train.json file
benign_train_path = r"C:\Users\BMEI CMU\Documents\GitHub\WORK\Windows\CODE_BME\PROJECT_CYBER_SECURITY\RESULT\01.TRAIN_TEST_SET\benign_train.json"
distance_matrix_path = r"C:\Users\BMEI CMU\Documents\GitHub\WORK\Windows\CODE_BME\PROJECT_CYBER_SECURITY\RESULT\02.EDIT_DISTANCE_MATRIX\benign_distance_matrix.npy"

benign_train = []
with open(benign_train_path, 'r') as file:
    for line in file:
        benign_train.append(json.loads(line.strip()))

distance_matrix = np.load(distance_matrix_path)

In [2]:
def save_results(output_dir, num_clusters, prototypes, memberships, max_iter, benign_train):
    """
    Save prototypes (data from the first column), membership matrix, and max_iter to files.
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Save prototypes as strings from the first column
    prototypes_path = os.path.join(output_dir, f"prototypes_{num_clusters}.txt")
    with open(prototypes_path, "w") as f:
        f.write("\n".join(benign_train.iloc[prototype, 0] for prototype in prototypes))  # ใช้คอลัมน์แรก (data)

    # Save membership matrix
    memberships_path = os.path.join(output_dir, f"memberships_{num_clusters}.npy")
    np.save(memberships_path, memberships)

    # Save max_iter
    max_iter_path = os.path.join(output_dir, f"max_iter_{num_clusters}.txt")
    with open(max_iter_path, "w") as f:
        f.write(str(max_iter))

In [3]:
def fuzzy_median_with_matrix(distance_matrix, memberships, fuzzifier, cluster_idx):
    """
    คำนวณ fuzzy median สำหรับ cluster ที่กำหนดโดยใช้ distance matrix และ membership matrix
    """
    weights = memberships[:, cluster_idx] ** fuzzifier
    weighted_distances = np.sum(weights[:, np.newaxis] * distance_matrix, axis=0)
    return np.argmin(weighted_distances)

In [4]:
def sgFCMed_debug_calculation(distance_matrix, num_clusters, fuzzifier, max_iter, tol):
    """
    String Grammar Fuzzy C-Medians (sgFCMed) Debug Calculation.
    
    Parameters:
    - distance_matrix: 2D numpy array containing the distances.
    - num_clusters: Number of clusters.
    - fuzzifier: Fuzzifier for membership computation.
    - max_iter: Maximum number of iterations.
    - tol: Tolerance for convergence.
    
    Returns:
    - prototypes: Final prototypes for each cluster.
    - memberships: Final membership matrix.
    """
    # Initialize prototypes randomly
    prototypes = np.random.choice(range(distance_matrix.shape[0]), size=num_clusters, replace=False)
    memberships = np.zeros((distance_matrix.shape[0], num_clusters))

    print("Initial Prototypes and Memberships:")
    for i, prototype in enumerate(prototypes):
        print(f"Cluster {i+1}: Prototype Index {prototype}")

    for iteration in range(1, max_iter + 1):
        print(f"\n--- Iteration {iteration} ---")
        new_prototypes = []

        for cluster_idx in range(num_clusters):
            # Calculate memberships for the current prototype
            for i in range(distance_matrix.shape[0]):
                denominator = sum(
                    (distance_matrix[i, prototypes[cluster_idx]] / distance_matrix[i, prototypes[other_idx]]) ** (2 / (fuzzifier - 1))
                    if distance_matrix[i, prototypes[other_idx]] > 0 else 1
                    for other_idx in range(num_clusters)
                )
                memberships[i, cluster_idx] = 1 / denominator

            # Update prototypes using fuzzy_median_with_matrix
            best_index = fuzzy_median_with_matrix(distance_matrix, memberships, fuzzifier, cluster_idx)
            print(f"Cluster {cluster_idx+1}: Prototype Update -> Old: {prototypes[cluster_idx]}, New: {best_index}")
            new_prototypes.append(best_index)

        # Check for convergence
        if np.all([distance_matrix[prototypes[k], new_prototypes[k]] <= tol for k in range(num_clusters)]):
            print("\nConvergence reached.")
            break

        prototypes = new_prototypes

    return prototypes, memberships

In [5]:
def dir_search_sgFCMed(distance_matrix, cluster_range, fuzzifier, max_iter=100, tol=1e-4, benign_train=None, output_dir="results"):
    """
    Perform a directed search over multiple cluster counts and save results.
    """
    if benign_train is None:
        raise ValueError("benign_train must be provided to map prototypes to strings.")
    
    results = {}
    for num_clusters in cluster_range:
        print(f"\nRunning sgFCMed with num_clusters = {num_clusters}")
        prototypes, memberships = sgFCMed_debug_calculation(
            distance_matrix=distance_matrix,
            num_clusters=num_clusters,
            fuzzifier=fuzzifier,
            max_iter=max_iter,
            tol=tol
        )
        
        # Save the results for this num_clusters
        save_results(output_dir, num_clusters, prototypes, memberships, max_iter, benign_train)
        results[num_clusters] = (prototypes, memberships)
    
    print("\nAll results saved to:", output_dir)
    return results

In [6]:
# Run the directed search with the specified output path
results = dir_search_sgFCMed(
    distance_matrix=distance_matrix,
    cluster_range = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
    fuzzifier=2.0,
    max_iter=100,
    benign_train=benign_train,
    tol=1e-4,
    output_dir=r"C:\Users\BMEI CMU\Documents\GitHub\WORK\Windows\CODE_BME\PROJECT_CYBER_SECURITY\RESULT\03.sgFCMed"
)


Running sgFCMed with num_clusters = 10
Initial Prototypes and Memberships:
Cluster 1: Prototype Index 133
Cluster 2: Prototype Index 164
Cluster 3: Prototype Index 944
Cluster 4: Prototype Index 140
Cluster 5: Prototype Index 937
Cluster 6: Prototype Index 712
Cluster 7: Prototype Index 898
Cluster 8: Prototype Index 468
Cluster 9: Prototype Index 227
Cluster 10: Prototype Index 945

--- Iteration 1 ---
Cluster 1: Prototype Update -> Old: 133, New: 931
Cluster 2: Prototype Update -> Old: 164, New: 330
Cluster 3: Prototype Update -> Old: 944, New: 127
Cluster 4: Prototype Update -> Old: 140, New: 158
Cluster 5: Prototype Update -> Old: 937, New: 937
Cluster 6: Prototype Update -> Old: 712, New: 712
Cluster 7: Prototype Update -> Old: 898, New: 626
Cluster 8: Prototype Update -> Old: 468, New: 468
Cluster 9: Prototype Update -> Old: 227, New: 330
Cluster 10: Prototype Update -> Old: 945, New: 945

--- Iteration 2 ---
Cluster 1: Prototype Update -> Old: 931, New: 931
Cluster 2: Prototype

AttributeError: 'list' object has no attribute 'iloc'