In [7]:
# %% Imports
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from pyfume.Clustering import Clusterer
from pyfume.EstimateAntecendentSet import AntecedentEstimator
from pyfume.EstimateConsequentParameters import ConsequentEstimator
from pyfume.SimpfulModelBuilder import SugenoFISBuilder
from pyfume.Tester import SugenoFISTester
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, cohen_kappa_score
from numpy import clip, column_stack, argmax
from scipy.spatial.distance import cdist

# %% Load the data
Train = pd.read_csv('dataTrain.csv')
Test = pd.read_csv('dataTest.csv')

# Prepare the data
X_train = Train.drop('STATUS', axis=1).to_numpy()
y_train = Train['STATUS'].to_numpy()
X_test = Test.drop('STATUS', axis=1).to_numpy()
y_test = Test['STATUS'].to_numpy()

var_names = Train.columns[:-1].to_list()  # feature names
maxs = X_train.max(axis=0)
mins = X_train.min(axis=0)

# %% Separation Metric
def SeparationMetric(X_train, clust_centers, part_matrix):
    distances = cdist(X_train, clust_centers, metric='euclidean')
    return np.sum(part_matrix * distances)

# %% K-Fold Cross-Validation
def k_fold_cross_validation(X_train, y_train, k=5, max_clusters=10, method='fcm'):
    kf = KFold(n_splits=k, shuffle=True, random_state=42)
    metrics_list = []
    
    for train_idx, val_idx in kf.split(X_train):
        X_train_fold, X_val_fold = X_train[train_idx], X_train[val_idx]
        y_train_fold, y_val_fold = y_train[train_idx], y_train[val_idx]
        
        # Perform clustering
        clust_centers, part_matrix = cluster_input_output(X_train_fold, y_train_fold, max_clusters, method)
        
        # Estimate membership functions parameters
        ae = AntecedentEstimator(X_train_fold, part_matrix)
        antecedent_params = ae.determineMF()
        
        # Estimate consequent parameters
        ce = ConsequentEstimator(X_train_fold, y_train_fold, part_matrix)
        conseq_params = ce.suglms()
        
        # Build the Sugeno FIS model
        modbuilder = SugenoFISBuilder(antecedent_params, conseq_params, var_names, save_simpful_code=False)
        model = modbuilder.get_model()

        # Get model predictions on validation set
        modtester = SugenoFISTester(model, X_val_fold, var_names)
        y_pred_probs = clip(modtester.predict()[0], 0, 1)
        y_pred_probs = column_stack((1 - y_pred_probs, y_pred_probs))
        y_pred = argmax(y_pred_probs, axis=1)

        # Compute metrics for the fold
        metrics = {
            "Accuracy": accuracy_score(y_val_fold, y_pred),
            "Recall": recall_score(y_val_fold, y_pred),
            "Precision": precision_score(y_val_fold, y_pred),
            "F1-Score": f1_score(y_val_fold, y_pred),
            "Kappa": cohen_kappa_score(y_val_fold, y_pred),
        }
        metrics_list.append(metrics)

    # Compute average metrics
    avg_metrics = {metric: np.mean([m[metric] for m in metrics_list]) for metric in metrics_list[0]}
    
    return avg_metrics

# %% Perform K-Fold Cross-Validation
avg_metrics = k_fold_cross_validation(X_train, y_train, k=5, max_clusters=10, method='fcm')

# Output the average metrics
for metric, value in avg_metrics.items():
    print(f"Average {metric}: {value:.3f}")



Clustering with 1 clusters...
Clustering with 2 clusters...
Clustering with 3 clusters...
Clustering with 4 clusters...
Clustering with 5 clusters...
Clustering with 6 clusters...
Clustering with 7 clusters...
Clustering with 8 clusters...
Clustering with 9 clusters...
Clustering with 10 clusters...
 * Detected 9 rules / clusters
 * Detected Sugeno model type
Clustering with 1 clusters...
Clustering with 2 clusters...
Clustering with 3 clusters...
Clustering with 4 clusters...
Clustering with 5 clusters...
Clustering with 6 clusters...
Clustering with 7 clusters...
Clustering with 8 clusters...
Clustering with 9 clusters...
Clustering with 10 clusters...
 * Detected 8 rules / clusters
 * Detected Sugeno model type
Clustering with 1 clusters...
Clustering with 2 clusters...
Clustering with 3 clusters...
Clustering with 4 clusters...
Clustering with 5 clusters...
Clustering with 6 clusters...
Clustering with 7 clusters...
Clustering with 8 clusters...
Clustering with 9 clusters...
Cluste