In [1]:
# Importing Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pyfume.Clustering import Clusterer
from pyfume.EstimateAntecendentSet import AntecedentEstimator
from pyfume.EstimateConsequentParameters import ConsequentEstimator
from pyfume.SimpfulModelBuilder import SugenoFISBuilder
from pyfume.Tester import SugenoFISTester
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, cohen_kappa_score
from numpy import clip, column_stack, argmax
from scipy.spatial.distance import cdist
import warnings

warnings.simplefilter(action='ignore', category=FutureWarning)

# Data Loading
Train = pd.read_csv('dataTrain.csv')
Test = pd.read_csv('dataTest.csv')
stats = pd.read_csv('stats.csv').iloc[:, 1:]

X_train = Train.drop('STATUS', axis=1).to_numpy()
y_train = Train['STATUS'].to_numpy()
X_test = Test.drop('STATUS', axis=1).to_numpy()
y_test = Test['STATUS'].to_numpy()

var_names = Train.columns[:-1].to_list()
savecols = var_names
maxs = X_train.max(axis=0)
mins = X_train.min(axis=0)

# Separation Metric with Vectorization
def SeparationMetric(X_train, clust_centers, part_matrix):
    distances = cdist(X_train, clust_centers, metric='euclidean')
    return np.sum(part_matrix * distances)

# Optimized Clustering Process
def optimized_clustering(X_train, y_train, max_clusters, method):
    best_metric = float('inf')
    best_clust = None
    best_part_matrix = None
    
    for i in range(1, max_clusters + 1):
        cl = Clusterer(x_train=X_train, y_train=y_train, nr_clus=i)
        clust_centers, part_matrix, _ = cl.cluster(method=method)
        metric = SeparationMetric(X_train, clust_centers[:, :-1], part_matrix)
        
        if metric < best_metric:
            best_metric = metric
            best_clust = clust_centers[:, :-1]
            best_part_matrix = part_matrix

    return best_clust, best_part_matrix

# Clustering
max_clusters = 10
selected_method = 'fst-pso'
clust_centers, part_matrix = optimized_clustering(X_train, y_train, max_clusters, selected_method)

# Building the Model
ae = AntecedentEstimator(X_train, part_matrix)
antecedent_params = ae.determineMF()
ce = ConsequentEstimator(X_train, y_train, part_matrix)
conseq_params = ce.suglms()

modbuilder = SugenoFISBuilder(antecedent_params, conseq_params, var_names, save_simpful_code=False)
model = modbuilder.get_model()

# Inference and Metrics
modtester = SugenoFISTester(model, X_test, var_names)
y_pred_probs = clip(modtester.predict()[0], 0, 1)
y_pred = argmax(column_stack((1 - y_pred_probs, y_pred_probs)), axis=1)

metrics = {
    "Accuracy": accuracy_score(y_test, y_pred),
    "Recall": recall_score(y_test, y_pred),
    "Precision": precision_score(y_test, y_pred),
    "F1-Score": f1_score(y_test, y_pred),
    "Kappa": cohen_kappa_score(y_test, y_pred),
}

for metric, value in metrics.items():
    print(f"{metric}: {value:.3f}")

# Visualization Adjustments for PyFUME
for ix, var_name in enumerate(var_names):
    model._lvs[var_name]._universe_of_discourse = [mins[ix], maxs[ix]]

# Model Visualization Example
model.produce_figure("", 2)

# Conversion to New MFs for Interpretation
def UODtoX(model, stats):
    uods = {}
    for lv in model._lvs:
        data = stats[stats['Feature'] == lv]
        mean = data.iloc[0, 1]
        std = np.sqrt(data.iloc[0, 2])
        uod = model._lvs[lv]._universe_of_discourse
        uod = [uod[0] * std + mean, uod[1] * std + mean]
        uods[lv] = uod
    return uods

uods = UODtoX(model, stats)

def FunctionstoX(model, stats):
    final_df = pd.DataFrame(columns=var_names + ['constant'])
    for f in model._outputfunctions:
        expression = model._outputfunctions[f]
        terms = expression.split('+')
        converted = [0] * (len(var_names) + 1)
        
        for term in terms[:-1]:
            coeff, var = term.split('*')
            coeff = float(coeff)
            stat = stats[stats['Feature'] == var]
            std = np.sqrt(stat.iloc[0, 2])
            mean = stat.iloc[0, 1]
            converted[var_names.index(var)] = coeff / std
            converted[-1] -= coeff * mean / std
        
        converted[-1] += float(terms[-1])
        final_df.loc[len(final_df)] = converted

    return final_df

consequents = FunctionstoX(model, stats)

# Simplified Plotting Function
def plotMFs(mfs, var):
    plt.figure()
    x = np.linspace(mfs[var][0].uod[0], mfs[var][0].uod[1], 100)
    for mf in mfs[var]:
        y = [mf(j) for j in x]
        plt.plot(x, y)
    plt.title(var)
    plt.show()

# Example of Plotting
plotMFs(uods, 'thalachh')


KeyboardInterrupt: 

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pyfume.Clustering import Clusterer
from pyfume.EstimateAntecendentSet import AntecedentEstimator
from pyfume.EstimateConsequentParameters import ConsequentEstimator
from pyfume.SimpfulModelBuilder import SugenoFISBuilder
from pyfume.Tester import SugenoFISTester
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, cohen_kappa_score
from numpy import clip, column_stack, argmax
from scipy.spatial.distance import cdist
from sklearn.decomposition import PCA
from concurrent.futures import ProcessPoolExecutor
import warnings

warnings.simplefilter(action='ignore', category=FutureWarning)

# Data Loading
Train = pd.read_csv('dataTrain.csv')
Test = pd.read_csv('dataTest.csv')
stats = pd.read_csv('stats.csv').iloc[:, 1:]

X_train = Train.drop('STATUS', axis=1).to_numpy()
y_train = Train['STATUS'].to_numpy()
X_test = Test.drop('STATUS', axis=1).to_numpy()
y_test = Test['STATUS'].to_numpy()

var_names = Train.columns[:-1].to_list()
savecols = var_names
maxs = X_train.max(axis=0)
mins = X_train.min(axis=0)

# Optional: Dimensionality Reduction with PCA
n_components = min(X_train.shape[1], 10)  # Retain up to 10 components
pca = PCA(n_components=n_components)
X_train_reduced = pca.fit_transform(X_train)
X_test_reduced = pca.transform(X_test)

# Optimized Separation Metric
def SeparationMetric(X_train, clust_centers, part_matrix):
    distances = np.linalg.norm(X_train[:, None] - clust_centers, axis=2)
    return np.sum(part_matrix * distances)

# Optimized Clustering with Parallel Processing
def cluster_task(n_clusters, X_train, y_train, method):
    cl = Clusterer(x_train=X_train, y_train=y_train, nr_clus=n_clusters)
    clust_centers, part_matrix, _ = cl.cluster(method=method)
    metric = SeparationMetric(X_train, clust_centers[:, :-1], part_matrix)
    return metric, clust_centers[:, :-1], part_matrix

def optimized_clustering(X_train, y_train, max_clusters, method):
    best_metric = float('inf')
    best_clust = None
    best_part_matrix = None

    with ProcessPoolExecutor() as executor:
        results = list(executor.map(
            cluster_task,
            range(1, max_clusters + 1),
            [X_train] * max_clusters,
            [y_train] * max_clusters,
            [method] * max_clusters,
        ))
    
    for metric, clust_centers, part_matrix in results:
        if metric < best_metric:
            best_metric = metric
            best_clust = clust_centers
            best_part_matrix = part_matrix

    return best_clust, best_part_matrix

# Clustering
max_clusters = 10
selected_method = 'fst-pso'
clust_centers, part_matrix = optimized_clustering(X_train_reduced, y_train, max_clusters, selected_method)

# Building the Model
ae = AntecedentEstimator(X_train_reduced, part_matrix)
antecedent_params = ae.determineMF()
ce = ConsequentEstimator(X_train_reduced, y_train, part_matrix)
conseq_params = ce.suglms()

modbuilder = SugenoFISBuilder(antecedent_params, conseq_params, var_names[:n_components], save_simpful_code=False)
model = modbuilder.get_model()

# Inference and Metrics
modtester = SugenoFISTester(model, X_test_reduced, var_names[:n_components])
y_pred_probs = clip(modtester.predict()[0], 0, 1)
y_pred = argmax(column_stack((1 - y_pred_probs, y_pred_probs)), axis=1)

metrics = {
    "Accuracy": accuracy_score(y_test, y_pred),
    "Recall": recall_score(y_test, y_pred),
    "Precision": precision_score(y_test, y_pred),
    "F1-Score": f1_score(y_test, y_pred),
    "Kappa": cohen_kappa_score(y_test, y_pred),
}

for metric, value in metrics.items():
    print(f"{metric}: {value:.3f}")

# Visualization Adjustments for PyFUME
for ix, var_name in enumerate(var_names[:n_components]):
    model._lvs[var_name]._universe_of_discourse = [mins[ix], maxs[ix]]

# Simplified Plotting Function
def plotMFs(mfs, var):
    plt.figure()
    x = np.linspace(mfs[var][0].uod[0], mfs[var][0].uod[1], 100)
    for mf in mfs[var]:
        y = [mf(j) for j in x]
        plt.plot(x, y)
    plt.title(var)
    plt.show()

# Example of Plotting
plotMFs(model._lvs, var_names[0])


BrokenProcessPool: A process in the process pool was terminated abruptly while the future was running or pending.