In [6]:
import pandas as pd

test=pd.read_csv(r"C:\Users\giovi\Desktop\test_df.csv")
train=pd.read_csv(r"C:\Users\giovi\Desktop\train_df_balanced.csv")

In [7]:
test=test.drop(columns=['image_id','age','sex','localization','image_path_seg'])
train=train.drop(columns=['image_id','age','sex','localization','image_path_seg'])


In [9]:
test

Unnamed: 0,dx,image_path_iso
0,nv,D:\Desktop\Project\Isolated_Giuste\Segmentatio...
1,nv,D:\Desktop\Project\Isolated_Giuste\Segmentatio...
2,mel,D:\Desktop\Project\Isolated_Giuste\Segmentatio...
3,nv,D:\Desktop\Project\Isolated_Giuste\Segmentatio...
4,bkl,D:\Desktop\Project\Isolated_Giuste\Segmentatio...
...,...,...
1998,nv,D:\Desktop\Project\Isolated_Giuste\Segmentatio...
1999,bcc,D:\Desktop\Project\Isolated_Giuste\Segmentatio...
2000,mel,D:\Desktop\Project\Isolated_Giuste\Segmentatio...
2001,mel,D:\Desktop\Project\Isolated_Giuste\Segmentatio...


In [12]:
import cv2
import numpy as np
import pandas as pd
from skimage.feature import graycomatrix, graycoprops
import skfuzzy as fuzz

# --- Step 1: Caricamento e preprocess ---
def load_and_preprocess_image(path, size=(100, 100)):
    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, size)
    return img

# --- Step 2: Calcolo feature GLCM ---
def calculate_glcm_features(img, distances=[1], angles=[0, np.pi/4, np.pi/2, 3*np.pi/4], levels=256):
    glcm = graycomatrix(img, distances=distances, angles=angles, levels=levels, symmetric=True, normed=True)
    
    contrast = graycoprops(glcm, 'contrast').flatten()
    correlation = graycoprops(glcm, 'correlation').flatten()
    homogeneity = graycoprops(glcm, 'homogeneity').flatten()
    energy = graycoprops(glcm, 'energy').flatten()
    
    features = {
        'contrast': np.mean(contrast),
        'correlation': np.mean(correlation),
        'homogeneity': np.mean(homogeneity),
        'energy': np.mean(energy),
    }
    return features

# --- Step 3: Estrazione features dataset ---
def extract_features_from_dataset(df):
    features_list = []
    for idx, row in df.iterrows():
        img = load_and_preprocess_image(row['image_path_iso'])
        feats = calculate_glcm_features(img)
        feats['dx'] = row['dx']
        features_list.append(feats)
    return pd.DataFrame(features_list)

# --- Step 4: Fuzzy C-means clustering per classe ---
def fuzzy_c_means_per_class(df_train_feats, m=2, error=0.005, maxiter=1000):
    classes = df_train_feats['dx'].unique()
    cluster_centers_per_class = {}

    for cls in classes:
        cls_data = df_train_feats[df_train_feats['dx'] == cls].drop(columns=['dx']).values.T  # (features, samples)
        cntr, u, u0, d, jm, p, fpc = fuzz.cluster.cmeans(
            cls_data, c=1, m=m, error=error, maxiter=maxiter, init=None
        )
        cluster_centers_per_class[cls] = cntr.flatten()
    return cluster_centers_per_class

# --- Funzione per stampare centroidi fuzzy di una classe specifica ---
def print_fuzzy_centroids_for_class(cluster_centers, class_name):
    if class_name not in cluster_centers:
        print(f"Classe '{class_name}' non trovata nei cluster.")
        return
    
    centroid = cluster_centers[class_name]
    features = ['contrast', 'correlation', 'homogeneity', 'energy']
    print(f"Centroidi fuzzy per la classe '{class_name}':")
    for feat, val in zip(features, centroid):
        print(f"  {feat}: {val:.4f}")

# --- Step 5: Distanza Chi-quadrato ---
def chi_square_distance(test_feats, cluster_center):
    numerator = (test_feats - cluster_center)**2
    denominator = test_feats + cluster_center + 1e-10  # evita divisione per zero
    return np.sum(numerator / denominator)

# --- Step 6: Classificazione immagine ---
def classify_image(test_feats, cluster_centers):
    distances = {}
    for cls, center in cluster_centers.items():
        dist = chi_square_distance(test_feats, center)
        distances[cls] = dist
    predicted_class = min(distances, key=distances.get)
    return predicted_class, distances

# --- Pipeline completa ---
def run_pipeline(df_train, df_test):
    # Estrai feature train e test
    print("Estrazione feature training set...")
    train_features = extract_features_from_dataset(df_train)
    print("Estrazione feature test set...")
    test_features = extract_features_from_dataset(df_test)

    # Applica fuzzy c-means clustering su train
    print("Esecuzione fuzzy c-means clustering per classe...")
    cluster_centers = fuzzy_c_means_per_class(train_features)

    # Classifica immagini di test
    print("Classificazione immagini di test...")
    preds = []
    for idx, row in test_features.iterrows():
        feats = row.drop('dx').values
        pred_class, _ = classify_image(feats, cluster_centers)
        preds.append(pred_class)
    test_features['predicted'] = preds

    # Calcola metriche
    accuracy = (test_features['dx'] == test_features['predicted']).mean()
    num_errors = (test_features['dx'] != test_features['predicted']).sum()
    num_tests = len(test_features)
    error_percent = (num_errors / num_tests) * 100
    efficiency = 100 - error_percent

    print(f'Accuracy: {accuracy*100:.2f}%')
    print(f'Error rate: {error_percent:.2f}%')
    print(f'Efficiency: {efficiency:.2f}%')

    return cluster_centers, test_features

# --- USO ---
# cluster_centers, test_results = run_pipeline(df_train, df_test)
# print_fuzzy_centroids_for_class(cluster_centers, 'Angioedema')


In [13]:
cluster_centers, test_results = run_pipeline(train,test)

Estrazione feature training set...
Estrazione feature test set...
Esecuzione fuzzy c-means clustering per classe...
Classificazione immagini di test...
Accuracy: 16.18%
Error rate: 83.82%
Efficiency: 16.18%


In [18]:
print_fuzzy_centroids_for_class(cluster_centers, 'nv')

Centroidi fuzzy per la classe 'nv':
  contrast: 373.0522
  correlation: 0.9110
  homogeneity: 0.7391
  energy: 0.7135
