In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Percorsi ai file
df_clust = pd.read_csv('Excel/Clustering2.csv')
df_diag = pd.read_csv('Excel/Diagnostics.csv')
df_comp = pd.read_csv('Excel/Complicanze_classificate.csv')

# Merge per assegnare il tipo di complicanza a ogni diagnosi
df_diag = df_diag.merge(df_comp[['Code', 'TipoComplicanza']], on='Code', how='left')

# Classificazione del tipo di complicanza per ogni paziente
def classify_complication(comp_types):
    comp_types = set(comp_types.dropna().str.lower())
    if 'diabetica' in comp_types:
        return 'diabetic'
    elif 'non diabetica' in comp_types:
        return 'non_diabetic'
    else:
        return 'healthy'

patient_types = df_diag.groupby('Patient_ID')['TipoComplicanza'].apply(classify_complication).reset_index()
df_clust = df_clust.merge(patient_types, on='Patient_ID', how='left')
df_clust['TipoComplicanza'] = df_clust['TipoComplicanza'].fillna('healthy')

# Codifica sesso
df_clust['Sex'] = df_clust['Sex'].map({0: 'F', 1: 'M'})

# Fasce d'età
bins = [0, 20, 30, 40, 50, 60, 70, 80, 90]
labels = ['0-20', '21-30', '31-40', '41-50', '51-60', '61-70', '71-80', '81-90']
df_clust['age_bin'] = pd.cut(df_clust['Age'], bins=bins, labels=labels, right=True, include_lowest=True)


# Conteggio pazienti per sesso, fascia di età e tipo complicanza
count_df = df_clust.groupby(['age_bin', 'Sex', 'TipoComplicanza'])['Patient_ID'] \
                   .nunique().reset_index(name='count')

# Preparazione colori
male_colors = {
    'diabetic': '#054F77',       # dark blue
    'non_diabetic': '#5588AA',   # medium blue
    'healthy': '#A9D1E5'         # light blue
}
female_colors = {
    'diabetic': '#A3003E',       # dark pink
    'non_diabetic': '#D279A6',   # medium pink
    'healthy': '#F4C2D7'         # light pink
}

# Plot
import numpy as np
from matplotlib.patches import Patch

x = np.arange(len(labels))
width = 0.35

plt.figure(figsize=(14, 7))

for sex in ['M', 'F']:
    bottoms = np.zeros(len(labels))
    for tipo in ['healthy', 'non_diabetic', 'diabetic']:
        heights = []
        for age in labels:
            match = count_df[(count_df['age_bin'] == age) &
                             (count_df['Sex'] == sex) &
                             (count_df['TipoComplicanza'] == tipo)]
            count = match['count'].values[0] if not match.empty else 0
            heights.append(count)

        color = male_colors[tipo] if sex == 'M' else female_colors[tipo]
        pos = x - width/2 if sex == 'M' else x + width/2
        bars = plt.bar(pos, heights, width=width, bottom=bottoms, color=color)

        # Aggiunta etichette numeriche
        for p, h, b in zip(pos, heights, bottoms):
            if h > 0:
                plt.text(p, b + h / 2, str(h), ha='center', va='center', fontsize=9, color='black')

        bottoms += heights

# Legenda fissa e ordinata
legend_elements = [
    Patch(facecolor=male_colors['healthy'], label='Healthy (M)'),
    Patch(facecolor=male_colors['non_diabetic'], label='Non-diabetic (M)'),
    Patch(facecolor=male_colors['diabetic'], label='Diabetic (M)'),
    Patch(facecolor=female_colors['healthy'], label='Healthy (F)'),
    Patch(facecolor=female_colors['non_diabetic'], label='Non-diabetic (F)'),
    Patch(facecolor=female_colors['diabetic'], label='Diabetic (F)'),
]

# Personalizzazione grafico
plt.xticks(x, labels, rotation=45)
plt.xlabel("Fasce di età")
plt.ylabel("Numero di pazienti")
plt.title("")
plt.legend(handles=legend_elements, loc='upper left')
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# -------------------------------
# 1. CARICAMENTO DATI
# -------------------------------
df_glucose = pd.read_csv("Excel/Glucose_measurements.csv")
df_diag = pd.read_csv("Excel/Diagnostics.csv")
df_comp = pd.read_csv("Excel/Complicanze_classificate.csv")
df_cluster = pd.read_csv("Excel/Clustering2.csv")

# -------------------------------
# 2. CLASSIFICAZIONE COMPLICANZE
# -------------------------------
df_diag = df_diag.merge(df_comp[['Code', 'TipoComplicanza']], on='Code', how='left')

def classify_patient(tipi):
    tipi = set(tipi.dropna().str.lower())
    if 'diabetica' in tipi:
        return 'diabetic'
    elif 'non diabetica' in tipi:
        return 'non_diabetic'
    return 'healthy'

patient_classes = df_diag.groupby("Patient_ID")['TipoComplicanza'] \
                         .apply(classify_patient).reset_index(name='Complication_Type')

# -------------------------------
# 3. FUNZIONE PER %TIR, TAR, TBR
# -------------------------------
def calculate_range_percent(df, label, condition):
    return df.groupby("Patient_ID").apply(
        lambda g: condition(g['Measurement']).sum() / len(g) * 100 if len(g) > 0 else 0
    ).reset_index(name=label)

# -------------------------------
# 4. GRAFICO DISTRIBUZIONE
# -------------------------------
def plot_distribution(percent_df, label, title, color_map):
    bins = [0, 1] + list(range(10, 110, 10))
    percent_df['Bin'] = pd.cut(percent_df[label], bins=bins, right=False)

    counts = percent_df.groupby(['Bin', 'Complication_Type'], observed=False).size().unstack(fill_value=0)
    counts = counts.reindex(columns=['healthy', 'non_diabetic', 'diabetic'], fill_value=0).sort_index()

    x = np.arange(len(counts))
    width = 0.25

    plt.figure(figsize=(14, 6))
    for i, tipo in enumerate(counts.columns):
        plt.bar(x + (i - 1) * width, counts[tipo], width=width,
                label=tipo.replace("_", " ").title(), color=color_map[tipo], edgecolor='black')
        for xi, count in zip(x, counts[tipo]):
            if count > 0:
                plt.text(xi + (i - 1) * width, count + 0.5, str(count), ha='center', va='bottom', fontsize=9)

    plt.xticks(x, [str(b) for b in counts.index], rotation=45)
    plt.xlabel(f'%{label}')
    plt.ylabel('Numero di Pazienti')
    plt.title(title if title else f'Distribuzione %{label} per tipo di complicanza')
    plt.legend()
    plt.tight_layout()
    plt.show()

# -------------------------------
# 5. COLORI E RANGE DEFINITI
# -------------------------------
colors = {
    'healthy': '#A9D1E5',
    'non_diabetic': '#5588AA',
    'diabetic': '#054F77'
}

range_defs = {
    'TIR': lambda x: (x >= 70) & (x <= 180),
    'TAR': lambda x: x > 180,
    'TBR': lambda x: x < 70
}

# -------------------------------
# 6. GRAFICI SU TUTTI I PAZIENTI
# -------------------------------
print("Grafici su TUTTI i pazienti:")
for label, condition in range_defs.items():
    df_percent = calculate_range_percent(df_glucose, f"%{label}", condition)
    df_percent = df_percent.merge(patient_classes, on='Patient_ID', how='left')
    df_percent['Complication_Type'] = df_percent['Complication_Type'].fillna('healthy')
    plot_distribution(df_percent, f"%{label}", f"Distribuzione %{label} (tutti i pazienti)", colors)

# -------------------------------
# 7. GRAFICI SOLO PAZIENTI IN CLUSTERING2
# -------------------------------
patients_in_cluster = set(df_cluster['Patient_ID'])

df_glucose_cluster = df_glucose[df_glucose['Patient_ID'].isin(patients_in_cluster)]
patient_classes_cluster = patient_classes[patient_classes['Patient_ID'].isin(patients_in_cluster)]

print("Grafici SOLO sui pazienti presenti in Clustering2.csv:")
for label, condition in range_defs.items():
    df_percent = calculate_range_percent(df_glucose_cluster, f"%{label}", condition)
    df_percent = df_percent.merge(patient_classes_cluster, on='Patient_ID', how='left')
    df_percent['Complication_Type'] = df_percent['Complication_Type'].fillna('healthy')
    plot_distribution(df_percent, f"%{label}", f"Distribuzione %{label} (solo Clustering2)", colors)
