In [23]:
import pandas as pd
import numpy as np

df = pd.read_csv("Biochemical_parameters.csv", names=["ID", "Data", "Parametro", "Valore"])

indici_da_droppare = df[df['Parametro'] == 'Name'].index
df = df.drop(indici_da_droppare)

df.describe()

Unnamed: 0,ID,Data,Parametro,Valore
count,87482,87482,87482,87482.0
unique,723,1370,17,8192.0
top,LIB193438,2021-04-14,Creatinine,0.7
freq,846,254,8495,2638.0


In [24]:
def modified_z_score(series):
    median = np.median(series)
    mad = np.median(np.abs(series - median))
    if mad == 0:
        return np.zeros_like(series)
    return 0.6745 * (series - median) / mad

# Dizionario con unità, intervallo normale e intervallo fuori norma (ma possibili)
param_info = {
    "Creatinine": {"unit": "mg/dL", "normal": (0.6, 1.2), "abnormal": (1.3, 4.0)},
    "Glucose": {"unit": "mg/dL", "normal": (70, 100), "abnormal": ((0, 69), (101, 140))},
    "Potassium": {"unit": "mEq/L", "normal": (3.5, 5.0), "abnormal": ((0, 3.4), (5.1, 6.5))},
    "Sodium": {"unit": "mEq/L", "normal": (136, 146), "abnormal": ((0, 135), (147, 155))},
    "Alanine transaminase (GPT)": {"unit": "U/L", "normal": (7, 56), "abnormal": (57, 100)},
    "Gamma-glutamyl Transferase (GGT)": {"unit": "U/L", "normal": (8, 61), "abnormal": (62, 200)},
    "Total cholesterol": {"unit": "mg/dL", "normal": (0, 200), "abnormal": (201, 299)},
    "Triglycerides": {"unit": "mg/dL", "normal": (0, 150), "abnormal": (151, 500)},
    "HDL cholesterol": {"unit": "mg/dL", "normal": (40, None), "abnormal": (0, 39)},
    "Uric acid": {"unit": "mg/dL", "normal": (3.0, 8.2), "abnormal": ((0, 2.9), (8.3, 12.0))},
    "Glycated hemoglobin (A1c)": {"unit": "%", "normal": (0, 5.7), "abnormal": (5.8, 14.0)},
    "Thyrotropin (TSH)": {"unit": "mIU/L", "normal": (0.4, 4.0), "abnormal": (4.1, 20.0)},
    "Creatinine (urine)": {"unit": "mg/die", "normal": (500, 2000), "abnormal": ((0, 499), (2001, 5000))},
    "Albumin (urine)": {"unit": "mg/die", "normal": (0, 30), "abnormal": (31, 300)},
    "Chlorine": {"unit": "mEq/L", "normal": (95, 105), "abnormal": ((0, 94), (106, 120))},
    "IA2 ANTIBODIES": {"unit": "U/mL", "normal": (0.0, 7.4), "abnormal": (7.5, 100.0)},
    "Insulin": {"unit": "µIU/mL", "normal": (2.6, 24.9), "abnormal": (25.0, 100.0)}
}

def analisi_parametri(df):
    summary = []
    
    df = df[df["Parametro"] != "Name"]

    for nome, gruppo in df.groupby("Parametro"):
        valori = pd.to_numeric(gruppo["Valore"], errors='coerce')
        totale = len(valori)
        id_unici = gruppo["ID"].nunique()
        mancanti = 723 - id_unici
        
        std_val = valori.std()
        # Modified Z-score
        z_scores = modified_z_score(valori)
        z_outliers_idx = np.where(np.abs(z_scores) > 5)[0]
        
        # Valori negativi
        valori_negativi_idx = valori[valori < 0].index

        # Unione degli outlier evitando doppioni
        outlier_idx_unici = set(z_outliers_idx).union(valori_negativi_idx)
        outliers = len(outlier_idx_unici)

        info = param_info.get(nome, {})
        unit = info.get("unit", "")
        normal = info.get("normal", (None, None))
        abnormal = info.get("abnormal", None)

        summary.append([
            nome,
            unit,
            f"{normal[0]}–{normal[1] if normal[1] is not None else ''}",
            str(abnormal),
            totale,
            mancanti,
            std_val,
            outliers,
        ])
    
    return pd.DataFrame(summary, columns=[
        "Parametro",
        "Unità",
        "Range Normale",
        "Range Fuori Norma (possibile)",
        "Valori Totali",
        "Non Hanno Fatto l’Esame",
        "Dev. Std",
        "Outliers",
    ])

# Esegui e stampa
tabella_sintesi = analisi_parametri(df)
tabella_sintesi

Unnamed: 0,Parametro,Unità,Range Normale,Range Fuori Norma (possibile),Valori Totali,Non Hanno Fatto l’Esame,Dev. Std,Outliers
0,Alanine transaminase (GPT),U/L,7–56,"(57, 100)",7134,1,44.617123,302
1,Albumin (urine),mg/die,0–30,"(31, 300)",4283,10,26.560691,0
2,Chlorine,mEq/L,95–105,"((0, 94), (106, 120))",1935,300,5.339634,9
3,Creatinine,mg/dL,0.6–1.2,"(1.3, 4.0)",8495,0,0.947799,656
4,Creatinine (urine),mg/die,500–2000,"((0, 499), (2001, 5000))",4412,10,66.798995,7
5,Gamma-glutamyl Transferase (GGT),U/L,8–61,"(62, 200)",6085,7,113.549983,642
6,Glucose,mg/dL,70–100,"((0, 69), (101, 140))",8349,0,85.403559,41
7,Glycated hemoglobin (A1c),%,0–5.7,"(5.8, 14.0)",4776,3,1.385512,48
8,HDL cholesterol,mg/dL,40–,"(0, 39)",5062,6,14.367272,8
9,IA2 ANTIBODIES,U/mL,0.0–7.4,"(7.5, 100.0)",66,659,1237.69795,25
