<a href="https://colab.research.google.com/github/Riccardo-Venturi/Tesi_Script_Colab/blob/main/Troviamo_outlier_dalle_maschere_unet%2B%2B.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install numpy pandas tqdm

In [None]:
#@title check utlier da rifinire a mano
import cv2
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm import tqdm

# --- CONFIGURAZIONE ---
PRED_DIR = Path("/content/drive/MyDrive/UNETPPMaschereInferenza")
# Scala di riferimento per i calcoli fisici
SCALA_PX_MM = 35.8

def screening_anomalie_totale():
    pred_files = sorted(list(PRED_DIR.glob("*.png")))
    analisi = []

    print(f"üïµÔ∏è Analisi eurisitica su {len(pred_files)} maschere UNet++...")

    for f in tqdm(pred_files):
        img = cv2.imread(str(f), 0)
        if img is None: continue

        # Estrazione metriche "pure" dalla maschera predetta
        area_foro_px = np.sum(img == 1)
        area_danno_px = np.sum(img == 2)
        area_tot_mm2 = (area_foro_px + area_danno_px) / (SCALA_PX_MM**2)
        ratio_danno_foro = (area_danno_px / area_foro_px) if area_foro_px > 0 else 0

        # Calcolo circolarit√† del foro (per vedere se l'AI ha segmentato bene il cerchio)
        contours, _ = cv2.findContours((img == 1).astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        circ_foro = 0
        if contours:
            cnt = max(contours, key=cv2.contourArea)
            area = cv2.contourArea(cnt)
            perim = cv2.arcLength(cnt, True)
            if perim > 0:
                circ_foro = 4 * np.pi * (area / (perim * perim))

        analisi.append({
            'File': f.name,
            'Area_Danno_mm2': round(area_danno_px / (SCALA_PX_MM**2), 2),
            'Ratio_Danno_Foro': round(ratio_danno_foro, 3),
            'Circolarita_Foro': round(circ_foro, 3),
            'Hole_Present': 1 if area_foro_px > 500 else 0
        })

    df = pd.DataFrame(analisi)

    # --- DEFINIZIONE ANOMALIE (LOGICA DA INGEGNERE) ---
    # 1. Filtro: Fori senza foro (Rilevamento fallito totalmente)
    fail_detect = df[df['Hole_Present'] == 0]

    # 2. Filtro: Circolarit√† pessima (AI ha confuso sfondo/danno con il foro)
    fail_geom = df[(df['Hole_Present'] == 1) & (df['Circolarita_Foro'] < 0.7)]

    # 3. Filtro: Esplosione del danno (Area > 50mm2 √® quasi certamente un errore sui bordi)
    fail_area = df[df['Area_Danno_mm2'] > 50.0]

    print("\n" + "!"*40)
    print(f"üìâ SCREENING COMPLETATO")
    print(f"‚ùå Rilevamenti Falliti: {len(fail_detect)}")
    print(f"üìê Errori Geometrici (Foro storto): {len(fail_geom)}")
    print(f"üå™Ô∏è Possibili Allucinazioni (Danno eccessivo): {len(fail_area)}")
    print("!"*40)

    # Crea un DataFrame degli "indiziati" per QuPath
    anomali = pd.concat([fail_detect, fail_geom, fail_area]).drop_duplicates().sort_values(by='Area_Danno_mm2', ascending=False)

    return anomali

df_anomalie = screening_anomalie_totale()

# Visualizza i primi 50 casi strani
display(df_anomalie.head(50))

In [None]:
import shutil
from pathlib import Path

# --- CONFIGURATION ---
# Define the root directory where the raw radiographs are stored (including subfolders)
RAW_PATCHES_DIR = Path("/content/drive/MyDrive/Radio_Patches_Normalized")

# Define the expected extension for the raw images (assuming .png like your masks)
RAW_IMG_EXTENSION = ".jpg" # <--- ADJUST THIS EXTENSION if different (e.g., .jpg, .tiff)

# Ensure the source directory exists
if not RAW_PATCHES_DIR.exists():
    print(f"Error: The specified raw image directory '{RAW_PATCHES_DIR}' does not exist. Please check the path.")
else:
    print(f"Copying corresponding raw image files from '{RAW_PATCHES_DIR}' to '{RAW_IMAGES_DIR}'...")

    copied_count = 0
    not_found_count = 0

    for index, row in df_anomalie.iterrows():
        mask_filename = row['File']
        # Extract the base name (e.g., 'H064', 'H574') from the mask filename
        base_name_to_match = Path(mask_filename).stem.split('_')[0] # Assuming 'HXXX' is always the first part

        found_raw_image = False
        # Search recursively for the corresponding raw image
        for raw_file_path in RAW_PATCHES_DIR.rglob(f"*{base_name_to_match}*{RAW_IMG_EXTENSION}"):
            # A more robust check might be needed if multiple files match the pattern
            # For now, we'll take the first one found.
            source_raw_image_path = raw_file_path
            destination_raw_image_path = RAW_IMAGES_DIR / raw_file_path.name # Keep original filename

            if source_raw_image_path.exists():
                shutil.copy(source_raw_image_path, destination_raw_image_path)
                copied_count += 1
                found_raw_image = True
                # print(f"Copied: {raw_file_path.name}") # Uncomment for verbose output
                break # Stop after finding the first match

        if not found_raw_image:
            not_found_count += 1
            print(f"Warning: Corresponding raw image file not found for mask beginning with '{base_name_to_match}' (mask: {mask_filename}) in '{RAW_PATCHES_DIR}'.")

    print(f"\nCopying process complete: {copied_count} raw images copied, {not_found_count} not found.")

In [None]:
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import os
from pathlib import Path

# Define the colormap once
colors = ['black', 'blue', 'red']
cmap = mcolors.ListedColormap(colors)
bounds = [-0.5, 0.5, 1.5, 2.5] # Define boundaries for each color
norm = mcolors.BoundaryNorm(bounds, cmap.N)

# Define a directory to save the visualized images
VISUALIZED_MASKS_DIR = Path("/content/drive/MyDrive/Segmentation_to_study_special_cases/visualized_anomalous_masks")
VISUALIZED_MASKS_DIR.mkdir(parents=True, exist_ok=True)
print(f"Saving visualized masks to: {VISUALIZED_MASKS_DIR}")

# Loop through the first 13 anomalous files in the DataFrame
for index, row in df_anomalie.head(13).iterrows():
    example_file = row['File']
    example_mask_path = PRED_DIR / example_file

    # Load the mask image
    mask_img = cv2.imread(str(example_mask_path), 0)

    if mask_img is not None:
        fig, ax = plt.subplots(figsize=(8, 8))
        ax.imshow(mask_img, cmap=cmap, norm=norm)
        ax.set_title(f'Visualizing Mask: {example_file}')
        fig.colorbar(ax.imshow(mask_img, cmap=cmap, norm=norm), ticks=[0, 1, 2], label='Pixel Value: 0=Background, 1=Hole, 2=Damage')

        # Define the output path for the saved visualized image
        output_filename = example_file.replace('.png', '_visualized.png')
        output_path = VISUALIZED_MASKS_DIR / output_filename

        # Save the figure
        plt.savefig(output_path, bbox_inches='tight')
        plt.close(fig) # Close the figure to avoid displaying all of them in the notebook
    else:
        print(f"Could not load image: {example_mask_path}")

print("Visualized masks saved to Google Drive.")

In [None]:
# ==============================================================================
# CACCIATORE DI ALIENI V3.0 - RELAXED & RANKED
# Ti restituisce solo i 25 file pi√π "brutti" basandosi su un punteggio di errore.
# ==============================================================================
def screening_peggiori_25():
    pred_files = sorted(list(PRED_DIR.glob("*.png")))
    analisi = []

    for f in tqdm(pred_files):
        img = cv2.imread(str(f), 0)
        if img is None: continue

        mask_foro = (img == 1).astype(np.uint8)
        mask_danno = (img == 2).astype(np.uint8)

        # 1. Frammentazione (Punta al rumore "a sale e pepe")
        num_labels, _, _, _ = cv2.connectedComponentsWithStats(mask_danno)
        n_fragments = num_labels - 1

        # 2. Circolarit√† (Punta ai fori che non sono fori)
        contours, _ = cv2.findContours(mask_foro, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        circ_score = 1.0 # default buono
        if contours:
            cnt = max(contours, key=cv2.contourArea)
            area = cv2.contourArea(cnt)
            perim = cv2.arcLength(cnt, True)
            if perim > 0: circ_score = (4 * np.pi * area) / (perim**2)

        # 3. Area del danno sospetta (troppo grande)
        area_danno_mm2 = np.sum(mask_danno) / (SCALA_PX_MM**2)

        # --- CALCOLO PUNTEGGIO DI ANOMALIA (Pi√π √® alto, pi√π √® un outlier) ---
        # Un foro pessimo (<0.6 circ) + Troppi frammenti (>50) + Area sospetta (>30mm2)
        score = (n_fragments / 100) + (1.0 - circ_score)
        if area_danno_mm2 > 40: score += 5  # Esplosione di area = quasi sempre errore
        if area_danno_mm2 < 0.5: score += 1  # Danno nullo = sospetto

        analisi.append({
            'File': f.name,
            'Area_Danno': area_danno_mm2,
            'Frammenti': n_fragments,
            'Circ_Foro': round(circ_score, 2),
            'Anomaly_Score': score
        })

    df = pd.DataFrame(analisi)

    # Prendi solo i 25 col punteggio di anomalia pi√π alto
    top_outliers = df.sort_values(by='Anomaly_Score', ascending=False).head(25)

    return top_outliers

df_indiziati = screening_peggiori_25()
print("\nüî• LISTA DEI 25 PEGGOIRI FILE DA SISTEMARE CON GIMP/PENNA üî•")
display(df_indiziati)