# Understanding which segments contain most information

In [4]:
import os
from PIL import Image
import numpy as np
from tqdm import tqdm

# Root folder containing the segmentation classes
ROOT_DIR = "/home/andreafabbricatore/rainbot/datasets/segmented"  # <- CHANGE THIS

def compute_mask_ratio(image_path):
    img = Image.open(image_path).convert("L")  # grayscale
    arr = np.array(img)
    total_pixels = arr.size
    non_black_pixels = np.count_nonzero(arr)
    return non_black_pixels / total_pixels

segment_ratios = {}

segment_classes = sorted([d for d in os.listdir(ROOT_DIR) if os.path.isdir(os.path.join(ROOT_DIR, d))])

for segment_class in tqdm(segment_classes, desc="Segment Classes"):
    class_path = os.path.join(ROOT_DIR, segment_class)

    ratios = []
    country_dirs = sorted([d for d in os.listdir(class_path) if os.path.isdir(os.path.join(class_path, d))])

    for country in country_dirs:
        country_path = os.path.join(class_path, country)

        image_files = sorted([f for f in os.listdir(country_path) if f.endswith(".png")])
        print(f"Country: {country}. Images: {len(image_files)}")
        for file in image_files:
            image_path = os.path.join(country_path, file)
            try:
                ratio = compute_mask_ratio(image_path)
                ratios.append(ratio)
            except Exception as e:
                print(f"Error reading {image_path}: {e}")

    if ratios:
        avg_ratio = np.mean(ratios)
        segment_ratios[segment_class] = avg_ratio

# Sort and print results
print("\n--- Segment Scores ---")
sorted_segments = sorted(segment_ratios.items(), key=lambda x: x[1], reverse=True)
for segment, score in sorted_segments:
    print(f"{segment}: {score:.4f}")

Segment Classes:   0%|          | 0/19 [00:00<?, ?it/s]

Country: Albania. Images: 112
Country: Andorra. Images: 1
Country: Argentina. Images: 267
Country: Australia. Images: 581
Country: Austria. Images: 172
Country: Bangladesh. Images: 115
Country: Belgium. Images: 60
Country: Bhutan. Images: 6
Country: Bolivia. Images: 193
Country: Botswana. Images: 45
Country: Brazil. Images: 867
Country: Bulgaria. Images: 63
Country: Cambodia. Images: 85
Country: Canada. Images: 300
Country: Chile. Images: 116
Country: Colombia. Images: 137
Country: Croatia. Images: 34
Country: Czechia. Images: 59
Country: Denmark. Images: 37
Country: Dominican_Republic. Images: 150
Country: Ecuador. Images: 145
Country: Estonia. Images: 6
Country: Eswatini. Images: 6
Country: Finland. Images: 214
Country: France. Images: 1357
Country: Germany. Images: 530
Country: Ghana. Images: 282
Country: Greece. Images: 113
Country: Greenland. Images: 154
Country: Guatemala. Images: 163
Country: Hungary. Images: 43
Country: Iceland. Images: 4
Country: Indonesia. Images: 218
Country

Segment Classes:   5%|▌         | 1/19 [01:02<18:42, 62.38s/it]

Country: Albania. Images: 534
Country: Andorra. Images: 13
Country: Argentina. Images: 1661
Country: Australia. Images: 3707
Country: Austria. Images: 663
Country: Bangladesh. Images: 214
Country: Belgium. Images: 389
Country: Bhutan. Images: 22
Country: Bolivia. Images: 695
Country: Botswana. Images: 318
Country: Brazil. Images: 4704
Country: Bulgaria. Images: 365
Country: Cambodia. Images: 248
Country: Canada. Images: 2142
Country: Chile. Images: 578
Country: Colombia. Images: 488
Country: Croatia. Images: 206
Country: Czechia. Images: 441
Country: Denmark. Images: 308
Country: Dominican_Republic. Images: 523
Country: Ecuador. Images: 580
Country: Estonia. Images: 76
Country: Eswatini. Images: 53
Country: Finland. Images: 1530
Country: France. Images: 6019
Country: Germany. Images: 1294
