# Intervalo de Classificação

1. Identificar intervalos de RGB nas classes identificadas visualmente por Zanetti (2019)

In [1]:
# 00-segmentation.py

In [2]:
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
from skimage import filters, io, morphology, segmentation
from skimage.color import rgb2gray
from skimage.measure import label, regionprops

plt.interactive(True)


def open_image(fname):
    return io.imread(fname)


def image_segmentation(img):
    gray = rgb2gray(img)
    mask = gray > filters.threshold_otsu(gray)
    borders = segmentation.clear_border(mask).astype(np.int64)
    segmentation.mark_boundaries(gray, borders)
    label_img = label(borders)
    regions = regionprops(label_img)
    return mask, borders, regions


def find_pellet(regions):
    area = 0
    for region in regions:
        if region.area > area:
            area = region.area
            pellet = region
    return pellet


def mask_background(mask, pellet):
    threshold = pellet.area * 0.01
    return morphology.remove_small_objects(mask, pellet.area - threshold)


def show_masked(img, borders, maskr):
    fig, (ax0, ax1) = plt.subplots(ncols=2, figsize=(8, 8))

    masked = img.copy()
    masked[~maskr] = 255
    ax0.imshow(masked)
    ax1.imshow(img)
    ax1.contour(borders, [0.5], colors="r")

In [4]:
from glob import glob

fnames = glob("class-zanetti/*.jpg")
bad_pellets = []

for fname in fnames:
    pellet = Path(fname).stem
    if pellet in bad_pellets:
        print(f"Skipping pellet {pellet}.\n")
        continue
        
    img = open_image(fname)

    mask, borders, regions = image_segmentation(img)
    area = find_pellet(regions)

    maskr = mask_background(mask, area)

    savename = f"masks-zanetti/{pellet}"
    np.save(savename, maskr)

In [5]:
# 01-find-dominant-color.py

In [6]:
from glob import glob
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image
from skimage import io


def get_dominant_color(pil_img, palette_size=16):
    # Resize image to speed up processing
    img = pil_img.copy()
    img.thumbnail((100, 100))

    # Reduce colors (uses k-means internally)
    paletted = img.convert("P", palette=Image.Palette.ADAPTIVE, colors=palette_size)

    # Find the color that occurs most often
    palette = paletted.getpalette()
    color_counts = sorted(paletted.getcolors(), reverse=True)
    idx = 1  # We want the second one to avoid getting the mask, otherwise use 0.
    palette_index = color_counts[idx][
        1
    ]  # We want the second one to avoid getting the mask
    dominant_color = palette[palette_index * 3 : palette_index * 3 + 3]

    return dominant_color

In [7]:
fnames = glob("masks-zanetti/*.npy")


dataset = {}
for fname in fnames:
    pellet = Path(fname).stem
    img = io.imread(f"class-zanetti/{pellet}.jpg")
    maskr = np.load(f"masks-zanetti/{pellet}.npy")

    fig, (ax0, ax1, ax2) = plt.subplots(ncols=3, figsize=(15, 5))
    fig.suptitle(pellet)

    ax0.imshow(img)

    # Remove background
    masked = img.copy()
    masked[~maskr] = 255
    ax1.imshow(masked)

    # Dominant color
    pil_img = Image.fromarray(masked)

    R, G, B = get_dominant_color(pil_img)
    dataset.update({pellet: (R, G, B)})

    colored = masked.copy()
    colored[maskr, 0] = R
    colored[maskr, 1] = G
    colored[maskr, 2] = B
    ax2.imshow(colored)

    saved = f"temp-zanetti/check-{pellet}.png"
    fig.savefig(saved)
    plt.close()

In [8]:
# Save final data
df = pd.DataFrame(dataset, index=("R", "G", "B")).T
df = df.sort_index(ascending = True)
df.to_csv("RGB_values_zanetti_outlier.csv")
df

Unnamed: 0,R,G,B
alto_cl1_p1_psul_deixa1_g0005,152,116,3
alto_cl1_p2_joaq_dunaiii_h0005,194,167,2
alto_cl1_p2_joaq_limsup2_b0004,169,157,54
alto_cl1_p4_brava_deixa4_d0009,186,167,6
baixo_cl1_p2_joaq_dunaiii_h0002,178,170,114
baixo_cl1_p6_moca2_deixa3_a0005,174,191,162
baixo_cl1_p6_moca2_deixa3_a0006,186,193,155
baixo_cl1_p9_moca2_deixa5_b0001,193,192,198
moderado_cl1_p2_joaq_dunai_c0009,199,187,90
moderado_cl1_p2_joaq_dunaiii_g0008,146,126,10


In [8]:
df["B/R"] = round((df['B'] / df['R']),3)
df

Unnamed: 0,R,G,B,B/R
alto_cl1_p1_psul_deixa1_g0005,152,116,3,0.02
alto_cl1_p2_joaq_dunaiii_h0005,194,167,2,0.01
alto_cl1_p4_brava_deixa4_d0009,186,167,6,0.032
baixo_cl1_p2_joaq_dunaiii_h0002,178,170,114,0.64
baixo_cl1_p6_moca2_deixa3_a0005,174,191,162,0.931
baixo_cl1_p6_moca2_deixa3_a0006,186,193,155,0.833
baixo_cl1_p9_moca2_deixa5_b0001,193,192,198,1.026
moderado_cl1_p2_joaq_dunai_c0009,199,187,90,0.452
moderado_cl1_p2_joaq_dunaiii_g0008,146,126,10,0.068
moderado_cl1_p2_joaq_dunaiii_h0003,184,168,64,0.348


In [9]:
nulo = df.iloc[13:18]
nulo.describe()

Unnamed: 0,R,G,B,B/R
count,4.0,4.0,4.0,4.0
mean,150.25,159.5,161.5,1.04925
std,28.756159,37.775654,64.272856,0.221054
min,114.0,119.0,106.0,0.926
25%,139.5,142.25,129.25,0.929
50%,151.5,154.5,143.0,0.9455
75%,162.25,171.75,175.25,1.06575
max,184.0,210.0,254.0,1.38


In [10]:
#DADOS NULO (MÉDIA E DESVIO PADRAO)
mednuloR = 150.25
stdnuloR = 28.76
mednuloG = 159.50
stdnuloG = 37.78
mednuloB = 161.50
stdnuloB = 64.27
mednuloBR = 1.05

In [11]:
baixo = df.iloc[3:7]
baixo.describe()

Unnamed: 0,R,G,B,B/R
count,4.0,4.0,4.0,4.0
mean,182.75,186.5,157.25,0.8575
std,8.460693,11.030261,34.44198,0.165026
min,174.0,170.0,114.0,0.64
25%,177.0,185.75,144.75,0.78475
50%,182.0,191.5,158.5,0.882
75%,187.75,192.25,171.0,0.95475
max,193.0,193.0,198.0,1.026


In [12]:
#DADOS BAIXO (MÉDIA E DESVIO PADRAO)
medbaixoR = 182.75
stdbaixoR = 8.46
medbaixoG = 186.50
stdbaixoG = 11.03
medbaixoB = 157.25
stdbaixoB = 34.44
medbaixoBR = 0.86

In [13]:
moderado = df.iloc[7:10]
moderado.describe()

Unnamed: 0,R,G,B,B/R
count,3.0,3.0,3.0,3.0
mean,176.333333,160.333333,54.666667,0.289333
std,27.319102,31.214313,40.808496,0.198608
min,146.0,126.0,10.0,0.068
25%,165.0,147.0,37.0,0.208
50%,184.0,168.0,64.0,0.348
75%,191.5,177.5,77.0,0.4
max,199.0,187.0,90.0,0.452


In [14]:
#DADOS MODERADO (MÉDIA E DESVIO PADRAO)
medmodR = 176.33
stdmodR = 27.32
medmodG = 160.33
stdmodG = 31.21
medmodB = 54.67
stdmodB = 40.81
medmodBR = 0.29

In [15]:
alto = df.iloc[0:3]
alto.describe()

Unnamed: 0,R,G,B,B/R
count,3.0,3.0,3.0,3.0
mean,177.333333,150.0,3.666667,0.020667
std,22.300972,29.444864,2.081666,0.011015
min,152.0,116.0,2.0,0.01
25%,169.0,141.5,2.5,0.015
50%,186.0,167.0,3.0,0.02
75%,190.0,167.0,4.5,0.026
max,194.0,167.0,6.0,0.032


In [16]:
#DADOS ALTO (MÉDIA E DESVIO PADRAO)
medaltoR = 177.33
stdaltoR = 22.30
medaltoG = 150.00
stdaltoG = 29.44
medaltoB = 3.67
stdaltoB = 2.08
medaltoBR = 0.02

In [17]:
muitoalto = df.iloc[10:13]
muitoalto.describe()

Unnamed: 0,R,G,B,B/R
count,3.0,3.0,3.0,3.0
mean,139.333333,92.333333,5.666667,0.042
std,14.468356,22.052967,3.785939,0.03005
min,130.0,68.0,3.0,0.019
25%,131.0,83.0,3.5,0.025
50%,132.0,98.0,4.0,0.031
75%,144.0,104.5,7.0,0.0535
max,156.0,111.0,10.0,0.076


In [18]:
#DADOS MUITO ALTO (MÉDIA E DESVIO PADRAO)
medmuitoaltoR = 139.33
stdmuitoaltoR = 14.47
medmuitoaltoG = 92.33
stdmuitoaltoG = 22.05
medmuitoaltoB = 5.67
stdmuitoaltoB = 3.78
medmuitoaltoBR = 0.04

In [23]:
# TABELA MÉDIAS E DESVIO PADRÃO RGB PARA AMARELAMENTO
import pandas as pd

index = ('nulo', 'baixo', 'moderado', 'alto', 'muito alto')
data = {'Média R':[mednuloR,medbaixoR,medmodR,medaltoR,medmuitoaltoR],
        'Média G':[mednuloG,medbaixoG,medmodG,medaltoG,medmuitoaltoG],
        'Média B':[mednuloB,medbaixoB,medmodB,medaltoB,medmuitoaltoB],
        'Média B/R': [mednuloBR, medbaixoBR, medmodBR, medaltoBR, medmuitoaltoBR]
       }

dfcrit = pd.DataFrame(data, index)
dfcrit

Unnamed: 0,Média R,Média G,Média B,Média B/R
nulo,150.25,159.5,161.5,1.05
baixo,182.75,186.5,157.25,0.86
moderado,176.33,160.33,54.67,0.29
alto,177.33,150.0,3.67,0.02
muito alto,139.33,92.33,5.67,0.04
