In [2]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches

import sys

sys.path.append("..")

import os
from PIL import Image
from collections.abc import Iterable
from os.path import dirname, join, abspath

import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from matplotlib.colors import ListedColormap

import numpy as np
import pandas as pd
import seaborn as sns
import torch.optim
from joblib import Parallel, delayed
from matplotlib.ticker import FuncFormatter
from skimage.color import label2rgb
# load label
from skimage.measure import label, find_contours
from skimage import transform
import subprocess

from utils import *
from pred2raster import pred2raster
from sample_selection import get_components_stats
from src.io_operations import fix_relative_paths, load_args, read_tiff, read_yaml

from IPython.display import HTML, display

from millify import  millify

from matplotlib import rc

from statistics import mode

from tqdm import tqdm
from seaborn import color_palette
from glob import glob

from scipy.ndimage import distance_transform_edt, gaussian_filter
import gc
# mulit process pool executor
from concurrent.futures import ProcessPoolExecutor

In [3]:
ORTHOIMAGE = read_tiff("../amazon_input_data/orthoimage/NOV_2017_FINAL_004.tif")
ORTHOIMAGE = np.moveaxis(ORTHOIMAGE, 0, 2)

In [4]:
LABEL_GT_TRAIN = read_tiff(
    "../amazon_input_data/segmentation/train_set.tif"
)

LABEL_GT_TEST = read_tiff(
    "../amazon_input_data/segmentation/test_set.tif"
)

LABEL_GT_FULL = read_tiff(
    "../amazon_input_data/segmentation/full_set.tif"
)

In [5]:
COMP_GT_TRAIN = label(LABEL_GT_TRAIN)
COMP_GT_TEST = label(LABEL_GT_TEST)
COMP_GT_FULL = label(LABEL_GT_FULL)

In [6]:
id_tree = pd.read_csv(
    "../amazon_input_data/id_trees.csv"
).set_index("label_num")["tree_name"]

# Tabela com Descrição do Dataset

In [7]:
train_stats = get_components_stats(COMP_GT_TRAIN, LABEL_GT_TRAIN)
test_stats = get_components_stats(COMP_GT_TEST, LABEL_GT_TEST)
full_stats = get_components_stats(COMP_GT_FULL, LABEL_GT_FULL)

In [8]:
train_stats["tree_name"] = train_stats["tree_type"].map(id_tree)
test_stats["tree_name"] = test_stats["tree_type"].map(id_tree)
full_stats["tree_name"] = full_stats["tree_type"].map(id_tree)

In [9]:
full_stats.groupby("tree_name").size()

tree_name
Abiorana Rosa      72
Angico Angico       6
Angico Vermelho    14
Castanheira        83
Cedro Cedro        52
Cerejeira          14
Cumaru Ferro       20
Garapeira          45
Guaribeiro         24
Guariuba           18
Ipe                20
Jutai              32
Massaranduba        9
Samauma            11
Tachi              25
Tauari             67
Ucuuba              6
dtype: int64

In [33]:
# Create a table by tree_name with the number registers, sum area
train_count = train_stats.groupby("tree_name", as_index=False, ).agg(ITCs=("tree_type", "count"), Pixels=("area", "sum"))

test_count = test_stats.groupby("tree_name", as_index=False, ).agg(ITCs=("tree_type", "count"), Pixels=("area", "sum"))

# join the two tables
count = train_count.merge(test_count, on="tree_name", suffixes=("_train", "_test"), how="outer")

# add column ITCs_total as the second column
count.insert(1, "ITCs_total", count["ITCs_train"] + count["ITCs_test"])
count.insert(2, "Pixels_total", count["Pixels_train"] + count["Pixels_test"])
# apply to Pixels columns
# count[["Pixels_train", "Pixels_test", "Pixels_total"]] = count[["Pixels_train", "Pixels_test", "Pixels_total"]].map(lambda x: millify(x, precision=2))

count

Unnamed: 0,tree_name,ITCs_total,Pixels_total,ITCs_train,Pixels_train,ITCs_test,Pixels_test
0,Abiorana Rosa,72,16371007.0,50,11403267.0,22,4967740.0
1,Angico Angico,6,1114366.0,4,898505.0,2,215861.0
2,Angico Vermelho,14,3315461.0,9,1876514.0,5,1438947.0
3,Castanheira,83,15653626.0,58,10784298.0,25,4869328.0
4,Cedro Cedro,52,3906764.0,36,2535266.0,16,1371498.0
5,Cerejeira,14,952959.0,9,663557.0,5,289402.0
6,Cumaru Ferro,20,4459054.0,14,3200887.0,6,1258167.0
7,Garapeira,45,5210028.0,31,3256934.0,14,1953094.0
8,Guaribeiro,24,2014050.0,16,1485259.0,8,528791.0
9,Guariuba,18,1216249.0,12,801240.0,6,415009.0


In [34]:
# select numeric tables and sum them
total = count.select_dtypes(include=np.number).sum()

total

ITCs_total           518.0
Pixels_total    78239639.0
ITCs_train           355.0
Pixels_train    53070254.0
ITCs_test            163.0
Pixels_test     25169385.0
dtype: float64

In [2]:
163/518

0.31467181467181465

In [40]:
millify(25169385.0, precision=2)

'25.17M'

# Análise Quantitativa das Amostras

In [18]:
train_stats.groupby(["tree_name"]).size().sort_values(ascending=False)

tree_name
Castanheira        58
Abiorana Rosa      50
Tauari             46
Cedro Cedro        36
Garapeira          31
Jutai              22
Tachi              17
Guaribeiro         16
Ipe                14
Cumaru Ferro       14
Guariuba           12
Cerejeira           9
Angico Vermelho     9
Samauma             7
Massaranduba        6
Angico Angico       4
Ucuuba              4
dtype: int64

In [None]:
train_stats.groupby(["tree_name"])['area'].quantile(0.25).sort_values()

In [None]:
# plot the area distribution of each tree_name with boxplot
ax = sns.boxplot(
    train_stats,
    x="area",
    y="tree_name",
    order=train_stats.groupby(["tree_name"])["area"].median().sort_values(ascending=False).index,
)


plt.show()

# Análise Qualitativa das Amostras

In [None]:
BOX_DIMENSION = 512

for tree_type in id_tree.index:
    # for each tree type plot 4 examples
    tree_name = id_tree[tree_type]
    label_tree = train_stats[train_stats["tree_type"] == tree_type].index
    
    # get 4 random examples
    np.random.seed(42)
    examples = np.random.choice(label_tree, 4)
    
    centroids = train_stats.loc[examples, ["centroid-0", "centroid-1"]].values

    fig, axs = plt.subplots(1, 4, figsize=(20, 5))
    for i, ax in enumerate(axs):
        centroid = centroids[i]
        x, y = centroid
        row_slice = slice(int(x)-BOX_DIMENSION//2, int(x)+BOX_DIMENSION//2)
        col_slice = slice(int(y)-BOX_DIMENSION//2, int(y)+BOX_DIMENSION//2)
        
        ax.imshow(ORTHOIMAGE[row_slice, col_slice])
        
        for contour in find_contours(LABEL_GT_TRAIN[row_slice, col_slice]==tree_type):
            ax.plot(contour[:, 1], contour[:, 0], color='red', linewidth=2)
            
        
        ax.set_xticks([])
        ax.set_yticks([])

    fig.suptitle(tree_name, y=1.00)
    fig.tight_layout()
    fig.show()
    
    