In [5]:
# getting weights of each features in different image folders
# the weights will be used in the model training

import geopandas as gpd
import pandas as pd
import shapely
import rasterio
import os
from pathlib import Path
import rioxarray
import fiona.transform
import numpy as np
from rasterio.warp import calculate_default_transform, reproject, Resampling
from shapely.geometry import Polygon
import skimage.draw
import numpy as np
import glob
import shutil
from tqdm import tqdm
import random
from os.path import dirname as up
path_cur = os.path.abspath(os.getcwd())

In [6]:
# Get the total pixel number of each class in the training set
data_dirs = os.path.join(up(path_cur), 'datasets')
data_dirs_dict = dict()

for data_dir in os.listdir(data_dirs):
    if os.path.isdir(os.path.join(data_dirs, data_dir)) and '.' not in data_dir:
        if not data_dir in data_dirs_dict.keys():
            data_dirs_dict[data_dir] = os.path.join(data_dirs, data_dir)


In [7]:
data_dirs_dict

{'Image_allyear_merged_512': '/rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_allyear_merged_512',
 'Image_after_2010_merged_512': '/rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_after_2010_merged_512',
 'Image_after_2010_merged_256': '/rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_after_2010_merged_256',
 'Image_allyear_merged_256': '/rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_allyear_merged_256',
 'Image_allyear_merged_1024': '/rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_allyear_merged_1024',
 'Image_after_2010_merged_1024': '/rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_after_2010_merged_1024',
 'Image_after_2010_VA_512': '/rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_after_2010_VA_512',
 'Image_after_2010_VA_256': 

In [8]:
def count_pixel(img):
    
    src = rasterio.open(img)
    img_array = src.read(1)
    
    count_0 = np.count_nonzero(img_array == 0)
    count_1 = np.count_nonzero(img_array == 1)
    count_2 = np.count_nonzero(img_array == 2)
    count_3 = np.count_nonzero(img_array == 3)
    count_4 = np.count_nonzero(img_array == 4)
    
    return (count_0, count_1, count_2, count_3, count_4)
    

In [9]:
counting_weights = True

while counting_weights:
    
    counting_weights = False
    
    count_train_dict = dict()
    count_val_dict = dict()
    count_test_dict = dict()
    

    for key, val in tqdm(data_dirs_dict.items()):
        
        print("Working on {}".format(key))
        
        allfiles_train = [file for file in os.listdir(os.path.join(val, 'train')) if file.endswith('.tif')]
        allfiles_val = [file for file in os.listdir(os.path.join(val, 'val')) if file.endswith('.tif')]
        allfiles_test = [file for file in os.listdir(os.path.join(val, 'test')) if file.endswith('.tif')]
        
        if key not in count_train_dict.keys():
            
            print("Working on training data in ".format(key))
            
            count_train_dict[key] = np.zeros(5) # 5 classes including background 0
            
            for roi in allfiles_train:
                roi_file_mask = os.path.join(val, 'masks', roi)
                
                print('Processing {}'.format(roi_file_mask))
                
                counts = count_pixel(roi_file_mask)
                
                print('Counting is {}'.format(counts))
                count_train_dict[key] = np.add(count_train_dict[key], counts)


        if key not in count_val_dict.keys():
            
            print("Working on val data in ".format(key))
            
            count_val_dict[key] = np.zeros(5) # 5 classes including background 0
            
            for roi in allfiles_val:
                roi_file_mask = os.path.join(val, 'masks', roi)
                counts = count_pixel(roi_file_mask)
                count_val_dict[key] = np.add(count_val_dict[key], counts)
        
        
        if key not in count_test_dict.keys():
            
            print("Working on test data in ".format(key))
            
            count_test_dict[key] = np.zeros(5) # 5 classes including background 0
            
            for roi in allfiles_test:
                roi_file_mask = os.path.join(val, 'masks', roi)
                counts = count_pixel(roi_file_mask)
                count_test_dict[key] = np.add(count_test_dict[key], counts)


  0%|          | 0/10 [00:00<?, ?it/s]

Working on Image_allyear_merged_512
Working on training data in 
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_allyear_merged_512/masks/groins_31978255_wgs84_tile_5120-3584.tif
Counting is (257909, 0, 0, 4235, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_allyear_merged_512/masks/groins_31228345_wgs84_tile_2048-512.tif
Counting is (261579, 0, 0, 565, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_allyear_merged_512/masks/groins_30178315_wgs84_tile_1536-3584.tif
Counting is (259305, 0, 0, 2839, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_allyear_merged_512/masks/groins_29878210_wgs84_tile_5120-3584.tif
Counting is (258271, 0, 0, 3873, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_allyear_merged_512/masks/groins_26128030_wgs84_ti

 10%|█         | 1/10 [00:37<05:34, 37.18s/it]

Working on Image_after_2010_merged_512
Working on training data in 
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_after_2010_merged_512/masks/groins_31978255_wgs84_tile_5120-3584.tif
Counting is (257909, 0, 0, 4235, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_after_2010_merged_512/masks/groins_31228345_wgs84_tile_2048-512.tif
Counting is (261579, 0, 0, 565, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_after_2010_merged_512/masks/groins_30178315_wgs84_tile_1536-3584.tif
Counting is (259305, 0, 0, 2839, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_after_2010_merged_512/masks/groins_29878210_wgs84_tile_5120-3584.tif
Counting is (258271, 0, 0, 3873, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_after_2010_merged_512/masks/groins

 20%|██        | 2/10 [00:52<03:14, 24.28s/it]

Working on Image_after_2010_merged_256
Working on training data in 
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_after_2010_merged_256/masks/groins_32128375_wgs84_tile_5120-256.tif
Counting is (64450, 0, 0, 1086, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_after_2010_merged_256/masks/groins_27178135_wgs84_tile_768-1280.tif
Counting is (60244, 0, 0, 5292, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_after_2010_merged_256/masks/groins_31978255_wgs84_tile_5120-3840.tif
Counting is (63541, 0, 0, 1995, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_after_2010_merged_256/masks/groins_25828210_wgs84_tile_1280-512.tif
Counting is (64203, 0, 0, 1333, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_after_2010_merged_256/masks/groins_2612

 30%|███       | 3/10 [01:13<02:38, 22.61s/it]

Working on Image_allyear_merged_256
Working on training data in 
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_allyear_merged_256/masks/groins_32128375_wgs84_tile_5120-256.tif
Counting is (64450, 0, 0, 1086, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_allyear_merged_256/masks/groins_27178135_wgs84_tile_768-1280.tif
Counting is (60244, 0, 0, 5292, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_allyear_merged_256/masks/groins_31978255_wgs84_tile_5120-3840.tif
Counting is (63541, 0, 0, 1995, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_allyear_merged_256/masks/groins_25828210_wgs84_tile_1280-512.tif
Counting is (64203, 0, 0, 1333, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_allyear_merged_256/masks/groins_26128870_wgs84_tile_30

 40%|████      | 4/10 [01:36<02:18, 23.09s/it]

Working on Image_allyear_merged_1024
Working on training data in 
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_allyear_merged_1024/masks/groins_27178240_wgs84_tile_3072-1024.tif
Counting is (1046127, 0, 0, 2449, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_allyear_merged_1024/masks/groins_28678345_wgs84_tile_2048-1024.tif
Counting is (1046512, 0, 0, 2064, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_allyear_merged_1024/masks/groins_29128330_wgs84_tile_1024-1024.tif
Counting is (1044329, 0, 0, 4247, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_allyear_merged_1024/masks/groins_26428315_wgs84_tile_4096-2048.tif
Counting is (1046972, 0, 0, 1604, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_allyear_merged_1024/masks/groins_26128

 50%|█████     | 5/10 [02:34<02:56, 35.37s/it]

Working on Image_after_2010_merged_1024
Working on training data in 
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_after_2010_merged_1024/masks/groins_27178240_wgs84_tile_3072-1024.tif
Counting is (1046127, 0, 0, 2449, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_after_2010_merged_1024/masks/groins_28678345_wgs84_tile_2048-1024.tif
Counting is (1046512, 0, 0, 2064, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_after_2010_merged_1024/masks/groins_29128330_wgs84_tile_1024-1024.tif
Counting is (1044329, 0, 0, 4247, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_after_2010_merged_1024/masks/groins_26428315_wgs84_tile_4096-2048.tif
Counting is (1046972, 0, 0, 1604, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_after_2010_merged_1024/

 60%|██████    | 6/10 [02:55<02:02, 30.58s/it]

Working on Image_after_2010_VA_512
Working on training data in 
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_after_2010_VA_512/masks/shoreline_DO_S23_2421_20_tile_512-3584.tif
Counting is (256356, 1480, 4308, 0, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_after_2010_VA_512/masks/shoreline_DO_S23_1812_40_tile_1536-1024.tif
Counting is (247165, 2109, 7677, 5193, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_after_2010_VA_512/masks/shoreline_DO_S23_1813_30_tile_512-2560.tif
Counting is (258009, 2057, 2078, 0, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_after_2010_VA_512/masks/shoreline_DO_S23_2433_10_tile_4096-1536.tif
Counting is (248235, 5523, 8386, 0, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_after_2010_VA_512/masks/sho

 70%|███████   | 7/10 [03:07<01:14, 24.67s/it]

Working on Image_after_2010_VA_256
Working on training data in 
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_after_2010_VA_256/masks/shoreline_DO_N26_0639_40_tile_1280-256.tif
Counting is (64312, 1224, 0, 0, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_after_2010_VA_256/masks/shoreline_DO_S23_1479_40_tile_3584-1280.tif
Counting is (63191, 2345, 0, 0, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_after_2010_VA_256/masks/shoreline_DO_S23_1399_10_tile_3072-3584.tif
Counting is (60666, 4870, 0, 0, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_after_2010_VA_256/masks/shoreline_DO_S23_2769_20_tile_3328-2304.tif
Counting is (61579, 179, 3778, 0, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_after_2010_VA_256/masks/shoreline_DO_S23_18

 80%|████████  | 8/10 [03:12<00:36, 18.32s/it]

Working on Image_allyear_VA_512
Working on training data in 
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_allyear_VA_512/masks/shoreline_DO_S23_1822_20_tile_4096-1536.tif
Counting is (255092, 5170, 1882, 0, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_allyear_VA_512/masks/shoreline_DO_S23_1437_40_tile_1024-1024.tif
Counting is (249496, 7798, 4850, 0, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_allyear_VA_512/masks/shoreline_DO_S23_2418_10_tile_2560-1536.tif
Counting is (256489, 3576, 2079, 0, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_allyear_VA_512/masks/shoreline_DO_S23_0515_30_tile_3072-1536.tif
Counting is (260579, 0, 1565, 0, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_allyear_VA_512/masks/shoreline_DO_S13_9651_30_

 90%|█████████ | 9/10 [03:33<00:19, 19.06s/it]

Working on Image_allyear_VA_256
Working on training data in 
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_allyear_VA_256/masks/shoreline_DO_S23_2720_20_tile_1024-2816.tif
Counting is (61184, 1404, 2948, 0, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_allyear_VA_256/masks/shoreline_DO_S13_9637_30_tile_512-2304.tif
Counting is (63680, 1856, 0, 0, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_allyear_VA_256/masks/shoreline_DO_N16_9727_20_tile_1536-3072.tif
Counting is (63157, 2379, 0, 0, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_allyear_VA_256/masks/shoreline_DO_S23_1815_30_tile_2560-2560.tif
Counting is (62566, 2970, 0, 0, 0)
Processing /rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure/datasets/Image_allyear_VA_256/masks/shoreline_DO_S23_1713_40_tile_4864-2

100%|██████████| 10/10 [03:44<00:00, 22.48s/it]


In [10]:
count_train_dict

{'Image_allyear_merged_512': array([3.19216492e+08, 4.14880300e+06, 3.77854800e+06, 2.24932000e+05,
        3.11225000e+05]),
 'Image_after_2010_merged_512': array([1.5546088e+08, 1.9051890e+06, 1.8110280e+06, 2.2042800e+05,
        2.4817100e+05]),
 'Image_after_2010_merged_256': array([37965447.,   717437.,   924223.,   379316.,   187145.]),
 'Image_allyear_merged_256': array([69758536.,  1806539.,  1704174.,   374952.,   214871.]),
 'Image_allyear_merged_1024': array([9.01530355e+08, 5.93995600e+06, 5.37768500e+06, 1.21750000e+05,
        3.39950000e+05]),
 'Image_after_2010_merged_1024': array([3.6814858e+08, 2.5507520e+06, 2.2371310e+06, 1.2517600e+05,
        2.3141700e+05]),
 'Image_after_2010_VA_512': array([1.43280007e+08, 1.90518900e+06, 1.81102800e+06, 8.05330000e+04,
        2.48171000e+05]),
 'Image_after_2010_VA_256': array([31105858.,   717437.,   924223.,    95481.,   187145.]),
 'Image_allyear_VA_512': array([3.07035619e+08, 4.14880300e+06, 3.77854800e+06, 8.50370000e+

In [11]:
count_val_dict

{'Image_allyear_merged_512': array([8.9163701e+07, 1.1399860e+06, 9.2449400e+05, 1.8005700e+05,
        8.0018000e+04]),
 'Image_after_2010_merged_512': array([48310279.,   480706.,   498131.,   204321.,    51779.]),
 'Image_after_2010_merged_256': array([14675054.,   164872.,   225847.,   267960.,    67227.]),
 'Image_allyear_merged_256': array([22668066.,   406597.,   427725.,   270849.,    81867.]),
 'Image_allyear_merged_1024': array([2.33763874e+08, 1.65217400e+06, 1.43050400e+06, 5.30310000e+04,
        7.85930000e+04]),
 'Image_after_2010_merged_1024': array([1.00369836e+08, 7.02007000e+05, 5.55985000e+05, 3.47780000e+04,
        4.92660000e+04]),
 'Image_after_2010_VA_512': array([3.5897933e+07, 4.8070600e+05, 4.9813100e+05, 3.3755000e+04,
        5.1779000e+04]),
 'Image_after_2010_VA_256': array([7768459.,  164872.,  225847.,   31131.,   67227.]),
 'Image_allyear_VA_512': array([7.6751355e+07, 1.1399860e+06, 9.2449400e+05, 9.4910000e+03,
        8.0018000e+04]),
 'Image_allye

In [12]:
count_test_dict

{'Image_allyear_merged_512': array([9.2442371e+07, 8.6514500e+05, 8.8511900e+05, 3.5553500e+05,
        8.5814000e+04]),
 'Image_after_2010_merged_512': array([56348439.,   452701.,   443461.,   331577.,    95502.]),
 'Image_after_2010_merged_256': array([20647149.,   184556.,   173911.,   571480.,    49784.]),
 'Image_allyear_merged_256': array([27680744.,   391008.,   330605.,   579735.,    50356.]),
 'Image_allyear_merged_1024': array([2.19802259e+08, 1.30526500e+06, 1.04514000e+06, 9.72300000e+04,
        4.82180000e+04]),
 'Image_after_2010_merged_1024': array([1.01302222e+08, 6.22282000e+05, 6.26021000e+05, 1.09679000e+05,
        1.00244000e+05]),
 'Image_after_2010_VA_512': array([3.176479e+07, 4.527010e+05, 4.434610e+05, 1.154600e+04,
        9.550200e+04]),
 'Image_after_2010_VA_256': array([6916659.,  184556.,  173911.,   15122.,   49784.]),
 'Image_allyear_VA_512': array([6.7858722e+07, 8.6514500e+05, 8.8511900e+05, 3.5504000e+04,
        8.5814000e+04]),
 'Image_allyear_VA

In [13]:
pixel_train_weights = dict()

for key, item in count_train_dict.items():
    
    if key not in pixel_train_weights.keys():
        pixel_train_weights[key] = item[1:] / np.sum(item[1:]) # proportion of the class in this dataset, not count background


In [14]:
pixel_val_weights = dict()

for key, item in count_val_dict.items():
    
    if key not in pixel_val_weights.keys():
        pixel_val_weights[key] = item[1:] / np.sum(item[1:]) # proportion of the class in this dataset, not count background


In [15]:
pixel_test_weights = dict()

for key, item in count_test_dict.items():
    
    if key not in pixel_test_weights.keys():
        pixel_test_weights[key] = item[1:] / np.sum(item[1:]) # proportion of the class in this dataset, not count background


In [16]:
pixel_train_weights

{'Image_allyear_merged_512': array([0.49019898, 0.44645175, 0.02657669, 0.03677258]),
 'Image_after_2010_merged_512': array([0.45526231, 0.43276168, 0.05267328, 0.05930273]),
 'Image_after_2010_merged_256': array([0.32490837, 0.41855632, 0.17178225, 0.08475305]),
 'Image_allyear_merged_256': array([0.44056167, 0.41559786, 0.09143975, 0.05240071]),
 'Image_allyear_merged_1024': array([0.50426896, 0.4565353 , 0.01033589, 0.02885985]),
 'Image_after_2010_merged_1024': array([0.49582348, 0.43486081, 0.02433212, 0.04498359]),
 'Image_after_2010_VA_512': array([0.47100772, 0.44772889, 0.01990966, 0.06135373]),
 'Image_after_2010_VA_256': array([0.37283283, 0.48029399, 0.04961892, 0.09725425]),
 'Image_allyear_VA_512': array([0.49843776, 0.45395527, 0.01021636, 0.03739061]),
 'Image_allyear_VA_256': array([0.47332474, 0.44650446, 0.02387324, 0.05629757])}

In [17]:
pixel_val_weights

{'Image_allyear_merged_512': array([0.49041042, 0.39770795, 0.0774587 , 0.03442293]),
 'Image_after_2010_merged_512': array([0.38925548, 0.40336552, 0.16545055, 0.04192845]),
 'Image_after_2010_merged_256': array([0.22712583, 0.31112431, 0.3691387 , 0.09261116]),
 'Image_allyear_merged_256': array([0.34253074, 0.36032966, 0.22817214, 0.06896746]),
 'Image_allyear_merged_1024': array([0.51400708, 0.44504343, 0.01649845, 0.02445103]),
 'Image_after_2010_merged_1024': array([0.52309103, 0.41428471, 0.02591436, 0.03670989]),
 'Image_after_2010_VA_512': array([0.45163388, 0.46800505, 0.03171357, 0.04864751]),
 'Image_after_2010_VA_256': array([0.33710847, 0.46178209, 0.06365255, 0.13745688]),
 'Image_allyear_VA_512': array([0.52924411, 0.42920089, 0.00440624, 0.03714875]),
 'Image_allyear_VA_256': array([0.4279027 , 0.45013781, 0.03580265, 0.08615683])}

In [18]:
pixel_test_weights

{'Image_allyear_merged_512': array([0.39475263, 0.40386647, 0.16222527, 0.03915564]),
 'Image_after_2010_merged_512': array([0.34211531, 0.33513245, 0.25057945, 0.07217279]),
 'Image_after_2010_merged_256': array([0.18837416, 0.17750893, 0.58330297, 0.05081395]),
 'Image_allyear_merged_256': array([0.28927043, 0.24458387, 0.42889198, 0.03725372]),
 'Image_allyear_merged_1024': array([0.52297351, 0.41875062, 0.03895662, 0.01931925]),
 'Image_after_2010_merged_1024': array([0.42673907, 0.42930314, 0.07521399, 0.0687438 ]),
 'Image_after_2010_VA_512': array([0.45125248, 0.44204205, 0.01150906, 0.09519642]),
 'Image_after_2010_VA_256': array([0.43591821, 0.4107749 , 0.03571791, 0.11758898]),
 'Image_allyear_VA_512': array([0.46225332, 0.47292558, 0.01897005, 0.04585105]),
 'Image_allyear_VA_256': array([0.49162   , 0.41567444, 0.02939224, 0.06331333])}

In [20]:
import pandas as pd
import functools as ft

In [21]:
# get the percentage of the pixels at each dataset
def get_percent(dic, mode='train'):
    
    df = pd.DataFrame.from_dict(dic).T.rename_axis('dataset').reset_index().round(2)
    col_nm = '{}_percent'.format(mode)
    df[col_nm] = df.apply(lambda x: str(x[0]) + '/' + str(x[1]) + '/' + str(x[2]) + '/' + str(x[3]), axis=1)
    del df[0]
    del df[1]
    del df[2]
    del df[3]
    
    return df

def get_count(dic, mode='train'):
    
    df = pd.DataFrame.from_dict(dic).T.rename_axis('dataset').reset_index()
    col_nm = '{}_count'.format(mode)
    df[col_nm] = df.apply(lambda x: str(x[1]) + '/' + str(x[2]) + '/' + str(x[3]) + '/' + str(x[4]), axis=1)
    
    del df[0]
    del df[1]
    del df[2]
    del df[3]
    del df[4]

    return df
    

In [22]:
# get pixel count (background is not included)s
train_count = get_count(count_train_dict, mode='train')
val_count = get_count(count_val_dict, mode='val')
test_count = get_count(count_test_dict, mode='test')

# get pixel percentage (background is not included)
train_df = get_percent(pixel_train_weights, mode='train')
val_df = get_percent(pixel_val_weights, mode='val')
test_df = get_percent(pixel_test_weights, mode='test')

In [23]:
dfs = [train_count, val_count, test_count, train_df, val_df, test_df]
df_final = ft.reduce(lambda left, right: pd.merge(left, right, on='dataset'), dfs)

In [25]:
df_final.to_csv(os.path.join(up(path_cur), 'pixel_count_statistics.csv'), encoding='utf-8', sep=',', index=False)