In [1]:
# Imports
import os
import time
import glob
import numpy as np
import pandas as pd
import rasterio
from csv import writer
from collections import Counter
import multiprocessing

In [2]:
# ESRI LULC 2020 (9 classes): class codec
# 0: Water
# 1: Tree
# 2: Flooded Vegetation
# 3: Crops
# 4: Built Area
# 5: Bare Ground
# 6: Snow / Ice
# 7: Clouds
# 8: Rangeland
esri_classes = [1, 2, 4, 5, 7, 8, 9, 10, 11]  # 9 classes
labels = [i for i in range(9)]

# create lookup table for index remap
esri_class_to_index_map = np.zeros(max(esri_classes) + 1, dtype='int64')
esri_class_to_index_map[esri_classes] = labels


# ---GHS-SMOD 2020 (8 classes): class codec---
# 30: URBAN CENTRE GRID CELL
# 23: DENSE URBAN CLUSTER GRID CELL
# 22: SEMI-DENSE URBAN CLUSTER GRID CELL
# 21: SUBURBAN OR PERI-URBAN GRID CELL
# 13: RURAL CLUSTER GRID CELL
# 12: LOW DENSITY RURAL GRID CELL
# 11: VERY LOW DENSITY RURAL GRID CELL
# 10: WATER GRID CELL
# NoData [-200]
smod_classes = [10, 11, 12, 13, 21, 22, 23, 30]
labels = [9, 9, 10, 11, 12, 13, 14, 15]

smod_class_to_index_map = np.zeros(max(smod_classes) + 1, dtype='int64')
smod_class_to_index_map[smod_classes] = labels

#### Country selection

In [3]:
# ---> ENTER COUNTRIES BELOW <---
fold_1 = ['Algeria', 'Niger', 'Mauritania', 'Mozambique', 'CentralAfricanRepublic', 'Zimbabwe', 'Guinea', 'Malawi', 'Togo']
fold_2 = ['DemocraticRepublicoftheCongo', 'Angola', 'Egypt', 'Zambia', 'Madagascar', 'Congo', 'Ghana', 'Eritrea', 'Guinea-Bissau']
fold_3 = ['Sudan', 'Mali', 'UnitedRepublicofTanzania', 'Morocco', 'Botswana', 'CotedIvoire', 'Uganda', 'Benin', 'Lesotho'] 
fold_4 = ['Libya', 'SouthAfrica', 'Nigeria', 'SouthSudan', 'Kenya', 'BurkinaFaso', 'Senegal', 'Liberia', 'EquatorialGuinea']
fold_5 = ['Chad', 'Ethiopia', 'Namibia', 'Somalia', 'Cameroon', 'Gabon', 'Tunisia', 'SierraLeone', 'Burundi', 'WesternSahara']
extra = ['Seychelles', 'SaoTomeandPrincipe', 'Mauritius', 'Comoros', 'CapeVerde', 'Gambia', 'Swaziland', 'Djibouti', 'Rwanda']
countries = extra + fold_1 + fold_2 + fold_3 + fold_4 + fold_5

#### Class distribution

In [6]:
# count class occurrances for all countries and add to csv
for country in countries:
    t0 = time.time()
    
    # retrieve list of image paths for selected country
    img_dir = f'/mimer/NOBACKUP/groups/globalpoverty1/albin_and_albin/raw_data_newest/2020/{country}/*.tif'
    img_paths = glob.glob(img_dir)

    country_class_count = Counter()
    incomplete_tiles = 0

    # update counter one image at a time
    for img in img_paths:
        # read tif
        im_file = rasterio.open(img)
        
        # band 10: ESRI
        image = im_file.read(10)
        inf_filter = np.isinf(image)
        image = image.astype('int64')
        
        # band 8: SMOD
        smod = im_file.read(8)
        smod[np.isinf(smod)] = 10
        smod = smod.astype('int64')
        smod = smod_class_to_index_map[smod]
        
        # missing labels in image
        if True in inf_filter:
            image = image[~inf_filter]  # remove missing pixels
            image = esri_class_to_index_map[image]
            np.putmask(image, image == 4, smod)
            incomplete_tiles += 1
        # no missing labels
        else:
            image = esri_class_to_index_map[image]
            np.putmask(image, image == 4, smod)
            image = image.flatten()
        
        # count class occurrence
        country_class_count.update(image)

    csv_row_country = [country] + [country_class_count[i] for i in range(16)]
    print(f'{country} done: {time.time() - t0:.3f}s', flush=True)
    print(csv_row_country, flush=True)
    print(f'{incomplete_tiles} tiles contain missing labels', flush=True)
    
    # write results to csv
    CSV_PATH = '/mimer/NOBACKUP/groups/globalpoverty1/albin_and_albin/training_data/class_distribution_country_esri_new.csv'
    with open(CSV_PATH, 'a') as f_object:
        
        writer_object = writer(f_object)
        writer_object.writerow(csv_row_country)
        f_object.close()
    




/mimer/NOBACKUP/groups/globalpoverty1/albin_and_albin/raw_data_newest/2020/Seychelles/tile_0.tif contains missing labels
Seychelles done: 1.755s
['Seychelles', 4544670, 760859, 49352, 183, 243069, 22068, 91, 4760, 374948]
1 tiles missing labels
