This notebook contains a workflow to add contrast and correlation from GLCM as texture features. 
It appends these features as columns to the given dataset and saves result in temp folder.

References:
https://scikit-image.org/docs/stable/api/skimage.feature.html#skimage.feature.graycoprops

https://scikit-image.org/docs/dev/auto_examples/features_detection/plot_glcm.html

https://prism.ucalgary.ca/bitstream/handle/1880/51900/texture%20tutorial%20v%203_0%20180206.pdf?sequence=11&isAllowed=y

https://stackoverflow.com/questions/50834170/image-texture-with-skimage

https://www.tandfonline.com/doi/epdf/10.1080/01431161.2016.1278314?needAccess=true&role=button

In [1]:
import os
import time
import pandas as pd
import numpy as np

import geopandas as gpd
import rioxarray as rioxr
import rasterio

import sample_rasters as sr
from rasterio.crs import CRS
from rasterio.transform import rowcol

from shapely.geometry import box

import planetary_computer as pc

from skimage.feature import graycomatrix, graycoprops

In [2]:
# ***************************************************
# ************* NOTEBOOK VARIABLES ******************

# csv with the points for which to add spectral window features
csv_name = 'twok_train.csv'
root = '/home/jovyan/msai4earth-esa/iceplant_detection/models/model_2k/twok_dataset_for_training_experiments/'

fp = root + csv_name
all_pts = pd.read_csv(fp)

# -------------------------------------------
window_r = 5  #in pixels

distances = [1]
angles = [0, np.pi/2]
props = ['contrast', 'correlation']

# -------------------------------------------
# name of column containing itemid of the NAIP scene containing the point if there is one
itemid_col = 'naip_id'

save = True

# ***************************************************
# ***************************************************

In [4]:
itemids = list(all_pts[itemid_col].unique())  

# in case there are mixed crss in the dataset
crss = all_pts.pts_crs.unique()  

# temporary folder for aux rasters
folp = os.path.join(os.getcwd(),'temp','aux_naip_rasters')
if os.path.exists(folp) == False:
    os.mkdir(folp)

In [5]:
t0 = time.time() # initial time tracker

sampled_pts = [] # sampled pts from each scene are collected here

N = len(itemids)  # counter to finish



print('REMAINING: ', N, 'scenes', end="\r")
for i in range(len(itemids)):
    # ---------------------------------------
    # open raster reader for NAIP scene
    itemid = itemids[i]
    item = sr.get_item_from_id(itemid)    
    href = pc.sign(item.assets["image"].href)
    naip_rast_r = rioxr.open_rasterio(href) 

    all_pts_scene = all_pts.loc[all_pts['naip_id'] == itemid]
    if len(all_pts_scene) !=0:
        for crs_str in crss:
            pts_scene = all_pts_scene[all_pts_scene.pts_crs == crs_str]  
    
            if len(pts_scene) !=0:
                crs = CRS.from_string(crs_str)
                pts_scene_df = sr.geodataframe_from_csv(df = pts_scene, lon_label='x', lat_label='y', crs=crs)
                pts_col = pts_scene_df.to_crs(naip_rast_r.rio.crs).geometry

                samples = []
                for pt in pts_col:
                    # make a small window out of raster, all bands
                    row, col = rowcol(naip_rast_r.rio.transform(), pt.x, pt.y)
                    windows = naip_rast_r[:, row-window_r:row+window_r+1, col-window_r:col+window_r+1]
                   
                    # calculate GLCM on that small window, all bands
                    # calculate the contrast and correlation on that window (all angles)
                    # add those as features for that pixel 
                    
                    pixel_feats = []
                    for angle in angles:
                        for band in range(4):
                            glcm = graycomatrix(windows[band],
                                        distances=distances,
                                        angles=[angle])
                            pixel_feats.append(graycoprops(glcm, 'contrast')[0,0])
                            pixel_feats.append(graycoprops(glcm, 'correlation')[0,0])
                    samples.append(pixel_feats)
                    
                # ---------------------------------------
                # Add all derived spectral data to pts dataframe
                new_features = pd.DataFrame(np.vstack(samples), 
                                            columns = ['r_contE', 'r_corrE', 'g_contE', 'g_corrE', 'b_contE', 'b_corrE', 'nir_contE', 'nir_corrE',
                                                       'r_contN', 'r_corrN', 'g_contN', 'g_corrN', 'b_contN', 'b_corrN', 'nir_contN', 'nir_corrN'])   
                pts = pd.concat([pts_scene, new_features.set_index(pts_col.index)], axis=1)                

                # -----------------------------
                # collect all points in the scene
                sampled_pts.append(pts)                    

    # ---------------------------------------
    # processing message
    N = N-1                
    print('REMAINING: ', N, 'scenes', end="\r")

print('FINISHED PROCESSING')       
     
# ---------------------------------------
# create data frame with all points
sampled_pts = pd.concat(sampled_pts).sort_index()           
sampled_pts = sampled_pts.drop(['geometry'],axis=1)

FINISHED PROCESSINGs


In [6]:
sampled_pts

Unnamed: 0,x,y,pts_crs,aoi,naip_id,r,r_max,r_min,r_diff,r_avg5,...,nir_contE,nir_corrE,r_contN,r_corrN,g_contN,g_corrN,b_contN,b_corrN,nir_contN,nir_corrN
0,-120.487218,34.492960,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,127,163,97,66,124.181816,...,20.581818,0.774510,49.345455,0.785663,59.045455,0.763311,28.609091,0.785548,21.081818,0.741355
1,-120.482942,34.487021,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,141,165,114,51,139.198350,...,22.390909,0.928819,37.418182,0.848603,55.000000,0.821835,24.854545,0.834540,7.863636,0.967642
2,-120.470587,34.480261,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,87,128,53,75,95.743805,...,41.781818,0.924967,87.918182,0.737634,34.900000,0.678684,32.436364,0.823100,79.572727,0.841766
3,-120.470228,34.480481,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,121,151,73,78,124.495865,...,34.790909,0.941217,193.227273,0.827256,55.009091,0.815053,66.618182,0.794590,88.072727,0.849271
4,-120.474463,34.474333,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,86,107,77,30,86.991740,...,41.572727,0.836085,27.618182,0.683734,12.390909,0.706953,15.700000,0.752289,35.527273,0.883826
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
569,-119.499635,34.390906,EPSG:4326,carpinteria,ca_m_3411936_se_11_060_20200521,76,185,76,109,114.661156,...,38.427273,0.963778,824.800000,0.700835,500.490909,0.704255,569.245455,0.763209,306.363636,0.488540
570,-119.504277,34.389914,EPSG:4326,carpinteria,ca_m_3411936_se_11_060_20200521,85,187,73,114,118.586780,...,13.018182,0.822755,457.627273,0.871116,223.490909,0.880462,216.172727,0.884650,34.763636,0.501325
571,-119.504155,34.389866,EPSG:4326,carpinteria,ca_m_3411936_se_11_060_20200521,76,187,70,117,99.347110,...,14.363636,0.814431,343.636364,0.748461,167.545455,0.795395,141.372727,0.796965,49.136364,0.349182
572,-119.503870,34.389431,EPSG:4326,carpinteria,ca_m_3411936_se_11_060_20200521,86,143,78,65,118.743805,...,15.018182,0.869231,195.018182,0.699115,48.072727,0.706109,44.690909,0.728189,47.390909,0.582789


In [None]:
# check = ['x', 'y', 'pts_crs',
#                     'aoi','naip_id',
#                     'r', 'r_max', 'r_min', 'r_diff', 'r_avg', 'r_entr', 'r_avg5', 'r_entr5', 'r_contN', 'r_contE', 'r_corrN', 'r_corrE',
#                     'g', 'g_max', 'g_min', 'g_diff', 'g_avg', 'g_entr', 'g_avg5', 'g_entr5', 'g_contN', 'g_contE', 'g_corrN', 'g_corrE',
#                     'b', 'b_max', 'b_min', 'b_diff', 'b_avg', 'b_entr', 'b_avg5', 'b_entr5', 'b_contN', 'b_contE', 'b_corrN',  'b_corrE',
#                     'nir', 'nir_max', 'nir_min', 'nir_diff', 'nir_avg', 'nir_entr', 'nir_avg5', 'nir_entr5', 'nir_contN', 'nir_contE', 'nir_corrN', 'nir_corrE',
#                     'ndvi', 'ndvi_max', 'ndvi_min', 'ndvi_diff', 'ndvi_avg', 'ndvi_entr','ndvi_entr5',                     
#                     'year', 'month', 'day_in_year',
#                     'iceplant']
# sampled_pts[check]

In [8]:
if save:
    fp = root + 'glcm_'+csv_name
    sampled_pts.to_csv(fp, index=False)