This notebook contains a workflow to add contrast and correlation from GLCM as texture features. 
It appends these features as columns to the given dataset and saves result in temp folder.

References:
https://scikit-image.org/docs/stable/api/skimage.feature.html#skimage.feature.graycoprops

https://scikit-image.org/docs/dev/auto_examples/features_detection/plot_glcm.html

https://prism.ucalgary.ca/bitstream/handle/1880/51900/texture%20tutorial%20v%203_0%20180206.pdf?sequence=11&isAllowed=y

https://stackoverflow.com/questions/50834170/image-texture-with-skimage

https://www.tandfonline.com/doi/epdf/10.1080/01431161.2016.1278314?needAccess=true&role=button

In [1]:
import os
import time
import pandas as pd
import numpy as np

import geopandas as gpd
import rioxarray as rioxr
import rasterio

import sample_rasters as sr
from rasterio.crs import CRS
from rasterio.transform import rowcol

from shapely.geometry import box

import planetary_computer as pc

from skimage.feature import graycomatrix, graycoprops

In [13]:
# ***************************************************
# ************* NOTEBOOK VARIABLES ******************

# csv with the points for which to add spectral window features
csv_name = 'twok_train.csv'
root = '/home/jovyan/msai4earth-esa/iceplant_detection/models/model_2k/twok_dataset/'

fp = root + csv_name
all_pts = pd.read_csv(fp)

# -------------------------------------------
# window radiius (in pixels)
#window_r = [8,7,6,5,4,3,2,1] 
window_r = 5

distances = [1]
angles = [0, np.pi/2]
props = ['contrast', 'correlation']

# -------------------------------------------
# name of column containing itemid of the NAIP scene containing the point if there is one
itemid_col = 'naip_id'
# name ofcolumns with the crs of all points
crs_col = 'pts_crs'

save = False

# ***************************************************
# ***************************************************

In [14]:
all_pts = all_pts[0:3]
all_pts

Unnamed: 0,x,y,pts_crs,aoi,naip_id,r,g,b,nir,ndvi,...,r_avg17,r_entr17,g_avg17,g_entr17,b_avg17,b_entr17,nir_avg17,nir_entr17,ndvi_avg17,ndvi_entr17
0,-119.844572,34.406317,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,124,114,101,142,0.067669,...,100.97924,5.960988,103.20761,5.694083,89.05537,5.339809,139.78546,5.633456,0.17301,4.892476
1,-119.816247,34.419391,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,106,112,94,151,0.175097,...,111.94463,4.973468,113.3218,4.19741,98.94463,4.512014,140.25952,5.315446,0.107266,4.70152
2,-119.877298,34.41566,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,94,96,78,135,0.179039,...,110.15571,5.699815,107.882355,5.239462,86.83045,4.680674,145.51556,5.39476,0.138408,3.848143


In [15]:
# # in case there are mixed crss in the dataset
# crss = all_pts.pts_crs.unique()  
# ===================================================
# temporary folder for aux rasters
folp = os.path.join(os.getcwd(),'temp','aux_naip_rasters')
if os.path.exists(folp) == False:
    os.mkdir(folp)

# ===================================================
t0 = time.time() # initial time tracker

# ===================================================
itemids = list(all_pts[itemid_col].unique())  
crs = CRS.from_string(all_pts[crs_col][0]) # crs of dataframe
N = len(itemids)  # counter to finish

# ===================================================
sampled_pts = [] # sampled pts from each scene are collected here

print('REMAINING: ', N, 'scenes', end="\r")
for i in range(len(itemids)):
    # ---------------------------------------
    # open raster reader for NAIP scene
    itemid = itemids[i]
    item = sr.get_item_from_id(itemid)    
    href = pc.sign(item.assets["image"].href)
    naip_rast_r = rioxr.open_rasterio(href) 

#    all_pts_scene = all_pts.loc[all_pts['naip_id'] == itemid]
    pts_scene = all_pts.loc[all_pts['naip_id'] == itemid]
#    if len(all_pts_scene) !=0:
#         for crs_str in crss:
#             pts_scene = all_pts_scene[all_pts_scene.pts_crs == crs_str]  
    
    if len(pts_scene) !=0:
#                crs = CRS.from_string(crs_str)
        # create geodataframe with pts in scene
        pts_scene_df = sr.geodataframe_from_csv(df = pts_scene, lon_label='x', lat_label='y', crs=crs)
        # convert pts to crs of NAIP scene        
        pts_col = pts_scene_df.to_crs(naip_rast_r.rio.crs).geometry

        samples = []
        for pt in pts_col:
            # make a small window out of raster, all bands
            row, col = rowcol(naip_rast_r.rio.transform(), pt.x, pt.y)
            windows = naip_rast_r[:, row-window_r:row+window_r+1, col-window_r:col+window_r+1]

            # calculate GLCM on that small window, all bands
            # calculate the contrast and correlation on that window (all angles)
            # add those as features for that pixel 

            pixel_feats = []
            for angle in angles:
                for band in range(4):
                    glcm = graycomatrix(windows[band],
                                distances=distances,
                                angles=[angle])
                    pixel_feats.append(graycoprops(glcm, 'contrast')[0,0])
                    pixel_feats.append(graycoprops(glcm, 'correlation')[0,0])
            samples.append(pixel_feats)

        # ---------------------------------------
        # Add all derived spectral data to pts dataframe
        new_features = pd.DataFrame(np.vstack(samples), 
                                    columns = ['r_contE', 'r_corrE', 'g_contE', 'g_corrE', 'b_contE', 'b_corrE', 'nir_contE', 'nir_corrE',
                                               'r_contN', 'r_corrN', 'g_contN', 'g_corrN', 'b_contN', 'b_corrN', 'nir_contN', 'nir_corrN'])   
        pts = pd.concat([pts_scene, new_features.set_index(pts_col.index)], axis=1)                

        # -----------------------------
        # collect all points in the scene
        sampled_pts.append(pts)                    

    # ---------------------------------------
    # processing message
    N = N-1                
    print('REMAINING: ', N, 'scenes', end="\r")

print('FINISHED PROCESSING')       
     
# ---------------------------------------
# create data frame with all points
sampled_pts = pd.concat(sampled_pts).sort_index()           
sampled_pts = sampled_pts.drop(['geometry'],axis=1)

FINISHED PROCESSINGs


In [16]:
sampled_pts

Unnamed: 0,x,y,pts_crs,aoi,naip_id,r,g,b,nir,ndvi,...,nir_contE,nir_corrE,r_contN,r_corrN,g_contN,g_corrN,b_contN,b_corrN,nir_contN,nir_corrN
0,-119.844572,34.406317,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,124,114,101,142,0.067669,...,286.972727,0.682982,246.418182,0.822774,142.072727,0.763888,52.763636,0.846636,320.5,0.635615
1,-119.816247,34.419391,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,106,112,94,151,0.175097,...,60.172727,0.884048,87.845455,0.374615,32.481818,0.375539,29.663636,0.562267,43.672727,0.906145
2,-119.877298,34.41566,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,94,96,78,135,0.179039,...,71.654545,0.763491,161.836364,0.667704,78.981818,0.715934,31.045455,0.687601,57.0,0.80599


In [8]:
if save:
    fp = root + 'glcm_'+csv_name
    sampled_pts.to_csv(fp, index=False)