This notebook contains a workflo to add contrast and correlation from GLCM as texture features. It appends these features as columns to the given dataset and saves result in test folder.

References:
https://scikit-image.org/docs/stable/api/skimage.feature.html#skimage.feature.graycoprops

https://scikit-image.org/docs/dev/auto_examples/features_detection/plot_glcm.html

https://prism.ucalgary.ca/bitstream/handle/1880/51900/texture%20tutorial%20v%203_0%20180206.pdf?sequence=11&isAllowed=y

https://stackoverflow.com/questions/50834170/image-texture-with-skimage

https://www.tandfonline.com/doi/epdf/10.1080/01431161.2016.1278314?needAccess=true&role=button

In [1]:
import os
import time
import pandas as pd
import numpy as np

import geopandas as gpd
import rioxarray as rioxr
import rasterio

import sample_rasters as sr
from rasterio.crs import CRS
from rasterio.transform import rowcol

from shapely.geometry import box

import planetary_computer as pc

from skimage.feature import graycomatrix, graycoprops

In [2]:
# ***************************************************
# ************* NOTEBOOK VARIABLES ******************

# csv with the points for which to add spectral window features
root = '/home/jovyan/msai4earth-esa/iceplant_detection/models/modelAE5_FP_2020/'
csv_name = 'modelAE5_FP_2020_train.csv'

fp = root + csv_name
all_pts = pd.read_csv(fp)

# -------------------------------------------
window_r = 5  #in pixels

distances = [1]
angles = [0, np.pi/2]
props = ['contrast', 'correlation']

# -------------------------------------------
# name of column containing itemid of the NAIP scene containing the point if there is one
itemid_col = 'naip_id'

save = False

# ***************************************************
# ***************************************************

In [None]:
all_pts.columns

Index(['x', 'y', 'pts_crs', 'aoi', 'naip_id', 'r', 'r_max', 'r_min', 'r_diff',
       'r_avg', 'r_entr', 'r_avg5', 'r_entr5', 'r_contN', 'r_contE', 'r_corrN',
       'r_corrE', 'g', 'g_max', 'g_min', 'g_diff', 'g_avg', 'g_entr', 'g_avg5',
       'g_entr5', 'g_contE', 'g_corrN', 'g_corrE', 'b', 'b_max', 'b_min',
       'b_diff', 'b_avg', 'b_entr', 'b_avg5', 'b_entr5', 'b_contN', 'b_contE',
       'b_corrE', 'nir', 'nir_max', 'nir_min', 'nir_diff', 'nir_avg',
       'nir_entr', 'nir_avg5', 'nir_entr5', 'nir_contN', 'nir_contE',
       'nir_corrN', 'nir_corrE', 'ndvi', 'ndvi_max', 'ndvi_min', 'ndvi_diff',
       'ndvi_avg', 'ndvi_entr', 'ndvi_entr5', 'year', 'month', 'day_in_year',
       'iceplant'],
      dtype='object')

In [None]:
old_glcm = ['b_contE', 'b_contN', 'b_corrE', 
            'g_contE', 'g_corrE', 'g_corrN', 
            'nir_contE', 'nir_contN', 'nir_corrE', 'nir_corrN', 
            'r_contE', 'r_contN', 'r_corrE', 'r_corrN']

all_pts = all_pts.drop(old_glcm, axis=1)



In [5]:
itemids = list(all_pts[itemid_col].unique())  

# temporary folder for aux rasters
folp = os.path.join(os.getcwd(),'temp','aux_naip_rasters')
if os.path.exists(folp) == False:
    os.mkdir(folp)

In [6]:
t0 = time.time() # initial time tracker

sampled_pts = [] # sampled pts from each scene are collected here

N = len(itemids)  # counter to finish

crss = all_pts.pts_crs.unique()  # in case there are mixed crss in the dataset

for i in range(len(itemids)):
    # ---------------------------------------
    # open raster reader for NAIP scene
    itemid = itemids[i]
    item = sr.get_item_from_id(itemid)    
    href = pc.sign(item.assets["image"].href)
    naip_rast_r = rioxr.open_rasterio(href) 

    all_pts_scene = all_pts.loc[all_pts['naip_id'] == itemid]
    if len(all_pts_scene) !=0:
        for crs_str in crss:
            pts_scene = all_pts_scene[all_pts_scene.pts_crs == crs_str]  
    
            if len(pts_scene) !=0:
                crs = CRS.from_string(crs_str)
                pts_scene_df = sr.geodataframe_from_csv(df = pts_scene, lon_label='x', lat_label='y', crs=crs)
                pts_col = pts_scene_df.to_crs(naip_rast_r.rio.crs).geometry

                samples = []
                for pt in pts_col:
                    # make a small window out of raster, all bands
                    row, col = rowcol(naip_rast_r.rio.transform(), pt.x, pt.y)
                    windows = naip_rast_r[:, row-window_r:row+window_r+1, col-window_r:col+window_r+1]
                   
                    # calculate GLCM on that small window, all bands
                    # calculate the contrast and correlation on that window (all angles)
                    # add those as features for that pixel 
                    
                    pixel_feats = []
                    for angle in angles:
                        for band in range(4):
                            glcm = graycomatrix(windows[band],
                                        distances=distances,
                                        angles=[angle])
                            pixel_feats.append(graycoprops(glcm, 'contrast')[0,0])
                            pixel_feats.append(graycoprops(glcm, 'correlation')[0,0])
                    samples.append(pixel_feats)
                    
                # ---------------------------------------
                # Add all derived spectral data to pts dataframe
                new_features = pd.DataFrame(np.vstack(samples), 
                                            columns = ['r_contE', 'r_corrE', 'g_contE', 'g_corrE', 'b_contE', 'b_corrE', 'nir_contE', 'nir_corrE',
                                                       'r_contN', 'r_corrN', 'g_contN', 'g_corrN', 'b_contN', 'b_corrN', 'nir_contN', 'nir_corrN'])   
                pts = pd.concat([pts_scene, new_features.set_index(pts_col.index)], axis=1)                

                # -----------------------------
                # collect all points in the scene
                sampled_pts.append(pts)                    

    # ---------------------------------------
    # processing message
    N = N-1                
    print('REMAINING: ', N, 'scenes', end="\r")

print('FINISHED PROCESSING')       
     
# ---------------------------------------
# create data frame with all points
sampled_pts = pd.concat(sampled_pts).sort_index()           
sampled_pts = sampled_pts.drop(['geometry'],axis=1)

FINISHED PROCESSINGs


In [7]:
sampled_pts

Unnamed: 0,x,y,pts_crs,aoi,naip_id,r,r_max,r_min,r_diff,r_avg,...,nir_contE,nir_corrE,r_contN,r_corrN,g_contN,g_corrN,b_contN,b_corrN,nir_contN,nir_corrN
0,-119.868370,34.417604,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,61,64,57,7,61.111110,...,1.100000,0.769610,14.009091,0.635333,11.318182,0.550012,3.336364,0.510024,2.500000,0.458072
1,-119.869194,34.414761,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,116,126,114,12,119.111115,...,120.209091,0.949822,45.518182,0.803686,20.290909,0.786604,13.118182,0.797660,109.772727,0.962598
2,-119.856764,34.410684,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,115,126,104,22,113.444440,...,7.418182,0.827697,45.481818,0.756663,12.890909,0.763455,6.981818,0.767834,8.481818,0.807013
3,-119.868120,34.417642,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,59,61,57,4,58.777780,...,1.663636,0.839925,5.581818,0.447329,7.090909,0.600481,1.554545,0.611180,4.409091,0.588690
4,-119.863904,34.413559,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,110,122,101,21,112.222220,...,3.790909,0.885235,78.145455,0.642810,21.436364,0.700858,16.590909,0.640148,6.645455,0.780970
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1836,-120.438128,34.458685,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,92,98,82,16,89.333336,...,148.390909,0.645066,114.536364,0.849735,64.554545,0.858519,32.645455,0.863824,60.390909,0.865917
1837,-120.484881,34.497110,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,97,118,89,29,102.666664,...,44.881818,0.805088,93.800000,0.749899,63.972727,0.619556,22.518182,0.731722,58.818182,0.739406
1838,-120.485111,34.493451,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,109,110,89,21,101.888885,...,30.545455,0.864088,119.500000,0.549534,42.527273,0.595787,27.881818,0.679398,65.781818,0.670984
1839,-120.468795,34.470439,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,107,128,93,35,105.888885,...,24.618182,0.803617,116.490909,0.662619,75.709091,0.677961,54.318182,0.720067,47.872727,0.596475


In [12]:
# check = ['x', 'y', 'pts_crs',
#                     'aoi','naip_id',
#                     'r', 'r_max', 'r_min', 'r_diff', 'r_avg', 'r_entr', 'r_avg5', 'r_entr5', 'r_contN', 'r_contE', 'r_corrN', 'r_corrE',
#                     'g', 'g_max', 'g_min', 'g_diff', 'g_avg', 'g_entr', 'g_avg5', 'g_entr5', 'g_contN', 'g_contE', 'g_corrN', 'g_corrE',
#                     'b', 'b_max', 'b_min', 'b_diff', 'b_avg', 'b_entr', 'b_avg5', 'b_entr5', 'b_contN', 'b_contE', 'b_corrN',  'b_corrE',
#                     'nir', 'nir_max', 'nir_min', 'nir_diff', 'nir_avg', 'nir_entr', 'nir_avg5', 'nir_entr5', 'nir_contN', 'nir_contE', 'nir_corrN', 'nir_corrE',
#                     'ndvi', 'ndvi_max', 'ndvi_min', 'ndvi_diff', 'ndvi_avg', 'ndvi_entr','ndvi_entr5',                     
#                     'year', 'month', 'day_in_year',
#                     'iceplant']
# sampled_pts[check]

Unnamed: 0,x,y,pts_crs,aoi,naip_id,r,r_max,r_min,r_diff,r_avg,...,ndvi_max,ndvi_min,ndvi_diff,ndvi_avg,ndvi_entr,ndvi_entr5,year,month,day_in_year,iceplant
0,-119.868370,34.417604,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,61,64,57,7,61.111110,...,0.528926,0.485944,0.042982,0.444444,3.246634,3.397571,2020,5,142,0.0
1,-119.869194,34.414761,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,116,126,114,12,119.111115,...,0.219178,0.160000,0.059178,0.111111,3.129051,3.967296,2020,5,142,0.0
2,-119.856764,34.410684,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,115,126,104,22,113.444440,...,0.240876,0.174497,0.066379,0.111111,2.684341,3.564984,2020,5,142,0.0
3,-119.868120,34.417642,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,59,61,57,4,58.777780,...,0.538462,0.510040,0.028421,0.444444,2.362652,2.459482,2020,5,142,0.0
4,-119.863904,34.413559,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,110,122,101,21,112.222220,...,0.286219,0.181208,0.105011,0.222222,3.719376,3.998629,2020,5,142,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1836,-120.438128,34.458685,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,92,98,82,16,89.333336,...,0.000000,-0.180723,0.180723,0.000000,3.831276,4.141958,2020,6,159,0.0
1837,-120.484881,34.497110,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,97,118,89,29,102.666664,...,0.097674,0.016667,0.081008,0.000000,3.741963,4.224118,2020,6,159,0.0
1838,-120.485111,34.493451,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,109,110,89,21,101.888885,...,0.252101,0.120000,0.132101,0.111111,4.004364,4.515595,2020,6,159,0.0
1839,-120.468795,34.470439,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,107,128,93,35,105.888885,...,0.231405,0.051852,0.179553,0.111111,4.280226,4.600739,2020,6,159,0.0


In [None]:
if True:
    fp = root + 'glcm_'+csv_name
    sampled_pts.to_csv(fp, index=False)