In [None]:
itemid = 'ca_m_3411934_sw_11_060_20200521'
# Fields next to Goleta Slough
reduce_box = Polygon([[-119.8284196946,34.4162731913],
                       [-119.8101541026,34.4162731913],
                       [-119.8101541026,34.4353838099],
                       [-119.8284196946,34.4353838099],
                       [-119.8284196946,34.4162731913]])

reduce_box_crs="EPSG:4326"

In [None]:
item = utility.get_item_from_id(itemid)
item.datetime

In [None]:
type(item.datetime)

In [None]:
type(item.datetime.date())

In [None]:
# ***************************************************************************************************
# ***************************************************************************************************

def rioxr_from_itemid(itemid, reduce_box = False, reduce_box_crs = False):
    item = utility.get_item_from_id(itemid)
    href = pc.sign(item.assets["image"].href)
    
    rast = rioxr.open_rasterio(href)
    
    if reduce_box != False:
        reduce = gpd.GeoDataFrame({'geometry':[reduce_box]}, crs=reduce_box_crs)
        reduce = reduce.to_crs(rast.rio.crs)        
        rast = rast.rio.clip_box(*reduce.total_bounds)
    
    rast.attrs['datetime'] = item.datetime
    
    return rast

In [None]:
rast = rioxr_from_itemid(itemid)
rast

In [None]:
rast.attrs['datetime'] == item.datetime

In [None]:
type(rast)

In [None]:
rast.rio.crs

In [None]:
rast.rio.transform()

In [None]:
rast_small = rioxr_from_itemid(itemid, reduce_box, reduce_box_crs)
rast_small

In [None]:
rast_small.rio.crs

In [None]:
rast_small.rio.transform()

In [None]:
# ***************************************************************************************************
# ***************************************************************************************************

def raster_as_df(raster, band_names):
    """
             Parameters:
       
            Returns: 
    """ 
    
    pixels = raster.reshape([len(band_names),-1]).T
    df = pd.DataFrame(pixels, columns=band_names) 
    return df

In [None]:
pixels = raster_as_df(rast.to_numpy(),  ['r','g','b','nir'])

In [None]:
# ***************************************************************************************************
# ***************************************************************************************************

def normalized_difference_index(df, *args):
    m = args[0]
    n = args[1]
    
    x = df.iloc[:, m].astype('int16')  
    y = df.iloc[:, n].astype('int16')
    return (x-y) / (x+y)

In [None]:
ndvi = normalized_difference_index(pixels,3,0)
ndvi

In [None]:
# ***************************************************************************************************
# ***************************************************************************************************

def feature_df_treshold(df, feature_name, thresh, keep_gr, func, *args):
    
    df[feature_name] = func(df, *args)
    
    if keep_gr == True:
        keep = df[df[feature_name] > thresh]
        deleted_indices = df[df[feature_name] <= thresh].index
    else : 
        keep = df[df[feature_name] < thresh]
        deleted_indices = df[df[feature_name] >= thresh].index
        
    deleted_indices = deleted_indices.to_numpy()
    
    return keep, deleted_indices

In [None]:
not_water, water_index = feature_df_treshold(pixels, 'ndwi', 0.3, False, normalized_difference_index, 1,3)

In [None]:
is_veg, non_veg_index = feature_df_treshold(not_water, 'ndvi', 0.05, True, normalized_difference_index, 3,0)

In [None]:
is_veg

In [None]:
water_index

In [None]:
non_veg_index

In [None]:
def indices_to_image(nrows, ncols, indices_list, values, back_value):
    # background, any pixel not in the union of indices will be given this value
    reconstruct = np.ones((nrows,ncols))*back_value 

    # TO DO: check indices list and values lengths are the same?
    for k in range(0,len(indices_list)):
        i = indices_list[k] / ncols
        i = i.astype(int)
        j = indices_list[k] % ncols
        reconstruct[i,j] = values[k]
    
    return reconstruct

In [None]:
reconstruct = indices_to_image(12500, 10580, [water_index, non_veg_index], [3,2], back_value=1)

In [None]:
utility.save_raster(reconstruct, 
                    os.getcwd()+'/trial.tif', 
                    (rast.shape[1],rast.shape[2]), 
                    1, 
                    rast.rio.crs, 
                    rast.rio.transform(), 'int16' )


In [None]:
# ***************************************************************************************************
# ***************************************************************************************************

def add_date_features(df, date): 
    kwargs = {'year' : date.year,
             'month' : date.month,
             'day_in_year' : utility.day_in_year(date.day, date.month, date.year)}
    
    return df.assign(**kwargs)

In [None]:
is_veg = add_date_features(pixels, rast.attrs['datetime'])

In [None]:
is_veg

In [None]:
# TO DO: figure out warning about setting values on copy

TIMING OLD VS NEW

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import time

import rasterio
import rioxarray as rioxr
import geopandas as gpd

import planetary_computer as pc

from shapely.geometry import Polygon

import data_sampling_workflow.utility as utility

In [2]:
import iceplant_detection_functions as ipf
import refactoring_modules as rm

In [3]:
# # ORIGINAL METHOD

# itemid = 'ca_m_3411934_sw_11_060_20200521'
# t0 = time.time()
# item = utility.get_item_from_id(itemid)

# df = ipf.features_over_aoi(item, 
#                            utility.get_raster_from_item(item).read([1,2,3,4]), 
#                            thresh=0.05)
# print('time to make features df: ', (time.time()-t0))
# # memory 2.14 GB
# # time to make features df:  13.830940961837769

In [4]:
# def add_spectral_features(df, ndwi_thresh, ndvi_thresh):
    
#     not_water, water_index = rm.feature_df_treshold(df, 
#                                              'ndwi', ndwi_thresh, False, 
#                                              rm.normalized_difference_index, 1,3)   
#     is_veg, not_veg_index = rm.feature_df_treshold(not_water, 
#                                                    'ndvi', ndvi_thresh, True, 
#                                                    rm.normalized_difference_index, 3,0)
#     return is_veg, water_index, not_veg_index


In [5]:
itemid = 'ca_m_3411934_sw_11_060_20200521'

t0 = time.time()
raster = rm.rioxr_from_itemid(itemid)

is_veg, water_index, not_veg_index = rm.add_spectral_features(df = rm.raster_as_df(raster.to_numpy(), ['r','g','b','nir']), 
                                                           ndwi_thresh = 0.3, 
                                                           ndvi_thresh = 0.05) 
is_veg.drop('ndwi', axis=1, inplace=True)


is_veg = rm.add_date_features(is_veg, raster.datetime)

print('time to make features df: ', (time.time()-t0))
# time to make features df:  19.167106866836548
# memory usage after: 3.43 GB

time to make features df:  19.167106866836548


In [6]:
is_veg

Unnamed: 0,r,g,b,nir,ndvi,year,month,day_in_year
83,79,101,86,147,0.300885,2020,5,142
84,71,98,73,158,0.379913,2020,5,142
85,95,118,82,180,0.309091,2020,5,142
86,105,125,93,188,0.283276,2020,5,142
87,155,164,138,185,0.088235,2020,5,142
...,...,...,...,...,...,...,...,...
131964172,32,49,54,38,0.085714,2020,5,142
131974752,33,45,53,39,0.083333,2020,5,142
132047704,50,92,76,66,0.137931,2020,5,142
132174895,78,121,130,88,0.060241,2020,5,142


In [None]:
is_veg