In [None]:
#Load Libraries

#import
import numpy as np
import pandas as pd

#system packages
import os

#loading images
from osgeo.gdalconst import *
from osgeo import gdal
from affine import Affine

from skimage import feature

## Custom Function

In [None]:
#Turn raws into dataframe from directory path using gdal
def gdal_to_dataframe(dir_path, nrcan_name = 'NRCAN_transformed.tif', index = [-14, -11], calculate_edge = None, sigma = 3):
    """
    Use gdal to create a labelled dataframe of Sentinal-2 band values from a folder of raw band geotiffs using land cover
    classification from NRCAN. NRCAN extent must be in directory above raw directory. It should be processed using GIS to be 
    the same width, height, projection and resolution as clipped raws extents.
    INPUT
    -----
    `dir_path`: string with path to the raw files directory
    `nrcan_name`: file name of the accompanying nrcan classification extent. File must be in directory above raw directory
    `index`: Index within raw band file names where the band name is found. For files downloaded directly from EO browser
    without being renamed this will be [-14, -11], hence it is the default
    `calculate_edge`: Intakes the string of a raw band name on which to perform sklearn canny edge detection.
    Returns the feature edge. Default set to None where it will not add this feature.
    `sigma`: Modifier for the canny edge detection. If edge detection is performed sigma is for canny is inputted sigma.
    Defaults to 3
    
    OUTPUT
    ------
    Pandas DataFrame containing a column for each raw band within input directory. 
    If calculate_edge is not None dataframe includes column 'edge'
    """
    
    raw_names = list(os.listdir(dir_path))
    
    raw_df = pd.DataFrame()
    
    for i in raw_names:
        raw_img = gdal.Open(os.path.join(dir_path, i))
        raw_array = np.array(raw_img.ReadAsArray())
        band = i[index[0]:index[1]]
        raw_df[band] = raw_array.flatten()
        
        #if calculate edge is equal to band name, take that band image and get canny edge
        if calculate_edge == band:
            print('getting edge')
            edge = feature.canny(raw_array, sigma = sigma)
            edge = edge.astype(int)
    
    try:
        raw_df['edge'] = edge.flatten()
        raw_df['edge'] = raw_df['edge'].astype('int')
    except:
        pass
    
    nrcan = gdal.Open(os.path.join(dir_path, '..', nrcan_name))
    nrcan_array = np.array(nrcan.ReadAsArray())
    
    raw_df['y'] = nrcan_array.flatten()
    
    return raw_df

## Download Data

In [None]:
#load raws into dataframe

#Choose band to generate canny edge from, select None to skip edge generation
target_edge = 'B8A'

#defines canny edge sigma
sigma = 3

#run for all possible extents
sim_raws = gdal_to_dataframe('../data_tests/simcoe_york/raws', nrcan_name = 'Simcoe_York_2019.tif', calculate_edge = target_edge, sigma = sigma)
lab_raws = gdal_to_dataframe('../data_tests/labrador/raws', sigma = sigma)
james_raws = gdal_to_dataframe('../data_tests/james_bay/raws', nrcan_name = 'James_Bay_Med.tif', calculate_edge = target_edge, sigma = sigma)
tor_raws = gdal_to_dataframe('../data_tests/toronto/raws', calculate_edge = target_edge, sigma = sigma)
sjames_raws = gdal_to_dataframe('../data_tests/james_south/raws', calculate_edge = target_edge, sigma = sigma)
cal_raws = gdal_to_dataframe('../data_tests/calgary/raws', calculate_edge = target_edge, sigma = sigma)
trois_raws = gdal_to_dataframe('../data_tests/trois/raws', calculate_edge = target_edge, sigma = sigma)
winn_raws = gdal_to_dataframe('../data_tests/winnipeg/raws', calculate_edge = target_edge, sigma = sigma)
sask_raws = gdal_to_dataframe('../data_tests/sasketchewan/raws', calculate_edge = target_edge, sigma = sigma)
newf_raws = gdal_to_dataframe('../data_tests/newfoundland/raws', calculate_edge = target_edge, sigma = sigma)