# Data processing functions

In [None]:

ext=GetExtent(wind_sub)

NDVI = gdal.Open('S_2_NDVI_2020_Diff_clip.tiff')
NDVI_arr = np.array(NDVI.GetRasterBand(1).ReadAsArray())

# Q-Q plot
ATL03_100 = []
for i in range(1,101):
    ATL03_100.append(np.percentile(ATL03_bins, i))


from sklearn.metrics import mean_squared_error
import math
# Root Mean Square Error (RMSE)
MSE_atl08_ATL03 = mean_squared_error(atl03_atl08['h_max_canopy'], atl03_atl08['Canopy_Height'])
RMSE_atl08_ATL03 = math.sqrt(MSE_atl08_ATL03)

# Cubic spline interpolation
df = ATL08[j].where((ATL08[j]['alongtrack'] > min(ATL03[i]['alongtrack'])) & 
                    (ATL08[j]['alongtrack'] < max(ATL03[i]['alongtrack'])))
df = df.dropna()

f = interp1d(ATL03[i]['alongtrack'], ATL03[i]['Photon_Height'], kind='cubic')
df['ATL03_Photon_Height'] = f(df['alongtrack'])

# geodataframe and study area intersection
f['geometry'] = df.apply(lambda row: Point(row.Longitude, row.Latitude), axis=1)
df = gp.GeoDataFrame(df)
df.set_crs(epsg=4326, inplace=True)

ROI = gp.GeoDataFrame.from_file('/data/atmani/study_area/Msc_Study_area.shp', crs='EPSG:4326')
df = gp.overlay(df, ROI, how='intersection')

# Elevation extraction from DEM and Datum transformation
dem = rasterio.open('Copernicus_DEM.tiff') 

row_dem, col_dem = dem.index(x,y)
dem_v = dem.read(1)[row_dem,col_dem]

# Height transformation from Copernicus DEM
transformer = Transformer.from_crs("epsg:3855", "epsg:4326")
df['Copernicus_Elevation'] = (transformer.transform(np.array(x), np.array(y), np.array(dem_v)))[2]

# Height transformation from Copernicus DEM
transformer = Transformer.from_crs("epsg:5773", "epsg:4326")
df['SRTM_Elevation'] = (transformer.transform(np.array(x), np.array(y), np.array(dem_v)))[2]



# Terrain slope 
dem = gdal.Open('Copernicus_DEM.tiff')
dem_arr = np.array(dem.GetRasterBand(1).ReadAsArray())

def np_slope(DEM, gridspacing):
    dx, dy = np.gradient(DEM, gridspacing)
    return np.arctan(np.sqrt(dx*dx+dy*dy))

slp = np_slope(dem_arr, 30)
slp_deg = np.rad2deg(slp)

# Write the slope to Geotiff

dst_filename = 'Copernicus_Slope.tiff'
driver = gdal.GetDriverByName('GTiff')
dataset = driver.Create(dst_filename,slp_deg.shape[1], slp_deg.shape[0], 1,gdal.GDT_Float32)
dataset.GetRasterBand(1).WriteArray(slp_deg)

# follow code is adding GeoTranform and Projection
geotrans=dem.GetGeoTransform()  #get GeoTranform from existed 
proj=dem.GetProjection() #you can get from a exsited tif or import 
dataset.SetGeoTransform(geotrans)
dataset.SetProjection(proj)
dataset.FlushCache()
dataset=None

# Calculate the sensing date of ATL08 and ATL03 data from the file's name
from datetime import datetime

df['sensing_date'] = 0
for i in range(len(df)):
    s = df['gt'].iloc[i].split('_')
    s = [j for j in s if len(j)==14]
    df['sensing_date'].iloc[i] = datetime.strptime(s[0], '%Y%m%d%H%M%S')
    