In [1]:
import rasterio
from rasterio.enums import Resampling
from rasterio import Affine as A
import numpy as np
import os
import glob
import pandas as pd
import geopandas as gpd
from matplotlib import pyplot as plt
%matplotlib inline

In [2]:
ACQ_NAME = 'klamath_2010'

In [3]:
CLIMATE = '../data/processed/rasters_for_modeling/{}_annual_climate_{}.tif'.format(ACQ_NAME, ACQ_NAME.split('_')[-1])
LIDARMETS = '../data/processed/rasters_for_modeling/{}_lidar_rescaled.tif'.format(ACQ_NAME)
SOILS = '../data/processed/rasters_for_modeling/{}_soils.tif'.format(ACQ_NAME)
POTVEG = '../data/processed/rasters_for_modeling/{}_potveg.tif'.format(ACQ_NAME)
WATER = '../data/processed/rasters_for_modeling/{}_water.tif'.format(ACQ_NAME)
LANDSAT = '../data/processed/rasters_for_modeling/{}_landsat.tif'.format(ACQ_NAME)

In [4]:
# join order = climate, water, cloudmets, gridsurf, topomets, soils, landsat, potveg

In [5]:
LIDAR_COLS = ['strat0_return-proportion', 'strat1_return-proportion', 
              'strat2_return-proportion', 'strat3_return-proportion', 'strat4_return-proportion', 
              'strat5_return-proportion', 'strat0_intensity-median', 'strat1_intensity-median', 
              'strat2_intensity-median', 'strat3_intensity-median', 'strat4_intensity-median', 
              'strat5_intensity-median', 'height_05-percentile', 'height_25-percentile', 
              'height_50-percentile', 'height_75-percentile', 'height_95_percentile', 
              'height_max', 'cover', 'potential_volume', 'stddev_height', 'surface_area_ratio', 
              'surface_volume', 'surface_volume_ratio', 'aspect', 'elevation', 'overall_curvature', 
              'plan_curvature', 'profile_curvature', 'slope', 'solar_radiation_index',
              'num_returns', 'skewness', 'kurtosis']

In [6]:
with rasterio.open(LIDARMETS) as lidar_src:
    lidar = lidar_src.read(masked=True,
#         out_shape=(lidar_src.count, lidar_src.height // 2, lidar_src.width //2),
#         resampling=Resampling.average
                          )
    meta = lidar_src.meta
    transform = lidar_src.transform
    print(lidar_src.meta)
# plt.imshow(lidar[18,:,:])
# plt.show()

{'driver': 'GTiff', 'dtype': 'float32', 'nodata': -9999.0, 'width': 5701, 'height': 5251, 'count': 31, 'crs': CRS.from_epsg(6339), 'transform': Affine(20.0, 0.0, 565995.0,
       0.0, -20.0, 4770005.0)}


In [7]:
with rasterio.open(WATER) as water_src:
    water = water_src.read(
        out_shape=(water_src.count, 
                   lidar_src.height,# //2 , 
                   lidar_src.width),# //2 ),
        resampling=Resampling.nearest,
        masked=True)
    print(water_src.meta)
# plt.imshow(water[0,:,:])
# plt.show()

{'driver': 'GTiff', 'dtype': 'float32', 'nodata': -3.4028234663852886e+38, 'width': 11401, 'height': 10501, 'count': 1, 'crs': CRS.from_epsg(6339), 'transform': Affine(10.0, 0.0, 565995.0,
       0.0, -10.0, 4770004.999999999)}


In [8]:
CLIMATE_COLS =  ["MAT", "MWMT", "MCMT", "TD", "MAP", "MSP", "AHM", "SHM", 
               "DD_0", "DD5", "DD_18", "DD18", "NFFD", "bFFP", "eFFP", 
               "FFP", "PAS", "EMT", "EXT", "MAR", "Eref", "CMD", "RH"]
with rasterio.open(CLIMATE) as climate_src:
    climate = climate_src.read(
        out_shape=(climate_src.count, 
                   lidar_src.height,# //2 , 
                   lidar_src.width),# //2 ),
        resampling=Resampling.nearest,
        masked=True)
    print(climate_src.meta)

# fig, axs = plt.subplots(6,4, figsize=(10,10), sharex=True, sharey=True)
# for i, name in enumerate(CLIMATE_COLS):
#     axs.ravel()[i].imshow(climate[i,:,:])
#     axs.ravel()[i].set_title(CLIMATE_COLS[i])
    
# fig.delaxes(axs.ravel()[-1])
# plt.tight_layout()

{'driver': 'GTiff', 'dtype': 'float32', 'nodata': -9999.0, 'width': 1050, 'height': 1140, 'count': 23, 'crs': CRS.from_epsg(6339), 'transform': Affine(1000.0, 0.0, 565995.0,
       0.0, -1000.0, 4770005.0)}


In [9]:
SOIL_README = '../data/processed/soils/readMe_metaSoils.csv'
soil_readme = pd.read_csv(SOIL_README, index_col='band')

with rasterio.open(SOILS) as soil_src:
    soil = soil_src.read(
        out_shape=(soil_src.count, 
                   lidar_src.height,# // 2 , 
                   lidar_src.width),# // 2),
        resampling=Resampling.nearest,
        masked=True)
    print(soil_src.meta)

# fig, axs = plt.subplots(2,5, figsize=(15,5))
# for i in range(soil_src.count):
#     axs.ravel()[i].imshow(soil[i,:,:])
#     axs.ravel()[i].set_title(soil_readme.loc[i+1].values[0])
# plt.tight_layout()

{'driver': 'GTiff', 'dtype': 'float32', 'nodata': -9999.0, 'width': 133, 'height': 123, 'count': 10, 'crs': CRS.from_epsg(6339), 'transform': Affine(860.5525535087121, 0.0, 565995.0,
       0.0, -860.5525535087121, 4770005.0)}


In [10]:
README_LANDSAT = '../data/processed/landsat/readMe_metaLandsat.csv'
readme_landsat = pd.read_csv(README_LANDSAT, index_col='band')

with rasterio.open(LANDSAT) as landsat_src:
    landsat = landsat_src.read(
        out_shape=(landsat_src.count, 
                   lidar_src.height,# // 2 , 
                   lidar_src.width),# // 2),
        resampling=Resampling.nearest,
        masked=True)
    print(landsat_src.meta)

# fig, axs = plt.subplots(3,4, figsize=(20,15))
# for i in range(landsat_src.count):
#     axs.ravel()[i].imshow(landsat[i,:,:])
#     axs.ravel()[i].set_title(readme_landsat.loc[i+1].values[0])
# plt.tight_layout()

{'driver': 'GTiff', 'dtype': 'float32', 'nodata': None, 'width': 3801, 'height': 3501, 'count': 12, 'crs': CRS.from_epsg(6339), 'transform': Affine(30.0, 0.0, 565995.0,
       0.0, -30.0, 4770005.0)}


In [11]:
with rasterio.open(POTVEG) as potveg_src:
    potveg = potveg_src.read(
        out_shape=(potveg_src.count, 
                   lidar_src.height,# // 2 , 
                   lidar_src.width),# // 2),
        resampling=Resampling.nearest,
        masked=True)
    print(potveg_src.meta)
# plt.imshow(potveg[0,:,:])
# plt.show()

{'driver': 'GTiff', 'dtype': 'int16', 'nodata': None, 'width': 3798, 'height': 3499, 'count': 1, 'crs': CRS.from_epsg(6339), 'transform': Affine(30.01879412478754, 0.0, 565995.0,
       0.0, -30.01879412478754, 4770005.0)}


In [12]:
# join order = climate, water, cloudmets, gridsurf, topomets, soils, landsat, potveg
data = pd.DataFrame()

for i, name in enumerate(CLIMATE_COLS):
    if name != 'MAR':
        data[name.lower().strip()] = climate[i,:,:].flatten()
        data[name.lower().strip()] = data[name.lower().strip()].replace(-9999.0, np.nan)

data['distance_to_water_m'] = water[0,:,:].flatten()
data['distance_to_water_m'] = data['distance_to_water_m'].replace(-9999.0, np.nan)

for i, name in enumerate(LIDAR_COLS):
    data[name.lower().strip()] = lidar[i,:,:].flatten()
    data[name.lower().strip()] = data[name.lower()].replace(-9999.0, np.nan)

KEEP_SOILS = ['bulk_dens', 'soil_depth', 'pct_clay_surf', 'pct_rock_surf', 'pct_sand_surf']
for i, name in enumerate(soil_readme.values.flatten()):
    if name.strip() in KEEP_SOILS:
        data[name.lower().strip()] = soil[i,:,:].flatten()
        data[name.lower().strip()] = data[name.lower().strip()].replace(-9999.0, np.nan)
        
for i, name in enumerate(readme_landsat.values.flatten()):
    data[name.lower().strip()] = landsat[i,:,:].flatten()
    data[name.lower().strip()] = data[name.lower().strip()].replace(-9999.0, np.nan)

data['pot_veg_type'] = potveg[0,:,:].flatten()
data['year_diff'] = 2019 - int(ACQ_NAME.split('_')[-1])

In [13]:
LIDAR_COLS = ['strat0_return-proportion', 'strat1_return-proportion', 
              'strat2_return-proportion', 'strat3_return-proportion', 'strat4_return-proportion', 
              'strat5_return-proportion', 'strat0_intensity-median', 'strat1_intensity-median', 
              'strat2_intensity-median', 'strat3_intensity-median', 'strat4_intensity-median', 
              'strat5_intensity-median', 'height_05-percentile', 'height_25-percentile', 
              'height_50-percentile', 'height_75-percentile', 'height_95_percentile', 
              'height_max', 'cover', 'potential_volume', 'stddev_height', 'surface_area_ratio', 
              'surface_volume', 'surface_volume_ratio', 'aspect', 'elevation', 'overall_curvature', 
              'plan_curvature', 'profile_curvature', 'slope', 'solar_radiation_index']
LANDSAT_COLS = ['green', 'blue', 'red', 'nir', 'endvi', 'ndvi', 'savi', 'swir1',
                'swir2', 'wetness', 'greenness', 'brightness']
CLIMATE_COLS = ['mat','mwmt','mcmt','td','map','msp',
                'ahm','shm','dd_0','dd5','dd_18','dd18',
                'nffd','bffp','effp','ffp','pas','emt',
                'ext','eref','cmd','rh']
SOIL_COLS = ['bulk_dens', 'soil_depth', 'pct_clay_surf', 'pct_rock_surf', 'pct_sand_surf']

In [14]:
good_data = data.dropna(subset=['elevation'] + LANDSAT_COLS + ['mat'] + SOIL_COLS + ['pot_veg_type', 'distance_to_water_m', 'year_diff'])

# FILTER OUT CANOPY COVER < 10 % AND HT95 < 1.3m here... we don't need to predict forest types for pixels that don't meet these criteria
good_data = good_data.loc[(good_data['cover'] >= 10)&(good_data['height_95_percentile'] >= 1.3)]

In [15]:
good_data.to_csv('../data/processed/{}_features.csv'.format(ACQ_NAME))