In [None]:
## Load h2o and do some predictions!

In [None]:
import pandas as pd
import numpy as np
import rasterio
import h2o
from h2o.estimators.gbm import H2OGradientBoostingEstimator
from matplotlib import pyplot as plt
%matplotlib inline

In [None]:
h2o.init()

In [None]:
ACQ_NAME = 'klamath_2010'
DATA_FILE = '../data/processed/{}_features.csv'.format(ACQ_NAME)
hf = h2o.import_file(DATA_FILE)

In [None]:
hf.describe()

In [None]:
LIDAR_COLS = ['strat0_return-proportion', 'strat1_return-proportion', 
              'strat2_return-proportion', 'strat3_return-proportion', 'strat4_return-proportion', 
              'strat5_return-proportion', 'strat0_intensity-median', 'strat1_intensity-median', 
              'strat2_intensity-median', 'strat3_intensity-median', 'strat4_intensity-median', 
              'strat5_intensity-median', 'height_05-percentile', 'height_25-percentile', 
              'height_50-percentile', 'height_75-percentile', 'height_95_percentile', 
              'height_max', 'cover', 'potential_volume', 'stddev_height', 'surface_area_ratio', 
              'surface_volume', 'surface_volume_ratio', 'aspect', 'elevation', 'overall_curvature', 
              'plan_curvature', 'profile_curvature', 'slope', 'solar_radiation_index']
LANDSAT_COLS = ['green', 'blue', 'red', 'nir', 'endvi', 'ndvi', 'savi', 'swir1',
                'swir2', 'wetness', 'greenness', 'brightness']
CLIMATE_COLS = ['mat','mwmt','mcmt','td','map','msp',
                'ahm','shm','dd_0','dd5','dd_18','dd18',
                'nffd','bffp','effp','ffp','pas','emt',
                'ext','eref','cmd','rh']
SOIL_COLS = ['bulk_dens', 'soil_depth', 'pct_clay_surf', 'pct_rock_surf', 'pct_sand_surf']

In [None]:
hf['eref'] = hf['eref'].asnumeric()
hf['pot_veg_type'] = hf['pot_veg_type'].asfactor()
hf[LIDAR_COLS] = hf[LIDAR_COLS].asnumeric()

In [None]:
FOREST_TYPE_MODEL = '../models/gbm_forest_type_rebalance/gbm_type_rebalance'
gbm_forest_type_model = h2o.load_model(FOREST_TYPE_MODEL)

SIZE_CLASS_MODEL = '../models/gbm_size_class/gbm_size_class'
gbm_size_class_model = h2o.load_model(SIZE_CLASS_MODEL)

COVER_MODEL = '../models/gbm_cov_class/gbm_cov_class'
gbm_cover_model = h2o.load_model(COVER_MODEL)

In [None]:
type_res = gbm_forest_type_model.predict(hf)

In [None]:
size_res = gbm_size_class_model.predict(hf)

In [None]:
cover_res = gbm_cover_model.predict(hf)

In [None]:
type_preds = type_res['predict'].as_data_frame()
size_preds = size_res['predict'].as_data_frame()
cover_preds = cover_res['predict'].as_data_frame()

In [None]:
size_classes = {'Nonstocked':1, 'Seedling-Sapling':2, 'Small':3, 'Medium':4, 'Large':5, 'Very Large':6}
size_lookup = pd.DataFrame(size_classes.values(), index=size_classes.keys(), columns=['id'])
size_lookup.head()

In [None]:
cover_classes = {'Sparse':1, 'Open':2, 'Moderate':3, 'Closed':4}
cover_lookup = pd.DataFrame(cover_classes.values(), index=cover_classes.keys(), columns=['id'])
cover_lookup.head()

In [None]:
forest_type_lookup = pd.read_csv('../data/processed/sppgrp_forest_type_crosswalk.csv', 
                                 names=['id', 'name'], 
                                 header=0, 
                                 index_col='name')
forest_type_lookup['id'] = forest_type_lookup['id'] + 1
forest_type_lookup.head()          

In [None]:
LIDARMETS = '../data/processed/rasters_for_modeling/{}_lidar_rescaled.tif'.format(ACQ_NAME)
with rasterio.open(LIDARMETS) as lidar_src:
    raster_meta = lidar_src.meta.copy()
    ht95 = lidar_src.read(17 , masked=True)  # 'height_95_percentile', 
    cover = lidar_src.read(19 , masked=True)  # 'cover'

In [None]:
non_forest = (ht95 < 1.3) * (cover < 10)
plt.imshow(non_forest)

In [None]:
non_forest = np.ma.masked_where(~non_forest, non_forest)

In [None]:
good_data = hf['C1'].as_data_frame()
good_data = good_data.set_index('C1')

In [None]:
data = pd.DataFrame(np.zeros(lidar_src.shape).flatten()).drop(0, axis=1)

In [None]:
data.head()

In [None]:
good_data.loc[:, 'forest_type_prediction'] = type_preds.values
good_data.loc[:, 'size_class_prediction'] = size_preds.values
good_data.loc[:, 'cover_class_prediction'] = cover_preds.values

In [None]:
good_data['forest_type_id'] = forest_type_lookup.loc[good_data['forest_type_prediction']].values
good_data['size_class_id'] = size_lookup.loc[good_data['size_class_prediction']].values
good_data['cover_class_id'] = cover_lookup.loc[good_data['cover_class_prediction']].values

In [None]:
good_data[['forest_type_prediction', 'forest_type_id', 
           'size_class_prediction', 'size_class_id',
           'cover_class_prediction', 'cover_class_id'
          ]].head()

In [None]:
data['forest_type_id'] = good_data['forest_type_id']
data['forest_type_id'] = data['forest_type_id'].fillna(0).astype(int)

data['size_class_id'] = good_data['size_class_id']
data['size_class_id'] = data['size_class_id'].fillna(0).astype(int)

data['cover_class_id'] = good_data['cover_class_id']
data['cover_class_id'] = data['cover_class_id'].fillna(0).astype(int)

In [None]:
non_forest.mask

In [None]:
type_reshaped = data['forest_type_id'].values.reshape(lidar_src.shape)
size_reshaped = data['size_class_id'].values.reshape(lidar_src.shape)
cover_reshaped = data['cover_class_id'].values.reshape(lidar_src.shape)

type_reshaped[~non_forest.mask] = 0
size_reshaped[~non_forest.mask] = 0
cover_reshaped[~non_forest.mask] = 0


fig, axs = plt.subplots(1,3, figsize=(10,5))
axs[0].imshow(type_reshaped)
axs[1].imshow(size_reshaped)
axs[2].imshow(cover_reshaped)

plt.show()

In [None]:
raster_meta.update(dtype = 'uint8',
                nodata=0,
                count = 3)
raster_meta

In [None]:
with rasterio.open('../data/processed/predictions/{}_forest_type_predictions.tif'.format(ACQ_NAME), 
                   'w', **raster_meta) as dst:
    dst.write(type_reshaped.astype(rasterio.uint8), 1)
    dst.set_band_description(1, 'forest_type')
    
    dst.write(size_reshaped.astype(rasterio.uint8), 2)
    dst.set_band_description(2, 'size_class')
    
    dst.write(cover_reshaped.astype(rasterio.uint8), 3)
    dst.set_band_description(3, 'cover_class')

In [None]:
with rasterio.open('../data/processed/predictions/{}_forest_type_predictions.tif'.format(ACQ_NAME), 
                   'r') as src:
    img = src.read(1, masked=True)
plt.imshow(img)

In [None]:
# h2o.cluster().shutdown()