In [1]:
import pandas as pd
import numpy as np
import joblib as jb
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from lightgbm import LGBMRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [2]:
path = '../data/generated/df_newimages_bands_downside.pkl.z'

In [3]:
df = jb.load(path)
df.head()

Unnamed: 0,x,y,z,b1,b2,b3,b4,b5,b6,b7,b8,b2b4,b3b4,ndwi15,ndwi24,ndwi53,cspmb7
0,233,1130,3.195862,0.1199,0.0866,0.0667,0.0464,0.049,0.0316,0.0283,0.0238,1.162614,1.094573,0.419775,0.302256,-0.152982,23.382784
1,233,1131,3.27303,0.1199,0.088,0.0668,0.0457,0.049,0.0316,0.0283,0.0237,1.171434,1.099318,0.419775,0.31638,-0.153713,23.382784
2,233,1132,3.299687,0.1199,0.0879,0.0666,0.0461,0.0488,0.0324,0.0281,0.0238,1.168473,1.096035,0.421458,0.31194,-0.154246,23.158824
3,233,1133,3.268182,0.1199,0.0882,0.0692,0.0452,0.0488,0.0324,0.0281,0.0232,1.175411,1.111754,0.421458,0.322339,-0.172881,23.158824
4,233,1134,3.278125,0.1196,0.0884,0.0677,0.0454,0.0489,0.0323,0.0285,0.0238,1.174645,1.104724,0.419585,0.321375,-0.161235,23.607309


# Only Bands baseline

In [4]:
only_bands_features = df[['b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'b7', 'b8']]
only_bands_target = df.z

In [5]:
X_train, X_val, y_train, y_val = train_test_split(only_bands_features, only_bands_target, test_size=0.3, random_state=42)

In [6]:
rf_onlybands_baseline = RandomForestRegressor()
lgbm_onlybands_baseline = LGBMRegressor()

rf_onlybands_baseline.fit(X_train, y_train)
lgbm_onlybands_baseline.fit(X_train, y_train)

LGBMRegressor()

In [7]:
rf_onlybands_baseline = rf_onlybands_baseline.predict(X_val)
lgbm_onlybands_baseline = lgbm_onlybands_baseline.predict(X_val)

In [8]:
print('RF Only Bands Metrics:')
print('R2 score:', r2_score(y_val, rf_onlybands_baseline))
print('MAE:', r2_score(y_val, rf_onlybands_baseline))
print('MSE:', mean_absolute_error(y_val, rf_onlybands_baseline))
print('RMSE:', np.sqrt(mean_squared_error(y_val, rf_onlybands_baseline)))
print()


print('LGBM Only Bands Metrics:')
print('R2 score:', r2_score(y_val, lgbm_onlybands_baseline))
print('MAE:', r2_score(y_val, lgbm_onlybands_baseline))
print('MSE:', mean_absolute_error(y_val, lgbm_onlybands_baseline))
print('RMSE:', np.sqrt(mean_squared_error(y_val, lgbm_onlybands_baseline)))

RF Only Bands Metrics:
R2 score: 0.7952936179948726
MAE: 0.7952936179948726
MSE: 1.37020071405047
RMSE: 2.0384463648697277

LGBM Only Bands Metrics:
R2 score: 0.7127882613393819
MAE: 0.7127882613393819
MSE: 1.7833939544397204
RMSE: 2.414541590867869


In [26]:
jb.dump(rf_baseline, '../data/models/rf_onlybands_baseline.pkl.z')
jb.dump(lgbm_baseline, '../data/models/lgbm_onlybands_baseline.pkl.z')

['../data/models/lgbm_onlybands_baseline.pkl.z']

# Bands, NDWI, Log Ratio and CSPM

In [18]:
'''
Used Features:
bands - (1 ~ 8)
ndwi - [(1,5), (2,4), (5,3)]
cspm - band 7
log ration - [(b2b4), (b3,b4)]
'''

'\nUsed Features:\nbands - (1 ~ 8)\nndwi - [(1,5), (2,4), (5,3)]\ncspm - band 7\nlog ration - [(b2b4), (b3,b4)]\n'

In [4]:
features = df.drop(['x','y','z'], axis=1)
target = df.z

X_train, X_val, y_train, y_val = train_test_split(features, target, test_size=0.3, random_state=42)

In [7]:
rf_baseline = RandomForestRegressor()
lgbm_baseline = LGBMRegressor()

rf_baseline.fit(X_train, y_train)
lgbm_baseline.fit(X_train, y_train)

LGBMRegressor()

In [8]:
rf_baseline_pred = rf_baseline.predict(X_val)
lgbm_baseline_pred = lgbm_baseline.predict(X_val)

In [15]:
print('RF Metrics:')
print('R2 score:', r2_score(y_val, rf_baseline_pred))
print('MAE:', r2_score(y_val, rf_baseline_pred))
print('MSE:', mean_absolute_error(y_val, rf_baseline_pred))
print('RMSE:', np.sqrt(mean_squared_error(y_val, rf_baseline_pred)))
print()


print('LGBM Metrics:')
print('R2 score:', r2_score(y_val, lgbm_baseline_pred))
print('MAE:', r2_score(y_val, lgbm_baseline_pred))
print('MSE:', mean_absolute_error(y_val, lgbm_baseline_pred))
print('RMSE:', np.sqrt(mean_squared_error(y_val, lgbm_baseline_pred)))

RF Metrics:
R2 score: 0.7669557119508013
MAE: 0.7669557119508013
MSE: 1.4650633059889226
RMSE: 2.174967786374046

LGBM Metrics:
R2 score: 0.7138790943384447
MAE: 0.7138790943384447
MSE: 1.7614190315809442
RMSE: 2.4099520025805754


In [17]:
jb.dump(rf_baseline, '../data/models/rf_baseline.pkl.z')
jb.dump(lgbm_baseline, '../data/models/lgbm_baseline.pkl.z')

['../data/models/lgbm_baseline.pkl.z']