In [35]:
import pandas as pd
import numpy as np
import joblib as jb
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
from satellite_bathymetry.preprocessing import get_coord_from_pixel_pos, get_pixel_from_coord, ndwi, pixel_ndwi, pixel_log_ratio
from sklearn.preprocessing import PolynomialFeatures

In [30]:
from lightgbm import LGBMRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor

In [6]:
path = './generated/dataset_dataframe.pkl.z'
df = jb.load(path)
df.head()

Unnamed: 0,x,y,z,b1,b2,b3,b4,b5,b6,b7,b8,b2b4,b3b4,ndwi15,ndwi24,ndwi53,cspmb7
0,233,1130,3.195862,0.1199,0.0887,0.0692,0.0483,0.0518,0.0328,0.0315,0.0252,1.156761,1.092734,0.396622,0.29489,-0.143802,27.041417
1,233,1131,3.27303,0.1199,0.0886,0.0691,0.0484,0.0519,0.0335,0.0317,0.0254,1.155853,1.091779,0.395809,0.293431,-0.142149,27.274666
2,233,1132,3.299687,0.1199,0.0886,0.069,0.0485,0.0519,0.0336,0.032,0.0255,1.155238,1.090825,0.395809,0.292487,-0.141439,27.625527
3,233,1133,3.268182,0.1199,0.0885,0.0689,0.0484,0.0518,0.0336,0.0321,0.0256,1.155562,1.091031,0.396622,0.292915,-0.141674,27.742739
4,233,1134,3.278125,0.1199,0.0884,0.0688,0.0482,0.0517,0.0336,0.0321,0.0257,1.156505,1.091822,0.397436,0.29429,-0.141909,27.742739


# Random Split Data

In [7]:
columns = ['b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'b7', 'b8']
#bands_cspm_features = df[['b1', 'b5', 'b6', 'cspmb7']]
#bands_cspm_target = df.z

X_train, X_val, y_train, y_val = train_test_split(df[columns], df.z, test_size=0.3, random_state=42)

# 1.0 - Random Forest

In [10]:
rf = RandomForestRegressor()
rf.fit(X_train, y_train)
p_rf = rf.predict(X_val)
print('RF Bands:')
print('R2 score:', r2_score(y_val, p_rf))
print('MAE:', mean_absolute_error(y_val, p_rf))
print('MSE:', mean_squared_error(y_val, p_rf))
print('RMSE:', np.sqrt(mean_squared_error(y_val, p_rf)))
print('Bias:', p_rf.mean() - y_val.mean())

RF Bands:
R2 score: 0.9419715686335639
MAE: 0.5480589461346743
MSE: 1.1706659691714503
RMSE: 1.0819731832034702
Bias: 0.03416003787604627


# 2.0 - XGBoost

In [20]:
xg = XGBRegressor()
xg.fit(X_train, y_train)
p_xg = xg.predict(X_val)
print('XGBoost Bands:')
print('R2 score:', r2_score(y_val, p_xg))
print('Mean Absolute Error:', mean_absolute_error(y_val, p_xg))
print('Mean Squared Error:', mean_squared_error(y_val, p_xg))
print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y_val, p_xg)))
print('Bias:', p_xg.mean() - y_val.mean())

RF Bands:
R2 score: 0.9113324866743583
Mean Absolute Error: 0.8632484611379012
Mean Squared Error: 1.7887790170634044
Root Mean Squared Error: 1.3374524354396324
Bias: 0.04071945245236286


# 3.0 - LGBM

In [22]:
lgbm = LGBMRegressor()
lgbm.fit(X_train, y_train)
p_lgbm = lgbm.predict(X_val)

print('LGBM Bands:')
print('R2 score:', r2_score(y_val, p_lgbm))
print('Mean Absolute Error:', mean_absolute_error(y_val, p_lgbm))
print('Mean Squared Error:', mean_squared_error(y_val, p_lgbm))
print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y_val, p_lgbm)))
print('Bias:', p_lgbm.mean() - y_val.mean())

RF Bands:
R2 score: 0.8620653369304733
Mean Absolute Error: 1.1711619251743357
Mean Squared Error: 2.782694831175862
Root Mean Squared Error: 1.6681411304730371
Bias: 0.03185720183167984


# 4.0 - MLPRegressor

In [32]:
mlp = MLPRegressor()
mlp.fit(X_train, y_train)
p_mlp = mlp.predict(X_val)

print('MLP Bands:')
print('R2 score:', r2_score(y_val, p_mlp))
print('Mean Absolute Error:', mean_absolute_error(y_val, p_mlp))
print('Mean Squared Error:', mean_squared_error(y_val, p_mlp))
print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y_val, p_mlp)))
print('Bias:', p_mlp.mean() - y_val.mean())



RF Bands:
R2 score: 0.3973657381995642
Mean Absolute Error: 2.6802725945103587
Mean Squared Error: 12.157547697465134
Root Mean Squared Error: 3.4867675141117647
Bias: 0.10094407234150893


# 5.0 - Linear Regressor

In [34]:
lr = LinearRegression()
lr.fit(X_train, y_train)
p_lr = lr.predict(X_val)

print('Linear Regression')
print('R2 score:', r2_score(y_val, p_lr))
print('Mean Absolute Error:', mean_absolute_error(y_val, p_lr))
print('Mean Squared Error:', mean_squared_error(y_val, p_lr))
print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y_val, p_lr)))
print('Bias:', p_lr.mean() - y_val.mean())

RF Bands:
R2 score: 0.47908435179078634
Mean Absolute Error: 2.48513147942932
Mean Squared Error: 10.508955830919374
Root Mean Squared Error: 3.2417519693707866
Bias: 0.042835625067619176


## 5.1 Polynomial Transform

In [47]:
lr_poly = LinearRegression()
poly = PolynomialFeatures(degree=2)

In [48]:
train_data = poly.fit_transform(X_train)
val_data = poly.transform(X_val)

In [49]:
lr_poly.fit(train_data, y_train)
p_lr_poly = lr_poly.predict(val_data)

print('Polynomial Regression Bands:')
print('R2 score:', r2_score(y_val, p_lr_poly))
print('Mean Absolute Error:', mean_absolute_error(y_val, p_lr_poly))
print('Mean Squared Error:', mean_squared_error(y_val, p_lr_poly))
print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y_val, p_lr_poly)))
print('Bias:', p_lr_poly.mean() - y_val.mean())

Polynomial Regression Bands:
R2 score: 0.6249267461302317
Mean Absolute Error: 2.0092593661410283
Mean Squared Error: 7.566730375305487
Root Mean Squared Error: 2.7507690516118375
Bias: 0.029257142366680533
