In [1]:
import numpy as np
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import r2_score

from vecstack import stacking

from xgboost import XGBRegressor
from catboost import CatBoostRegressor
import lightgbm 

from scipy.stats import pearsonr


import math

import warnings
from joblib import dump, load

  from pandas import MultiIndex, Int64Index


In [2]:
data = pd.read_csv("wetSSN.csv")
data.head()

Unnamed: 0,FID,Eastings,Northings,No_of_Larv,LST,NDVI,NDMI
0,0,667759.1603,1410456.418,70,25.5909,0.134806,-0.051258
1,1,667970.2328,1410159.893,79,25.389299,0.188839,-0.042621
2,2,668100.4408,1410251.599,80,26.0898,0.289951,0.026311
3,3,668075.4149,1410344.91,75,26.104,0.14367,-0.032491
4,4,668321.5382,1409718.525,75,25.640499,0.197746,-0.105824


In [3]:
target = "No_of_Larv"
y = data[target]
X = data.drop(columns = ["FID", "Eastings", "Northings", target])

#cutoff = int(len(X) * 0.8)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = 0.20, random_state = 7)
# X_train, y_train = X.iloc[:cutoff], y.iloc[:cutoff]
# X_test, y_test = X.iloc[cutoff:], y.iloc[cutoff:]

X_train.head()

Unnamed: 0,LST,NDVI,NDMI
79,25.621401,0.242078,-0.019283
386,23.2934,0.395142,0.244493
28,26.513901,0.220413,-0.085882
41,24.960699,0.548239,0.181547
206,26.8918,0.29077,0.067642


In [4]:
y_mean = y_train.mean()
y_pred_baseline = [y_mean] * len(y_train)
mae_baseline = mean_absolute_error(y_train, y_pred_baseline)

print("Mean P2 Reading:", round(y_train.mean(), 2))
print("Baseline MAE:", round(mae_baseline, 2))

Mean P2 Reading: 55.1
Baseline MAE: 28.58


In [5]:
models = [
    
    RandomForestRegressor(random_state = 0, n_jobs = -1, 
        criterion = "absolute_error", n_estimators = 250, min_samples_split = 8, min_samples_leaf = 2),
    
   
    XGBRegressor(seed = 0, n_jobs = -1, learning_rate = 0.1, n_estimators = 250, max_depth = 3),
    
    GradientBoostingRegressor(n_estimators = 250, learning_rate = 1.0, max_depth = 1, random_state = 0),
    
    AdaBoostRegressor(
        SVR(kernel = "poly", degree = 3, coef0 = 1, C=5), n_estimators = 250, learning_rate = 0.5),
    
    AdaBoostRegressor(
        DecisionTreeRegressor(max_depth = 2), n_estimators = 250, learning_rate = 0.5),

]

S_train, S_test = stacking(models, X_train, y_train, X_test, regression = True, metric = mean_absolute_error, n_folds = 2, 
    shuffle = True, random_state = 0, verbose = 2)

model = ExtraTreesRegressor(random_state = 0, n_jobs = -1, 
        criterion = "absolute_error", n_estimators = 250, min_samples_split = 8, min_samples_leaf = 2)
model = model.fit(S_train, y_train)

y_pred = model.predict(S_test)

# Final prediction score
print('Final prediction score: [%.8f]' % mean_absolute_error(y_test, y_pred))
#print ('MAE is:', metrics.mean_absolute_error(y_test, y_pred))
print('MSE is:', metrics.mean_squared_error(y_test, y_pred))
print('RMSE is:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

task:         [regression]
metric:       [mean_absolute_error]
mode:         [oof_pred_bag]
n_models:     [5]

model  0:     [RandomForestRegressor]
    fold  0:  [24.98984783]
    fold  1:  [26.46688043]
    ----
    MEAN:     [25.72836413] + [0.73851630]
    FULL:     [25.72836413]

model  1:     [XGBRegressor]
    fold  0:  [29.83462469]
    fold  1:  [30.86045432]
    ----
    MEAN:     [30.34753951] + [0.51291481]
    FULL:     [30.34753951]

model  2:     [GradientBoostingRegressor]
    fold  0:  [30.34393449]
    fold  1:  [34.93639670]
    ----
    MEAN:     [32.64016559] + [2.29623110]
    FULL:     [32.64016559]

model  3:     [AdaBoostRegressor]
    fold  0:  [28.21187849]
    fold  1:  [28.30226333]
    ----
    MEAN:     [28.25707091] + [0.04519242]
    FULL:     [28.25707091]

model  4:     [AdaBoostRegressor]
    fold  0:  [32.16368565]
    fold  1:  [32.02206728]
    ----
    MEAN:     [32.09287646] + [0.07080919]
    FULL:     [32.09287646]

Final prediction score: [22