In [3]:
#instalação de pacotes necessários
#install.packages("e1071")
#install.packages("caret")
#install.packages("mlbench")
#install.packages("mice")
#install.packages("Metrics")
#install.packages("randomForest")
library(caret)
library(mlbench)
library(mice)
library(Metrics)
library("randomForest")


In [4]:
#carregamento da base
dados <- read.csv("databases/5 - Biomassa - Dados.csv")
head(dados)

Unnamed: 0_level_0,dap,h,Me,biomassa
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>
1,6.4,5.0,1.04,7.07
2,7.3,5.0,1.04,10.3
3,7.8,5.5,1.04,13.9
4,9.2,7.6,1.04,18.61
5,9.9,8.2,1.04,30.97
6,10.6,8.7,1.04,46.44


In [5]:
#retirando coluna de id e separando em bases de treino e teste
set.seed(2034)
indice <- createDataPartition(dados$biomassa, p = 0.8, list = FALSE)

treino <- dados[indice, ]
teste <- dados[-indice, ]

In [6]:
#funcoes das metricas usadas


syx <- function(observado, predito, p) {
  return(sqrt(sum((observado - predito)^2) / (length(observado) - p)))
}

r2 <- function(observado, predito){
  return(1 - (sum((predito - observado)^2) / sum((observado - mean(observado))^2)))
}

metricas <- function(observado, predito, p) {
  #calculando as metricas
  r2_valor <- r2(observado, predito)
  syx_valor <- syx(observado, predito, p)
  pearson_valor <- cor(observado, predito, method = "pearson")
  rmse_valor <- rmse(observado, predito)
  mae_valor <- mae(observado, predito)

  #apresentando as metricas
  cat("R²:", r2_valor, "\n")
  cat("Syx:", syx_valor, "\n")
  cat("Pearson:", pearson_valor, "\n")
  cat("RMSE:", rmse_valor, "\n")
  cat("MAE:", mae_valor, "\n")

}


### KNN
---

In [7]:
#treinando o modelo usando KNN e mostrando as metricas
set.seed(2034)
tune_grid <- expand.grid(k = c(1, 3, 5, 7, 9, 10))
knn <- train(biomassa ~ ., data = treino, method = "knn", tuneGrid = tune_grid)
knn

#fazendo a predicaõ na base de teste
predito_knn <- predict(knn, teste)

#apresentando as metricas

metricas(teste$biomassa, predito_knn, ncol(teste) - 1)


k-Nearest Neighbors 

240 samples
  3 predictor

No pre-processing
Resampling: Bootstrapped (25 reps) 
Summary of sample sizes: 240, 240, 240, 240, 240, 240, ... 
Resampling results across tuning parameters:

  k   RMSE      Rsquared   MAE     
   1  444.5298  0.8612377  128.7392
   3  393.6790  0.8785611  121.3048
   5  417.9987  0.8668940  122.5961
   7  436.2986  0.8564698  126.4328
   9  459.1026  0.8377583  130.1570
  10  470.2587  0.8297000  131.0792

RMSE was used to select the optimal model using the smallest value.
The final value used for the model was k = 3.

R²: 0.6579974 
Syx: 1448.066 
Pearson: 0.959401 
RMSE: 1411.4 
MAE: 233.0013 


### RNA com hold-out e cross-validation
---

In [8]:
#treinando o modelo usando RNA com hold-out e mostrando as metricas
set.seed(2034)
rna <- train(biomassa ~ ., data = treino, method = "nnet", linout = TRUE, trace = FALSE)
rna

#fazendo a predicaõ na base de teste
predito_rna <- predict(rna, teste)


#apresentando as metricas
metricas(teste$biomassa, predito_rna, ncol(teste) - 1)


"There were missing values in resampled performance measures."


Neural Network 

240 samples
  3 predictor

No pre-processing
Resampling: Bootstrapped (25 reps) 
Summary of sample sizes: 240, 240, 240, 240, 240, 240, ... 
Resampling results across tuning parameters:

  size  decay  RMSE      Rsquared   MAE     
  1     0e+00  931.9306  0.4592234  423.9288
  1     1e-04  933.1017  0.5895797  430.1406
  1     1e-01  642.6081  0.6613617  283.7577
  3     0e+00  899.1358  0.2790738  420.2962
  3     1e-04  842.0807  0.5535275  386.3472
  3     1e-01  545.2353  0.7531263  206.5807
  5     0e+00  940.5061  0.3108315  433.7675
  5     1e-04  933.3567  0.2002978  423.6672
  5     1e-01  632.4257  0.6337422  249.9103

RMSE was used to select the optimal model using the smallest value.
The final values used for the model were size = 3 and decay = 0.1.

R²: 0.7171674 
Syx: 1316.856 
Pearson: 0.9785522 
RMSE: 1283.513 
MAE: 281.0424 


In [9]:
#treinando o modelo usando RNA com crossvalidation e mostrando as metricas
set.seed(2034)
ctrl <- trainControl(method = "cv", number = 10)
rna <- train(biomassa ~ ., data = treino, method = "nnet", linout = TRUE, trace = FALSE, trControl = ctrl)
rna

#fazendo a predicaõ na base de teste
predito_rna <- predict(rna, teste)

#apresentando as metricas
metricas(teste$biomassa, predito_rna, ncol(teste) - 1)


"There were missing values in resampled performance measures."


Neural Network 

240 samples
  3 predictor

No pre-processing
Resampling: Cross-Validated (10 fold) 
Summary of sample sizes: 216, 216, 216, 216, 216, 216, ... 
Resampling results across tuning parameters:

  size  decay  RMSE      Rsquared   MAE     
  1     0e+00  656.5198  0.9698593  378.2633
  1     1e-04  763.3930  0.3826326  437.2020
  1     1e-01  595.6204  0.8034714  293.2357
  3     0e+00  810.2057        NaN  443.3283
  3     1e-04  716.1820  0.4091068  389.5675
  3     1e-01  489.4655  0.8039717  226.3929
  5     0e+00  887.4377  0.1818292  450.8269
  5     1e-04  758.3014  0.4673458  408.7426
  5     1e-01  539.6661  0.8803846  212.4527

RMSE was used to select the optimal model using the smallest value.
The final values used for the model were size = 3 and decay = 0.1.

R²: 0.7689618 
Syx: 1190.188 
Pearson: 0.9845222 
RMSE: 1160.052 
MAE: 244.9104 


### SVM com hold-out e crossvalidation
---

In [10]:
#treinando o modelo usando svm com hold-out e mostrando as metricas
set.seed(2034)
tune_grid <- expand.grid(C = c(1, 2, 10, 50, 100), sigma = c(.021, .015, 0.2))

svm <- train(biomassa ~ ., data = treino, method = "svmRadial", tuneGrid = tune_grid)
svm

#fazendo a predicaõ na base de teste
predito_svm <- predict(svm, teste)

#apresentando as metricas
metricas(teste$biomassa, predito_svm, ncol(teste) - 1)


Support Vector Machines with Radial Basis Function Kernel 

240 samples
  3 predictor

No pre-processing
Resampling: Bootstrapped (25 reps) 
Summary of sample sizes: 240, 240, 240, 240, 240, 240, ... 
Resampling results across tuning parameters:

  C    sigma  RMSE      Rsquared   MAE     
    1  0.015  453.6097  0.8755930  147.7669
    1  0.021  443.7960  0.8690152  143.4460
    1  0.200  539.2662  0.7458264  142.0678
    2  0.015  403.7552  0.8906118  137.1970
    2  0.021  403.4320  0.8828341  132.6235
    2  0.200  508.6916  0.7772066  141.2469
   10  0.015  358.1557  0.9053336  120.5798
   10  0.021  372.0385  0.8971864  121.4957
   10  0.200  503.2450  0.7839639  145.1076
   50  0.015  349.4730  0.9109133  117.9506
   50  0.021  354.5228  0.9071668  118.9860
   50  0.200  508.4078  0.7587782  150.0395
  100  0.015  345.0184  0.9126019  118.3573
  100  0.021  343.2457  0.9135753  118.9191
  100  0.200  512.6654  0.7501992  154.1307

RMSE was used to select the optimal model using 

R²: 0.9622214 
Syx: 481.2785 
Pearson: 0.9981336 
RMSE: 469.0922 
MAE: 127.2661 


In [11]:
#treinando o modelo usando svm com crossvalidation e mostrando as metricas
set.seed(2034)
tune_grid <- expand.grid(C = c(1, 2, 10, 50, 100), sigma = c(.021, .015, 0.2))
ctrl <- trainControl(method = "cv", number = 10)

svm <- train(biomassa ~ ., data = treino, method = "svmRadial", tuneGrid = tune_grid, trControl = ctrl)
svm

#fazendo a predicaõ na base de teste
predito_svm <- predict(svm, teste)

#apresentando as metricas
metricas(teste$biomassa, predito_svm, ncol(teste) - 1)


Support Vector Machines with Radial Basis Function Kernel 

240 samples
  3 predictor

No pre-processing
Resampling: Cross-Validated (10 fold) 
Summary of sample sizes: 216, 216, 216, 216, 216, 216, ... 
Resampling results across tuning parameters:

  C    sigma  RMSE      Rsquared   MAE     
    1  0.015  348.7853  0.9174187  155.6798
    1  0.021  335.2484  0.9216866  151.7858
    1  0.200  370.4957  0.9143780  145.0972
    2  0.015  314.9185  0.9260421  146.0075
    2  0.021  312.5556  0.9316658  140.0783
    2  0.200  345.3653  0.9256684  138.5301
   10  0.015  283.6193  0.9424587  122.7834
   10  0.021  294.4389  0.9451010  120.6915
   10  0.200  309.3795  0.9445610  132.0466
   50  0.015  305.1771  0.9391301  121.8567
   50  0.021  306.2220  0.9360425  125.2683
   50  0.200  326.4245  0.9360100  136.7059
  100  0.015  302.3510  0.9357033  123.7064
  100  0.021  278.3912  0.9443352  114.9312
  100  0.200  312.4530  0.9331025  136.3111

RMSE was used to select the optimal model usi

R²: 0.9622214 
Syx: 481.2785 
Pearson: 0.9981336 
RMSE: 469.0922 
MAE: 127.2661 


### Random Forest com hold-out e Crossvalidation
---

In [12]:
#treinando o modelo usando svm com hold-out e mostrando as metricas
set.seed(2034)

rf <- train(biomassa ~ ., data = treino, method = "rf")
rf

#fazendo a predicaõ na base de teste
predito_rf <- predict(rf, teste)

#apresentando as metricas
metricas(teste$biomassa, predito_rf, ncol(teste) - 1)


note: only 2 unique complexity parameters in default grid. Truncating the grid to 2 .



Random Forest 

240 samples
  3 predictor

No pre-processing
Resampling: Bootstrapped (25 reps) 
Summary of sample sizes: 240, 240, 240, 240, 240, 240, ... 
Resampling results across tuning parameters:

  mtry  RMSE      Rsquared   MAE     
  2     351.5679  0.8983700  107.9245
  3     356.9337  0.8898555  112.3243

RMSE was used to select the optimal model using the smallest value.
The final value used for the model was mtry = 2.

R²: 0.6539241 
Syx: 1456.664 
Pearson: 0.9610971 
RMSE: 1419.78 
MAE: 237.9854 


In [13]:
#treinando o modelo usando svm com crossvalidarion e mostrando as metricas
set.seed(2034)
ctrl <- trainControl(method = "cv", number = 10)
rf <- train(biomassa ~ ., data = treino, method = "rf", trControl = ctrl)
rf

#fazendo a predicaõ na base de teste
predito_rf <- predict(rf, teste)

#apresentando as metricas
metricas(teste$biomassa, predito_rf, ncol(teste) - 1)


note: only 2 unique complexity parameters in default grid. Truncating the grid to 2 .



Random Forest 

240 samples
  3 predictor

No pre-processing
Resampling: Cross-Validated (10 fold) 
Summary of sample sizes: 216, 216, 216, 216, 216, 216, ... 
Resampling results across tuning parameters:

  mtry  RMSE      Rsquared   MAE     
  2     303.4196  0.9521672  111.5531
  3     307.9688  0.9537189  112.8936

RMSE was used to select the optimal model using the smallest value.
The final value used for the model was mtry = 2.

R²: 0.6527128 
Syx: 1459.211 
Pearson: 0.9577396 
RMSE: 1422.263 
MAE: 236.9633 
