In [1]:
!pip install pyspark

Defaulting to user installation because normal site-packages is not writeable


In [2]:
from datetime import datetime, date
import pandas as pd
import plotly.express as px

from pyspark.ml import Pipeline
from pyspark.sql import Row
from pyspark.sql.functions import *
from pyspark.sql import SparkSession
from pyspark.sql.types import DoubleType
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
from pyspark.ml.feature import StringIndexer, VectorIndexer, VectorAssembler

### Modelos utilizados ###

from pyspark.ml.classification import GBTClassifier
from pyspark.ml.classification import DecisionTreeClassifier
from pyspark.ml.classification import RandomForestClassifier

spark = SparkSession.builder.getOrCreate()

df = spark.read.option("delimiter", ";").option("header", True).csv('Data/inmet_filtered_A401_H_2000-05-12_2023-05-16.csv')

dfTransformado = df.withColumn('PRECIPITACAO TOTAL, HORARIO(mm)', regexp_replace('PRECIPITACAO TOTAL, HORARIO(mm)', ',', '.').cast(DoubleType()))

dfTransformado = dfTransformado.drop(
           'PRESSAO ATMOSFERICA REDUZIDA NIVEL DO MAR, AUT(mB)',
           'PRESSAO ATMOSFERICA REDUZIDA NIVEL DO MAR, AUT(mB)',
           'PRESSAO ATMOSFERICA MAX.NA HORA ANT. (AUT)(mB)',
           'PRESSAO ATMOSFERICA MIN. NA HORA ANT. (AUT)(mB)',
           'PRESSAO ATMOSFERICA AO NIVEL DA ESTACAO, HORARIA(mB)',
           'TEMPERATURA DA CPU DA ESTACAO(°C)',
           'TEMPERATURA DO AR - BULBO SECO, HORARIA(°C)',
           'TEMPERATURA DO PONTO DE ORVALHO(°C)',
           'TEMPERATURA MAXIMA NA HORA ANT. (AUT)(°C)',
           'TEMPERATURA MINIMA NA HORA ANT. (AUT)(°C)',
           'TEMPERATURA ORVALHO MAX. NA HORA ANT. (AUT)(°C)',
           'TEMPERATURA ORVALHO MIN. NA HORA ANT. (AUT)(°C)',
           'TENSAO DA BATERIA DA ESTACAO(V)',
           'UMIDADE REL. MAX. NA HORA ANT. (AUT)(%)',
           'UMIDADE REL. MIN. NA HORA ANT. (AUT)(%)',
           'UMIDADE RELATIVA DO AR, HORARIA(%)',
           'VENTO, DIRECAO HORARIA (gr)(° (gr))',
           'VENTO, RAJADA MAXIMA(m/s)',
           'VENTO, VELOCIDADE HORARIA(m/s)',
           'Unnamed: 22',
           '_c22'
)

df = dfTransformado.withColumn("Inicio_Semana",date_sub(next_day(col("Data Medicao"),"sunday"),7))\
                    .groupBy("Inicio_Semana").agg\
                        (sum("PRECIPITACAO TOTAL, HORARIO(mm)").cast("float").alias("Total de Chuvas(mm)"),\
                         sum("PRECIPITACAO TOTAL, HORARIO(mm)").cast("float").alias("Média diária de chuvas(mm)"))\
                    .orderBy("Inicio_Semana")

df = df.select('Inicio_Semana','Total de Chuvas(mm)',col('Média diária de chuvas(mm)')/ 7 )

df = df.withColumnRenamed("(Média diária de chuvas(mm) / 7)","Média diária de chuvas(mm)")

df1 = df.withColumn('Semana_Ano',weekofyear(df.Inicio_Semana))
df1.show()

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
23/06/27 11:39:10 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
[Stage 1:>                                                          (0 + 6) / 6]

+-------------+-------------------+--------------------------+----------+
|Inicio_Semana|Total de Chuvas(mm)|Média diária de chuvas(mm)|Semana_Ano|
+-------------+-------------------+--------------------------+----------+
|   2000-05-07|               null|                      null|        18|
|   2000-05-14|               null|                      null|        19|
|   2000-05-21|               null|                      null|        20|
|   2000-05-28|               null|                      null|        21|
|   2000-06-04|               null|                      null|        22|
|   2000-06-11|               null|                      null|        23|
|   2000-06-18|               null|                      null|        24|
|   2000-06-25|               null|                      null|        25|
|   2000-07-02|               null|                      null|        26|
|   2000-07-09|               null|                      null|        27|
|   2000-07-16|               null|   

                                                                                

In [3]:
inema = spark.read.option("header",True).csv("Data/inema_filtered_balneabilidade_farol_barra.csv")

inema = inema.withColumnRenamed("01/2007","numero_boletim").withColumnRenamed("Farol da Barra - SSA FB 100","ponto_codigo").withColumnRenamed("Indisponível","categoria")

#Farol da Barra - SSA FB 100 | Farol da Barra - SSA FB 200 | Porto da Barra - SSA PB 100 | Santa Maria - SSA SM 100

dfCompleto = df1.withColumn('Farol da Barra - SSA FB 100', when(rand() > 0.5, 'Imprópria').otherwise('Própria'))\
                .withColumn("Farol da Barra - SSA FB 200", when(rand() > 0.5, 'Imprópria').otherwise('Própria'))\
                .withColumn("Porto da Barra - SSA PB 100", when(rand() > 0.5, 'Imprópria').otherwise('Própria'))\
                .withColumn("Santa Maria - SSA SM 100", when(rand() > 0.5, 'Imprópria').otherwise('Própria'))

dfCompleto.show()

+-------------+-------------------+--------------------------+----------+---------------------------+---------------------------+---------------------------+------------------------+
|Inicio_Semana|Total de Chuvas(mm)|Média diária de chuvas(mm)|Semana_Ano|Farol da Barra - SSA FB 100|Farol da Barra - SSA FB 200|Porto da Barra - SSA PB 100|Santa Maria - SSA SM 100|
+-------------+-------------------+--------------------------+----------+---------------------------+---------------------------+---------------------------+------------------------+
|   2000-05-07|               null|                      null|        18|                    Própria|                    Própria|                  Imprópria|                 Própria|
|   2000-05-14|               null|                      null|        19|                  Imprópria|                  Imprópria|                  Imprópria|               Imprópria|
|   2000-05-21|               null|                      null|        20|            

In [4]:
### Descritiva dos dados ###

dfCompleto.describe().toPandas()

23/06/27 11:39:22 WARN package: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.


Unnamed: 0,summary,Total de Chuvas(mm),Média diária de chuvas(mm),Semana_Ano,Farol da Barra - SSA FB 100,Farol da Barra - SSA FB 200,Porto da Barra - SSA PB 100,Santa Maria - SSA SM 100
0,count,1115.0,1115.0,1202.0,1202,1202,1202,1202
1,mean,30.80591932758355,4.400845618226224,26.57487520798669,,,,
2,stddev,40.63941721415994,5.80563103059428,15.058303598679691,,,,
3,min,0.0,0.0,1.0,Imprópria,Imprópria,Imprópria,Imprópria
4,max,364.8,52.1142839704241,53.0,Própria,Própria,Própria,Própria


In [5]:
### Contabilizando os Nulls por coluna ###

dfCompleto.select([count(when(isnull(c), c)).alias(c) for c in dfCompleto.columns]).show()

+-------------+-------------------+--------------------------+----------+---------------------------+---------------------------+---------------------------+------------------------+
|Inicio_Semana|Total de Chuvas(mm)|Média diária de chuvas(mm)|Semana_Ano|Farol da Barra - SSA FB 100|Farol da Barra - SSA FB 200|Porto da Barra - SSA PB 100|Santa Maria - SSA SM 100|
+-------------+-------------------+--------------------------+----------+---------------------------+---------------------------+---------------------------+------------------------+
|            0|                 87|                        87|         0|                          0|                          0|                          0|                       0|
+-------------+-------------------+--------------------------+----------+---------------------------+---------------------------+---------------------------+------------------------+



In [6]:
### Removendo os Nulls ####

dfCompleto = dfCompleto.replace('?', None).dropna(how='any')

In [7]:
dfCompleto.show()

+-------------+-------------------+--------------------------+----------+---------------------------+---------------------------+---------------------------+------------------------+
|Inicio_Semana|Total de Chuvas(mm)|Média diária de chuvas(mm)|Semana_Ano|Farol da Barra - SSA FB 100|Farol da Barra - SSA FB 200|Porto da Barra - SSA PB 100|Santa Maria - SSA SM 100|
+-------------+-------------------+--------------------------+----------+---------------------------+---------------------------+---------------------------+------------------------+
|   2000-10-01|                0.0|                       0.0|        39|                  Imprópria|                  Imprópria|                  Imprópria|                 Própria|
|   2000-10-08|                0.2|      0.028571428997176036|        40|                  Imprópria|                  Imprópria|                  Imprópria|               Imprópria|
|   2000-10-15|                0.8|       0.11428571598870414|        41|            

In [8]:
### Transformando os valores qualitativos em numéricos => 0 = Própria | 1 = Imprópria | 2 = Indisponível ###

dfCompleto = StringIndexer(
    inputCol='Farol da Barra - SSA FB 100', 
    outputCol='Farol_100', 
    handleInvalid='keep').fit(dfCompleto).transform(dfCompleto)

dfCompleto = StringIndexer(
    inputCol='Farol da Barra - SSA FB 200', 
    outputCol='Farol_200', 
    handleInvalid='keep').fit(dfCompleto).transform(dfCompleto)

dfCompleto = StringIndexer(
    inputCol='Porto da Barra - SSA PB 100', 
    outputCol='Porto_100', 
    handleInvalid='keep').fit(dfCompleto).transform(dfCompleto)

dfCompleto = StringIndexer(
    inputCol='Santa Maria - SSA SM 100', 
    outputCol='Santa Maria_100', 
    handleInvalid='keep').fit(dfCompleto).transform(dfCompleto)

In [9]:
### Removendo as colunas após transformação ###

dfCompleto = dfCompleto.drop('Farol da Barra - SSA FB 100','Farol da Barra - SSA FB 200','Porto da Barra - SSA PB 100','Santa Maria - SSA SM 100')

In [10]:
dfCompleto.show()

+-------------+-------------------+--------------------------+----------+---------+---------+---------+---------------+
|Inicio_Semana|Total de Chuvas(mm)|Média diária de chuvas(mm)|Semana_Ano|Farol_100|Farol_200|Porto_100|Santa Maria_100|
+-------------+-------------------+--------------------------+----------+---------+---------+---------+---------------+
|   2000-10-01|                0.0|                       0.0|        39|      1.0|      0.0|      0.0|            0.0|
|   2000-10-08|                0.2|      0.028571428997176036|        40|      1.0|      0.0|      0.0|            1.0|
|   2000-10-15|                0.8|       0.11428571598870414|        41|      1.0|      1.0|      0.0|            0.0|
|   2000-10-22|                3.6|        0.5142857006617955|        42|      1.0|      1.0|      1.0|            1.0|
|   2000-10-29|                4.6|        0.6571428435189384|        43|      1.0|      0.0|      0.0|            0.0|
|   2000-11-05|               62.0|     

In [11]:
informacoes_necessarias = ['Total de Chuvas(mm)','Média diária de chuvas(mm)']

assembler = VectorAssembler(inputCols=informacoes_necessarias, outputCol='informacoes')

dataset = assembler.transform(dfCompleto)

In [12]:
dataset.show()

+-------------+-------------------+--------------------------+----------+---------+---------+---------+---------------+--------------------+
|Inicio_Semana|Total de Chuvas(mm)|Média diária de chuvas(mm)|Semana_Ano|Farol_100|Farol_200|Porto_100|Santa Maria_100|         informacoes|
+-------------+-------------------+--------------------------+----------+---------+---------+---------+---------------+--------------------+
|   2000-10-01|                0.0|                       0.0|        39|      1.0|      0.0|      0.0|            0.0|           (2,[],[])|
|   2000-10-08|                0.2|      0.028571428997176036|        40|      1.0|      0.0|      0.0|            1.0|[0.20000000298023...|
|   2000-10-15|                0.8|       0.11428571598870414|        41|      1.0|      1.0|      0.0|            0.0|[0.80000001192092...|
|   2000-10-22|                3.6|        0.5142857006617955|        42|      1.0|      1.0|      1.0|            1.0|[3.59999990463256...|
|   2000-10-2

In [13]:
### Dividindo o dataser em treino (80%) e teste (20%) ###

(treino, teste) = dataset.randomSplit([0.8,0.2])

In [14]:
### Definindo os modelos ###

gbt = GBTClassifier(labelCol="Farol_100", featuresCol="informacoes", maxIter=10)
dt = DecisionTreeClassifier(labelCol='Farol_100',featuresCol='informacoes')
rf = RandomForestClassifier(labelCol='Farol_100',featuresCol='informacoes',maxDepth=5)

In [15]:
### Treinando os modelos #### 

gbtModel = gbt.fit(treino) ### Gradient Boosted Tree Classifier

rfModel = rf.fit(treino) ### Random Forest Classifier

dtModel = dt.fit(treino) ### Desition Tree classfier

In [16]:
### Testando os modelos ###

gbtPredicao = gbtModel.transform(teste)

gbtPredicao.toPandas()

23/06/27 11:39:43 WARN InstanceBuilder: Failed to load implementation from:dev.ludovic.netlib.blas.JNIBLAS
23/06/27 11:39:43 WARN InstanceBuilder: Failed to load implementation from:dev.ludovic.netlib.blas.VectorBLAS


Unnamed: 0,Inicio_Semana,Total de Chuvas(mm),Média diária de chuvas(mm),Semana_Ano,Farol_100,Farol_200,Porto_100,Santa Maria_100,informacoes,rawPrediction,probability,prediction
0,2000-10-01,0.000000,0.000000,39,1.0,0.0,0.0,0.0,"(0.0, 0.0)","[-0.09873899443050625, 0.09873899443050625]","[0.4507903203754999, 0.5492096796245001]",1.0
1,2000-10-29,4.600000,0.657143,43,1.0,0.0,0.0,0.0,"[4.599999904632568, 0.6571428435189384]","[0.08057637580710855, -0.08057637580710855]","[0.5402012226908599, 0.4597987773091401]",0.0
2,2000-12-03,0.600000,0.085714,48,0.0,0.0,1.0,1.0,"[0.6000000238418579, 0.08571428912026542]","[-0.08475298044449227, 0.08475298044449227]","[0.45772468349194206, 0.5422753165080579]",1.0
3,2000-12-24,5.800000,0.828571,51,0.0,0.0,1.0,0.0,"[5.800000190734863, 0.8285714558192662]","[0.08057637580710855, -0.08057637580710855]","[0.5402012226908599, 0.4597987773091401]",0.0
4,2001-02-18,1.800000,0.257143,7,0.0,0.0,0.0,0.0,"[1.7999999523162842, 0.25714285033089773]","[0.06086312403516533, -0.06086312403516533]","[0.5303940415318293, 0.46960595846817066]",0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
226,2023-02-19,25.799999,3.685714,7,0.0,0.0,1.0,1.0,"[25.799999237060547, 3.6857141767229353]","[-0.021008324672785187, 0.021008324672785187]","[0.4894973827271608, 0.5105026172728392]",1.0
227,2023-03-19,48.400002,6.914286,11,0.0,0.0,1.0,1.0,"[48.400001525878906, 6.914285932268415]","[0.10334123948052018, -0.10334123948052018]","[0.5514874648202155, 0.4485125351797845]",0.0
228,2023-04-02,66.400002,9.485715,13,0.0,1.0,0.0,0.0,"[66.4000015258789, 9.485714503696986]","[-0.11696779372420123, 0.11696779372420123]","[0.4417813666797163, 0.5582186333202837]",1.0
229,2023-04-16,6.600000,0.942857,15,0.0,1.0,0.0,0.0,"[6.599999904632568, 0.9428571292332241]","[0.08057637580710855, -0.08057637580710855]","[0.5402012226908599, 0.4597987773091401]",0.0


In [17]:
rfPredicao = rfModel.transform(teste)

rfPredicao.toPandas()

Unnamed: 0,Inicio_Semana,Total de Chuvas(mm),Média diária de chuvas(mm),Semana_Ano,Farol_100,Farol_200,Porto_100,Santa Maria_100,informacoes,rawPrediction,probability,prediction
0,2000-10-01,0.000000,0.000000,39,1.0,0.0,0.0,0.0,"(0.0, 0.0)","[9.273650867205237, 10.72634913279476, 0.0]","[0.4636825433602619, 0.5363174566397381, 0.0]",1.0
1,2000-10-29,4.600000,0.657143,43,1.0,0.0,0.0,0.0,"[4.599999904632568, 0.6571428435189384]","[10.49010552864123, 9.509894471358768, 0.0]","[0.5245052764320615, 0.4754947235679384, 0.0]",0.0
2,2000-12-03,0.600000,0.085714,48,0.0,0.0,1.0,1.0,"[0.6000000238418579, 0.08571428912026542]","[9.920312059335256, 10.079687940664744, 0.0]","[0.4960156029667628, 0.5039843970332372, 0.0]",1.0
3,2000-12-24,5.800000,0.828571,51,0.0,0.0,1.0,0.0,"[5.800000190734863, 0.8285714558192662]","[10.921488856817339, 9.078511143182658, 0.0]","[0.546074442840867, 0.45392555715913296, 0.0]",0.0
4,2001-02-18,1.800000,0.257143,7,0.0,0.0,0.0,0.0,"[1.7999999523162842, 0.25714285033089773]","[10.451205990246619, 9.548794009753381, 0.0]","[0.522560299512331, 0.47743970048766904, 0.0]",0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
226,2023-02-19,25.799999,3.685714,7,0.0,0.0,1.0,1.0,"[25.799999237060547, 3.6857141767229353]","[9.506192098912507, 10.493807901087493, 0.0]","[0.47530960494562535, 0.5246903950543746, 0.0]",1.0
227,2023-03-19,48.400002,6.914286,11,0.0,0.0,1.0,1.0,"[48.400001525878906, 6.914285932268415]","[11.852542297252615, 8.147457702747385, 0.0]","[0.5926271148626308, 0.40737288513736925, 0.0]",0.0
228,2023-04-02,66.400002,9.485715,13,0.0,1.0,0.0,0.0,"[66.4000015258789, 9.485714503696986]","[8.630224746631855, 11.369775253368143, 0.0]","[0.43151123733159275, 0.5684887626684072, 0.0]",1.0
229,2023-04-16,6.600000,0.942857,15,0.0,1.0,0.0,0.0,"[6.599999904632568, 0.9428571292332241]","[10.921488856817339, 9.078511143182658, 0.0]","[0.546074442840867, 0.45392555715913296, 0.0]",0.0


In [18]:
dtPredicao = dtModel.transform(teste)

dtPredicao.toPandas()

Unnamed: 0,Inicio_Semana,Total de Chuvas(mm),Média diária de chuvas(mm),Semana_Ano,Farol_100,Farol_200,Porto_100,Santa Maria_100,informacoes,rawPrediction,probability,prediction
0,2000-10-01,0.000000,0.000000,39,1.0,0.0,0.0,0.0,"(0.0, 0.0)","[37.0, 43.0, 0.0]","[0.4625, 0.5375, 0.0]",1.0
1,2000-10-29,4.600000,0.657143,43,1.0,0.0,0.0,0.0,"[4.599999904632568, 0.6571428435189384]","[155.0, 125.0, 0.0]","[0.5535714285714286, 0.44642857142857145, 0.0]",0.0
2,2000-12-03,0.600000,0.085714,48,0.0,0.0,1.0,1.0,"[0.6000000238418579, 0.08571428912026542]","[17.0, 23.0, 0.0]","[0.425, 0.575, 0.0]",1.0
3,2000-12-24,5.800000,0.828571,51,0.0,0.0,1.0,0.0,"[5.800000190734863, 0.8285714558192662]","[155.0, 125.0, 0.0]","[0.5535714285714286, 0.44642857142857145, 0.0]",0.0
4,2001-02-18,1.800000,0.257143,7,0.0,0.0,0.0,0.0,"[1.7999999523162842, 0.25714285033089773]","[155.0, 125.0, 0.0]","[0.5535714285714286, 0.44642857142857145, 0.0]",0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
226,2023-02-19,25.799999,3.685714,7,0.0,0.0,1.0,1.0,"[25.799999237060547, 3.6857141767229353]","[204.0, 184.0, 0.0]","[0.5257731958762887, 0.4742268041237113, 0.0]",0.0
227,2023-03-19,48.400002,6.914286,11,0.0,0.0,1.0,1.0,"[48.400001525878906, 6.914285932268415]","[204.0, 184.0, 0.0]","[0.5257731958762887, 0.4742268041237113, 0.0]",0.0
228,2023-04-02,66.400002,9.485715,13,0.0,1.0,0.0,0.0,"[66.4000015258789, 9.485714503696986]","[204.0, 184.0, 0.0]","[0.5257731958762887, 0.4742268041237113, 0.0]",0.0
229,2023-04-16,6.600000,0.942857,15,0.0,1.0,0.0,0.0,"[6.599999904632568, 0.9428571292332241]","[155.0, 125.0, 0.0]","[0.5535714285714286, 0.44642857142857145, 0.0]",0.0


In [19]:
### Avaliando os modelos ### 

### Definindo os avaliadores ###

### Documentação => https://spark.apache.org/docs/latest/mllib-evaluation-metrics.html ###

### https://towardsdatascience.com/the-f1-score-bec2bbc38aa6 ###

acuracia = MulticlassClassificationEvaluator(labelCol='Farol_100',predictionCol='prediction',metricName='accuracy')
f1 = MulticlassClassificationEvaluator(labelCol='Farol_100',predictionCol='prediction',metricName='f1')
precisaoPonderada = MulticlassClassificationEvaluator(labelCol='Farol_100',predictionCol='prediction',metricName='weightedPrecision')
weightedRecall = MulticlassClassificationEvaluator(labelCol='Farol_100',predictionCol='prediction',metricName='weightedRecall')

### Resultados da Acurácia ###

gbtAcuracia = acuracia.evaluate(gbtPredicao)
rfAcuracia = acuracia.evaluate(rfPredicao)
dtAcuracia = acuracia.evaluate(dtPredicao)
print('Acurácia do teste Árvore de Decisão (Gradiente Boosting) = ', gbtAcuracia)
print('Acurácia do teste Árvore Aleatória = ', rfAcuracia)
print('Acurácia do teste Árvore de Decisão = ', dtAcuracia)

Acurácia do teste Árvore de Decisão (Gradiente Boosting) =  0.48484848484848486
Acurácia do teste Árvore Aleatória =  0.4935064935064935
Acurácia do teste Árvore de Decisão =  0.48484848484848486


In [20]:
### Resultados do F1 ###

gbtF1 = f1.evaluate(gbtPredicao)
rfF1 = f1.evaluate(rfPredicao)
dtF1 = f1.evaluate(dtPredicao)
print('F1 do teste Árvore de Decisão (Gradiente Boosting) = ', gbtF1)
print('F1 do teste Árvore Aleatória = ', rfF1)
print('F1 do teste Árvore de Decisão = ', dtF1)

F1 do teste Árvore de Decisão (Gradiente Boosting) =  0.4768945242439838
F1 do teste Árvore Aleatória =  0.48452573469292043
F1 do teste Árvore de Decisão =  0.44944288243257313


In [21]:
### Resultados do Precisão Ponderada ###

gbtPP = precisaoPonderada.evaluate(gbtPredicao)
rfPP = precisaoPonderada.evaluate(rfPredicao)
dtPP = precisaoPonderada.evaluate(dtPredicao)
print('Precisão Ponderada do teste Árvore de Decisão (Gradiente Boosting) = ', gbtPP)
print('Precisão Ponderada do teste Árvore Aleatória = ', rfPP)
print('Precisão Ponderada do teste Árvore de Decisão = ', dtPP)

Precisão Ponderada do teste Árvore de Decisão (Gradiente Boosting) =  0.48230146290491116
Precisão Ponderada do teste Árvore Aleatória =  0.49129106187929716
Precisão Ponderada do teste Árvore de Decisão =  0.4754440961337514


In [22]:
### Resultados do weightedRecall ###

### Recall é a razão entre o número de positivos verdadeiros (pv) e a soma dos positivos verdadeiros (pv) e falsos negativos (fn) => pv/(pv+fn)

gbtWR = weightedRecall.evaluate(gbtPredicao)
rfWR = weightedRecall.evaluate(rfPredicao)
dtWR = weightedRecall.evaluate(dtPredicao)
print('Recall do teste Árvore de Decisão (Gradiente Boosting) = ', gbtWR)
print('Recall do teste Árvore Aleatória = ', rfWR)
print('Recall do teste Árvore de Decisão = ', dtWR)

Recall do teste Árvore de Decisão (Gradiente Boosting) =  0.48484848484848486
Recall do teste Árvore Aleatória =  0.49350649350649356
Recall do teste Árvore de Decisão =  0.48484848484848486
