In [1]:
!pip install pyspark

Defaulting to user installation because normal site-packages is not writeable


In [23]:
from datetime import datetime, date
import pandas as pd
import plotly.express as px

from pyspark.ml import Pipeline
from pyspark.sql import Row
from pyspark.sql.functions import *
from pyspark.sql import SparkSession
from pyspark.sql.types import DoubleType
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
from pyspark.ml.feature import StringIndexer, VectorIndexer, VectorAssembler

### Modelos utilizados ###

from pyspark.ml.classification import GBTClassifier
from pyspark.ml.classification import DecisionTreeClassifier
from pyspark.ml.classification import RandomForestClassifier

spark = SparkSession.builder.getOrCreate()

df = spark.read.option("delimiter", ";").option("header", True).csv('Data/inmet_filtered_A401_H_2000-05-12_2023-05-16.csv')

dfTransformado = df.withColumn('PRECIPITACAO TOTAL, HORARIO(mm)', regexp_replace('PRECIPITACAO TOTAL, HORARIO(mm)', ',', '.').cast(DoubleType()))

dfTransformado = dfTransformado.drop(
           'PRESSAO ATMOSFERICA REDUZIDA NIVEL DO MAR, AUT(mB)',
           'PRESSAO ATMOSFERICA REDUZIDA NIVEL DO MAR, AUT(mB)',
           'PRESSAO ATMOSFERICA MAX.NA HORA ANT. (AUT)(mB)',
           'PRESSAO ATMOSFERICA MIN. NA HORA ANT. (AUT)(mB)',
           'PRESSAO ATMOSFERICA AO NIVEL DA ESTACAO, HORARIA(mB)',
           'TEMPERATURA DA CPU DA ESTACAO(°C)',
           'TEMPERATURA DO AR - BULBO SECO, HORARIA(°C)',
           'TEMPERATURA DO PONTO DE ORVALHO(°C)',
           'TEMPERATURA MAXIMA NA HORA ANT. (AUT)(°C)',
           'TEMPERATURA MINIMA NA HORA ANT. (AUT)(°C)',
           'TEMPERATURA ORVALHO MAX. NA HORA ANT. (AUT)(°C)',
           'TEMPERATURA ORVALHO MIN. NA HORA ANT. (AUT)(°C)',
           'TENSAO DA BATERIA DA ESTACAO(V)',
           'UMIDADE REL. MAX. NA HORA ANT. (AUT)(%)',
           'UMIDADE REL. MIN. NA HORA ANT. (AUT)(%)',
           'UMIDADE RELATIVA DO AR, HORARIA(%)',
           'VENTO, DIRECAO HORARIA (gr)(° (gr))',
           'VENTO, RAJADA MAXIMA(m/s)',
           'VENTO, VELOCIDADE HORARIA(m/s)',
           'Unnamed: 22',
           '_c22'
)

df = dfTransformado.withColumn("Inicio_Semana",date_sub(next_day(col("Data Medicao"),"sunday"),7))\
                    .groupBy("Inicio_Semana").agg\
                        (sum("PRECIPITACAO TOTAL, HORARIO(mm)").cast("float").alias("Total de Chuvas(mm)"),\
                         sum("PRECIPITACAO TOTAL, HORARIO(mm)").cast("float").alias("Média diária de chuvas(mm)"))\
                    .orderBy("Inicio_Semana")

df = df.select('Inicio_Semana','Total de Chuvas(mm)',col('Média diária de chuvas(mm)')/ 7 )

df = df.withColumnRenamed("(Média diária de chuvas(mm) / 7)","Média diária de chuvas(mm)")

df1 = df.withColumn('Semana_Ano',weekofyear(df.Inicio_Semana))
df1.toPandas()

Unnamed: 0,Inicio_Semana,Total de Chuvas(mm),Média diária de chuvas(mm),Semana_Ano
0,2000-05-07,,,18
1,2000-05-14,,,19
2,2000-05-21,,,20
3,2000-05-28,,,21
4,2000-06-04,,,22
...,...,...,...,...
1197,2023-04-16,6.600000,0.942857,15
1198,2023-04-23,94.800003,13.542858,16
1199,2023-04-30,49.400002,7.057143,17
1200,2023-05-07,117.800003,16.828572,18


In [3]:
inema = spark.read.option("header",True).csv("Data/inema_filtered_balneabilidade_farol_barra.csv")

inema = inema.withColumnRenamed("01/2007","numero_boletim").withColumnRenamed("Farol da Barra - SSA FB 100","ponto_codigo").withColumnRenamed("Indisponível","categoria")

#Farol da Barra - SSA FB 100 | Farol da Barra - SSA FB 200 | Porto da Barra - SSA PB 100 | Santa Maria - SSA SM 100

dfCompleto = df1.withColumn('Farol da Barra - SSA FB 100', when(rand() > 0.5, 'Imprópria').otherwise('Própria'))\
                .withColumn("Farol da Barra - SSA FB 200", when(rand() > 0.5, 'Imprópria').otherwise('Própria'))\
                .withColumn("Porto da Barra - SSA PB 100", when(rand() > 0.5, 'Imprópria').otherwise('Própria'))\
                .withColumn("Santa Maria - SSA SM 100", when(rand() > 0.5, 'Imprópria').otherwise('Própria'))

dfCompleto.toPandas()

Unnamed: 0,Inicio_Semana,Total de Chuvas(mm),Média diária de chuvas(mm),Semana_Ano,Farol da Barra - SSA FB 100,Farol da Barra - SSA FB 200,Porto da Barra - SSA PB 100,Santa Maria - SSA SM 100
0,2000-05-07,,,18,Imprópria,Imprópria,Imprópria,Imprópria
1,2000-05-14,,,19,Imprópria,Própria,Imprópria,Própria
2,2000-05-21,,,20,Imprópria,Imprópria,Imprópria,Própria
3,2000-05-28,,,21,Própria,Imprópria,Própria,Própria
4,2000-06-04,,,22,Própria,Própria,Própria,Imprópria
...,...,...,...,...,...,...,...,...
1197,2023-04-16,6.600000,0.942857,15,Imprópria,Imprópria,Própria,Própria
1198,2023-04-23,94.800003,13.542858,16,Própria,Imprópria,Própria,Própria
1199,2023-04-30,49.400002,7.057143,17,Imprópria,Própria,Própria,Própria
1200,2023-05-07,117.800003,16.828572,18,Própria,Própria,Imprópria,Imprópria


In [24]:
### Descritiva dos dados ###

dfCompleto.describe().toPandas()

Unnamed: 0,summary,Total de Chuvas(mm),Média diária de chuvas(mm),Semana_Ano,Farol_100,Farol_200,Porto_100,Santa Maria_100
0,count,1115.0,1115.0,1115.0,1115.0,1115.0,1115.0,1115.0
1,mean,30.80591932758355,4.400845618226224,27.201793721973093,0.4986547085201793,0.4771300448430493,0.4762331838565022,0.4896860986547085
2,stddev,40.63941721415994,5.80563103059428,15.024862591119009,0.5002225555523119,0.499700822650452,0.4996589315827454,0.5001179305578493
3,min,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,max,364.8,52.1142839704241,53.0,1.0,1.0,1.0,1.0


In [5]:
### Contabilizando os Nulls por coluna ###

dfCompleto.select([count(when(isnull(c), c)).alias(c) for c in dfCompleto.columns]).show()

+-------------+-------------------+--------------------------+----------+---------------------------+---------------------------+---------------------------+------------------------+
|Inicio_Semana|Total de Chuvas(mm)|Média diária de chuvas(mm)|Semana_Ano|Farol da Barra - SSA FB 100|Farol da Barra - SSA FB 200|Porto da Barra - SSA PB 100|Santa Maria - SSA SM 100|
+-------------+-------------------+--------------------------+----------+---------------------------+---------------------------+---------------------------+------------------------+
|            0|                 87|                        87|         0|                          0|                          0|                          0|                       0|
+-------------+-------------------+--------------------------+----------+---------------------------+---------------------------+---------------------------+------------------------+



In [6]:
### Removendo os Nulls ####

dfCompleto = dfCompleto.replace('?', None).dropna(how='any')

In [7]:
dfCompleto.toPandas()

Unnamed: 0,Inicio_Semana,Total de Chuvas(mm),Média diária de chuvas(mm),Semana_Ano,Farol da Barra - SSA FB 100,Farol da Barra - SSA FB 200,Porto da Barra - SSA PB 100,Santa Maria - SSA SM 100
0,2000-10-01,0.000000,0.000000,39,Própria,Própria,Própria,Própria
1,2000-10-08,0.200000,0.028571,40,Imprópria,Imprópria,Imprópria,Imprópria
2,2000-10-15,0.800000,0.114286,41,Própria,Imprópria,Própria,Própria
3,2000-10-22,3.600000,0.514286,42,Imprópria,Própria,Imprópria,Própria
4,2000-10-29,4.600000,0.657143,43,Própria,Imprópria,Própria,Própria
...,...,...,...,...,...,...,...,...
1110,2023-04-16,6.600000,0.942857,15,Imprópria,Imprópria,Própria,Própria
1111,2023-04-23,94.800003,13.542858,16,Própria,Imprópria,Própria,Própria
1112,2023-04-30,49.400002,7.057143,17,Imprópria,Própria,Própria,Própria
1113,2023-05-07,117.800003,16.828572,18,Própria,Própria,Imprópria,Imprópria


In [8]:
### Transformando os valores qualitativos em numéricos => 0 = Própria | 1 = Imprópria | 2 = Indisponível ###

dfCompleto = StringIndexer(
    inputCol='Farol da Barra - SSA FB 100', 
    outputCol='Farol_100', 
    handleInvalid='keep').fit(dfCompleto).transform(dfCompleto)

dfCompleto = StringIndexer(
    inputCol='Farol da Barra - SSA FB 200', 
    outputCol='Farol_200', 
    handleInvalid='keep').fit(dfCompleto).transform(dfCompleto)

dfCompleto = StringIndexer(
    inputCol='Porto da Barra - SSA PB 100', 
    outputCol='Porto_100', 
    handleInvalid='keep').fit(dfCompleto).transform(dfCompleto)

dfCompleto = StringIndexer(
    inputCol='Santa Maria - SSA SM 100', 
    outputCol='Santa Maria_100', 
    handleInvalid='keep').fit(dfCompleto).transform(dfCompleto)

In [9]:
### Removendo as colunas após transformação ###

dfCompleto = dfCompleto.drop('Farol da Barra - SSA FB 100','Farol da Barra - SSA FB 200','Porto da Barra - SSA PB 100','Santa Maria - SSA SM 100')

In [10]:
dfCompleto.toPandas()

Unnamed: 0,Inicio_Semana,Total de Chuvas(mm),Média diária de chuvas(mm),Semana_Ano,Farol_100,Farol_200,Porto_100,Santa Maria_100
0,2000-10-01,0.000000,0.000000,39,1.0,0.0,0.0,1.0
1,2000-10-08,0.200000,0.028571,40,0.0,1.0,1.0,0.0
2,2000-10-15,0.800000,0.114286,41,1.0,1.0,0.0,1.0
3,2000-10-22,3.600000,0.514286,42,0.0,0.0,1.0,1.0
4,2000-10-29,4.600000,0.657143,43,1.0,1.0,0.0,1.0
...,...,...,...,...,...,...,...,...
1110,2023-04-16,6.600000,0.942857,15,0.0,1.0,0.0,1.0
1111,2023-04-23,94.800003,13.542858,16,1.0,1.0,0.0,1.0
1112,2023-04-30,49.400002,7.057143,17,0.0,0.0,0.0,1.0
1113,2023-05-07,117.800003,16.828572,18,1.0,0.0,1.0,0.0


In [11]:
informacoes_necessarias = ['Total de Chuvas(mm)','Média diária de chuvas(mm)']

assembler = VectorAssembler(inputCols=informacoes_necessarias, outputCol='informacoes')

dataset = assembler.transform(dfCompleto)

In [12]:
dataset.toPandas()

Unnamed: 0,Inicio_Semana,Total de Chuvas(mm),Média diária de chuvas(mm),Semana_Ano,Farol_100,Farol_200,Porto_100,Santa Maria_100,informacoes
0,2000-10-01,0.000000,0.000000,39,1.0,0.0,0.0,1.0,"(0.0, 0.0)"
1,2000-10-08,0.200000,0.028571,40,0.0,1.0,1.0,0.0,"[0.20000000298023224, 0.028571428997176036]"
2,2000-10-15,0.800000,0.114286,41,1.0,1.0,0.0,1.0,"[0.800000011920929, 0.11428571598870414]"
3,2000-10-22,3.600000,0.514286,42,0.0,0.0,1.0,1.0,"[3.5999999046325684, 0.5142857006617955]"
4,2000-10-29,4.600000,0.657143,43,1.0,1.0,0.0,1.0,"[4.599999904632568, 0.6571428435189384]"
...,...,...,...,...,...,...,...,...,...
1110,2023-04-16,6.600000,0.942857,15,0.0,1.0,0.0,1.0,"[6.599999904632568, 0.9428571292332241]"
1111,2023-04-23,94.800003,13.542858,16,1.0,1.0,0.0,1.0,"[94.80000305175781, 13.542857578822545]"
1112,2023-04-30,49.400002,7.057143,17,0.0,0.0,0.0,1.0,"[49.400001525878906, 7.057143075125558]"
1113,2023-05-07,117.800003,16.828572,18,1.0,0.0,1.0,0.0,"[117.80000305175781, 16.82857186453683]"


In [13]:
### Dividindo o dataser em treino (80%) e teste (20%) ###

(treino, teste) = dataset.randomSplit([0.8,0.2])

In [14]:
### Definindo os modelos ###

gbt = GBTClassifier(labelCol="Farol_100", featuresCol="informacoes", maxIter=10)
dt = DecisionTreeClassifier(labelCol='Farol_100',featuresCol='informacoes')
rf = RandomForestClassifier(labelCol='Farol_100',featuresCol='informacoes',maxDepth=5)

In [15]:
### Treinando os modelos #### 

gbtModel = gbt.fit(treino) ### Gradient Boosted Tree Classifier

rfModel = rf.fit(treino) ### Random Forest Classifier

dtModel = dt.fit(treino) ### Desition Tree classfier

In [16]:
### Testando os modelos ###

gbtPredicao = gbtModel.transform(teste)

gbtPredicao.toPandas()

23/06/27 12:02:43 WARN InstanceBuilder: Failed to load implementation from:dev.ludovic.netlib.blas.JNIBLAS
23/06/27 12:02:43 WARN InstanceBuilder: Failed to load implementation from:dev.ludovic.netlib.blas.VectorBLAS


Unnamed: 0,Inicio_Semana,Total de Chuvas(mm),Média diária de chuvas(mm),Semana_Ano,Farol_100,Farol_200,Porto_100,Santa Maria_100,informacoes,rawPrediction,probability,prediction
0,2000-11-26,2.800000,0.400000,47,0.0,0.0,1.0,1.0,"[2.799999952316284, 0.3999999931880406]","[-0.16375101143776685, 0.16375101143776685]","[0.41884854331340793, 0.5811514566865921]",1.0
1,2000-12-17,4.800000,0.685714,50,0.0,0.0,0.0,0.0,"[4.800000190734863, 0.6857143129621234]","[0.15779444170562337, -0.15779444170562337]","[0.5782488546866037, 0.4217511453133963]",0.0
2,2001-01-21,7.400000,1.057143,3,0.0,1.0,1.0,0.0,"[7.400000095367432, 1.057142870766776]","[-0.18302883992061175, 0.18302883992061175]","[0.40949396746562433, 0.5905060325343756]",1.0
3,2001-02-25,38.000000,5.428571,8,0.0,1.0,0.0,1.0,"[38.0, 5.428571428571429]","[0.15071693883105836, -0.15071693883105836]","[0.57479300265269, 0.42520699734731005]",0.0
4,2001-06-03,78.599998,11.228571,22,1.0,0.0,0.0,1.0,"[78.5999984741211, 11.228571210588727]","[0.31143949552051975, -0.31143949552051975]","[0.6508730466303491, 0.34912695336965094]",0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
186,2022-09-25,6.400000,0.914286,38,1.0,1.0,0.0,0.0,"[6.400000095367432, 0.9142857279096331]","[-0.27282756786375456, 0.27282756786375456]","[0.3668730369517842, 0.6331269630482158]",1.0
187,2023-01-01,84.400002,12.057143,52,1.0,0.0,0.0,1.0,"[84.4000015258789, 12.057143075125557]","[0.03431417502587721, -0.03431417502587721]","[0.517150356740013, 0.482849643259987]",0.0
188,2023-02-05,1.600000,0.228571,5,1.0,0.0,0.0,0.0,"[1.600000023841858, 0.22857143197740828]","[-0.20352210745497534, 0.20352210745497534]","[0.39962107428272914, 0.6003789257172709]",1.0
189,2023-02-26,17.000000,2.428571,8,1.0,1.0,1.0,1.0,"[17.0, 2.4285714285714284]","[-0.10996352322879088, 0.10996352322879088]","[0.44523878443398657, 0.5547612155660134]",1.0


In [17]:
rfPredicao = rfModel.transform(teste)

rfPredicao.toPandas()

Unnamed: 0,Inicio_Semana,Total de Chuvas(mm),Média diária de chuvas(mm),Semana_Ano,Farol_100,Farol_200,Porto_100,Santa Maria_100,informacoes,rawPrediction,probability,prediction
0,2000-11-26,2.800000,0.400000,47,0.0,0.0,1.0,1.0,"[2.799999952316284, 0.3999999931880406]","[8.535056450957931, 11.464943549042065, 0.0]","[0.42675282254789665, 0.5732471774521034, 0.0]",1.0
1,2000-12-17,4.800000,0.685714,50,0.0,0.0,0.0,0.0,"[4.800000190734863, 0.6857143129621234]","[10.49062370214451, 9.509376297855493, 0.0]","[0.5245311851072254, 0.47546881489277454, 0.0]",0.0
2,2001-01-21,7.400000,1.057143,3,0.0,1.0,1.0,0.0,"[7.400000095367432, 1.057142870766776]","[8.600761411798555, 11.399238588201445, 0.0]","[0.4300380705899277, 0.5699619294100723, 0.0]",1.0
3,2001-02-25,38.000000,5.428571,8,0.0,1.0,0.0,1.0,"[38.0, 5.428571428571429]","[11.142199817986208, 8.857800182013793, 0.0]","[0.5571099908993105, 0.44289000910068965, 0.0]",0.0
4,2001-06-03,78.599998,11.228571,22,1.0,0.0,0.0,1.0,"[78.5999984741211, 11.228571210588727]","[12.19007949436981, 7.809920505630192, 0.0]","[0.6095039747184905, 0.3904960252815096, 0.0]",0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
186,2022-09-25,6.400000,0.914286,38,1.0,1.0,0.0,0.0,"[6.400000095367432, 0.9142857279096331]","[8.585411603150249, 11.414588396849751, 0.0]","[0.4292705801575124, 0.5707294198424876, 0.0]",1.0
187,2023-01-01,84.400002,12.057143,52,1.0,0.0,0.0,1.0,"[84.4000015258789, 12.057143075125557]","[11.015545148230375, 8.984454851769625, 0.0]","[0.5507772574115187, 0.44922274258848127, 0.0]",0.0
188,2023-02-05,1.600000,0.228571,5,1.0,0.0,0.0,0.0,"[1.600000023841858, 0.22857143197740828]","[8.526470592372075, 11.473529407627925, 0.0]","[0.42632352961860376, 0.5736764703813962, 0.0]",1.0
189,2023-02-26,17.000000,2.428571,8,1.0,1.0,1.0,1.0,"[17.0, 2.4285714285714284]","[9.613086263503563, 10.386913736496439, 0.0]","[0.48065431317517815, 0.519345686824822, 0.0]",1.0


In [18]:
dtPredicao = dtModel.transform(teste)

dtPredicao.toPandas()

Unnamed: 0,Inicio_Semana,Total de Chuvas(mm),Média diária de chuvas(mm),Semana_Ano,Farol_100,Farol_200,Porto_100,Santa Maria_100,informacoes,rawPrediction,probability,prediction
0,2000-11-26,2.800000,0.400000,47,0.0,0.0,1.0,1.0,"[2.799999952316284, 0.3999999931880406]","[40.0, 55.0, 0.0]","[0.42105263157894735, 0.5789473684210527, 0.0]",1.0
1,2000-12-17,4.800000,0.685714,50,0.0,0.0,0.0,0.0,"[4.800000190734863, 0.6857143129621234]","[33.0, 24.0, 0.0]","[0.5789473684210527, 0.42105263157894735, 0.0]",0.0
2,2001-01-21,7.400000,1.057143,3,0.0,1.0,1.0,0.0,"[7.400000095367432, 1.057142870766776]","[34.0, 54.0, 0.0]","[0.38636363636363635, 0.6136363636363636, 0.0]",1.0
3,2001-02-25,38.000000,5.428571,8,0.0,1.0,0.0,1.0,"[38.0, 5.428571428571429]","[147.0, 142.0, 0.0]","[0.5086505190311419, 0.4913494809688581, 0.0]",0.0
4,2001-06-03,78.599998,11.228571,22,1.0,0.0,0.0,1.0,"[78.5999984741211, 11.228571210588727]","[82.0, 62.0, 0.0]","[0.5694444444444444, 0.4305555555555556, 0.0]",0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
186,2022-09-25,6.400000,0.914286,38,1.0,1.0,0.0,0.0,"[6.400000095367432, 0.9142857279096331]","[34.0, 54.0, 0.0]","[0.38636363636363635, 0.6136363636363636, 0.0]",1.0
187,2023-01-01,84.400002,12.057143,52,1.0,0.0,0.0,1.0,"[84.4000015258789, 12.057143075125557]","[82.0, 62.0, 0.0]","[0.5694444444444444, 0.4305555555555556, 0.0]",0.0
188,2023-02-05,1.600000,0.228571,5,1.0,0.0,0.0,0.0,"[1.600000023841858, 0.22857143197740828]","[40.0, 55.0, 0.0]","[0.42105263157894735, 0.5789473684210527, 0.0]",1.0
189,2023-02-26,17.000000,2.428571,8,1.0,1.0,1.0,1.0,"[17.0, 2.4285714285714284]","[39.0, 48.0, 0.0]","[0.4482758620689655, 0.5517241379310345, 0.0]",1.0


In [19]:
### Avaliando os modelos ### 

### Definindo os avaliadores ###

### Documentação => https://spark.apache.org/docs/latest/mllib-evaluation-metrics.html ###

### https://towardsdatascience.com/the-f1-score-bec2bbc38aa6 ###

acuracia = MulticlassClassificationEvaluator(labelCol='Farol_100',predictionCol='prediction',metricName='accuracy')
f1 = MulticlassClassificationEvaluator(labelCol='Farol_100',predictionCol='prediction',metricName='f1')
precisaoPonderada = MulticlassClassificationEvaluator(labelCol='Farol_100',predictionCol='prediction',metricName='weightedPrecision')
weightedRecall = MulticlassClassificationEvaluator(labelCol='Farol_100',predictionCol='prediction',metricName='weightedRecall')

### Resultados da Acurácia ###

gbtAcuracia = acuracia.evaluate(gbtPredicao)
rfAcuracia = acuracia.evaluate(rfPredicao)
dtAcuracia = acuracia.evaluate(dtPredicao)
print('Acurácia do teste Árvore de Decisão (Gradiente Boosting) = ', gbtAcuracia)
print('Acurácia do teste Árvore Aleatória = ', rfAcuracia)
print('Acurácia do teste Árvore de Decisão = ', dtAcuracia)

Acurácia do teste Árvore de Decisão (Gradiente Boosting) =  0.5497382198952879
Acurácia do teste Árvore Aleatória =  0.5497382198952879
Acurácia do teste Árvore de Decisão =  0.5130890052356021


In [20]:
### Resultados do F1 ###

gbtF1 = f1.evaluate(gbtPredicao)
rfF1 = f1.evaluate(rfPredicao)
dtF1 = f1.evaluate(dtPredicao)
print('F1 do teste Árvore de Decisão (Gradiente Boosting) = ', gbtF1)
print('F1 do teste Árvore Aleatória = ', rfF1)
print('F1 do teste Árvore de Decisão = ', dtF1)

F1 do teste Árvore de Decisão (Gradiente Boosting) =  0.54978759070451
F1 do teste Árvore Aleatória =  0.54978759070451
F1 do teste Árvore de Decisão =  0.49872019759943065


In [21]:
### Resultados do Precisão Ponderada ###

gbtPP = precisaoPonderada.evaluate(gbtPredicao)
rfPP = precisaoPonderada.evaluate(rfPredicao)
dtPP = precisaoPonderada.evaluate(dtPredicao)
print('Precisão Ponderada do teste Árvore de Decisão (Gradiente Boosting) = ', gbtPP)
print('Precisão Ponderada do teste Árvore Aleatória = ', rfPP)
print('Precisão Ponderada do teste Árvore de Decisão = ', dtPP)

Precisão Ponderada do teste Árvore de Decisão (Gradiente Boosting) =  0.5503193154556489
Precisão Ponderada do teste Árvore Aleatória =  0.5503193154556489
Precisão Ponderada do teste Árvore de Decisão =  0.5203606748109366


In [22]:
### Resultados do weightedRecall ###

### Recall é a razão entre o número de positivos verdadeiros (pv) e a soma dos positivos verdadeiros (pv) e falsos negativos (fn) => pv/(pv+fn)

gbtWR = weightedRecall.evaluate(gbtPredicao)
rfWR = weightedRecall.evaluate(rfPredicao)
dtWR = weightedRecall.evaluate(dtPredicao)
print('Recall do teste Árvore de Decisão (Gradiente Boosting) = ', gbtWR)
print('Recall do teste Árvore Aleatória = ', rfWR)
print('Recall do teste Árvore de Decisão = ', dtWR)

Recall do teste Árvore de Decisão (Gradiente Boosting) =  0.5497382198952879
Recall do teste Árvore Aleatória =  0.5497382198952879
Recall do teste Árvore de Decisão =  0.5130890052356021
