In [8]:
from pyspark.ml.classification import RandomForestClassificationModel, DecisionTreeClassificationModel, GBTClassificationModel
from pyspark.sql.types import StructType,StructField, StringType, IntegerType, FloatType
from pyspark.ml import Pipeline
from pyspark.sql import Row
from pyspark.sql.functions import *
from pyspark.sql import SparkSession
from pyspark.sql.types import DoubleType
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
from pyspark.ml.feature import StringIndexer, VectorIndexer, VectorAssembler

spark = SparkSession \
    .builder \
    .appName("ML Pipeline") \
    .getOrCreate()

sc = SparkContext.getOrCreate();

rfModel = RandomForestClassificationModel.load("models/rf")
dtModel = DecisionTreeClassificationModel.load("models/dt")
gbtModel = GBTClassificationModel.load("models/gbt")

In [9]:
data = [
    (60.50,8.0),
  ]

schema = StructType([ \
    StructField("Total de Chuvas(mm)",FloatType(),True), \
    StructField("Média diária de chuvas(mm)",FloatType(),True), \
  ])
 
df = spark.createDataFrame(data=data,schema=schema)
df.toPandas()

Unnamed: 0,Total de Chuvas(mm),Média diária de chuvas(mm)
0,60.5,8.0


In [10]:
informacoes_necessarias = ['Total de Chuvas(mm)','Média diária de chuvas(mm)']
assembler = VectorAssembler(inputCols=informacoes_necessarias, outputCol='informacoes')
df = assembler.transform(df)
df.toPandas()

Unnamed: 0,Total de Chuvas(mm),Média diária de chuvas(mm),informacoes
0,60.5,8.0,"[60.5, 8.0]"


In [4]:
testeRF = rfModel.transform(df)
testeRF.toPandas()

Unnamed: 0,Total de Chuvas(mm),Média diária de chuvas(mm),informacoes,rawPrediction,probability,prediction
0,60.5,8.0,"[60.5, 8.0]","[13.609434295118515, 6.390565704881484, 0.0, 0.0]","[0.6804717147559257, 0.31952828524407423, 0.0,...",0.0


In [5]:
testeDT = dtModel.transform(df)
testeDT.toPandas()

Unnamed: 0,Total de Chuvas(mm),Média diária de chuvas(mm),informacoes,rawPrediction,probability,prediction
0,60.5,8.0,"[60.5, 8.0]","[67.0, 35.0, 0.0, 0.0]","[0.6568627450980392, 0.3431372549019608, 0.0, ...",0.0


In [6]:
testeGBT = gbtModel.transform(df)
testeGBT.toPandas()

Unnamed: 0,Total de Chuvas(mm),Média diária de chuvas(mm),informacoes,rawPrediction,probability,prediction
0,60.5,8.0,"[60.5, 8.0]","[0.48951153222269056, -0.48951153222269056]","[0.7269143279705572, 0.2730856720294428]",0.0


In [11]:
saidaRF = (testeRF.first()['prediction'])

if saidaRF == 0.0 :
    print("A previsão RF é", saidaRF, ": Própria")
else:
    print("A previsão RF é", saidaRF, ": Imprópria")

A previsão RF é 0.0 : Própria


In [12]:
saidaDT = (testeDT.first()['prediction'])

if saidaDT == 0.0 :
    print("A previsão DT é", saidaDT, ": Própria")
else:
    print("A previsão DT é", saidaDT, ": Imprópria")

A previsão DT é 0.0 : Própria


In [13]:
saidaGBT = (testeGBT.first()['prediction'])

if saidaGBT == 0.0 :
    print("A previsão GBT é", saidaGBT, ": Própria")
else:
    print("A previsão GBT é", saidaGBT, ": Imprópria")

A previsão GBT é 0.0 : Própria
