In [4]:
from pyspark.sql import SparkSession
from pyspark.ml.feature import VectorAssembler,StringIndexer,OneHotEncoder
from pyspark.ml.classification import MultilayerPerceptronClassifier
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
from pyspark.ml.tuning import CrossValidator,ParamGridBuilder

spark = SparkSession.builder.getOrCreate()
irisDF = spark.read.option('header','true').option('inferSchema','true').csv('../datasets/iris-dataset.txt')
indexer = StringIndexer(inputCol='class',outputCol='label')
indexerModel = indexer.fit(irisDF)
irisDF = indexerModel.transform(irisDF)
vec = VectorAssembler(inputCols=irisDF.columns[0:4],outputCol='features')
irisDF = vec.transform(irisDF)
#irisDF.show()

trainDF, testDF = irisDF.randomSplit([0.75,0.25],seed=123) 

mlpClassifier= MultilayerPerceptronClassifier()
eva = MulticlassClassificationEvaluator(metricName='accuracy')

myParams = ParamGridBuilder().addGrid(mlpClassifier.layers, [[4,1,3],[4,2,3],[4,4,3]]).build()

validator = CrossValidator(estimator=mlpClassifier,
                                 estimatorParamMaps=myParams,
                                 evaluator=eva,
                                 parallelism=4,
                                 numFolds=5
                                )

model = validator.fit(trainDF)
print('finished')

print("Layers : ",model.bestModel.getLayers())

22/05/15 23:03:38 WARN BlockManager: Block rdd_21_0 already exists on this machine; not re-adding it
22/05/15 23:03:38 WARN BlockManager: Block rdd_21_0 already exists on this machine; not re-adding it
22/05/15 23:03:42 WARN BlockManager: Block rdd_251_0 already exists on this machine; not re-adding it
22/05/15 23:03:43 ERROR StrongWolfeLineSearch: Encountered bad values in function evaluation. Decreasing step size to 0.5
22/05/15 23:03:46 ERROR StrongWolfeLineSearch: Encountered bad values in function evaluation. Decreasing step size to 0.5
22/05/15 23:03:46 ERROR StrongWolfeLineSearch: Encountered bad values in function evaluation. Decreasing step size to 0.25
22/05/15 23:03:46 ERROR StrongWolfeLineSearch: Encountered bad values in function evaluation. Decreasing step size to 0.125
22/05/15 23:03:46 ERROR StrongWolfeLineSearch: Encountered bad values in function evaluation. Decreasing step size to 0.0625
22/05/15 23:03:46 ERROR StrongWolfeLineSearch: Encountered bad values in functio

finished
Layers :  [4, 2, 3]


In [5]:
resultDF = model.transform(testDF)
accuracy = eva.evaluate(resultDF)
print("Accuracy : ", accuracy)

Accuracy :  0.9696969696969697


In [8]:
trainDF.printSchema()

root
 |-- sepal-length: double (nullable = true)
 |-- sepal-width: double (nullable = true)
 |-- petal-length: double (nullable = true)
 |-- petal-width: double (nullable = true)
 |-- class: string (nullable = true)
 |-- label: double (nullable = false)
 |-- features: vector (nullable = true)

