In [8]:
import findspark

#Finding Spark Module.
findspark.init()

from pyspark.mllib.tree import RandomForest, RandomForestModel
from pyspark import SparkContext
from pyspark.mllib.util import MLUtils

if __name__ == "__main__":
    
    #If SparkContext Is Already Set, Reset It
    sc.stop()
    
    #Initialising SparkContext
    sc = SparkContext(appName="RandomForrest")

    # Loading and parsing  data
    data = MLUtils.loadLibSVMFile(sc, 'RF_data.txt')
    
    # Data split into traning and testing (65:35)
    (training, test) = data.randomSplit([0.65, 0.35])

    #  Training the  RandomForest model.
    #  categoricalFeaturesInfo is empty because features are continuos
    
    classifier = RandomForest.trainClassifier(training, numClasses=2, categoricalFeaturesInfo={},
                                         numTrees=4, featureSubsetStrategy="auto",
                                         impurity='gini', maxDepth=4, maxBins=32)

    # Classify test data and check accuracy
    classifications = classifier.predict(test.map(lambda x: x.features))
    labelsAndClassifications = test.map(lambda lp: lp.label).zip(classifications)
    error = labelsAndClassifications.filter(
        lambda lp: lp[0] != lp[1]).count() / float(test.count())
    print('Error = ' + str(error))
    print('Learned classifier:')
    print(classifier.toDebugString())

    # Save and load model
    classifier.save(sc, "RFClassifier")
    loadModel = RandomForestModel.load(sc, "RFClassifier")
    # $example off$

Error = 0.05714285714285714
Learned classifier:
TreeEnsembleModel classifier with 4 trees

  Tree 0:
    If (feature 373 <= 11.5)
     If (feature 244 <= 200.5)
      Predict: 1.0
     Else (feature 244 > 200.5)
      Predict: 0.0
    Else (feature 373 > 11.5)
     Predict: 0.0
  Tree 1:
    If (feature 541 <= 20.0)
     If (feature 235 <= 90.0)
      Predict: 1.0
     Else (feature 235 > 90.0)
      If (feature 548 <= 153.5)
       Predict: 0.0
      Else (feature 548 > 153.5)
       Predict: 1.0
    Else (feature 541 > 20.0)
     If (feature 183 <= 3.0)
      Predict: 1.0
     Else (feature 183 > 3.0)
      If (feature 463 <= 2.0)
       Predict: 0.0
      Else (feature 463 > 2.0)
       Predict: 1.0
  Tree 2:
    If (feature 539 <= 3.0)
     If (feature 379 <= 29.0)
      If (feature 493 <= 1.5)
       Predict: 1.0
      Else (feature 493 > 1.5)
       Predict: 0.0
     Else (feature 379 > 29.0)
      Predict: 1.0
    Else (feature 539 > 3.0)
     Predict: 0.0
  Tree 3:
    If (feat