In [None]:
from pyspark.sql import SparkSession
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.classification import RandomForestClassifier
from pyspark.ml.evaluation import MulticlassClassificationEvaluator

# Initialize Spark session
spark = SparkSession.builder \
    .appName("Random Forest Classification") \
    .getOrCreate()

# Assuming you have a DataFrame named 'data' containing your features and labels
# data = ...

# Create a vector assembler to assemble all feature columns into a single vector column
vector_assembler = VectorAssembler(inputCols=data.columns[:-1], outputCol="features")
data_assembled = vector_assembler.transform(data)

# Split the data into train and test sets (80% train, 20% test)
(train_data, test_data) = data_assembled.randomSplit([0.8, 0.2], seed=1234)

# Initialize Random Forest classifier
rf_classifier = RandomForestClassifier(labelCol="label", featuresCol="features", numTrees=10)

# Train the model
model = rf_classifier.fit(train_data)

# Make predictions on the test data
predictions = model.transform(test_data)

# Evaluate model performance
evaluator = MulticlassClassificationEvaluator(labelCol="label", predictionCol="prediction", metricName="accuracy")
accuracy = evaluator.evaluate(predictions)
print("Accuracy:", accuracy)

# Stop Spark session
spark.stop()
