In [21]:
from pyspark.sql import SQLContext
from pyspark import SparkContext
from pyspark.ml.fpm import FPGrowth

SparkContext.getOrCreate()
sc = SparkContext.getOrCreate("FP-Growth")
sqlCtx = SQLContext(sc)

In [22]:
df = sqlCtx.createDataFrame([ # Create Dataframe from datasets
    (0, [1, 2, 5]),    # Dataset 1
    (1, [1, 2, 3, 5]), # Dataset 2
    (2, [1, 2])        # Dataset 3
], ["id", "items"]) # Name of the columns

fpGrowth = FPGrowth(itemsCol="items", minSupport=0.5, minConfidence=0.6)
# minSupport: the minimum support for an itemset to be identified as frequent. 
# For example, if an item appears 3 out of 5 transactions, it has a support of 3/5=0.6.

# minConfidence: minimum confidence for generating Association Rule. 
# Confidence is an indication of how often an association rule has been found to be true. 
# For example, if in the transactions itemset X appears 4 times, X and Y co-occur only 2 times, 
# the confidence for the rule X => Y is then 2/4 = 0.5. The parameter will not affect the mining for frequent itemsets, 
# but specify the minimum confidence for generating association rules from frequent itemsets.

# Fit the fpGrowth model to the data
model = fpGrowth.fit(df)

# Display frequent itemsets.
model.freqItemsets.show()

# Display generated association rules.
model.associationRules.show()

+---------+----+
|    items|freq|
+---------+----+
|      [1]|   3|
|      [2]|   3|
|   [2, 1]|   3|
|      [5]|   2|
|   [5, 2]|   2|
|[5, 2, 1]|   2|
|   [5, 1]|   2|
+---------+----+

+----------+----------+------------------+----+------------------+
|antecedent|consequent|        confidence|lift|           support|
+----------+----------+------------------+----+------------------+
|    [5, 2]|       [1]|               1.0| 1.0|0.6666666666666666|
|    [2, 1]|       [5]|0.6666666666666666| 1.0|0.6666666666666666|
|    [5, 1]|       [2]|               1.0| 1.0|0.6666666666666666|
|       [5]|       [2]|               1.0| 1.0|0.6666666666666666|
|       [5]|       [1]|               1.0| 1.0|0.6666666666666666|
|       [1]|       [2]|               1.0| 1.0|               1.0|
|       [1]|       [5]|0.6666666666666666| 1.0|0.6666666666666666|
|       [2]|       [1]|               1.0| 1.0|               1.0|
|       [2]|       [5]|0.6666666666666666| 1.0|0.6666666666666666|
+-------