In [3]:
from pyspark.sql import SparkSession
from pyspark.ml.fpm import FPGrowth

In [4]:
sqlCtx = SparkSession.builder.getOrCreate()

In [5]:
df = sqlCtx.createDataFrame([ # Create Dataframe from datasets
    (0, ["B", "A", "T"]),       # Dataset 1
    (1, ["A", "C"]),            # Dataset 2
    (2, ["A", "S"]),            # Dataset 3
    (3, ["B", "A", "C"]),       # Dataset 4
    (4, ["B", "S"]),            # Dataset 5
    (5, ["A", "S"]),            # Dataset 6
    (6, ["B", "S"]),            # Dataset 7
    (7, ["B", "A", "S", "T"]),  # Dataset 8
    (8, ["B", "A", "S"])        # Dataset 9
], ["id", "items"]) # Name of the columns

fpGrowth = FPGrowth(itemsCol="items", minSupport=0.2, minConfidence=0.2)
# minSupport: the minimum support for an itemset to be identified as frequent. 
# For example, if an item appears 3 out of 9 transactions, it has a support of 3/9=0.6.

# minConfidence: minimum confidence for generating Association Rule. 
# Confidence is an indication of how often an association rule has been found to be true. 
# For example, if in the transactions itemset X appears 4 times, X and Y co-occur only 2 times, 
# the confidence for the rule X => Y is then 2/4 = 0.5. The parameter will not affect the mining for frequent itemsets, 
# but specify the minimum confidence for generating association rules from frequent itemsets.

# Fit the fpGrowth model to the data
model = fpGrowth.fit(df)

# Display frequent itemsets.
model.freqItemsets.show()

# Display generated association rules.
model.associationRules.show()

+---------+----+
|    items|freq|
+---------+----+
|      [A]|   7|
|      [B]|   6|
|   [B, A]|   4|
|      [S]|   6|
|   [S, B]|   4|
|[S, B, A]|   2|
|   [S, A]|   4|
|      [T]|   2|
|   [T, B]|   2|
|[T, B, A]|   2|
|   [T, A]|   2|
|      [C]|   2|
|   [C, A]|   2|
+---------+----+

+----------+----------+------------------+------------------+------------------+
|antecedent|consequent|        confidence|              lift|           support|
+----------+----------+------------------+------------------+------------------+
|    [S, B]|       [A]|               0.5|0.6428571428571428|0.2222222222222222|
|       [S]|       [B]|0.6666666666666666|               1.0|0.4444444444444444|
|       [S]|       [A]|0.6666666666666666|0.8571428571428571|0.4444444444444444|
|    [T, A]|       [B]|               1.0|               1.5|0.2222222222222222|
|    [B, A]|       [S]|               0.5|              0.75|0.2222222222222222|
|    [B, A]|       [T]|               0.5|              2.25|0