In [1]:
 from pyspark.sql import SparkSession
import os
os.environ['PYSPARK_PYTHON'] = '/opt/conda/bin/python'
spark = SparkSession \
    .builder \
    .appName("Python Spark SQL basic example") \
    .config("spark.some.config.option", "some-value") \
    .getOrCreate()

In [2]:
# spark is an existing SparkSession
df = spark.read.json("people.json")
# Displays the content of the DataFrame to stdout
df.show()
# +----+-------+
# | age|   name|
# +----+-------+
# |null|Michael|
# |  30|   Andy|
# |  19| Justin|
# +----+-------+

+----+-------+
| age|   name|
+----+-------+
|null|Michael|
|  30|   Andy|
|  19| Justin|
+----+-------+



In [3]:
from pyspark.ml.fpm import FPGrowth

df = spark.createDataFrame([
    (0, [1, 2, 5]),
    (1, [1, 2, 3, 5]),
    (2, [1, 2])
], ["id", "items"])


In [4]:
fpGrowth = FPGrowth(itemsCol="items", minSupport=0.5, minConfidence=0.6)


In [5]:
model = fpGrowth.fit(df)

In [6]:
# Display frequent itemsets.
model.freqItemsets.show()

# Display generated association rules.
model.associationRules.show()

# transform examines the input items against all the association rules and summarize the
# consequents as prediction
model.transform(df).show()

+---------+----+
|    items|freq|
+---------+----+
|      [5]|   2|
|   [5, 1]|   2|
|[5, 1, 2]|   2|
|   [5, 2]|   2|
|      [2]|   3|
|      [1]|   3|
|   [1, 2]|   3|
+---------+----+

+----------+----------+------------------+----+
|antecedent|consequent|        confidence|lift|
+----------+----------+------------------+----+
|       [5]|       [1]|               1.0| 1.0|
|       [5]|       [2]|               1.0| 1.0|
|    [1, 2]|       [5]|0.6666666666666666| 1.0|
|    [5, 2]|       [1]|               1.0| 1.0|
|    [5, 1]|       [2]|               1.0| 1.0|
|       [2]|       [5]|0.6666666666666666| 1.0|
|       [2]|       [1]|               1.0| 1.0|
|       [1]|       [5]|0.6666666666666666| 1.0|
|       [1]|       [2]|               1.0| 1.0|
+----------+----------+------------------+----+

+---+------------+----------+
| id|       items|prediction|
+---+------------+----------+
|  0|   [1, 2, 5]|        []|
|  1|[1, 2, 3, 5]|        []|
|  2|      [1, 2]|       [5]|
+---+--