In [None]:
# basic imports 

import os # OS e.g directory structure
import sys
import numpy as np # linear algebra
import scipy as sc  # scientific computing
import pandas as pd # data processing, file I/O
import seaborn as sns  # visualization
import matplotlib.pyplot as plt # visualization
import math
import warnings
warnings.filterwarnings("ignore")

In [None]:
# Spark related imports

from pyspark.sql import SparkSession, Row
from pyspark.sql.functions import *
from pyspark.sql.types import *

from pyspark.ml.feature import StringIndexer
from pyspark.ml.fpm import FPGrowth
from pyspark.ml.fpm import PrefixSpan
from pyspark.ml.evaluation import RegressionEvaluator

In [None]:
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("abd_recommendation").getOrCreate()

In [None]:
def fpGrowth(df, minSup, minConf):
    fpGrowth = FPGrowth(itemsCol="items", minSupport=minSup, minConfidence=minConf)
    return fpGrowth_u_p.fit(df)

def modelShow(model, limit = 20):
    # Display frequent itemsets.
    model.freqItemsets.orderBy(col("freq").desc()).show(truncate=False)
    # Display generated association rules.
    model.associationRules.orderBy(col("confidence").desc(), col("lift").desc()).show(limit)


Read parquet from data preparation

In [None]:
products = spark.read.parquet("products")
brands = spark.read.parquet("brands")
categories = spark.read.parquet("categories")

Creating the market-basktet model for user vs (products, brands, categories) purchased and viewed

In [None]:
df_user_product = spark.read.parquet("user_products")
df_user_brand = spark.read.parquet("user_brands")
df_user_category = spark.read.parquet("user_categories")
df_user_product_v = spark.read.parquet("user_products_v")
df_user_brand_v = spark.read.parquet("user_brands_v")
df_user_category_v = spark.read.parquet("user_categories_v")

In [None]:
# user x product purchase
model_u_p = fpGrowth(df_user_product, 0.001, 0.001)
modelShow(model_u_p)

In [None]:
# user x brand purchase
model_u_b = fpGrowth(df_user_brand, 0.001, 0.001)
modelShow(model_u_b)

In [None]:
# user x category purchase
model_u_c = fpGrowth(df_user_category, 0.001, 0.001)
modelShow(model_u_c)

In [None]:
# user x product view
model_u_p_v = fpGrowth(df_user_product_v, 0.001, 0.001)
modelShow(model_u_p_v)

In [None]:
# user x brand view
model_u_b_v = fpGrowth(df_user_brand_v, 0.001, 0.001)
modelShow(model_u_b_v)

In [None]:
# user x category view
model_u_c_v = fpGrowth(df_user_category_v, 0.001, 0.001)
modelShow(model_u_c_v)

Creating the market-basktet model for session vs (products, brands, categories) purchased and viewed

In [None]:
df_session_product = spark.read.parquet("session_products")
df_session_brand = spark.read.parquet("session_brands")
df_session_category = spark.read.parquet("session_categories")
df_session_product_v = spark.read.parquet("session_products_v")
df_session_brand_v = spark.read.parquet("session_brands_v")
df_session_category_v = spark.read.parquet("session_categories_v")

In [None]:
# session x product purchase
model_s_p = fpGrowth(df_session_product, 0.001, 0.001)
modelShow(model_s_p)

In [None]:
# session x brand purchase
model_s_b = fpGrowth(df_session_brand, 0.001, 0.001)
modelShow(model_s_b)

In [None]:
# session x category purchase
model_s_c = fpGrowth(df_session_cateogry, 0.001, 0.001)
modelShow(model_s_c)

In [None]:
# session x product view
model_s_p_v = fpGrowth(df_session_product_v, 0.001, 0.001)
modelShow(model_s_p_v)

In [None]:
# session x brand view
model_s_b_v = fpGrowth(df_session_brand_v, 0.001, 0.001)
modelShow(model_s_b_v)

In [None]:
# session x category view
model_s_c_v = fpGrowth(df_session_category_v, 0.001, 0.001)
modelShow(model_s_c_v)

In [None]:
# models parquets

model_u_p.associationRules.write.mode("overwrite").parquet("model_user_products")
model_u_b.associationRules.write.mode("overwrite").parquet("model_user_brands")
model_u_c.associationRules.write.mode("overwrite").parquet("model_user_categories")

model_u_p_v.associationRules.write.mode("overwrite").parquet("model_user_products_v")
model_u_b_v.associationRules.write.mode("overwrite").parquet("model_user_brands_v")
model_u_c_v.associationRules.write.mode("overwrite").parquet("model_user_categories_v")



model_s_p.associationRules.write.mode("overwrite").parquet("model_session_products")
model_s_b.associationRules.write.mode("overwrite").parquet("model_session_brands")
model_s_c.associationRules.write.mode("overwrite").parquet("model_session_categories")

model_s_p_v.associationRules.write.mode("overwrite").parquet("model_session_products_v")
model_s_b_v.associationRules.write.mode("overwrite").parquet("model_session_brands_v")
model_s_c_v.associationRules.write.mode("overwrite").parquet("model_session_categories_v")