# Generalized Sequential Pattern

In [1]:
%matplotlib inline
import os
import math
import pickle
import operator
import gsp as gsp
import numpy as np
import pandas as pd
from tqdm import tqdm
from fim import apriori
from datetime import datetime
import matplotlib.pyplot as plt

img = 'plots/figure_{}.png'
dataFolder = '../../data/{}'

In [2]:
def load_dataset():
    df = pd.read_csv(dataFolder.format('customer_supermarket.csv'), sep='\t', index_col=0).drop_duplicates()
    
    df['Sale'] = df['Sale'].str.replace(",",".").astype(float) # replace ',' with '.' to make Sale type as float64
    df['CustomerID'] = df['CustomerID'].astype('Int64')
    df['BasketDate'] = pd.to_datetime(df['BasketDate'])
    df = df[df['CustomerID'].notna()]
    return df

def load_customer_baskets():
    df = load_dataset()
    df = df[['CustomerID','ProdID','BasketID','BasketDate']]
    df = df.sort_values(by='BasketDate')
    df = df.groupby(['CustomerID','BasketID'])['ProdID'].apply(list).reset_index()
    df = df.groupby('CustomerID')['ProdID'].apply(list).reset_index()
    customer_baskets = []
    for index, row in df.iterrows():
        customer_baskets.append({
            'customer_id': row['CustomerID'],
            'baskets': row['ProdID']
        })
    return customer_baskets

def load_products():
    df = load_dataset()
    df = df[['ProdID','ProdDescr']]
    df.set_index('ProdID', inplace=True)
    df = df.groupby('ProdID')['ProdDescr'].agg(list).apply(lambda x: ';'.join(set(x))).reset_index()
    products = {}
    for index, row in df.iterrows():
        products[row['ProdID']] = row['ProdDescr']
        """products.append({
            'prod_id': row['ProdID'],
            'prod_descr': row['ProdDescr']
        })"""
    return products

def load_transactions():
    df = load_dataset()
    df = df[['BasketID','ProdID','CustomerID']]
    df = df.groupby(['BasketID','CustomerID'])['ProdID'].agg(list).reset_index()
    transactions = []
    for index, row in df.iterrows():
        transactions.append({
            'basket_id': row['BasketID'],
            'customer_id': row['CustomerID'],
            'products': row['ProdID']
        })
    return transactions

def get_grid_search(*args):
    import itertools
    grid = []
    for e in itertools.product(*args):
        grid.append({
            'supp': e[0],
            'zmin': e[1],
            'conf': e[2]
        })
    return grid

def save(rules, fname):
    with open(fname, "wb") as f:
        pickle.dump(rules, f)
            
def read(fname):
    rules = []
    with open(fname, "rb") as f:
        rules = pickle.load(f)
    return rules

#### Map the rules and baskets with a unique id

#### Histogram of the rules confidence

#### Histogram of the rules lifts

#### Extract the users that contains basket in the premise of a rule

#### Rules sorted by Confidence

#### Rules explained

#### GSP

#### Load rules from file

In [7]:
f_name = 'rules_200.pkl'
products = load_products()
print(f"Number of products: {len(products.keys())}")

Number of products: 3684


In [8]:
rules = []

if os.path.isfile(f_name):
    print('file exists!')
    rules = read(f_name)
print(f"Number of rules: {len(rules)}")

filtered_rules = []
for r in rules:
    if len(r[0]) > 1:
        filtered_rules.append(r)
print(f"Number of filtered rules: {len(filtered_rules)}")

filtered_rules = sorted(filtered_rules, key=lambda x: x[1], reverse=True)

for rule in filtered_rules:
    sequence = [s[0] for s in rule[0]]
    names = [products[s].split(';')[0] for s in sequence]
    print(f"- Rule: {names}, sup={rule[1]}", end="\n")

file exists!
Number of rules: 402
Number of filtered rules: 32
- Rule: ['CREAM HANGING HEART T-LIGHT HOLDER', 'CREAM HANGING HEART T-LIGHT HOLDER'], sup=416
- Rule: ['REGENCY CAKESTAND 3 TIER', 'REGENCY CAKESTAND 3 TIER'], sup=363
- Rule: ['JUMBO BAG RED RETROSPOT', 'JUMBO BAG RED RETROSPOT'], sup=317
- Rule: ['ASSORTED COLOUR BIRD ORNAMENT', 'ASSORTED COLOUR BIRD ORNAMENT'], sup=294
- Rule: ['LUNCH BAG RED SPOTTY', 'LUNCH BAG RED SPOTTY'], sup=284
- Rule: ['PARTY BUNTING', 'PARTY BUNTING'], sup=282
- Rule: ['SET OF 3 CAKE TINS PANTRY DESIGN ', 'SET OF 3 CAKE TINS PANTRY DESIGN '], sup=249
- Rule: ['JUMBO BAG RED RETROSPOT', 'JUMBO BAG VINTAGE DOILY '], sup=237
- Rule: ['JUMBO BAG VINTAGE DOILY ', 'JUMBO BAG VINTAGE DOILY '], sup=236
- Rule: ['LUNCH BAG  BLACK SKULL.', 'LUNCH BAG  BLACK SKULL.'], sup=233
- Rule: ['LUNCH BAG SUKI DESIGN ', 'LUNCH BAG SUKI DESIGN '], sup=230
- Rule: ['CREAM HANGING HEART T-LIGHT HOLDER', 'CREAM HANGING HEART T-LIGHT HOLDER', 'CREAM HANGING HEART T-LIGHT 