In [61]:
#Mining Sequential Patterns Efficiently by Prefix-Projected Pattern Growth
def generateItems(dataset):
    return sorted(set ([item for sublist1 in dataset for sublist2 in sublist1 for item in sublist2]))

def generateItemSupports(dataset, ignoreFirstEvent=False, prefix=[]):
    result = defaultdict(int)
    for sequence in dataset:
        if ignoreFirstEvent:
            sequence = sequence[1:]
        cooccurringItems = set()
        for itemset in sequence:
            if all(x in itemset for x in prefix):
                for item in itemset:
                    if not item in prefix:
                        cooccurringItems.add(item)
        for item in cooccurringItems:
            result [item] += 1
    return sorted(result.items())

In [62]:
def projectDatabase(dataset, prefix, newEvent):
    projectedDB = []
    for sequence in dataset:
        seqProjected = projectSequence(sequence, prefix, newEvent)
        if not seqProjected is None:
            projectedDB.append(seqProjected)
    return projectedDB

In [63]:
def projectSequence(sequence, prefix, newEvent):
    result = None
    for i, itemset in enumerate(sequence):
        if result is None:
            if (not newEvent) or i > 0:
                if (all(x in itemset for x in prefix)):
                    result = [list(itemset)]
        else:
            result.append(copy.copy(itemset))
    return result

In [64]:
def prefixSpan(dataset, minSupport):
    result = []
    itemCounts = generateItemSupports(dataset)
    for item, count in itemCounts:
        if count >= minSupport:
            newPrefix = [[item]]
            result.append((newPrefix, count))
            result.extend(prefixSpanInternal(projectDatabase(dataset, [item], False), minSupport, newPrefix))
    return result

def prefixSpanInternal(dataset, minSupport, prevPrefixes=[]):
    result = []
    
    # Add a new item to the last element (==same time)
    itemCountSameEvent = generateItemSupports(dataset, False, prefix=prevPrefixes[-1])
    for item, count in itemCountSameEvent:
        if (count >= minSupport) and item > prevPrefixes[-1][-1]:
            newPrefix = copy.deepcopy(prevPrefixes)
            newPrefix[-1].append(item)
            result.append((newPrefix, count))
            result.extend(prefixSpanInternal(projectDatabase(dataset, newPrefix[-1], False), minSupport, newPrefix))
        
    # Add a new event to the prefix
    itemCountSubsequentEvents = generateItemSupports(dataset, True)
    for item, count in itemCountSubsequentEvents:
        if count >= minSupport:
            newPrefix = copy.deepcopy(prevPrefixes)
            newPrefix.append([item])
            result.append((newPrefix, count))
            result.extend(prefixSpanInternal(projectDatabase(dataset, [item], True), minSupport, newPrefix))
    return result

In [65]:
#import packages
import pandas as pd
import numpy as np
from collections import defaultdict
import copy

#load data for which the clicks have lead to buys
clicks_buy = pd.read_csv("./data/click_buy.csv")
clicks_buy = clicks_buy.values

a = []
for i in clicks_buy:
    for j in i:
        j = j.split(', ')
        a.append([j])
buy_sequences = a

#perform prefixspan sequence mining to extract the sequence rules
prefixSpan(buy_sequences, 2)

[([['214507447']], 2),
 ([['214507447', '214702925']], 2),
 ([['214508287']], 3),
 ([['214508563']], 4),
 ([['214508942']], 3),
 ([['214509000']], 2),
 ([['214509013']], 2),
 ([['214509013', '214821296']], 2),
 ([['214509084']], 4),
 ([['214509135']], 3),
 ([['214510044']], 3),
 ([['214510738']], 2),
 ([['214510757']], 2),
 ([['214510757', '214820255']], 2),
 ([['214510805']], 3),
 ([['214510805', '214510854']], 2),
 ([['214510854']], 2),
 ([['214511015']], 2),
 ([['214511015', '214835120']], 2),
 ([['214512122']], 2),
 ([['214512122', '214512137']], 2),
 ([['214512137']], 2),
 ([['214512152']], 2),
 ([['214512152', '214580790']], 2),
 ([['214512416']], 3),
 ([['214512425']], 7),
 ([['214512425', '214537967']], 3),
 ([['214512425', '214601670']], 3),
 ([['214512611']], 3),
 ([['214512611', '214602605']], 2),
 ([['214512611', '214680371']], 2),
 ([['214512611', '214835120']], 2),
 ([['214512671']], 2),
 ([['214513170']], 2),
 ([['214514233']], 2),
 ([['214514287']], 2),
 ([['214514287',

In [59]:
#load data for which the clicks have lead to no-buys
clicks_nobuy = pd.read_csv("./data/click_nobuy.csv")
clicks_nobuy = clicks_nobuy.values
b = []
for i in clicks_nobuy:
    for j in i:
        j = j.split(', ')
        b.append([j])
nobuy_sequences = b

#perform prefixspan sequence mining to extract the sequence rules
prefixSpan(nobuy_sequences, 10)

[([['214507331']], 28),
 ([['214508287']], 38),
 ([['214508287', '214684093']], 16),
 ([['214508379']], 11),
 ([['214508563']], 58),
 ([['214508852']], 16),
 ([['214508942']], 85),
 ([['214508942', '214840762']], 19),
 ([['214509000']], 55),
 ([['214509000', '214601049']], 10),
 ([['214509013']], 47),
 ([['214509013', '214840762']], 10),
 ([['214509035']], 13),
 ([['214509082']], 12),
 ([['214509084']], 88),
 ([['214509084', '214512431']], 20),
 ([['214509084', '214539845']], 17),
 ([['214509084', '214613893']], 19),
 ([['214509084', '214639305']], 19),
 ([['214509135']], 22),
 ([['214509152']], 11),
 ([['214510044']], 60),
 ([['214510044', '214582387']], 21),
 ([['214510738']], 17),
 ([['214510740']], 10),
 ([['214510757']], 10),
 ([['214510783']], 20),
 ([['214510783', '214820252']], 11),
 ([['214510802']], 10),
 ([['214510854']], 25),
 ([['214510854', '214672963']], 10),
 ([['214511015']], 12),
 ([['214511465']], 20),
 ([['214512416']], 29),
 ([['214512416', '214537967']], 19),
 ([[

In [105]:
#Recommendation for buy-data
import queue
df = pd.read_csv("./data/recom-data.csv")
q = queue.Queue()
a=[]
user_clicked = '214716926'
for i in df.rule:
    i = i.split(", ")
    if len(i) > 1:
        for j in i:
            if j == user_clicked:
                a.append(i[1])
                q.put(i[1:])
a = list(set(a))
print("Recommended item for user clicked/viewed item %s is:" %user_search, q.get())

Recommended item for user clicked/viewed item 214716926 is: ['214716928']


In [106]:
print("Recommended group of items for user clicked/viewed item %s is:" %user_search, a[::-1])

Recommended group of items for user clicked/viewed item 214716926 is: ['214716928', '214717003', '214716971', '214717007', '214716930', '214716986', '214717005']
