In [4]:
import numpy as np

# algorithm from https://www.geeksforgeeks.org/apriori-algorithm/

# check 
import pandas as pd 
from mlxtend.frequent_patterns import apriori, association_rules 

In [5]:
def encode(transactions):
    """
    Encode transactions into a numpy array.
    """
    # labels
    labels = set(item for transaction in transactions for item in transaction)
    # Mapping
    indexes = {item: index for index, item in enumerate(labels)}
    encoded = np.zeros((len(transactions), len(indexes)), dtype=int)
    
    for i, transaction in enumerate(transactions):
        for item in transaction:
            encoded[i, indexes[item]] = 1
    
    return encoded, labels

In [195]:
transactions = [
    ['bread', 'milk', 'butter'],
    ['bread', 'butter'],
    ['diaper', 'beer', 'cookies'],
    ['bread', 'milk', 'diaper', 'butter'],
    ['beer', 'diaper']
]

In [196]:

s = [[transactions[j][i], j] for j in range(len(transactions)) for i in range(len(transactions[j]))]
s

[['bread', 0],
 ['milk', 0],
 ['butter', 0],
 ['bread', 1],
 ['butter', 1],
 ['diaper', 2],
 ['beer', 2],
 ['cookies', 2],
 ['bread', 3],
 ['milk', 3],
 ['diaper', 3],
 ['butter', 3],
 ['beer', 4],
 ['diaper', 4]]

In [197]:
df = pd.DataFrame([[transactions[j][i], j, 1] for j in range(len(transactions)) for i in range(len(transactions[j]))], columns=['item', 'id', 'quontity'])
df

Unnamed: 0,item,id,quontity
0,bread,0,1
1,milk,0,1
2,butter,0,1
3,bread,1,1
4,butter,1,1
5,diaper,2,1
6,beer,2,1
7,cookies,2,1
8,bread,3,1
9,milk,3,1


In [198]:
df_encode = df.groupby(['id', "item"])["quontity"].sum().unstack().reset_index().fillna(0).set_index('id') > 0.5
df_encode

item,beer,bread,butter,cookies,diaper,milk
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,False,True,True,False,False,True
1,False,True,True,False,False,False
2,True,False,False,True,True,False
3,False,True,True,False,True,True
4,True,False,False,False,True,False


In [None]:
apriori(df_encode, min_support = 0.1, use_colnames = True, max_len=2, )

Unnamed: 0,support,itemsets
0,0.4,(beer)
1,0.6,(bread)
2,0.6,(butter)
3,0.2,(cookies)
4,0.6,(diaper)
5,0.4,(milk)
6,0.2,"(cookies, beer)"
7,0.4,"(diaper, beer)"
8,0.6,"(butter, bread)"
9,0.2,"(bread, diaper)"


In [200]:
my_apriori(transactions, min_support=0.5)

([['bread', 'butter'],
  ['milk', 'butter'],
  ['milk', 'bread'],
  ['beer', 'cookies'],
  ['beer', 'diaper']],
 [1.0, 1.0, 1.0, 0.5, 1.0])

In [None]:
enc1, lebels = encode(transactions)
print(enc1)
print(lebels)

[[0 0 1 1 0 0]
 [0 1 1 0 1 1]
 [1 0 0 1 1 1]
 [0 0 1 1 1 1]
 [1 0 1 1 1 0]]
{'cola', 'eggs', 'bread', 'milk', 'diaper', 'beer'}


array([[0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0]])

In [None]:
def my_apriori(transactions, min_support = 0.6):
    """
    oh my oh my, apriori algorithm to find frequent continious itemsets.
    """

    encoded, labels = encode(transactions)
    labels = list(labels)

    len1 = [int(np.sum(encoded.T[i])) >= min_support * len(encoded) for i in range(encoded.shape[1])]
    numbers = [int(np.sum(encoded.T[i])) / len(encoded) for i in range(encoded.shape[1])]

    res = []
    num = []
    for i in range(len(len1)):
        if len1[i]:
            res.append(labels[i])
            num.append(numbers[i])
    
    len2 = np.zeros((encoded.shape[1], encoded.shape[1]), dtype=int)
    for i in range(encoded.shape[1]):
        for j in range(encoded.shape[0]):
            if encoded.T[i][j] == 1:
                for k in range(encoded.shape[1]):
                    len2[i][k] += encoded[j][k]
    
    len2 = np.float64(len2)
    for i in range (len(len2)):
        len2[i] /= len2[i][i]
    res2 = len2>= min_support
    comb = []
    num2 = []
    for i in range(len(labels)):
        for j in range(len(labels)):
            if res2[i][j] and i != j and len2[i][j] >= len2[j][i] :
                comb.append([labels[i], labels[j]])
                num2.append(float(round(len2[i][j], 2)))

    #return res, num
    return comb, num2

In [206]:
my_apriori(transactions)

([['butter', 'bread'],
  ['cookies', 'diaper'],
  ['cookies', 'beer'],
  ['bread', 'butter'],
  ['milk', 'butter'],
  ['milk', 'bread'],
  ['beer', 'diaper']],
 [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])

In [141]:
encode(transactions)[0].T

array([[0, 0, 1, 0, 1],
       [0, 1, 0, 0, 0],
       [1, 1, 0, 1, 1],
       [1, 0, 1, 1, 1],
       [0, 1, 1, 1, 1],
       [0, 1, 1, 1, 0]])

In [None]:
frequent_itemsets = apriori(transactions, min_support=0.6)

In [None]:

    # Find frequent itemsets with minimum support of 0.6 (60%)
    frequent_itemsets = apriori(transactions, min_support=0.6)
    
    # Print frequent itemsets
    print("Frequent Itemsets:")
    for length, itemsets in frequent_itemsets.items():
        print(f"\nItemsets of length {length}:")
        for itemset, count in itemsets.items():
            items = ', '.join(list(itemset))
            print(f"  {{{items}}} : {count}")
    
    # Generate association rules with minimum confidence of 0.7 (70%)
    rules = generate_association_rules(frequent_itemsets, min_confidence=0.7)
    
    # Print association rules
    print("\nAssociation Rules:")
    for antecedent, consequent, confidence, support in rules:
        ant_items = ', '.join(list(antecedent))
        cons_items = ', '.join(list(consequent))
        print(f"  {{{ant_items}}} -> {{{cons_items}}} (Confidence: {confidence:.2f}, Support: {support:.2f})")