In [15]:
import pandas as pd
import scipy
import numpy as np
import math

In [66]:
from itertools import combinations
from tqdm import tqdm

# 1. Support 계산 함수
def calculate_support(itemset, transactions):
    count = sum(1 for transaction in transactions if itemset <= transaction)
    return count / len(transactions)

# 2. Confidence 계산 함수
def calculate_confidence(X, Y, transactions):
    support_X = calculate_support(X, transactions)
    support_XY = calculate_support(X | Y, transactions)
    return support_XY / support_X if support_X > 0 else 0

# 3. Lift 계산 함수
def calculate_lift(X, Y, transactions):
    confidence_XY = calculate_confidence(X, Y, transactions)
    support_Y = calculate_support(Y, transactions)
    return confidence_XY / support_Y if support_Y > 0 else 0

# 4. 연관규칙 분석 함수
def generate_rules(transactions, min_support=0.1, min_confidence=0.1, min_lift=1.0):
    items = set(item for transaction in transactions for item in transaction)  # 모든 항목 집합
    results = []
    
    # 모든 가능한 항목 집합 생성 및 지지도 계산
    for size in tqdm(range(1, len(items) + 1)):
        for itemset in combinations(items, size):
            itemset = set(itemset)
            support = calculate_support(itemset, transactions)
            
            # Support 기준 통과한 경우만 처리
            if support >= min_support:
                # 각 항목 집합에 대해 규칙 생성
                for item in itemset:
                    X = itemset - {item}
                    Y = {item}
                    
                    if X:
                        confidence = calculate_confidence(X, Y, transactions)
                        lift = calculate_lift(X, Y, transactions)
                        
                        # Confidence와 Lift 기준 충족
                        if confidence >= min_confidence and lift >= min_lift:
                            results.append({
                                "rule": f"{X} -> {Y}",
                                "support": support,
                                "confidence": confidence,
                                "lift": lift
                            })
    return results

In [45]:
transactions = [
    {"우유", "빵"}, {"기저귀", "맥주"}, {"빵", "콜라"}, {"우유", "기저귀", "맥주"}, {"맥주", "빵"},
    {"우유", "계란"}, {"빵", "콜라", "계란"},  {"우유", "기저귀"}, {"우유", "맥주"},  {"콜라", "맥주"},
    {"우유", "맥주", "계란"}, {"빵", "계란"}, {"우유", "콜라"}, {"기저귀", "맥주"}, {"빵", "기저귀", "콜라"},
    {"우유", "빵", "맥주"}, {"우유", "콜라", "계란"}, {"맥주", "콜라"}, {"빵", "기저귀"},{"우유", "기저귀", "콜라"} ]
X = {"우유"}
Y = {"빵"}

support = calculate_support(X | Y, transactions)
confidence = calculate_confidence(X,Y, transactions)
lift = calculate_lift(X,Y,transactions)

print(f"Support({X | Y}): {support:.2f}")
print(f"Confidence({X} -> {Y}): {confidence:.2f}")
print(f"Lift({X} -> {Y}): {lift:.2f}")

Support({'빵', '우유'}): 0.10
Confidence({'우유'} -> {'빵'}): 0.20
Lift({'우유'} -> {'빵'}): 0.50


In [46]:
set(item for transaction in transactions for item in transaction)

{'계란', '기저귀', '맥주', '빵', '우유', '콜라'}

In [65]:
transactions = [{"우유", "빵"}, {"기저귀", "맥주"}, {"빵", "콜라"}, {"우유", "기저귀", "맥주"}, {"맥주", "빵"},{"우유", "계란"}]
X = {"우유"}
Y = {"빵"}
min_support=0.1
min_confidence=0.1
min_lift=1.0
items = set(item for transaction in transactions for item in transaction)  # 모든 항목 집합
results = []

print(f'items : {items}')

# 모든 가능한 항목 집합 생성 및 지지도 계산
for size in range(1, len(items) + 1):
    for itemset in combinations(items, size):
        itemset = set(itemset)
        support = calculate_support(itemset, transactions)
        print(f'itemset : {itemset} / support : {support}')
        # Support 기준 통과한 경우만 처리
        if support >= min_support:
            # 각 항목 집합에 대해 규칙 생성
            for item in itemset:
                print(f'item : {item}')
                X = itemset - {item}
                Y = {item}
                print(f'X = {X} and Y = {Y}')
                if X:
                    confidence = calculate_confidence(X, Y, transactions)
                    lift = calculate_lift(X, Y, transactions)
                    
                    # Confidence와 Lift 기준 충족
                    if confidence >= min_confidence and lift >= min_lift:
                        results.append({
                            "rule": f"{X} -> {Y}",
                            "support": support,
                            "confidence": confidence,
                            "lift": lift
                        })
    print(results)

items : {'콜라', '우유', '기저귀', '계란', '빵', '맥주'}
itemset : {'콜라'} / support : 0.16666666666666666
item : 콜라
X = set() and Y = {'콜라'}
itemset : {'우유'} / support : 0.5
item : 우유
X = set() and Y = {'우유'}
itemset : {'기저귀'} / support : 0.3333333333333333
item : 기저귀
X = set() and Y = {'기저귀'}
itemset : {'계란'} / support : 0.16666666666666666
item : 계란
X = set() and Y = {'계란'}
itemset : {'빵'} / support : 0.5
item : 빵
X = set() and Y = {'빵'}
itemset : {'맥주'} / support : 0.5
item : 맥주
X = set() and Y = {'맥주'}
[]
itemset : {'콜라', '우유'} / support : 0.0
itemset : {'콜라', '기저귀'} / support : 0.0
itemset : {'콜라', '계란'} / support : 0.0
itemset : {'콜라', '빵'} / support : 0.16666666666666666
item : 콜라
X = {'빵'} and Y = {'콜라'}
item : 빵
X = {'콜라'} and Y = {'빵'}
itemset : {'콜라', '맥주'} / support : 0.0
itemset : {'기저귀', '우유'} / support : 0.16666666666666666
item : 기저귀
X = {'우유'} and Y = {'기저귀'}
item : 우유
X = {'기저귀'} and Y = {'우유'}
itemset : {'계란', '우유'} / support : 0.16666666666666666
item : 계란
X = {'우유'} and Y = {'

In [67]:
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori

dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
           ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
           ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]

In [69]:
te = TransactionEncoder()
te_ary = te.fit_transform(dataset)

In [72]:
df = pd.DataFrame(te_ary, columns=te.columns_)

In [73]:
df

Unnamed: 0,Apple,Corn,Dill,Eggs,Ice cream,Kidney Beans,Milk,Nutmeg,Onion,Unicorn,Yogurt
0,False,False,False,True,False,True,True,True,True,False,True
1,False,False,True,True,False,True,False,True,True,False,True
2,True,False,False,True,False,True,True,False,False,False,False
3,False,True,False,False,False,True,True,False,False,True,True
4,False,True,False,True,True,True,False,False,True,False,False


In [76]:
frequent_itemsets = apriori(df, min_support=0.6, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets

Unnamed: 0,support,itemsets,length
0,0.8,(Eggs),1
1,1.0,(Kidney Beans),1
2,0.6,(Milk),1
3,0.6,(Onion),1
4,0.6,(Yogurt),1
5,0.8,"(Kidney Beans, Eggs)",2
6,0.6,"(Onion, Eggs)",2
7,0.6,"(Kidney Beans, Milk)",2
8,0.6,"(Kidney Beans, Onion)",2
9,0.6,"(Kidney Beans, Yogurt)",2
