<a href="https://colab.research.google.com/github/BuczynskiRafal/ML/blob/main/unsupervised/12_apriori.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


# Import bibliotek


In [1]:
import pandas as pd
import numpy as np

# Wygenerowanie danych



In [2]:
data = {'produkty': ['chleb jajka mleko', 'mleko ser', 'chleb masło ser', 'chleb jajka']}

transactions = pd.DataFrame(data=data, index=[1, 2, 3, 4])
transactions

Unnamed: 0,produkty
1,chleb jajka mleko
2,mleko ser
3,chleb masło ser
4,chleb jajka


# Przygotowanie danych


In [3]:
# rozwinięcie kolumny do obiektu DataFrame
expand = transactions['produkty'].str.split(expand=True)
expand

Unnamed: 0,0,1,2
1,chleb,jajka,mleko
2,mleko,ser,
3,chleb,masło,ser
4,chleb,jajka,


In [5]:
# wydobycie nazw wszystkich produktów
products = []
for col in expand.columns:
    for product in expand[col].unique():
        if product is not None and product not in products:
            products.append(product)

products.sort()
print(products)

['chleb', 'jajka', 'masło', 'mleko', 'ser']


In [6]:
transactions_encoded = np.zeros((len(transactions), len(products)), dtype='int8')
transactions_encoded

array([[0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0]], dtype=int8)

In [7]:
# kodowanie 0-1
for row in zip(range(len(transactions)), transactions_encoded, expand.values):
    for idx, product in enumerate(products):
        if product in row[2]:
            transactions_encoded[row[0], idx] = 1

transactions_encoded

array([[1, 1, 0, 1, 0],
       [0, 0, 0, 1, 1],
       [1, 0, 1, 0, 1],
       [1, 1, 0, 0, 0]], dtype=int8)

In [8]:
transactions_encoded_df = pd.DataFrame(transactions_encoded, columns=products)
transactions_encoded_df

Unnamed: 0,chleb,jajka,masło,mleko,ser
0,1,1,0,1,0
1,0,0,0,1,1
2,1,0,1,0,1
3,1,1,0,0,0


# Algorytm Apriori


In [9]:
from mlxtend.frequent_patterns import apriori, association_rules

supports = apriori(df=transactions_encoded_df, min_support=0, use_colnames=True)
supports

Unnamed: 0,support,itemsets
0,0.75,(chleb)
1,0.5,(jajka)
2,0.25,(masło)
3,0.5,(mleko)
4,0.5,(ser)
5,0.5,"(chleb, jajka)"
6,0.25,"(chleb, masło)"
7,0.25,"(mleko, chleb)"
8,0.25,"(chleb, ser)"
9,0.0,"(jajka, masło)"


In [10]:
supports = apriori(transactions_encoded_df, min_support=0.3, use_colnames=True)
supports

Unnamed: 0,support,itemsets
0,0.75,(chleb)
1,0.5,(jajka)
2,0.5,(mleko)
3,0.5,(ser)
4,0.5,"(chleb, jajka)"


In [11]:
rules = association_rules(supports, metric='confidence', min_threshold=0.65)
rules = rules.iloc[:, [0, 1, 4, 5, 6]]
rules

Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(chleb),(jajka),0.5,0.666667,1.333333
1,(jajka),(chleb),0.5,1.0,1.333333


In [12]:
rules.sort_values(by='lift', ascending=False)

Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(chleb),(jajka),0.5,0.666667,1.333333
1,(jajka),(chleb),0.5,1.0,1.333333
