# Apriori

### Será analisada uma base de dados gerada manualmente para ilustrar a geração de regras de associação de dados por meio do algoritmo Apriori.

### Bibliotecas a serem utilizadas

In [1]:
from itertools import chain, combinations
import operator

### Função para obtenção de todas as combinações de items

In [2]:
def subsets(itemset):
    return chain(*[combinations(itemset, i + 1) for i, a in enumerate(itemset)])

### Função para obtenção de combinações de k-itens

In [3]:
def joinset(itemset, k):
    joint_set = set()
    for i in itemset:
        for j in itemset:
            if len(i.union(j)) == k:
                joint_set.add(i.union(j))
    return joint_set

### Função para determinar os candidatos à itemsets frequentes

In [4]:
def get_candidates(transaction_list, itemset, min_support):
    candidates = dict()
    k = 1
    k_itemset = get_freq_itemset(transaction_list, itemset, min_support)
    candidates.update(k_itemset)
    k += 1
    while True:
        itemset = joinset(k_itemset, k)
        k_itemset = get_freq_itemset(transaction_list, itemset, min_support)
        if not k_itemset: # If None
            break
        candidates.update(k_itemset)
        k += 1
    return candidates

### Função para determinar os itens mais frequentes de acordo com o valor de suporte

In [5]:
def get_freq_itemset(transaction_list, itemset, min_support):
    len_transaction_list = len(transaction_list)
    freq_itemsets = dict()
    for item in itemset:
        freq_itemsets[item] = 0
        for row in transaction_list:
            if item.issubset(row):
                freq_itemsets[item] += 1
        freq_itemsets[item] = freq_itemsets[item] / len_transaction_list
    relevant_itemsets = dict()
    for item, support in freq_itemsets.items():
        if support >= min_support:
            relevant_itemsets[item] = support
    return relevant_itemsets

### Construção da lista de itemsets e transações

In [6]:
def itemset_from_data(data):
    itemset = set()
    transaction_list = list()
    for row in data:
        transaction_list.append(frozenset(row))
        for item in row:
            if item not in itemset:
                itemset.add(frozenset([item]))
    return itemset, transaction_list

### Algoritmo Apriori

In [7]:
def apriori(data, min_support, min_confidence):
    # Lista de item sets e transações
    itemset, transaction_list = itemset_from_data(data)
    print('\n')   
    print(f'Item Sets: \n \n{list(itemset)}')
    print('\n')   
    print(f'Transactions: \n \n{list(transaction_list)}')

    # Gerar candidatos
    candidates = get_candidates(transaction_list, itemset, min_support)
   
    rules = list()
    for sets in candidates.keys():
        if len(sets) > 1:
            for subset in subsets(sets):
                item = sets.difference(subset)
                if item: # If not None
                    subset = frozenset(subset)
                    subset_item = subset | item  # União de sets
                    confidence = float(candidates[subset_item]) / candidates[subset]
                    if confidence >= min_confidence:
                        rules.append((subset, item, confidence))
    return rules, candidates

### Função para impressão dos resultados

In [8]:
def print_report(rules, candidates):
    print('\n')
    print('---Frequent Itemsets---')
    print('[Itemset] | [Support]')
    sorted_candidates = sorted(candidates.items(), key=operator.itemgetter(1))
    for candidate in sorted_candidates:
        print(f'{tuple(candidate[0])} : {round(candidate[1], 4)}')

    print('\n')
    print('---Rules---')
    sorted_rules = sorted(rules, key=lambda s : s[2])
    print('[Rule] | [Confidence]')
    for rule in sorted_rules:
         print(f'{tuple(rule[0])} => {tuple(rule[1])} : {round(rule[2], 4)}')

### Função para leitura de dados csv

In [9]:
def get_csv_data(filename):
    data = []
    f = open(filename, 'r')
    csv_data = f.read()
    rows = csv_data.strip().split('\n')
    for row in rows:
        split_row = row.strip().split(',')
        data.append(split_row)
    return data

### Main()

In [10]:
data = get_csv_data('simple_data.csv')
print('\n')
print('Leitura dos dados:')
print(data)

print('\n')
min_support = float(input('Minimum Support: ')) # 0.6
min_confidence = float(input('Minimum Confindence: ')) # 0.7

rules, candidates = apriori(data, min_support, min_confidence)
print_report(rules, candidates)



Leitura dos dados:
[['Bread', 'Milk'], ['Bread', 'Diapers', 'Beer', 'Eggs'], ['Milk', 'Diapers', 'Beer', 'Cola'], ['Bread', 'Milk', 'Diapers', 'Beer'], ['Bread', 'Milk', 'Diapers', 'Cola'], ['Bread', 'Milk'], ['Bread', 'Cola', 'Beer', 'Milk'], ['Milk', 'Bread', 'Beer', 'Cola'], ['Bread', 'Milk', 'Diapers', 'Beer'], ['Bread', 'Beer', 'Diapers', 'Diapers']]




Minimum Support:  0.6
Minimum Confindence:  0.7




Item Sets: 
 
[frozenset({'Eggs'}), frozenset({'Cola'}), frozenset({'Beer'}), frozenset({'Diapers'}), frozenset({'Milk'}), frozenset({'Bread'})]


Transactions: 
 
[frozenset({'Bread', 'Milk'}), frozenset({'Bread', 'Diapers', 'Eggs', 'Beer'}), frozenset({'Milk', 'Diapers', 'Cola', 'Beer'}), frozenset({'Bread', 'Milk', 'Beer', 'Diapers'}), frozenset({'Bread', 'Milk', 'Cola', 'Diapers'}), frozenset({'Bread', 'Milk'}), frozenset({'Bread', 'Beer', 'Cola', 'Milk'}), frozenset({'Milk', 'Beer', 'Cola', 'Bread'}), frozenset({'Bread', 'Milk', 'Beer', 'Diapers'}), frozenset({'Bread', 'Beer', 'Diapers'})]


---Frequent Itemsets---
[Itemset] | [Support]
('Diapers',) : 0.6
('Bread', 'Beer') : 0.6
('Beer',) : 0.7
('Milk', 'Bread') : 0.7
('Milk',) : 0.8
('Bread',) : 0.9


---Rules---
[Rule] | [Confidence]
('Bread',) => ('Milk',) : 0.7778
('Beer',) => ('Bread',) : 0.8571
('Milk',) => ('Bread',) : 0.875
