In [9]:
# adaptado de: https://github.com/bcosta12/ml-demos/blob/master/201-apriori/AprioriAlgorithmIntro.ipynb

In [None]:
# importando as libs que irão ser usadas no projeto
from mlxtend.frequent_patterns import apriori
import pandas as pd

In [6]:
# criando um conjunto de dados
columns = ['ID', 'Cerveja', 'Fralda', 'Chiclete', 'Refrigerante', 'Snack']
dataset = [[1, 1, 1, 1, 1, 0],
           [2, 1, 1, 0, 0, 0],
           [3, 1, 1, 1, 0, 1],
           [4, 1, 1, 0, 1, 1],
           [5, 0, 1, 0, 1, 0],
           [6, 0, 1, 0, 0, 0],
           [7, 0, 1, 0, 0, 0],
           [8, 0, 0, 0, 1, 1],
           [9, 0, 0, 0, 1, 1]]

In [12]:
# criando um dataframe para juntar as informações
df = pd.DataFrame(dataset, columns=columns) 
#printando o dataframe
df

Unnamed: 0,ID,Beer,Diaper,Gum,Soda,Snack
0,1,1,1,1,1,0
1,2,1,1,0,0,0
2,3,1,1,1,0,1
3,4,1,1,0,1,1
4,5,0,1,0,1,0
5,6,0,1,0,0,0
6,7,0,1,0,0,0
7,8,0,0,0,1,1
8,9,0,0,0,1,1


In [10]:
class Apriori:
    """Apriori Class. Its has Apriori steps."""
    threshold = 0.5
    df = None

    def __init__(self, df, threshold=None, transform_bol=False):
        """Apriori Constructor. 
        :param pandas.DataFrame df: transactions dataset (1 or 0).
        :param float threshold: set threshold for min_support.
        :return: Apriori instance.
        :rtype: Apriori
        """
        
        # validanto se o dataframe tem itens
        self._validate_df(df)

        # atribuindo o threshold minimo
        self.df = df
        if threshold is not None:
            self.threshold = threshold

        # transformando true e false em 0 e 1
        if transform_bol:
            self._transform_bol()

            
    def _validate_df(self, df=None):
        """Validade if df exists. 
        :param pandas.DataFrame df: transactions dataset (1 or 0).
        :return: 
        :rtype: void
        """

        # retornando mensagem derro
        if df is None:
            raise Exception("df must be a valid pandas.DataDrame.")


    def _transform_bol(self):
        """Transform (1 or 0) dataset to (True or False). 
        :return: 
        :rtype: void
        """

        # convertendo true e false para 0 e 1
        for column in self.df.columns:
            self.df[column] = self.df[column].apply(lambda x: True if x == 1 else False)


    def _apriori(self, use_colnames=False, max_len=None, count=True):
        """Call apriori mlxtend.frequent_patterns function. 
        :param bool use_colnames: Flag to use columns name in final DataFrame.
        :param int max_len: Maximum length of itemsets generated.
        :param bool count: Flag to count length of the itemsets.
        :return: apriori DataFrame.
        :rtype: pandas.DataFrame
        """
    
        # chamando a biblioteca que executa o algoritmo a priori
        apriori_df = apriori(self.df, 
                            min_support=self.threshold,
                            use_colnames=use_colnames, 
                            max_len=max_len)
        
        # retornando o resultado da funcao
        if count:
            apriori_df['length'] = apriori_df['itemsets'].apply(lambda x: len(x))

        return apriori_df

    
    def run(self, use_colnames=False, max_len=None, count=True):
        """Apriori Runner Function.
        :param bool use_colnames: Flag to use columns name in final DataFrame.
        :param int max_len: Maximum length of itemsets generated.
        :param bool count: Flag to count length of the itemsets.
        :return: apriori DataFrame.
        :rtype: pandas.DataFrame
        """

        # chamando a funcao criada para organizar o codigo
        return self._apriori(use_colnames=use_colnames,
                            max_len=max_len,
                            count=count)

    
    def filter(self, apriori_df, length, threshold):
        """Filter Apriori DataFrame by length and threshold.
        :param pandas.DataFrame apriori_df: Apriori DataFrame.
        :param int length: Length of itemsets required.
        :param float threshold: Minimum threshold nrequired.
        :return: apriori filtered DataFrame.
        :rtype:pandas.DataFrame
        """
        
        # ajustando a saida do algoritmo apriori 
        if 'length' not in apriori_df.columns:
            raise Exception("apriori_df has no length. Please run the Apriori with count=True.")

        return apriori_df[ (apriori_df['length'] == length) & (apriori_df['support'] >= threshold) ]

In [13]:
# executando o algoritmo a priori

# condicao que diz que se houve um ID no dataframe, ele exclue essa coluna
if 'ID' in df.columns: del df['ID'] 

# chamando o algoritmo a priori para um threshold de 0.4 (suporte minimo)
apriori_runner = Apriori(df, threshold=0.4, transform_bol=True)
# executando o algoritmo
apriori_df = apriori_runner.run(use_colnames=True)
# printando os resultados
apriori_df

Unnamed: 0,support,itemsets,length
0,0.444444,(Beer),1
1,0.777778,(Diaper),1
2,0.555556,(Soda),1
3,0.444444,(Snack),1
4,0.444444,"(Beer, Diaper)",2
