In [6]:
import pandas as pd 
import numpy as np 
from itertools import combinations
from itertools import permutations
import itertools

In [2]:
class Apriori:
    def __init__(self, df):
        self.transactions = []
        self.list_of_frequent_items = []
        for purchase in df.values:
            series = pd.Series(purchase)
            series = series.dropna()
            result = series.tolist()
            self.transactions.append(result)
        all_items = np.array(df.values).ravel()
        self.unique_items_in_dataset = (pd.Series(pd.unique(all_items)).dropna()).tolist()
        
    def calculate_support(self, itemset):
        itemset_count = 0
        total_transactions = len(self.transactions)

        for transaction in self.transactions:
            if all(item in transaction for item in itemset):
                itemset_count += 1

        support = itemset_count / total_transactions
        return support

    def calculate_confidence(self,Y, Z):
        support_YZ = self.calculate_support(Y + Z)
        support_Y = self.calculate_support(Y)

        confidence = support_YZ / support_Y
        return confidence
    
    def calculate_lift(self,Y, Z):
        confidence = self.calculate_confidence(Y, Z)
        support_Z = self.calculate_support(Z)

        lift = confidence / support_Z
        return lift
    
    def get_all_combs(self,fris, k):
        combinations_list = [list(x) for x in (combinations(fris, k))]
        return combinations_list
    
    def frequent_itemsets(self,combinations):
        frequent_items = []
        for item in combinations:
            if self.calculate_support(item) >= self.minimum_support:
                frequent_items.append(item)
        return frequent_items
    
    def get_and_prune(self,minimum_support):
        self.minimum_support = minimum_support
        k = 1
        fris = self.unique_items_in_dataset
        all_frequents = []
        while True:
            fris = self.frequent_itemsets(self.get_all_combs(fris, k))
            sfris = list(pd.unique(np.array(fris).flatten()))
            all_frequents.append(fris)
            fris = sfris
            k += 1
            if len(fris) == 0 :
                break
        self.all_frequents = all_frequents
        return all_frequents
    
    def generate_association_rules(self, min_confidence, min_lift):
        association_rules = []
        high_lifts_rules = []
        for itemset_list in self.all_frequents:
            for itemset in itemset_list:
                if len(itemset) > 1:
                    itemset_permutations = list(permutations(itemset, len(itemset)))

                    for permutation in itemset_permutations:
                        for i in range(1, len(permutation)):
                            antecedent = permutation[:i]
                            consequent = permutation[i:]

                            confidence = self.calculate_confidence(antecedent, consequent)
                            lift = self.calculate_lift(antecedent, consequent)
                        

                            if confidence >= min_confidence:
                                association_rules.append((antecedent, consequent, confidence, lift ))
                                if lift >= min_lift:
                                    high_lifts_rules.append((antecedent, consequent, confidence, lift))

        self.assosiation_rules = association_rules
        self.high_lifts_rules = high_lifts_rules                      
        return association_rules , high_lifts_rules
    
    def print_assosiation_rules(self):
        print("assossiation rules : ")
        for rule in self.assosiation_rules:
            antecedent = rule[0]
            consequent = rule[1]
            confidence = round(rule[2], 3)
            lift = round(rule[3], 3)
            print(str(antecedent)+" ---> " + str(consequent) + "|| confidence = " + str(confidence)+"  &  lift = " + str(lift))
        print("-"*200)
        print("strong assossiation_rules : ")
        for rule in self.high_lifts_rules:
            antecedent = rule[0]
            consequent = rule[1]
            confidence = round(rule[2], 3)
            lift = round(rule[3], 3)
            print(str(antecedent)+" ---> " + str(consequent) + "|| confidence = " + str(confidence)+ "  &  lift = " + str(lift))

In [3]:
datad = {
    "T1" : ["HotDogs", "Buns", "Ketchup"],
    "T2" : ["HotDogs", "Buns", np.nan],
    "T3" : ["HotDogs", "Coke", "Chips"],
    "T4" : ["Chips", "Coke", np.nan],
    "T5" : ["Chips", "Ketchup", np.nan],
    "T6" : ["HotDogs", "Coke", "Chips"]
}
data = pd.DataFrame(datad).T
data


Unnamed: 0,0,1,2
T1,HotDogs,Buns,Ketchup
T2,HotDogs,Buns,
T3,HotDogs,Coke,Chips
T4,Chips,Coke,
T5,Chips,Ketchup,
T6,HotDogs,Coke,Chips


In [4]:
a = Apriori(data)

In [13]:
frequent_items = a.get_and_prune(minimum_support=1/3)

In [6]:
a.generate_association_rules(0.6, 2)

([(('Buns',), ('HotDogs',), 1.0, 1.5),
  (('Coke',), ('HotDogs',), 0.6666666666666666, 1.0),
  (('Coke',), ('Chips',), 1.0, 1.5),
  (('Chips',), ('Coke',), 0.75, 1.5),
  (('HotDogs', 'Coke'), ('Chips',), 1.0, 1.5),
  (('HotDogs', 'Chips'), ('Coke',), 1.0, 2.0),
  (('Coke',), ('HotDogs', 'Chips'), 0.6666666666666666, 2.0),
  (('Coke', 'HotDogs'), ('Chips',), 1.0, 1.5),
  (('Coke',), ('Chips', 'HotDogs'), 0.6666666666666666, 2.0),
  (('Coke', 'Chips'), ('HotDogs',), 0.6666666666666666, 1.0),
  (('Chips', 'HotDogs'), ('Coke',), 1.0, 2.0),
  (('Chips', 'Coke'), ('HotDogs',), 0.6666666666666666, 1.0)],
 [(('HotDogs', 'Chips'), ('Coke',), 1.0, 2.0),
  (('Coke',), ('HotDogs', 'Chips'), 0.6666666666666666, 2.0),
  (('Coke',), ('Chips', 'HotDogs'), 0.6666666666666666, 2.0),
  (('Chips', 'HotDogs'), ('Coke',), 1.0, 2.0)])

In [7]:
a.print_assosiation_rules()

assossiation rules : 
('Buns',) ---> ('HotDogs',)|| confidence = 1.0  &  lift = 1.5
('Coke',) ---> ('HotDogs',)|| confidence = 0.667  &  lift = 1.0
('Coke',) ---> ('Chips',)|| confidence = 1.0  &  lift = 1.5
('Chips',) ---> ('Coke',)|| confidence = 0.75  &  lift = 1.5
('HotDogs', 'Coke') ---> ('Chips',)|| confidence = 1.0  &  lift = 1.5
('HotDogs', 'Chips') ---> ('Coke',)|| confidence = 1.0  &  lift = 2.0
('Coke',) ---> ('HotDogs', 'Chips')|| confidence = 0.667  &  lift = 2.0
('Coke', 'HotDogs') ---> ('Chips',)|| confidence = 1.0  &  lift = 1.5
('Coke',) ---> ('Chips', 'HotDogs')|| confidence = 0.667  &  lift = 2.0
('Coke', 'Chips') ---> ('HotDogs',)|| confidence = 0.667  &  lift = 1.0
('Chips', 'HotDogs') ---> ('Coke',)|| confidence = 1.0  &  lift = 2.0
('Chips', 'Coke') ---> ('HotDogs',)|| confidence = 0.667  &  lift = 1.0
-------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [12]:
df = pd.DataFrame(pd.read_csv("./Market_Basket_Optimisation.csv", header=None))


In [9]:
ap = Apriori(df)

In [10]:
ap.get_and_prune(0.008)
ap.generate_association_rules(0.2, 3)
ap.print_assosiation_rules()

assossiation rules : 
('shrimp',) ---> ('mineral water',)|| confidence = 0.33  &  lift = 1.385
('shrimp',) ---> ('milk',)|| confidence = 0.246  &  lift = 1.9
('shrimp',) ---> ('frozen vegetables',)|| confidence = 0.233  &  lift = 2.447
('shrimp',) ---> ('spaghetti',)|| confidence = 0.297  &  lift = 1.704
('shrimp',) ---> ('chocolate',)|| confidence = 0.252  &  lift = 1.537
('avocado',) ---> ('mineral water',)|| confidence = 0.348  &  lift = 1.46
('cottage cheese',) ---> ('mineral water',)|| confidence = 0.301  &  lift = 1.264
('tomato juice',) ---> ('mineral water',)|| confidence = 0.316  &  lift = 1.325
('low fat yogurt',) ---> ('mineral water',)|| confidence = 0.314  &  lift = 1.316
('low fat yogurt',) ---> ('eggs',)|| confidence = 0.22  &  lift = 1.221
('green tea',) ---> ('mineral water',)|| confidence = 0.235  &  lift = 0.986
('burgers',) ---> ('green tea',)|| confidence = 0.2  &  lift = 1.516
('green tea',) ---> ('french fries',)|| confidence = 0.216  &  lift = 1.263
('green tea'

In [8]:
print("requirements : ")
print(pd.__version__)
print(np.__version__)


requirements : 
2.2.1
1.23.5
