In [3]:
import numpy as np
dataset_filename="affinity_dataset.txt"
X=np.loadtxt(dataset_filename)
print(X[:5])

[[0. 0. 1. 1. 1.]
 [1. 1. 0. 1. 0.]
 [1. 0. 1. 1. 0.]
 [0. 0. 1. 1. 1.]
 [0. 1. 0. 0. 1.]]


In [4]:
#The names of features(items)
features=["bread","milk","cheese","apples","bananas"]
print(features)

['bread', 'milk', 'cheese', 'apples', 'bananas']


In [5]:
#Lets's find the support and confidence of the rule 'If a person buys apple, he is likely to buy banana'

num_apple_purchases = 0 #counts total no. of apple purchased amongst 100 samples

for sample in X:
    if sample[3] == 1: #The person buys apple
        num_apple_purchases+=1

print("{0} people bought Apples".format(num_apple_purchases))

#This is support

36 people bought Apples


In [6]:
rule_valid=0
rule_invalid=0

for sample in X:
    if sample[3]==1:
        if sample[4]==1:
            rule_valid+=1 #Bought apple and banana
        else:
            rule_invalid+=1 #Bought apple but not banana

print("{0} rules are valid out of {1} cases where apple is bought".format(rule_valid,num_apple_purchases))
print("{0} rules are invalid out of {1} cases where apple is bought".format(rule_invalid,num_apple_purchases))
            

21 rules are valid out of 36 cases where apple is bought
15 rules are invalid out of 36 cases where apple is bought


In [7]:
# Now we have all the information needed to compute Support and Confidence
support = rule_valid  # The Support is the number of times the rule is discovered.
confidence = rule_valid / num_apple_purchases
print("The support is {0} and the confidence is {1:.3f}.".format(support, confidence))
# Confidence can be thought of as a percentage using the following:
print("As a percentage, that is {0:.1f}%.".format(100 * confidence))

The support is 21 and the confidence is 0.583.
As a percentage, that is 58.3%.


In [8]:
n_samples,n_features=X.shape #X.shape returns no. rows and cols. 
print("The dataset has {0} samples and {1} features.".format(n_samples,n_features))

The dataset has 100 samples and 5 features.


In [9]:
from collections import defaultdict

valid_rules1 = defaultdict(int)
invalid_rules1 = defaultdict(int)
num_occurances = defaultdict(int)

for sample in X:
    for premise in range(n_features):
        if sample[premise] == 0:
            continue #Item is not bought,so go for next item
        num_occurances[premise]+=1  #Item is bought
        
        for conclusion in range(n_features):
            if premise == conclusion: #x->x 
                continue
            
            if sample[conclusion] == 1:
                valid_rules1[(premise,conclusion)]+=1
            else:
                invalid_rules1[(premise,conclusion)]+=1
                

support1 = valid_rules1
confidence1 = defaultdict(float)
for premise,conclusion in valid_rules1.keys():
    confidence1[(premise,conclusion)] = valid_rules1[(premise,conclusion)] / num_occurances[premise] 

In [98]:
for premise, conclusion in confidence1:
    premise_name = features[premise]
    conclusion_name = features[conclusion]
    print("Rule: If a person buys {0} they will also buy {1}".format(premise_name, conclusion_name))
    print(" - Confidence: {0:.3f}".format(confidence1[(premise, conclusion)]))
    print(" - Support: {0}".format(support1[(premise, conclusion)]))
    print("")

Rule: If a person buys cheese they will also buy apples
 - Confidence: 0.610
 - Support: 25

Rule: If a person buys cheese they will also buy bananas
 - Confidence: 0.659
 - Support: 27

Rule: If a person buys apples they will also buy cheese
 - Confidence: 0.694
 - Support: 25

Rule: If a person buys apples they will also buy bananas
 - Confidence: 0.583
 - Support: 21

Rule: If a person buys bananas they will also buy cheese
 - Confidence: 0.458
 - Support: 27

Rule: If a person buys bananas they will also buy apples
 - Confidence: 0.356
 - Support: 21

Rule: If a person buys bread they will also buy milk
 - Confidence: 0.519
 - Support: 14

Rule: If a person buys bread they will also buy apples
 - Confidence: 0.185
 - Support: 5

Rule: If a person buys milk they will also buy bread
 - Confidence: 0.304
 - Support: 14

Rule: If a person buys milk they will also buy apples
 - Confidence: 0.196
 - Support: 9

Rule: If a person buys apples they will also buy bread
 - Confidence: 0.139
 

In [10]:
def print_rule(premise, conclusion, support1, confidence1, features):
    premise_name = features[premise]
    conclusion_name = features[conclusion]
    print("Rule: If a person buys {0} they will also buy {1}".format(premise_name, conclusion_name))
    print(" - Confidence: {0:.3f}".format(confidence1[(premise, conclusion)]))
    print(" - Support: {0}".format(support1[(premise, conclusion)]))
    print("")

In [11]:
premise = 3
conclusion = 1
print_rule(premise, conclusion, support1, confidence1, features)

Rule: If a person buys apples they will also buy milk
 - Confidence: 0.250
 - Support: 9



In [12]:
valid_rules2 = defaultdict(int)
invalid_rules2 = defaultdict(int)
num_occurances2 = defaultdict(int)

for sample in X:
    for premise1 in range(n_features):
        if sample[premise1] == 0:
            continue
        for premise2 in range(n_features):
            if sample[premise2] == 0 or premise1 == premise2:
                continue
            num_occurances2[(premise1,premise2)]+=1
                
            for conclusion in range(n_features):
                if conclusion == premise1 or conclusion == premise2:
                    continue
                if sample[conclusion]==1:
                    valid_rules2[(premise1,premise2,conclusion)]+=1
                else:
                    invalid_rules2[(premise1,premise2,conclusion)]+=1
                    
support2=valid_rules2
confidence2 = defaultdict(float)

for premise1,premise2,conclusion in valid_rules2.keys():
    confidence2[(premise1,premise2,conclusion)] = valid_rules2[(premise1,premise2,conclusion)] / num_occurances2[(premise1,premise2)] 
    

In [12]:
for premise1,premise2, conclusion in confidence2:
    premise_name1 = features[premise1]
    premise_name2 = features[premise2]
    conclusion_name = features[conclusion]
    print("Rule: If a person buys {0} and {1} they will also buy {2}".format(premise_name1, premise_name2, conclusion_name))
    print(" - Confidence: {0:.3f}".format(confidence2[(premise1,premise2, conclusion)]))
    print(" - Support: {0}".format(support2[(premise1,premise2,conclusion)]))
    print("")

Rule: If a person buys cheese and apples they will also buy bananas
 - Confidence: 0.680
 - Support: 17

Rule: If a person buys cheese and bananas they will also buy apples
 - Confidence: 0.630
 - Support: 17

Rule: If a person buys apples and cheese they will also buy bananas
 - Confidence: 0.680
 - Support: 17

Rule: If a person buys apples and bananas they will also buy cheese
 - Confidence: 0.810
 - Support: 17

Rule: If a person buys bananas and cheese they will also buy apples
 - Confidence: 0.630
 - Support: 17

Rule: If a person buys bananas and apples they will also buy cheese
 - Confidence: 0.810
 - Support: 17

Rule: If a person buys bread and milk they will also buy apples
 - Confidence: 0.286
 - Support: 4

Rule: If a person buys bread and apples they will also buy milk
 - Confidence: 0.800
 - Support: 4

Rule: If a person buys milk and bread they will also buy apples
 - Confidence: 0.286
 - Support: 4

Rule: If a person buys milk and apples they will also buy bread
 - Con

In [13]:
def print_rule2(premise1,premise2,conclusion,support2,confidence2,features):
    premise_name1 = features[premise1]
    premise_name2 = features[premise2]
    conclusion_name = features[conclusion]
    print("Rule: If a person buys {0} and {1} they will also buy {2}".format(premise_name1, premise_name2, conclusion_name))
    print(" - Confidence: {0:.3f}".format(confidence2[(premise1,premise2, conclusion)]))
    print(" - Support: {0}".format(support2[(premise1,premise2,conclusion)]))
    print("")

In [23]:
premise1= 2
premise2 = 1
conclusion = 4
print_rule2(premise1,premise2,conclusion,support2,confidence2,features)

Rule: If a person buys cheese and milk they will also buy bananas
 - Confidence: 0.571
 - Support: 4



In [15]:
def count3(X):
    sum=0
    for i in X:
        sum+=i
    return sum

In [16]:
from pprint import pprint

valid_rules3= defaultdict(int)
invalid_rules3 = defaultdict(int)
num_occurances3 = defaultdict(int)

sum=0
for sample in X:
    if(count3(sample)<3):
        continue
    for premise1 in range(n_features-2):
        if sample[premise1]==0:
            continue
        for premise2 in range(premise1+1,n_features,1):
            if sample[premise2] == 0:
                continue
            for premise3 in range(premise2+1,n_features,1):
                if sample[premise3]==0:
                    continue
                num_occurances3[(premise1,premise2,premise3)]+=1
                
                for conclusion in range(n_features):
                    if conclusion==premise1 or conclusion==premise2 or conclusion==premise3:
                        continue
                    if sample[conclusion]==1:
                        valid_rules3[(premise1,premise2,premise3,conclusion)]+=1
                    else:
                        invalid_rules3[(premise1,premise2,premise3,conclusion)]+=1
                
            
#pprint(list(num_occurances3.items()))
support3=valid_rules3
confidence3=defaultdict(float)

for premise1,premise2,premise3,conclusion in valid_rules3.keys():
       confidence3[(premise1,premise2,premise3,conclusion)] = valid_rules3[(premise1,premise2,premise3,conclusion)] / num_occurances3[(premise1,premise2,premise3)]



In [17]:
def print_rule3(premise1,premise2,premise3,conclusion,support3,confidence3,features):
    premise_name1 = features[premise1]
    premise_name2 = features[premise2]
    premise_name3 = features[premise3]
    conclusion_name = features[conclusion]
    print("Rule: If a person buys {0} and {1} and {2} they will also buy {3}".format(premise_name1, premise_name2,premise_name3, conclusion_name))
    print(" - Confidence: {0: .3f}".format(confidence3[(premise1,premise2,premise3,conclusion)]))
    print(" - Support: {0}".format(support3[(premise1,premise2,premise3,conclusion)]))
    print("")

In [31]:
premise1= 0
premise2 = 1
premise3 = 2
conclusion = 4
print_rule3(premise1,premise2,premise3,conclusion,support3,confidence3,features)

Rule: If a person buys bread and milk and cheese they will also buy bananas
 - Confidence:  1.000
 - Support: 2



In [19]:
pprint(list(valid_rules3.items()))

[((0, 1, 2, 4), 2),
 ((0, 1, 4, 2), 2),
 ((0, 2, 4, 1), 2),
 ((1, 2, 4, 0), 2),
 ((0, 1, 3, 4), 1),
 ((0, 1, 4, 3), 1),
 ((0, 3, 4, 1), 1),
 ((1, 3, 4, 0), 1),
 ((1, 2, 3, 4), 1),
 ((1, 2, 4, 3), 1),
 ((1, 3, 4, 2), 1),
 ((2, 3, 4, 1), 1)]


In [20]:
from operator import itemgetter
sorted_support1 = sorted(support1.items(), key=itemgetter(1), reverse=True)

In [21]:
for index in range(5):
    print("Rule #{0}".format(index + 1))
    (premise, conclusion) = sorted_support1[index][0]
    print_rule(premise, conclusion, support1, confidence1, features)

Rule #1
Rule: If a person buys cheese they will also buy bananas
 - Confidence: 0.659
 - Support: 27

Rule #2
Rule: If a person buys bananas they will also buy cheese
 - Confidence: 0.458
 - Support: 27

Rule #3
Rule: If a person buys cheese they will also buy apples
 - Confidence: 0.610
 - Support: 25

Rule #4
Rule: If a person buys apples they will also buy cheese
 - Confidence: 0.694
 - Support: 25

Rule #5
Rule: If a person buys apples they will also buy bananas
 - Confidence: 0.583
 - Support: 21



In [25]:
sorted_support2=sorted(support2.items(),key=itemgetter(1),reverse=True)

In [26]:
for index in range(5):
    print("Rule #{0}".format(index + 1))
    (premise1,premise2, conclusion) = sorted_support2[index][0]
    print_rule2(premise1,premise2, conclusion, support2, confidence2, features)

Rule #1
Rule: If a person buys cheese and apples they will also buy bananas
 - Confidence: 0.680
 - Support: 17

Rule #2
Rule: If a person buys cheese and bananas they will also buy apples
 - Confidence: 0.630
 - Support: 17

Rule #3
Rule: If a person buys apples and cheese they will also buy bananas
 - Confidence: 0.680
 - Support: 17

Rule #4
Rule: If a person buys apples and bananas they will also buy cheese
 - Confidence: 0.810
 - Support: 17

Rule #5
Rule: If a person buys bananas and cheese they will also buy apples
 - Confidence: 0.630
 - Support: 17



In [29]:
sorted_support3=sorted(support3.items(),key=itemgetter(1),reverse=True)

In [30]:
for index in range(5):
    print("Rule #{0}".format(index + 1))
    (premise1,premise2,premise3, conclusion) = sorted_support3[index][0]
    print_rule3(premise1,premise2,premise3, conclusion, support3, confidence3, features)

Rule #1
Rule: If a person buys bread and milk and cheese they will also buy bananas
 - Confidence:  1.000
 - Support: 2

Rule #2
Rule: If a person buys bread and milk and bananas they will also buy cheese
 - Confidence:  0.286
 - Support: 2

Rule #3
Rule: If a person buys bread and cheese and bananas they will also buy milk
 - Confidence:  0.667
 - Support: 2

Rule #4
Rule: If a person buys milk and cheese and bananas they will also buy bread
 - Confidence:  0.500
 - Support: 2

Rule #5
Rule: If a person buys bread and milk and apples they will also buy bananas
 - Confidence:  0.250
 - Support: 1

