In [1]:
from mlxtend.frequent_patterns import apriori, association_rules
import pandas as pd
import numpy as np

In [2]:
dummy = np.array(["A", "B", "C", "D", "E", "F", "G"])

In [3]:
data = {
    "Transaction_ID" : [i for i in range(1, 11)],
    "Items": [list(np.random.choice(dummy, np.random.randint(3, 5))) for i in range(10)]
}

In [4]:
data

{'Transaction_ID': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
 'Items': [['E', 'E', 'E'],
  ['B', 'A', 'A', 'F'],
  ['E', 'C', 'A', 'B'],
  ['B', 'E', 'G', 'D'],
  ['C', 'G', 'D'],
  ['G', 'B', 'A', 'B'],
  ['G', 'E', 'E', 'A'],
  ['A', 'C', 'F'],
  ['F', 'E', 'G'],
  ['C', 'G', 'D', 'E']]}

In [5]:
df = pd.DataFrame(data)

In [6]:
df.head()

Unnamed: 0,Transaction_ID,Items
0,1,"[E, E, E]"
1,2,"[B, A, A, F]"
2,3,"[E, C, A, B]"
3,4,"[B, E, G, D]"
4,5,"[C, G, D]"


In [7]:
df["Items"] = df["Items"].apply(lambda x: " ".join(map(str, x)))

In [8]:
df.head()

Unnamed: 0,Transaction_ID,Items
0,1,E E E
1,2,B A A F
2,3,E C A B
3,4,B E G D
4,5,C G D


In [9]:
ohe = pd.get_dummies(df["Items"].str.split(expand=True).stack(), dtype=int)

In [10]:
ohe.reset_index(inplace=True)

In [11]:
ohe.head()

Unnamed: 0,level_0,level_1,A,B,C,D,E,F,G
0,0,0,0,0,0,0,1,0,0
1,0,1,0,0,0,0,1,0,0
2,0,2,0,0,0,0,1,0,0
3,1,0,0,1,0,0,0,0,0
4,1,1,1,0,0,0,0,0,0


In [12]:
ohe.drop("level_1", axis=1, inplace=True)

In [13]:
groped = ohe.groupby("level_0").max()

In [14]:
groped

Unnamed: 0_level_0,A,B,C,D,E,F,G
level_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0,0,0,0,1,0,0
1,1,1,0,0,0,1,0
2,1,1,1,0,1,0,0
3,0,1,0,1,1,0,1
4,0,0,1,1,0,0,1
5,1,1,0,0,0,0,1
6,1,0,0,0,1,0,1
7,1,0,1,0,0,1,0
8,0,0,0,0,1,1,1
9,0,0,1,1,1,0,1


In [15]:
fr_it = apriori(groped, min_support=0.2, use_colnames=True)



In [16]:
fr_it

Unnamed: 0,support,itemsets
0,0.5,(A)
1,0.4,(B)
2,0.4,(C)
3,0.3,(D)
4,0.6,(E)
5,0.3,(F)
6,0.6,(G)
7,0.3,"(B, A)"
8,0.2,"(A, C)"
9,0.2,"(E, A)"


In [17]:
asso_rule = association_rules(fr_it, metric="confidence", min_threshold=0.5)

In [18]:
asso_rule

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(B),(A),0.4,0.5,0.3,0.75,1.5,0.1,2.0,0.555556
1,(A),(B),0.5,0.4,0.3,0.6,1.5,0.1,1.5,0.666667
2,(C),(A),0.4,0.5,0.2,0.5,1.0,0.0,1.0,0.0
3,(F),(A),0.3,0.5,0.2,0.666667,1.333333,0.05,1.5,0.357143
4,(B),(E),0.4,0.6,0.2,0.5,0.833333,-0.04,0.8,-0.25
5,(B),(G),0.4,0.6,0.2,0.5,0.833333,-0.04,0.8,-0.25
6,(D),(C),0.3,0.4,0.2,0.666667,1.666667,0.08,1.8,0.571429
7,(C),(D),0.4,0.3,0.2,0.5,1.666667,0.08,1.4,0.666667
8,(C),(E),0.4,0.6,0.2,0.5,0.833333,-0.04,0.8,-0.25
9,(C),(G),0.4,0.6,0.2,0.5,0.833333,-0.04,0.8,-0.25


In [19]:
data1 = {
    "TID":[1,1,2,2,3,3,3,4,4,5,5,5],
    "Items":["A","D","A","C","A","B","C","A","B","D","B","C"]
}

In [20]:
df1 = pd.DataFrame(data1)

In [21]:
df1.head()

Unnamed: 0,TID,Items
0,1,A
1,1,D
2,2,A
3,2,C
4,3,A


In [22]:
ohe1 = pd.get_dummies(df1["Items"])

In [23]:
ohe1

Unnamed: 0,A,B,C,D
0,True,False,False,False
1,False,False,False,True
2,True,False,False,False
3,False,False,True,False
4,True,False,False,False
5,False,True,False,False
6,False,False,True,False
7,True,False,False,False
8,False,True,False,False
9,False,False,False,True


In [24]:
ohe1["TID"] = df1["TID"]

In [25]:
ohe1

Unnamed: 0,A,B,C,D,TID
0,True,False,False,False,1
1,False,False,False,True,1
2,True,False,False,False,2
3,False,False,True,False,2
4,True,False,False,False,3
5,False,True,False,False,3
6,False,False,True,False,3
7,True,False,False,False,4
8,False,True,False,False,4
9,False,False,False,True,5


In [26]:
groups = ohe1.groupby("TID").sum()

In [27]:
groups

Unnamed: 0_level_0,A,B,C,D
TID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1,0,0,1
2,1,0,1,0
3,1,1,1,0
4,1,1,0,0
5,0,1,1,1


In [28]:
freq_items1 = apriori(groups,min_support=0.2, use_colnames=True)



In [29]:
freq_items1

Unnamed: 0,support,itemsets
0,0.8,(A)
1,0.6,(B)
2,0.6,(C)
3,0.4,(D)
4,0.4,"(B, A)"
5,0.4,"(A, C)"
6,0.2,"(D, A)"
7,0.4,"(B, C)"
8,0.2,"(B, D)"
9,0.2,"(D, C)"


In [30]:
rules = association_rules(freq_items1, metric="confidence", min_threshold=0.5)

In [31]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(B),(A),0.6,0.8,0.4,0.666667,0.833333,-0.08,0.6,-0.333333
1,(A),(B),0.8,0.6,0.4,0.5,0.833333,-0.08,0.8,-0.5
2,(A),(C),0.8,0.6,0.4,0.5,0.833333,-0.08,0.8,-0.5
3,(C),(A),0.6,0.8,0.4,0.666667,0.833333,-0.08,0.6,-0.333333
4,(D),(A),0.4,0.8,0.2,0.5,0.625,-0.12,0.4,-0.5
5,(B),(C),0.6,0.6,0.4,0.666667,1.111111,0.04,1.2,0.25
6,(C),(B),0.6,0.6,0.4,0.666667,1.111111,0.04,1.2,0.25
7,(D),(B),0.4,0.6,0.2,0.5,0.833333,-0.04,0.8,-0.25
8,(D),(C),0.4,0.6,0.2,0.5,0.833333,-0.04,0.8,-0.25
9,"(B, A)",(C),0.4,0.6,0.2,0.5,0.833333,-0.04,0.8,-0.25
