<a href="https://colab.research.google.com/github/Samuela31/Data-Mining-and-Analysis-Laboratory/blob/main/Data_mining_lab_2_apriori.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Problem taken from- https://www.javatpoint.com/apriori-algorithm-in-machine-learning

In [None]:
import pandas as pd

df=pd.read_csv("apri.csv")
df

Unnamed: 0,TID,ITEMSETS
0,T1,"A, B"
1,T2,"B, D"
2,T3,"B, C"
3,T4,"A, B, D"
4,T5,"A, C"
5,T6,"B, C"
6,T7,"A, C"
7,T8,"A, B, C, E"
8,T9,"A, B, C"


In [None]:
li=list(df["ITEMSETS"])

min_support=2
min_confi=0.5

item_purchases = [item.split(', ') for item in li]
print(item_purchases)


[['A', 'B'], ['B', 'D'], ['B', 'C'], ['A', 'B', 'D'], ['A', 'C'], ['B', 'C'], ['A', 'C'], ['A', 'B', 'C', 'E'], ['A', 'B', 'C']]


In [None]:
#Step 1: Candidate list C1 and Frequency list L1
c1 = {}

for l in item_purchases:
    for i in l:
        if i in c1:
            c1[i] += 1
        else:
            c1[i] = 1

print("C1:",c1)

l1 = {key: value for key, value in c1.items() if value >= min_support}

print("L1:",l1)

C1: {'A': 6, 'B': 7, 'D': 2, 'C': 6, 'E': 1}
L1: {'A': 6, 'B': 7, 'D': 2, 'C': 6}


In [None]:
#Step 2: Candidate Generation C2 and L2
from itertools import combinations

c2 = {}
combinations_2 = list(combinations(l1.keys(), 2))

for combination in combinations_2:
    c2[combination] = sum(1 for purchase in item_purchases if set(combination).issubset(purchase))

print("C2:",c2)

l2 = {key: value for key, value in c2.items() if value >= min_support}

print("L2:",l2)

C2: {('A', 'B'): 4, ('A', 'D'): 1, ('A', 'C'): 4, ('B', 'D'): 2, ('B', 'C'): 4, ('D', 'C'): 0}
L2: {('A', 'B'): 4, ('A', 'C'): 4, ('B', 'D'): 2, ('B', 'C'): 4}


In [None]:
#Step 3: Candidate Generation C3 and L3
c3 = {}
combinations_2 = list(combinations(l1.keys(), 3))

for combination in combinations_2:
    c3[combination] = sum(1 for purchase in item_purchases if set(combination).issubset(purchase))

print("C3:",c3)

l3 = {key: value for key, value in c3.items() if value >= min_support}

print("L3:",l3)


C3: {('A', 'B', 'D'): 1, ('A', 'B', 'C'): 2, ('A', 'D', 'C'): 0, ('B', 'D', 'C'): 0}
L3: {('A', 'B', 'C'): 2}


In [None]:
#Step 4: Association rules for subsets
def calculate_support(itemset, item_purchases):
    count = 0
    for l in item_purchases:
        if set(itemset).issubset(l):
            count += 1
    return count / len(item_purchases)

for key, val in l3.items():
    for i in key:
        antecedent = tuple(set(key) - {i})
        consequence = tuple(antecedent + tuple(i))
        confidence = calculate_support(consequence, item_purchases) / calculate_support(antecedent, item_purchases)
        association_strength = "Strong" if confidence >=min_confi else "Weak"
        print(f"{antecedent} -> {i}\tSupport: {val}\tConfidence: {confidence:.2%}\tAssociation: {association_strength}")

for key, val in l3.items():
    for i in key:
        antecedent = tuple(set(key) - {i})
        consequence = tuple(tuple(i) + antecedent)
        confidence = calculate_support(consequence, item_purchases) / calculate_support(i, item_purchases)
        association_strength = "Strong" if confidence >=min_confi else "Weak"
        print(f"{i} -> {antecedent}\tSupport: {val}\tConfidence: {confidence:.2%}\tAssociation: {association_strength}")

('C', 'B') -> A	Support: 2	Confidence: 50.00%	Association: Strong
('C', 'A') -> B	Support: 2	Confidence: 50.00%	Association: Strong
('A', 'B') -> C	Support: 2	Confidence: 50.00%	Association: Strong
A -> ('C', 'B')	Support: 2	Confidence: 33.33%	Association: Weak
B -> ('C', 'A')	Support: 2	Confidence: 28.57%	Association: Weak
C -> ('A', 'B')	Support: 2	Confidence: 33.33%	Association: Weak


In [None]:
!pip install pandas mlxtend




In [None]:
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

df = pd.read_csv('apri.csv')

#Create a list of unique items in the dataset (set is used to prevent duplicate item, TID is ignored)
unique_items = list(set(item for sublist in df['ITEMSETS'].str.split(', ') for item in sublist))
print(unique_items)

#Create a one-hot encoded DataFrame
oht = pd.DataFrame(columns=unique_items)
print(oht)

for index, row in df.iterrows():
    itemset = row['ITEMSETS'].split(', ')
    oht.loc[index, itemset] = 1

oht.fillna(0, inplace=True)
print(oht)

#Find frequent itemsets using Apriori algorithm
frequent_itemsets = apriori(oht, min_support=0.2, use_colnames=True)

#Find association rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)

#Display frequent itemsets and association rules
print("\nFrequent Itemsets:")
print(frequent_itemsets)

print("\nAssociation Rules:")
print(rules)


['C', 'B', 'E', 'A', 'D']
Empty DataFrame
Columns: [C, B, E, A, D]
Index: []
   C  B  E  A  D
0  0  1  0  1  0
1  0  1  0  0  1
2  1  1  0  0  0
3  0  1  0  1  1
4  1  0  0  1  0
5  1  1  0  0  0
6  1  0  0  1  0
7  1  1  1  1  0
8  1  1  0  1  0

Frequent Itemsets:
    support   itemsets
0  0.666667        (C)
1  0.777778        (B)
2  0.666667        (A)
3  0.222222        (D)
4  0.444444     (C, B)
5  0.444444     (C, A)
6  0.444444     (A, B)
7  0.222222     (D, B)
8  0.222222  (C, A, B)

Association Rules:
  antecedents consequents  antecedent support  consequent support   support  \
0         (C)         (B)            0.666667            0.777778  0.444444   
1         (B)         (C)            0.777778            0.666667  0.444444   
2         (C)         (A)            0.666667            0.666667  0.444444   
3         (A)         (C)            0.666667            0.666667  0.444444   
4         (A)         (B)            0.666667            0.777778  0.444444   
5        

  and should_run_async(code)


In [None]:
!pip install apyori



In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
ds= pd.read_csv('Market_Basket_Optimisation.csv')

ds.fillna(0, inplace=True)
ds.head()

list_of_transactions = []
for i in range(0, 30):
    list_of_transactions.append([str(ds.values[i,j]) for j in range(0, 20)])

list_of_transactions = [[item for item in sublist if item != '0' and item != '0.0'] for sublist in list_of_transactions]

print(list_of_transactions)

[['burgers', 'meatballs', 'eggs'], ['chutney'], ['turkey', 'avocado'], ['mineral water', 'milk', 'energy bar', 'whole wheat rice', 'green tea'], ['low fat yogurt'], ['whole wheat pasta', 'french fries'], ['soup', 'light cream', 'shallot'], ['frozen vegetables', 'spaghetti', 'green tea'], ['french fries'], ['eggs', 'pet food'], ['cookies'], ['turkey', 'burgers', 'mineral water', 'eggs', 'cooking oil'], ['spaghetti', 'champagne', 'cookies'], ['mineral water', 'salmon'], ['mineral water'], ['shrimp', 'chocolate', 'chicken', 'honey', 'oil', 'cooking oil', 'low fat yogurt'], ['turkey', 'eggs'], ['turkey', 'fresh tuna', 'tomatoes', 'spaghetti', 'mineral water', 'black tea', 'salmon', 'eggs', 'chicken', 'extra dark chocolate'], ['meatballs', 'milk', 'honey', 'french fries', 'protein bar'], ['red wine', 'shrimp', 'pasta', 'pepper', 'eggs', 'chocolate', 'shampoo'], ['rice', 'sparkling water'], ['spaghetti', 'mineral water', 'ham', 'body spray', 'pancakes', 'green tea'], ['burgers', 'grated chee

In [None]:
from apyori import apriori

rules = apriori(list_of_transactions, min_support = 0.004, min_confidence = 0.2, min_lift = 3, min_length = 2)
results = list(rules)
print(results)

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [None]:
def inspect(results):
    lhs     =  [tuple(result [2] [0] [0]) [0] for result in results]
    rhs     =  [tuple(result [2] [0] [1]) [0] for result in results]
    supports = [result [1] for result in results]
    confidences = [result [2] [0] [2]   for result in results]
    lifts = [result [2] [0] [3]   for result in results]
    return list(zip(lhs,rhs,supports,confidences, lifts))
resultsinDataFrame = pd.DataFrame(inspect(results),columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'] )
resultsinDataFrame.head(10)

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
0,avocado,burgers,0.033333,0.333333,3.333333
1,avocado,fresh bread,0.033333,0.333333,10.0
2,avocado,grated cheese,0.033333,0.333333,10.0
3,avocado,honey,0.033333,0.333333,3.333333
4,avocado,parmesan cheese,0.033333,0.333333,10.0
5,avocado,pasta,0.033333,0.333333,5.0
6,avocado,shrimp,0.033333,0.333333,3.333333
7,avocado,soup,0.033333,0.333333,5.0
8,avocado,toothpaste,0.033333,0.333333,10.0
9,avocado,white wine,0.033333,0.333333,10.0
