In [34]:
!pip install mlxtend




In [35]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

from mlxtend.frequent_patterns import fpgrowth



In [36]:

df = pd.read_csv("/content/transactions.csv")



In [37]:
df.head()

Unnamed: 0,Transaction,Item1,Item2,Item3,Item4,Item5
0,T1,A,B,C,D,E
1,T2,A,C,D,E,F
2,T3,A,B,E,,
3,T4,A,C,F,G,H
4,T5,B,C,D,E,F


In [38]:
# Open the CSV file and read it line by line
with open('/content/transactions.csv', 'r') as file:
    transactions_list = [line.strip().split(',') for line in file.readlines()]

# Show the resulting list
print("🧾 Transactions as a list:")
for transaction in transactions_list:
    print(transaction)


🧾 Transactions as a list:
['Transaction', 'Item1', 'Item2', 'Item3', 'Item4', 'Item5']
['T1', 'A', 'B', 'C', 'D', 'E']
['T2', 'A', 'C', 'D', 'E', 'F']
['T3', 'A', 'B', 'E', '', '']
['T4', 'A', 'C', 'F', 'G', 'H']
['T5', 'B', 'C', 'D', 'E', 'F']


In [39]:



te = TransactionEncoder()
te_array = te.fit(transactions_list).transform(transactions_list)

# Convert to pandas DataFrame
df = pd.DataFrame(te_array, columns=te.columns_)

print("🧾 One-hot encoded transaction DataFrame:")
print(df.head())


🧾 One-hot encoded transaction DataFrame:
              A      B      C      D      E      F      G      H  Item1  \
0  False  False  False  False  False  False  False  False  False   True   
1  False   True   True   True   True   True  False  False  False  False   
2  False   True  False   True   True   True   True  False  False  False   
3   True   True   True  False  False   True  False  False  False  False   
4  False   True  False   True  False  False   True   True   True  False   

   Item2  Item3  Item4  Item5     T1     T2     T3     T4     T5  Transaction  
0   True   True   True   True  False  False  False  False  False         True  
1  False  False  False  False   True  False  False  False  False        False  
2  False  False  False  False  False   True  False  False  False        False  
3  False  False  False  False  False  False   True  False  False        False  
4  False  False  False  False  False  False  False   True  False        False  


In [40]:
# Minimum support value can be adjusted (e.g., 0.5 means 50% of transactions)
frequent_itemsets = apriori(df, min_support=0.5, use_colnames=True)

print("📊 Frequent Itemsets:")
print(frequent_itemsets)


📊 Frequent Itemsets:
     support   itemsets
0   0.666667        (A)
1   0.500000        (B)
2   0.666667        (C)
3   0.500000        (D)
4   0.666667        (E)
5   0.500000        (F)
6   0.500000     (A, C)
7   0.500000     (E, A)
8   0.500000     (E, B)
9   0.500000     (C, D)
10  0.500000     (E, C)
11  0.500000     (F, C)
12  0.500000     (E, D)
13  0.500000  (E, C, D)


In [41]:
# You can adjust the metric and threshold (e.g., "confidence", "lift", etc.)
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)

print(rules[['antecedents', 'consequents', 'support', 'confidence','lift']])

# print("🔗 Association Rules:")
# print(rules)


   antecedents consequents  support  confidence   lift
0          (A)         (C)      0.5        0.75  1.125
1          (C)         (A)      0.5        0.75  1.125
2          (E)         (A)      0.5        0.75  1.125
3          (A)         (E)      0.5        0.75  1.125
4          (E)         (B)      0.5        0.75  1.500
5          (B)         (E)      0.5        1.00  1.500
6          (C)         (D)      0.5        0.75  1.500
7          (D)         (C)      0.5        1.00  1.500
8          (E)         (C)      0.5        0.75  1.125
9          (C)         (E)      0.5        0.75  1.125
10         (F)         (C)      0.5        1.00  1.500
11         (C)         (F)      0.5        0.75  1.500
12         (E)         (D)      0.5        0.75  1.500
13         (D)         (E)      0.5        1.00  1.500
14      (E, C)         (D)      0.5        1.00  2.000
15      (E, D)         (C)      0.5        1.00  1.500
16      (C, D)         (E)      0.5        1.00  1.500
17        

In [42]:
# You can adjust the support as needed
frequent_itemsets_fp = fpgrowth(df, min_support=0.5, use_colnames=True)

print("🌲 Frequent Itemsets (FP-Growth):")
print(frequent_itemsets_fp)


🌲 Frequent Itemsets (FP-Growth):
     support   itemsets
0   0.666667        (E)
1   0.666667        (C)
2   0.666667        (A)
3   0.500000        (D)
4   0.500000        (B)
5   0.500000        (F)
6   0.500000     (E, C)
7   0.500000     (A, C)
8   0.500000     (E, A)
9   0.500000     (C, D)
10  0.500000     (E, D)
11  0.500000  (E, C, D)
12  0.500000     (E, B)
13  0.500000     (F, C)


In [44]:
rules_fp = association_rules(frequent_itemsets_fp, metric="confidence", min_threshold=0.7)

print(rules[['antecedents', 'consequents', 'support', 'confidence','lift']])

# print("🔗 Association Rules (FP-Growth):")
# print(rules_fp)


   antecedents consequents  support  confidence   lift
0          (A)         (C)      0.5        0.75  1.125
1          (C)         (A)      0.5        0.75  1.125
2          (E)         (A)      0.5        0.75  1.125
3          (A)         (E)      0.5        0.75  1.125
4          (E)         (B)      0.5        0.75  1.500
5          (B)         (E)      0.5        1.00  1.500
6          (C)         (D)      0.5        0.75  1.500
7          (D)         (C)      0.5        1.00  1.500
8          (E)         (C)      0.5        0.75  1.125
9          (C)         (E)      0.5        0.75  1.125
10         (F)         (C)      0.5        1.00  1.500
11         (C)         (F)      0.5        0.75  1.500
12         (E)         (D)      0.5        0.75  1.500
13         (D)         (E)      0.5        1.00  1.500
14      (E, C)         (D)      0.5        1.00  2.000
15      (E, D)         (C)      0.5        1.00  1.500
16      (C, D)         (E)      0.5        1.00  1.500
17        