In [1]:
# from itertools import combinations
# from collections import defaultdict

# def load_data(file_path):
#     """Load transactions from the retail dataset."""
#     with open(file_path, 'r') as file:
#         transactions = [line.strip().split() for line in file]
#     return transactions

# def get_item_support(transactions, itemsets, min_support):
#     """Calculate support for each itemset and filter those meeting min support."""
#     itemset_counts = defaultdict(int)
#     for transaction in transactions:
#         for itemset in itemsets:
#             if itemset.issubset(set(transaction)):
#                 itemset_counts[itemset] += 1

#     return {itemset: count for itemset, count in itemset_counts.items() if count >= min_support}

# def apriori(transactions, min_support):
#     """Apriori algorithm to find frequent itemsets."""
#     # Step 1: Find frequent 1-itemsets
#     items = {item for transaction in transactions for item in transaction}
#     itemsets = [frozenset([item]) for item in items]  # Use frozenset for immutability
#     frequent_itemsets = []
    
#     k = 1
#     while itemsets:
#         itemset_support = get_item_support(transactions, itemsets, min_support)
#         frequent_itemsets.extend([(frozenset(item), support) for item, support in itemset_support.items()])
        
#         # Generate candidates for the next level
#         k += 1
#         itemsets = [i.union(j) for i in itemset_support for j in itemset_support if len(i.union(j)) == k]
#         itemsets = list(set(itemsets))  # Remove duplicates
    
#     return frequent_itemsets

# # Main execution
# file_path = 'retail.txt'  # Replace with actual path
# transactions = load_data(file_path)

# # Run Apriori for varying minimum support counts
# min_support_values = [10, 20, 50, 100]  # Example values
# for min_support in min_support_values:
#     print(f"\nFrequent itemsets for minimum support count = {min_support}")
#     frequent_itemsets = apriori(transactions, min_support)
#     for itemset, support in frequent_itemsets:
#         print(f"Itemset: {itemset}, Support: {support}")


In [3]:
!pip install mlxtend

Collecting mlxtend
  Downloading mlxtend-0.23.3-py3-none-any.whl.metadata (7.3 kB)
Downloading mlxtend-0.23.3-py3-none-any.whl (1.4 MB)
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
    --------------------------------------- 0.0/1.4 MB 435.7 kB/s eta 0:00:04
   -- ------------------------------------- 0.1/1.4 MB 744.7 kB/s eta 0:00:02
   ------------ --------------------------- 0.4/1.4 MB 2.7 MB/s eta 0:00:01
   -------------------------- ------------- 0.9/1.4 MB 4.4 MB/s eta 0:00:01
   ---------------------------------------  1.4/1.4 MB 5.4 MB/s eta 0:00:01
   ---------------------------------------  1.4/1.4 MB 5.4 MB/s eta 0:00:01
   ---------------------------------------- 1.4/1.4 MB 4.5 MB/s eta 0:00:00
Installing collected packages: mlxtend
Successfully installed mlxtend-0.23.3


In [5]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

# Step 1: Load the dataset
def load_large_dataset(file_path):
    """Load dataset into a list of transactions."""
    with open(file_path, 'r') as file:
        dataset = [line.strip().split() for line in file]
    return dataset

# Step 2: Preprocess dataset into one-hot encoded format
def preprocess_dataset(dataset):
    """Convert dataset into a one-hot encoded DataFrame."""
    te = TransactionEncoder()
    te_ary = te.fit(dataset).transform(dataset)
    return pd.DataFrame(te_ary, columns=te.columns_)

# Step 3: Apply Apriori
def find_frequent_itemsets(df, min_support):
    """Find frequent itemsets using the Apriori algorithm."""
    frequent_itemsets = apriori(df, min_support=min_support, use_colnames=True)
    return frequent_itemsets

# Step 4: Load and preprocess the dataset
file_path = 'retail.txt'  # Update with your actual dataset path
dataset = load_large_dataset(file_path)
df = preprocess_dataset(dataset)

# Step 5: Find frequent itemsets with varying minimum support values
min_support_values = [0.01, 0.02, 0.05]  # Adjust these values for your dataset
for min_support in min_support_values:
    print(f"\nFrequent itemsets with min_support = {min_support}:")
    frequent_itemsets = find_frequent_itemsets(df, min_support=min_support)
    print(frequent_itemsets)



Frequent itemsets with min_support = 0.01:
     support itemsets
0   0.017252   (33,1)
1   0.016935  (33,10)
2   0.017729   (33,2)
3   0.017071   (33,3)
4   0.017695   (33,4)
5   0.016560   (33,5)
6   0.016867   (33,6)
7   0.017581   (33,7)
8   0.016935   (33,8)
9   0.017411   (33,9)
10  0.016867   (39,1)
11  0.017751  (39,10)
12  0.017264   (39,2)
13  0.017797   (39,3)
14  0.018387   (39,4)
15  0.017457   (39,5)
16  0.017706   (39,6)
17  0.018001   (39,7)
18  0.017819   (39,8)
19  0.017853   (39,9)
20  0.057474   (40,1)
21  0.056952  (40,10)
22  0.056680   (40,2)
23  0.057020   (40,3)
24  0.057485   (40,4)
25  0.057769   (40,5)
26  0.058880   (40,6)
27  0.057122   (40,7)
28  0.056850   (40,8)
29  0.058563   (40,9)
30  0.016402   (42,1)
31  0.017354  (42,10)
32  0.016538   (42,2)
33  0.016640   (42,3)
34  0.016912   (42,4)
35  0.016810   (42,5)
36  0.017638   (42,6)
37  0.017525   (42,7)
38  0.016447   (42,8)
39  0.017252   (42,9)
40  0.047299   (49,1)
41  0.048207  (49,10)
42  0.0473

In [12]:
!pip install --upgrade mlxtend




In [26]:
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder
import pandas as pd


# Generate rules for varying confidence thresholds
for min_confidence in [0.6, 0.7, 0.8]:
    print(f"\nRules with Minimum Confidence = {min_confidence}")
    rules = association_rules(df, metric="confidence", min_threshold=min_confidence)
    print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])



Rules with Minimum Confidence = 0.6


TypeError: association_rules() missing 1 required positional argument: 'num_itemsets'