In [2]:
%%time

import numpy as np
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

data = [['Milk', 'Bread', 'Butter'],
        ['Bread', 'Butter'],
        ['Milk', 'Bread', 'Butter'],
        ['Milk', 'Bread'],
        ['Milk', 'Juice'],
        ['Bread', 'Juice'],
        ['Milk', 'Bread', 'Juice'],
        ['Milk', 'Bread', 'Butter'],
        ['Milk', 'Juice'],
        ['Bread', 'Butter'],
        ['Milk', 'Juice'],
        ['Bread', 'Juice'],
        ['Bread', 'Butter'],
        ['Milk', 'Bread'],
        ['Bread', 'Juice']]

te = TransactionEncoder()
te_ary = te.fit(data).transform(data)
df = pd.DataFrame(te_ary, columns=te.columns_)

frequent_itemsets = apriori(df, min_support=0.2, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)

print("Frequent Itemsets: ")
print(frequent_itemsets)
print("\nAssociation Rules: ")
print(rules)

Frequent Itemsets: 
    support               itemsets
0  0.800000                (Bread)
1  0.400000               (Butter)
2  0.466667                (Juice)
3  0.600000                 (Milk)
4  0.400000        (Bread, Butter)
5  0.266667         (Juice, Bread)
6  0.400000          (Milk, Bread)
7  0.200000         (Milk, Butter)
8  0.266667          (Juice, Milk)
9  0.200000  (Milk, Bread, Butter)

Association Rules: 
      antecedents consequents  antecedent support  consequent support  \
0        (Butter)     (Bread)                 0.4                 0.8   
1          (Milk)     (Bread)                 0.6                 0.8   
2  (Milk, Butter)     (Bread)                 0.2                 0.8   

   support  confidence      lift  leverage  conviction  zhangs_metric  
0      0.4    1.000000  1.250000      0.08         inf       0.333333  
1      0.4    0.666667  0.833333     -0.08         0.6      -0.333333  
2      0.2    1.000000  1.250000      0.04         inf       0.25

In [5]:
%%time

import random

# Function to generate transactions with varying unique items
def generate_transactions(num_transactions, max_items):
    items = ['Item_' + str(i) for i in range(1, max_items + 1)]
    transactions = []
    for _ in range(num_transactions):
        num_items = random.randint(1, max_items)
        transaction = random.sample(items, num_items)
        transactions.append(transaction)
    return transactions

# Generate transactions with varying unique items
num_transactions = 15  # same as the previous example
max_items = 3  # change the number of unique items here
data_varying = generate_transactions(num_transactions, max_items)

# Rest of the code remains the same as the previous example
te_varying = TransactionEncoder()
te_ary_varying = te_varying.fit(data_varying).transform(data_varying)
df_varying = pd.DataFrame(te_ary_varying, columns=te_varying.columns_)

frequent_itemsets_varying = apriori(df_varying, min_support=0.2, use_colnames=True)
rules_varying = association_rules(frequent_itemsets_varying, metric="confidence", min_threshold=0.6)

print("Frequent Itemsets with Varying Unique Items: ")
print(frequent_itemsets_varying)
print("\nAssociation Rules with Varying Unique Items: ")
print(rules_varying)
#By changing the value of max_items, you can generate a dataset with a different number of unique items. This modified code will still utilize the mlxtend library and produce the frequent itemsets and association rules for the updated dataset.

Frequent Itemsets with Varying Unique Items: 
    support                  itemsets
0  0.933333                  (Item_1)
1  0.600000                  (Item_2)
2  0.533333                  (Item_3)
3  0.533333          (Item_1, Item_2)
4  0.533333          (Item_1, Item_3)
5  0.400000          (Item_3, Item_2)
6  0.400000  (Item_1, Item_3, Item_2)

Association Rules with Varying Unique Items: 
        antecedents       consequents  antecedent support  consequent support  \
0          (Item_2)          (Item_1)            0.600000            0.933333   
1          (Item_3)          (Item_1)            0.533333            0.933333   
2          (Item_3)          (Item_2)            0.533333            0.600000   
3          (Item_2)          (Item_3)            0.600000            0.533333   
4  (Item_1, Item_3)          (Item_2)            0.533333            0.600000   
5  (Item_1, Item_2)          (Item_3)            0.533333            0.533333   
6  (Item_3, Item_2)          (Item_1

In [6]:
%%time

import random

# Function to generate transactions with varying unique items
def generate_transactions(num_transactions, max_items):
    items = ['Item_' + str(i) for i in range(1, max_items + 1)]
    transactions = []
    for _ in range(num_transactions):
        num_items = random.randint(1, max_items)
        transaction = random.sample(items, num_items)
        transactions.append(transaction)
    return transactions

# Generate transactions with varying unique items
num_transactions = 15  # same as the previous example
max_items = 8  # change the number of unique items here
data_varying = generate_transactions(num_transactions, max_items)

# Rest of the code remains the same as the previous example
te_varying = TransactionEncoder()
te_ary_varying = te_varying.fit(data_varying).transform(data_varying)
df_varying = pd.DataFrame(te_ary_varying, columns=te_varying.columns_)

frequent_itemsets_varying = apriori(df_varying, min_support=0.2, use_colnames=True)
rules_varying = association_rules(frequent_itemsets_varying, metric="confidence", min_threshold=0.6)

print("Frequent Itemsets with Varying Unique Items: ")
print(frequent_itemsets_varying)
print("\nAssociation Rules with Varying Unique Items: ")
print(rules_varying)
#By changing the value of max_items, you can generate a dataset with a different number of unique items. This modified code will still utilize the mlxtend library and produce the frequent itemsets and association rules for the updated dataset.

Frequent Itemsets with Varying Unique Items: 
      support                                           itemsets
0    0.600000                                           (Item_1)
1    0.600000                                           (Item_2)
2    0.533333                                           (Item_3)
3    0.600000                                           (Item_4)
4    0.800000                                           (Item_5)
..        ...                                                ...
250  0.200000  (Item_1, Item_6, Item_7, Item_3, Item_2, Item_...
251  0.266667  (Item_1, Item_6, Item_7, Item_4, Item_2, Item_...
252  0.266667  (Item_1, Item_6, Item_7, Item_3, Item_4, Item_...
253  0.200000  (Item_6, Item_7, Item_3, Item_4, Item_2, Item_...
254  0.200000  (Item_1, Item_6, Item_7, Item_3, Item_4, Item_...

[255 rows x 2 columns]

Association Rules with Varying Unique Items: 
           antecedents                                       consequents  \
0             (Item_1)     

In [4]:
%%time

import random

# Function to generate transactions with varying unique items
def generate_transactions(num_transactions, max_items):
    items = ['Item_' + str(i) for i in range(1, max_items + 1)]
    transactions = []
    for _ in range(num_transactions):
        num_items = random.randint(1, max_items)
        transaction = random.sample(items, num_items)
        transactions.append(transaction)
    return transactions

# Generate transactions with varying unique items
num_transactions = 15  # same as the previous example
max_items = 10  # change the number of unique items here
data_varying = generate_transactions(num_transactions, max_items)

# Rest of the code remains the same as the previous example
te_varying = TransactionEncoder()
te_ary_varying = te_varying.fit(data_varying).transform(data_varying)
df_varying = pd.DataFrame(te_ary_varying, columns=te_varying.columns_)

frequent_itemsets_varying = apriori(df_varying, min_support=0.2, use_colnames=True)
rules_varying = association_rules(frequent_itemsets_varying, metric="confidence", min_threshold=0.6)

print("Frequent Itemsets with Varying Unique Items: ")
print(frequent_itemsets_varying)
print("\nAssociation Rules with Varying Unique Items: ")
print(rules_varying)
#By changing the value of max_items, you can generate a dataset with a different number of unique items. This modified code will still utilize the mlxtend library and produce the frequent itemsets and association rules for the updated dataset.

Frequent Itemsets with Varying Unique Items: 
       support                                           itemsets
0     0.666667                                           (Item_1)
1     0.800000                                          (Item_10)
2     0.666667                                           (Item_2)
3     0.733333                                           (Item_3)
4     0.733333                                           (Item_4)
...        ...                                                ...
1018  0.200000  (Item_1, Item_6, Item_7, Item_9, Item_4, Item_...
1019  0.200000  (Item_1, Item_6, Item_7, Item_9, Item_3, Item_...
1020  0.200000  (Item_1, Item_6, Item_7, Item_9, Item_3, Item_...
1021  0.266667  (Item_6, Item_7, Item_9, Item_3, Item_4, Item_...
1022  0.200000  (Item_1, Item_6, Item_7, Item_9, Item_3, Item_...

[1023 rows x 2 columns]

Association Rules with Varying Unique Items: 
            antecedents                                        consequents  \
0           

In [8]:
%%time

from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules
import pandas as pd

# Sample dataset
data = [
    ['Action', 'Adventure', 'Sci-Fi'],
    ['Action', 'Drama', 'Romance'],
    ['Drama', 'Romance'],
    ['Action', 'Adventure', 'Drama', 'Sci-Fi'],
    ['Action', 'Romance'],
    ['Drama', 'Sci-Fi'],
    ['Drama', 'Romance'],
    ['Action', 'Adventure', 'Romance'],
    ['Drama', 'Romance'],
    ['Action', 'Drama', 'Sci-Fi'],
    ['Drama', 'Romance'],
    ['Action', 'Sci-Fi'],
    ['Drama', 'Romance'],
    ['Action', 'Adventure'],
    ['Drama', 'Romance']
]

# Convert the dataset to a one-hot encoded DataFrame
te = TransactionEncoder()
te_ary = te.fit(data).transform(data)
df = pd.DataFrame(te_ary, columns=te.columns_)

# Applying the Apriori algorithm
frequent_itemsets = apriori(df, min_support=0.2, use_colnames=True)

# Generating association rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)

# Printing the rules
print("Frequent Itemsets:\n", frequent_itemsets)
print("\nAssociation Rules:\n", rules)
#This code will help you find frequent itemsets and generate association rules based on the minimum support and confidence thresholds. Adjust the min_support and min_threshold parameters based on your dataset and requirements.

Frequent Itemsets:
      support             itemsets
0   0.533333             (Action)
1   0.266667          (Adventure)
2   0.666667              (Drama)
3   0.600000            (Romance)
4   0.333333             (Sci-Fi)
5   0.266667  (Action, Adventure)
6   0.200000      (Drama, Action)
7   0.200000    (Romance, Action)
8   0.266667     (Sci-Fi, Action)
9   0.466667     (Drama, Romance)
10  0.200000      (Drama, Sci-Fi)

Association Rules:
    antecedents consequents  antecedent support  consequent support   support  \
0  (Adventure)    (Action)            0.266667            0.533333  0.266667   
1     (Sci-Fi)    (Action)            0.333333            0.533333  0.266667   
2      (Drama)   (Romance)            0.666667            0.600000  0.466667   
3    (Romance)     (Drama)            0.600000            0.666667  0.466667   
4     (Sci-Fi)     (Drama)            0.333333            0.666667  0.200000   

   confidence      lift  leverage  conviction  zhangs_metric  
0    1.

In [9]:
%%time

import random

# Generating random transactions for the same set of items
num_transactions_list = [10, 15, 20, 25, 30]  # Varying number of transactions
items = ['Action', 'Adventure', 'Drama', 'Romance', 'Sci-Fi']  # Same set of unique items

data = []
for num_transactions in num_transactions_list:
    transactions = []
    for _ in range(num_transactions):
        random.shuffle(items)
        transactions.append(items.copy())
    data.append(transactions)

# Convert the dataset to a one-hot encoded DataFrame
te = TransactionEncoder()
te_ary_list = [te.fit_transform(transactions) for transactions in data]
df_list = [pd.DataFrame(te_ary, columns=te.columns_) for te_ary in te_ary_list]

# Applying the Apriori algorithm for each dataset
for idx, df in enumerate(df_list):
    print(f"Results for {num_transactions_list[idx]} transactions:")
    frequent_itemsets = apriori(df, min_support=0.2, use_colnames=True)
    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)
    print("Frequent Itemsets:\n", frequent_itemsets)
    print("\nAssociation Rules:\n", rules)
    print("\n")

Results for 10 transactions:
Frequent Itemsets:
     support                                     itemsets
0       1.0                                     (Action)
1       1.0                                  (Adventure)
2       1.0                                      (Drama)
3       1.0                                    (Romance)
4       1.0                                     (Sci-Fi)
5       1.0                          (Action, Adventure)
6       1.0                              (Drama, Action)
7       1.0                            (Romance, Action)
8       1.0                             (Sci-Fi, Action)
9       1.0                           (Drama, Adventure)
10      1.0                         (Romance, Adventure)
11      1.0                          (Sci-Fi, Adventure)
12      1.0                             (Drama, Romance)
13      1.0                              (Drama, Sci-Fi)
14      1.0                            (Romance, Sci-Fi)
15      1.0                   (Drama, A

In [14]:
%%time

from itertools import chain, combinations
from collections import defaultdict

# Sample dataset
dataset = [
    ['Action', 'Adventure', 'Sci-Fi'],
    ['Action', 'Drama', 'Romance'],
    ['Drama', 'Romance'],
    ['Action', 'Adventure', 'Drama', 'Sci-Fi'],
    ['Action', 'Romance'],
    ['Drama', 'Sci-Fi'],
    ['Drama', 'Romance'],
    ['Action', 'Adventure', 'Romance'],
    ['Drama', 'Romance'],
    ['Action', 'Drama', 'Sci-Fi'],
    ['Drama', 'Romance'],
    ['Action', 'Sci-Fi'],
    ['Drama', 'Romance'],
    ['Action', 'Adventure'],
    ['Drama', 'Romance']
]

# Function to generate all possible itemsets
def get_all_itemsets(data):
    itemsets = defaultdict(int)
    for row in data:
        for i in range(1, len(row) + 1):
            for subset in combinations(row, i):
                itemsets[subset] += 1
    return itemsets

# Function to filter frequent itemsets based on minimum support
def filter_itemsets(itemsets, min_support):
    num_items = len(dataset)
    return {itemset: support for itemset, support in itemsets.items() if support / num_items >= min_support}

# Setting the minimum support
min_support = 0.2

# Getting all possible itemsets
all_itemsets = get_all_itemsets(dataset)

# Filtering frequent itemsets based on the minimum support
frequent_itemsets = filter_itemsets(all_itemsets, min_support)

# Printing the frequent itemsets
print("Frequent Itemsets:")
for itemset, support in frequent_itemsets.items():
    print(f"{itemset}: {support}")

Frequent Itemsets:
('Action',): 8
('Adventure',): 4
('Sci-Fi',): 5
('Action', 'Adventure'): 4
('Action', 'Sci-Fi'): 4
('Drama',): 10
('Romance',): 9
('Action', 'Drama'): 3
('Action', 'Romance'): 3
('Drama', 'Romance'): 7
('Drama', 'Sci-Fi'): 3
CPU times: total: 0 ns
Wall time: 212 µs


In [18]:
%%time

from itertools import chain, combinations
from collections import defaultdict

# Sample dataset
data_market = [['Milk', 'Bread', 'Butter'],
        ['Bread', 'Butter'],
        ['Milk', 'Bread', 'Butter'],
        ['Milk', 'Bread'],
        ['Milk', 'Juice'],
        ['Bread', 'Juice'],
        ['Milk', 'Bread', 'Juice'],
        ['Milk', 'Bread', 'Butter'],
        ['Milk', 'Juice'],
        ['Bread', 'Butter'],
        ['Milk', 'Juice'],
        ['Bread', 'Juice'],
        ['Bread', 'Butter'],
        ['Milk', 'Bread'],
        ['Bread', 'Juice']]

# Function to generate all possible itemsets
def get_all_itemsets(data_market):
    itemsets = defaultdict(int)
    for row in data_market:
        for i in range(1, len(row) + 1):
            for subset in combinations(row, i):
                itemsets[subset] += 1
    return itemsets

# Function to filter frequent itemsets based on minimum support
def filter_itemsets(itemsets, min_support):
    num_items = len(dataset)
    return {itemset: support for itemset, support in itemsets.items() if support / num_items >= min_support}

# Setting the minimum support
min_support = 0.2

# Getting all possible itemsets
all_itemsets = get_all_itemsets(data_market)

# Filtering frequent itemsets based on the minimum support
frequent_itemsets = filter_itemsets(all_itemsets, min_support)

# Printing the frequent itemsets
print("Frequent Itemsets:")
for itemset, support in frequent_itemsets.items():
    print(f"{itemset}: {support}")

Frequent Itemsets:
('Milk',): 9
('Bread',): 12
('Butter',): 6
('Milk', 'Bread'): 6
('Milk', 'Butter'): 3
('Bread', 'Butter'): 6
('Milk', 'Bread', 'Butter'): 3
('Juice',): 7
('Milk', 'Juice'): 4
('Bread', 'Juice'): 4
CPU times: total: 0 ns
Wall time: 0 ns
