In [1]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori

# Dataset with NaN values
dataset = [
    ['Milk', 'Onion', None, 'Kidney Beans', 'Eggs', 'Yogurt'],
    ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
    ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
    ['Milk', 'Unicorn', 'Corn', None, 'Yogurt'],
    ['Corn', 'Onion', None, 'Kidney Beans', 'Ice cream', 'Eggs']
]

In [2]:
# Remove None (or NaN) values from transactions
cleaned_dataset = [[item for item in transaction if item is not None] for transaction in dataset]

# Convert the cleaned dataset into a one-hot encoded DataFrame
te = TransactionEncoder()
te_ary = te.fit(cleaned_dataset).transform(cleaned_dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)

# Find frequent itemsets with support count = 2 (i.e., min_support = 2/len(df))
frequent_itemsets_2 = apriori(df, min_support=2/len(df), use_colnames=True)

print("Frequent Itemsets with Support Count = 2:")
print(frequent_itemsets_2)

Frequent Itemsets with Support Count = 2:
    support                             itemsets
0       0.4                               (Corn)
1       0.8                               (Eggs)
2       0.8                       (Kidney Beans)
3       0.6                               (Milk)
4       0.6                              (Onion)
5       0.6                             (Yogurt)
6       0.8                 (Eggs, Kidney Beans)
7       0.4                         (Eggs, Milk)
8       0.6                        (Onion, Eggs)
9       0.4                       (Yogurt, Eggs)
10      0.4                 (Milk, Kidney Beans)
11      0.6                (Onion, Kidney Beans)
12      0.4               (Yogurt, Kidney Beans)
13      0.4                       (Yogurt, Milk)
14      0.4                      (Onion, Yogurt)
15      0.4           (Eggs, Milk, Kidney Beans)
16      0.6          (Onion, Eggs, Kidney Beans)
17      0.4         (Yogurt, Eggs, Kidney Beans)
18      0.4                

Impact on Frequent Itemsets:

•	If NaN is treated as an item, it could appear in frequent itemsets if it meets the support threshold.

•	After removing NaN, only valid items will be considered, which might result in different frequent itemsets compared to when NaN is treated as a distinct item.
