In [None]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from itertools import combinations

In [None]:
df = pd.read_csv('market.csv', header=None)

In [None]:
df.head()

Unnamed: 0,0
0,Bread;Honey;Bacon;Toothpaste;Banana;Apple;Haze...
1,1;0;1;0;1;1;1;0;0;1;0;0;0;0;0;0;0;1;1;0;0;1
2,1;1;1;0;1;1;1;0;0;0;1;0;1;1;0;0;1;0;0;1;1;0
3,0;1;1;1;1;1;1;1;1;0;1;1;1;0;1;1;1;1;1;0;0;1
4,1;1;0;1;0;1;0;0;0;0;1;1;1;0;0;0;1;0;1;1;1;0


In [None]:
df.shape


(465, 1)

In [None]:
df.describe()

Unnamed: 0,0
count,465
unique,458
top,0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0
freq,3


In [None]:
# Calculate support for itemsets
def calculate_support(itemset, dataset):
    itemset = set(itemset)
    return sum(all(item in transaction for item in itemset) for transaction in dataset) / len(dataset)

# Generate candidate itemsets and their support
def apriori(dataset, min_support):
    itemsets = []
    single_items = [item for transaction in dataset for item in transaction]
    single_itemsets = list(set(single_items))  # Unique items in the dataset

    # Generate frequent itemsets
    for itemset_size in range(1, len(single_itemsets) + 1):
        current_itemsets = list(combinations(single_itemsets, itemset_size))
        frequent_itemsets = []

        for itemset in current_itemsets:
            support = calculate_support(itemset, dataset)
            if support >= min_support:
                frequent_itemsets.append((itemset, support))

        if frequent_itemsets:
            itemsets.extend(frequent_itemsets)

    return itemsets

# Function to find association rules
def generate_rules(itemsets, dataset, min_confidence=0.5, min_lift=1.0):
    rules = []
    for itemset, support in itemsets:
        # Generate all possible rules (X → Y)
        for size in range(1, len(itemset)):
            left = itemset[:size]
            right = itemset[size:]
            left_support = calculate_support(left, dataset)
            right_support = calculate_support(right, dataset)

            # Calculate confidence and lift
            confidence = support / left_support
            lift = confidence / right_support

            # Check if the rule meets the minimum confidence and lift thresholds
            if confidence >= min_confidence and lift >= min_lift:
                rules.append({
                    'Rule': f"{left} → {right}",
                    'Support': support,
                    'Confidence': confidence,
                    'Lift': lift
                })

    return rules


In [None]:
# Convert the DataFrame into a list of transactions (list of sets)
transactions = df.values.tolist()

# Apply Apriori to find frequent itemsets
min_support = 0.01
itemsets = apriori(transactions, min_support)

# Generate association rules
rules = generate_rules(itemsets, transactions, min_confidence=0.5, min_lift=1.0)

# Convert rules to DataFrame for easier analysis
rules_df = pd.DataFrame(rules)


In [None]:
# Get top 10 itemsets by support
top_itemsets = sorted(itemsets, key=lambda x: x[1], reverse=True)[:10]

# Plot the top 10 itemsets
itemset_labels = [str(itemset[0]) for itemset in top_itemsets]
support_values = [itemset[1] for itemset in top_itemsets]

plt.figure(figsize=(10, 6))
plt.barh(itemset_labels, support_values)
plt.xlabel('Support')
plt.title('Top 10 Itemsets by Support')
plt.show()


In [None]:
# Sort the rules by lift
top_rules_by_lift = rules_df.sort_values(by='Lift', ascending=False).head(10)

# Plot the top 10 rules by lift
plt.figure(figsize=(10, 6))
plt.barh(top_rules_by_lift['Rule'], top_rules_by_lift['Lift'])
plt.xlabel('Lift')
plt.title('Top 10 Rules by Lift')
plt.show()
