In [8]:
import pandas as pd
from itertools import combinations
from collections import defaultdict

# Load dataset
df = pd.read_excel("Online retail.xlsx")
df

Unnamed: 0,"shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil"
0,"burgers,meatballs,eggs"
1,chutney
2,"turkey,avocado"
3,"mineral water,milk,energy bar,whole wheat rice..."
4,low fat yogurt
...,...
7495,"butter,light mayo,fresh bread"
7496,"burgers,frozen vegetables,eggs,french fries,ma..."
7497,chicken
7498,"escalope,green tea"


In [9]:
# Preprocess data
df_trans = df.iloc[:, 0].str.split(",")  # Convert transactions into lists
transactions = df_trans.tolist()

In [10]:
# Function to calculate support
def get_support(transactions, itemset):
    count = sum(1 for transaction in transactions if itemset.issubset(transaction))
    return count / len(transactions)


In [11]:
# Set minimum support threshold
min_support = 0.02
itemsets = defaultdict(int)

In [12]:
# Count item occurrences
for transaction in transactions:
    for item in transaction:
        itemsets[frozenset([item])] += 1

In [13]:
# Filter frequent itemsets
num_transactions = len(transactions)
frequent_itemsets = {itemset: count / num_transactions for itemset, count in itemsets.items() if count / num_transactions >= min_support}


In [14]:
# Generate pairwise itemsets
pair_itemsets = defaultdict(int)
for transaction in transactions:
    for itemset in combinations(transaction, 2):
        pair_itemsets[frozenset(itemset)] += 1

frequent_pair_itemsets = {itemset: count / num_transactions for itemset, count in pair_itemsets.items() if count / num_transactions >= min_support}


In [15]:
# Function to generate association rules
def generate_association_rules(frequent_itemsets, transactions):
    rules = []
    for itemset in frequent_itemsets.keys():
        if len(itemset) < 2:
            continue
        for item in itemset:
            antecedent = frozenset([item])
            consequent = itemset - antecedent

            support_itemset = frequent_itemsets[itemset]
            support_antecedent = get_support(transactions, antecedent)
            support_consequent = get_support(transactions, consequent)

            confidence = support_itemset / support_antecedent
            lift = confidence / support_consequent if support_consequent > 0 else 0

            rules.append((antecedent, consequent, support_itemset, confidence, lift))

    return sorted(rules, key=lambda x: x[4], reverse=True)  # Sort by lift

In [16]:
# Generate and display association rules
association_rules = generate_association_rules(frequent_pair_itemsets, transactions)
for rule in association_rules[:5]:  # Display top 5 rules
    print(f"Rule: {rule[0]} -> {rule[1]}, Support: {rule[2]:.4f}, Confidence: {rule[3]:.4f}, Lift: {rule[4]:.4f}")

Rule: frozenset({'spaghetti'}) -> frozenset({'ground beef'}), Support: 0.0392, Confidence: 0.2251, Lift: 2.2909
Rule: frozenset({'ground beef'}) -> frozenset({'spaghetti'}), Support: 0.0392, Confidence: 0.3989, Lift: 2.2909
Rule: frozenset({'spaghetti'}) -> frozenset({'olive oil'}), Support: 0.0229, Confidence: 0.1317, Lift: 2.0035
Rule: frozenset({'olive oil'}) -> frozenset({'spaghetti'}), Support: 0.0229, Confidence: 0.3489, Lift: 2.0035
Rule: frozenset({'soup'}) -> frozenset({'mineral water'}), Support: 0.0231, Confidence: 0.4565, Lift: 1.9158


In [17]:
# --- Interview Questions and Answers ---
print("\nInterview Questions and Answers:")
print("\n1. What is lift and why is it important in Association rules?")
print("Lift measures how much more likely two items are purchased together than if they were independently bought.  A lift > 1 indicates a positive relationship. Higher lift means a stronger association, useful for recommendations and promotions.")

print("\n2. What is support and Confidence. How do you calculate them?")
print("Support: Th…sactions with both A and B) / (Number of transactions with A).")

print("\n3. What are some limitations or challenges of Association rules mining?")
print("Challenges include: 1) Large number of rules: Can be overwhelming, requires careful filtering. 2) Spurious relationships:  Chance correlations can appear. 3) Data sparsity: If items are rarely bought together, it's hard to find strong rules. 4) Setting thresholds: Requires experimentation to find appropriate support/confidence/lift values.")



Interview Questions and Answers:

1. What is lift and why is it important in Association rules?
Lift measures how much more likely two items are purchased together than if they were independently bought.  A lift > 1 indicates a positive relationship. Higher lift means a stronger association, useful for recommendations and promotions.

2. What is support and Confidence. How do you calculate them?
Support: Th…sactions with both A and B) / (Number of transactions with A).

3. What are some limitations or challenges of Association rules mining?
Challenges include: 1) Large number of rules: Can be overwhelming, requires careful filtering. 2) Spurious relationships:  Chance correlations can appear. 3) Data sparsity: If items are rarely bought together, it's hard to find strong rules. 4) Setting thresholds: Requires experimentation to find appropriate support/confidence/lift values.
