**MARKET BASKET ANALYSIS APRIORI
# Dataset: Market_Basket_Optimisation.csv**

# IMPORT LIBRARIES

In [None]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules
import matplotlib.pyplot as plt

# LOAD DATASET

In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
data_path = "Market_Basket_Optimisation.csv"
df = pd.read_csv(data_path, header=None)

In [None]:
print("Shape of raw dataset:", df.shape)
print(df.head())

# CONVERT DATA TO TRANSACTION LIST

In [None]:
# Each row = one transaction (list of items, NaN removed)
transactions = []

for i in range(df.shape[0]):
    basket = df.iloc[i].dropna().tolist()   # remove NaN and convert to list
    transactions.append(basket)

print("\nExample transaction:")
print(transactions[0])

# ONE-HOT ENCODING USING TransactionEncoder

In [None]:
te = TransactionEncoder()
te_array = te.fit(transactions).transform(transactions)

basket_df = pd.DataFrame(te_array, columns=te.columns_)
print("\nOne-hot encoded data (sample):")
print(basket_df.head())

# FREQUENT ITEMSETS USING APRIORI

In [None]:
# Adjust min_support as needed (0.01 = item appears in at least 1% of transactions)
frequent_itemsets = apriori(basket_df, min_support=0.01, use_colnames=True)

# Add length of itemset for easy filtering
frequent_itemsets["length"] = frequent_itemsets["itemsets"].apply(len)

print("\nFrequent itemsets (first 10):")
print(frequent_itemsets.sort_values("support", ascending=False).head(10))

# TOP SINGLE ITEMS BY SUPPORT â€“ SIMPLE PLOT

In [None]:
top_items = (
    frequent_itemsets[frequent_itemsets["length"] == 1]
    .sort_values("support", ascending=False)
    .head(10)
)

plt.figure(figsize=(10, 5))
plt.bar(top_items["itemsets"].astype(str), top_items["support"])
plt.xticks(rotation=45, ha="right")
plt.title("Top 10 Items by Support")
plt.xlabel("Item")
plt.ylabel("Support")
plt.tight_layout()
plt.show()

# GENERATE ASSOCIATION RULES

In [None]:
# metric can be "confidence" or "lift"; min_threshold = minimum value for that metric
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0)

print("\nAll rules (sample 10):")
print(
    rules[["antecedents", "consequents", "support", "confidence", "lift"]]
    .head(10)
)

# FILTER STRONG RULES (FOR PRESENTATION)

In [None]:
# Example: support >= 1%, confidence >= 30%, lift > 1
strong_rules = rules[
    (rules["support"] >= 0.01)
    & (rules["confidence"] >= 0.3)
    & (rules["lift"] > 1)
].copy()

# Sort by lift (strongest relationships first)
strong_rules = strong_rules.sort_values("lift", ascending=False)

print("\nStrong rules (sorted by lift):")
print(
    strong_rules[
        ["antecedents", "consequents", "support", "confidence", "lift"]
    ].head(20)
)

# NICE PRINTING OF RULES (HUMAN-READABLE)

In [None]:
print("\n--- TOP ASSOCIATION RULES (Readable) ---")
for idx, row in strong_rules.head(20).iterrows():
    antecedents = ", ".join(list(row["antecedents"]))
    consequents = ", ".join(list(row["consequents"]))
    print(
        f"Rule: IF a customer buys [{antecedents}] "
        f"THEN they also buy [{consequents}]"
    )
    print(
        f" - Support: {row['support']:.3f} "
        f"| Confidence: {row['confidence']:.3f} "
        f"| Lift: {row['lift']:.3f}"
    )
    print("-" * 80)