In [19]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules
import requests

# -------------------------------
# Load dataset as list of transactions
# -------------------------------
url = "https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/groceries.csv"
raw_data = requests.get(url).text.strip().split("\n")

transactions = [line.split(",") for line in raw_data]

# -------------------------------
# OPTION 1: Show first 10 raw transactions
# -------------------------------
print("üîπ First 10 transactions (raw list form):")
for i, t in enumerate(transactions[:10], 1):
    print(f"{i}: {t}")

print("\n" + "="*60 + "\n")

# -------------------------------
# OPTION 2: Convert to DataFrame (for a cleaner view)
# -------------------------------
df_view = pd.DataFrame({
    "TransactionID": range(1, len(transactions)+1),
    "Items": [", ".join(t) for t in transactions]
})

print("üîπ Dataset preview (first 10 rows):")
print(df_view.head(10))

print("\n" + "="*60 + "\n")

# -------------------------------
# One-hot encoding
# -------------------------------
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)

print("üîπ One-hot encoded dataset (first 5 rows):")
print(df.head())

print("\n" + "="*60 + "\n")

# -------------------------------
# Apply Apriori
# -------------------------------
frequent_itemsets = apriori(df, min_support=0.03, use_colnames=True)

print("üîπ Frequent Itemsets (min_support=0.03):")
print(frequent_itemsets.head(10))

print("\n" + "="*60 + "\n")

# -------------------------------
# Association Rules
# -------------------------------
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.4)

print("üîπ Association Rules (min_confidence=0.4):")
print(rules[['antecedents','consequents','support','confidence','lift']].head(10))


üîπ First 10 transactions (raw list form):
1: ['citrus fruit', 'semi-finished bread', 'margarine', 'ready soups']
2: ['tropical fruit', 'yogurt', 'coffee']
3: ['whole milk']
4: ['pip fruit', 'yogurt', 'cream cheese ', 'meat spreads']
5: ['other vegetables', 'whole milk', 'condensed milk', 'long life bakery product']
6: ['whole milk', 'butter', 'yogurt', 'rice', 'abrasive cleaner']
7: ['rolls/buns']
8: ['other vegetables', 'UHT-milk', 'rolls/buns', 'bottled beer', 'liquor (appetizer)']
9: ['pot plants']
10: ['whole milk', 'cereals']


üîπ Dataset preview (first 10 rows):
   TransactionID                                              Items
0              1  citrus fruit, semi-finished bread, margarine, ...
1              2                     tropical fruit, yogurt, coffee
2              3                                         whole milk
3              4     pip fruit, yogurt, cream cheese , meat spreads
4              5  other vegetables, whole milk, condensed milk, ...
5            

# **Notes:**

## **üìù Notes on Apriori Algorithm Project**

**üìå Dataset**

Source: Groceries dataset (9,835 transactions) from

[GitHub: stedy/Machine-Learning-with-R-datasets](https://github.com/stedy/Machine-Learning-with-R-datasets)

**Format:** Each row represents a transaction (items purchased together).

**Preprocessing:**

Loaded raw text ‚Üí split into list of items per transaction.

Converted transactions to a one-hot encoded DataFrame using TransactionEncoder.