In [31]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.frequent_patterns import association_rules

print("--- STARTING MARKET BASKET ANALYSIS (FP-GROWTH) ---\n")

# 1. LOAD DATASET
# Make sure 'Member_number' is treated as a string to avoid issues
try:
    df = pd.read_csv('Groceries_dataset.csv')
    df['Member_number'] = df['Member_number'].astype(str)
    print("✓ Dataset Loaded Successfully")
    print(f"✓ Shape: {df.shape}")
except FileNotFoundError:
    print("Error: 'Groceries_dataset.csv' not found. Please upload the file.")
    exit()

# 2. PREPROCESSING (Transaction Matrix)
print("\n- Starting Preprocessing (Grouping by Transaction)...")

# Group by Member and Date to create a unique 'Transaction'
# We count how many times an item appears per transaction
basket = (df.groupby(['Member_number', 'Date', 'itemDescription'])['itemDescription']
          .count().unstack().reset_index().fillna(0)
          .set_index(['Member_number', 'Date']))

# Convert counts to Boolean (0 or 1)
# We use applymap for older pandas or map for newer. 
# This simple lambda function works robustly across versions.
def encode_units(x):
    return 1 if x >= 1 else 0

basket_sets = basket.applymap(encode_units).astype(bool)

print("✓ Data Transformed to Market Basket Format (One-Hot Encoded)")
print(f"✓ Transformed Matrix Shape: {basket_sets.shape}")
print(f"✓ Total Transactions: {basket_sets.shape[0]}")
print(f"✓ Total Unique Items: {basket_sets.shape[1]}")

# 3. TRAIN MODEL (FP-GROWTH)
print("\n- Training FP-Growth Model...")

# Min Support: 0.001 means an itemset must appear in at least 0.1% of transactions.
# For sparse grocery data, 0.001 to 0.005 is standard. 
frequent_itemsets = fpgrowth(basket_sets, min_support=0.001, use_colnames=True)

print("✓ Frequent Itemsets Generated")
print(f"✓ Count of Frequent Itemsets: {len(frequent_itemsets)}")

# 4. RULE GENERATION
# We use "Lift" as the metric because Confidence is usually low in supermarkets.
# Lift > 1 means the relationship is NOT a coincidence.
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.1)

# Sort rules by Strength (Lift)
rules = rules.sort_values(by='lift', ascending=False)

print(f"✓ Association Rules Generated: {len(rules)}")

# 5. EVALUATION
print("\n--- MODEL EVALUATION (Top 5 Rules) ---")
# Helper to format columns for display
display_rules = rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].copy()
display_rules['antecedents'] = display_rules['antecedents'].apply(lambda x: list(x)[0])
display_rules['consequents'] = display_rules['consequents'].apply(lambda x: list(x)[0])

print(display_rules.head(5).to_string(index=False))

print("\n--- STATS SUMMARY ---")
print(rules[['support', 'confidence', 'lift']].describe())

# =============================================================================
# PART 6: THE TA PRESENTATION DEMO (Interactive Recommendation)
# =============================================================================
print("\n========================================")
print("      TA PRESENTATION DEMO SYSTEM      ")
print("========================================")
print("Type an item to see recommendations (e.g., 'whole milk', 'yogurt', 'sausage')")

# Get list of all available items for reference
all_items = list(basket_sets.columns)

while True:
    try:
        user_input = input("\nEnter Item Name (or 'exit'): ").strip().lower()
        if user_input == 'exit':
            break
            
        # Check if item exists in dataset (case insensitive search)
        found_item = None
        for item in all_items:
            if user_input == item.lower():
                found_item = item
                break
        
        if not found_item:
            print(f"❌ Item '{user_input}' not found in database. Try: whole milk, rolls/buns, soda")
            continue
            
        # Filter rules where the input is the "Antecedent" (IF part)
        # We handle frozensets by checking if our item is inside the antecedent set
        recommendations = rules[rules['antecedents'].apply(lambda x: found_item in x)]
        
        if recommendations.empty:
            print(f"No strong rules found for '{found_item}'. It is bought independently.")
        else:
            print(f"✅ Customers who buy '{found_item}' also buy:")
            # Show top 3 results
            top_recs = recommendations.head(3)
            for idx, row in top_recs.iterrows():
                consequent = list(row['consequents'])[0]
                lift_val = row['lift']
                print(f"   -> {consequent} (Lift: {lift_val:.2f}x more likely)")
                
    except Exception as e:
        print(f"Error: {e}")

--- STARTING MARKET BASKET ANALYSIS (FP-GROWTH) ---

✓ Dataset Loaded Successfully
✓ Shape: (38765, 3)

- Starting Preprocessing (Grouping by Transaction)...


  basket_sets = basket.applymap(encode_units).astype(bool)


✓ Data Transformed to Market Basket Format (One-Hot Encoded)
✓ Transformed Matrix Shape: (14963, 167)
✓ Total Transactions: 14963
✓ Total Unique Items: 167

- Training FP-Growth Model...
✓ Frequent Itemsets Generated
✓ Count of Frequent Itemsets: 750
✓ Association Rules Generated: 88

--- MODEL EVALUATION (Top 5 Rules) ---
 antecedents         consequents  support  confidence     lift
     sausage          whole milk 0.001470    0.024363 2.182917
  whole milk             sausage 0.001470    0.131737 2.182917
     sausage              yogurt 0.001470    0.164179 1.911760
      yogurt             sausage 0.001470    0.017121 1.911760
citrus fruit specialty chocolate 0.001403    0.026415 1.653762

--- STATS SUMMARY ---
         support  confidence       lift
count  88.000000   88.000000  88.000000
mean    0.001419    0.058946   1.376157
std     0.000481    0.047333   0.193843
min     0.001002    0.007194   1.203301
25%     0.001069    0.020712   1.244390
50%     0.001303    0.043568   1.3

In [24]:
#IMPORT LIBRARIES
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.frequent_patterns import association_rules

In [25]:
print("--- STARTING MARKET BASKET ANALYSIS (FP-GROWTH) ---\n")

try:
    df = pd.read_csv('Groceries_dataset.csv')
    df['Member_number'] = df['Member_number'].astype(str)
    print("✓ Dataset Loaded Successfully")
    print(f"✓ Shape: {df.shape}")
except FileNotFoundError:
    print("Error: 'Groceries_dataset.csv' not found. Please upload the file.")
    exit()

--- STARTING MARKET BASKET ANALYSIS (FP-GROWTH) ---

✓ Dataset Loaded Successfully
✓ Shape: (38765, 3)


In [26]:
print("\n- Starting Preprocessing (Grouping by Transaction)...")

basket = (df.groupby(['Member_number', 'Date', 'itemDescription'])['itemDescription']
          .count().unstack().reset_index().fillna(0)
          .set_index(['Member_number', 'Date']))

def encode_units(x):
    return 1 if x >= 1 else 0

basket_sets = basket.applymap(encode_units).astype(bool)

print("✓ Data Transformed to Market Basket Format (One-Hot Encoded)")
print(f"✓ Transformed Matrix Shape: {basket_sets.shape}")
print(f"✓ Total Transactions: {basket_sets.shape[0]}")
print(f"✓ Total Unique Items: {basket_sets.shape[1]}")


- Starting Preprocessing (Grouping by Transaction)...


  basket_sets = basket.applymap(encode_units).astype(bool)


✓ Data Transformed to Market Basket Format (One-Hot Encoded)
✓ Transformed Matrix Shape: (14963, 167)
✓ Total Transactions: 14963
✓ Total Unique Items: 167


In [27]:

print("\n- Training FP-Growth Model...") 
frequent_itemsets = fpgrowth(basket_sets, min_support=0.001, use_colnames=True)

print("✓ Frequent Itemsets Generated")
print(f"✓ Count of Frequent Itemsets: {len(frequent_itemsets)}")



- Training FP-Growth Model...
✓ Frequent Itemsets Generated
✓ Count of Frequent Itemsets: 750


In [28]:

rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.1)

rules = rules.sort_values(by='lift', ascending=False)

print(f"✓ Association Rules Generated: {len(rules)}")

✓ Association Rules Generated: 142


In [29]:


print("\n--- MODEL EVALUATION (Top 5 Rules) ---")
display_rules = rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].copy()
display_rules['antecedents'] = display_rules['antecedents'].apply(lambda x: list(x)[0])
display_rules['consequents'] = display_rules['consequents'].apply(lambda x: list(x)[0])

print(display_rules.head(5).to_string(index=False))

print("\n--- STATS SUMMARY ---")
print(rules[['support', 'confidence', 'lift']].describe())


--- MODEL EVALUATION (Top 5 Rules) ---
 antecedents         consequents  support  confidence     lift
     sausage          whole milk 0.001470    0.024363 2.182917
  whole milk             sausage 0.001470    0.131737 2.182917
     sausage              yogurt 0.001470    0.164179 1.911760
      yogurt             sausage 0.001470    0.017121 1.911760
citrus fruit specialty chocolate 0.001403    0.026415 1.653762

--- STATS SUMMARY ---
          support  confidence        lift
count  142.000000  142.000000  142.000000
mean     0.001483    0.056680    1.286441
std      0.000793    0.045302    0.191656
min      0.001002    0.006771    1.106100
25%      0.001069    0.022406    1.151307
50%      0.001337    0.042259    1.232030
75%      0.001470    0.079161    1.347267
max      0.005748    0.255814    2.182917


In [30]:
# STEP 8: CONCLUSION
print("\n Conclusion: ")
if not rules.empty:
    best_rule = rules.iloc[0]
    # Note: Accessing frozenset items
    ant = list(best_rule['antecedents'])[0]
    con = list(best_rule['consequents'])[0]
    lift = best_rule['lift']
    
    print("The FP-Growth algorithm successfully identified shopping patterns.")
    print(f"Insight: The strongest relationship is between '{ant}' and '{con}'.")
    print(f"Lift: {lift:.4f}")
else:
    print("No significant rules found.")


 Conclusion: 
The FP-Growth algorithm successfully identified shopping patterns.
Insight: The strongest relationship is between 'sausage' and 'whole milk'.
Lift: 2.1829
