In [None]:
import pandas as pd
import numpy as np
import joblib
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from mlxtend.frequent_patterns import apriori, association_rules

# Load data
df = pd.read_csv('supermarket_sales.csv')
df['Date'] = pd.to_datetime(df['Date'])
df['Month'] = df['Date'].dt.month
df['Year'] = df['Date'].dt.year

# ----------------------------
# Model 1: Top-Selling Product Prediction
# ----------------------------

# Prepare monthly sales data
monthly_sales = df.groupby(['Year', 'Month', 'Product line'])['Quantity'].sum().reset_index()
pivot = monthly_sales.pivot_table(index=['Year', 'Month'], columns='Product line', values='Quantity', fill_value=0)

# Prepare features (X) and target (y)
X = pivot.index.to_frame(index=False)
y = pivot
product_columns = list(y.columns)  # Save for predictions

# Train model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model1 = LinearRegression()
model1.fit(X_train, y_train)

# Save model and product columns as tuple
joblib.dump((model1, product_columns), 'model_top_products.pkl')
print("✅ model_top_products.pkl saved successfully!")

# ----------------------------
# Model 2: Combo Offer Generation (Apriori)
# ----------------------------

# Prepare basket data: one-hot encode presence of product lines per Invoice ID
basket = df.groupby(['Invoice ID', 'Product line'])['Quantity'].sum().unstack().fillna(0)
basket = basket.applymap(lambda x: True if x > 0 else False)

print(f"Basket shape: {basket.shape}")
print(f"Sample rows:\n{basket.head()}")
print(f"Sum of each product sold in basket:\n{basket.sum()}")

# Generate frequent itemsets and association rules
frequent_items = apriori(basket, min_support=0.01, use_colnames=True)
rules = association_rules(frequent_items, metric="lift", min_threshold=1)
rules = rules[rules['confidence'] >= 0.2]

print(f"Number of rules: {len(rules)}")
if len(rules) == 0:
    print("⚠️ No combo rules generated. Generating fallback combos...")

    # Fallback combo generation: pair least-selling with top-selling products
    top_products = basket.sum().sort_values(ascending=False).index.tolist()
    least_products = basket.sum().sort_values(ascending=True).index.tolist()

    combos = []
    for low in least_products[:3]:  # bottom 3
        for high in top_products[:3]:  # top 3
            if low != high:
                combos.append({'antecedents': {low}, 'consequents': {high}})

    combo_df = pd.DataFrame(combos)
    combo_df.to_csv('combo_offers.csv', index=False)
    print("✅ Fallback combo_offers.csv created with dummy combos")
else:
    # Save the real generated combos
    combo_df = rules[['antecedents', 'consequents']]
    combo_df.to_csv('combo_offers.csv', index=False)
    print("✅ combo_offers.csv saved successfully!")


✅ model_top_products.pkl saved successfully!
Basket shape: (1000, 6)
Sample rows:
Product line  Electronic accessories  Fashion accessories  Food and beverages  \
Invoice ID                                                                      
101-17-6199                    False                False                True   
101-81-4070                    False                False               False   
102-06-2002                    False                False               False   
102-77-2261                    False                False               False   
105-10-6182                    False                 True               False   

Product line  Health and beauty  Home and lifestyle  Sports and travel  
Invoice ID                                                              
101-17-6199               False               False              False  
101-81-4070                True               False              False  
102-06-2002               False               False       

  basket = basket.applymap(lambda x: True if x > 0 else False)


In [9]:
print("Basket shape:", basket.shape)
print("Sample rows:\n", basket.head())
print("Sum of each product sold in basket:\n", basket.sum())


Basket shape: (1000, 6)
Sample rows:
 Product line  Electronic accessories  Fashion accessories  Food and beverages  \
Invoice ID                                                                      
101-17-6199                    False                False                True   
101-81-4070                    False                False               False   
102-06-2002                    False                False               False   
102-77-2261                    False                False               False   
105-10-6182                    False                 True               False   

Product line  Health and beauty  Home and lifestyle  Sports and travel  
Invoice ID                                                              
101-17-6199               False               False              False  
101-81-4070                True               False              False  
102-06-2002               False               False               True  
102-77-2261                Tr

In [11]:
basket['num_products'] = basket.sum(axis=1)
print("Number of products per invoice distribution:")
print(basket['num_products'].value_counts())


Number of products per invoice distribution:
num_products
1    1000
Name: count, dtype: int64
