# Simulating main functions for different manipulations of customer and sales data

In [None]:
import warnings
warnings.filterwarnings('ignore')
import random

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.gridspec as gr

In [2]:
master_df = pd.read_csv("express.csv")
master_df

In [4]:
# Remove Credit/Cancelled transactions
master_df = master_df[master_df["OnCredit"]==False]
master_df

## Forecast Simulation

In [5]:
#Select Relevant Columns
forecast = master_df[["InvoiceDate","CustomerID","InvoiceNo","StockCode","Quantity","TotalPrice"]]
forecast

In [6]:
#Convert date to datetime
forecast["InvoiceDate"] = pd.to_datetime(forecast["InvoiceDate"])
forecast["InvoiceDate"].dtype

In [7]:
forecast["Date"] = pd.to_datetime(forecast["InvoiceDate"])
forecast["Date"]

In [8]:
daily_sales = forecast.groupby(['InvoiceDate', 'StockCode'])['Quantity',"TotalPrice"].sum().reset_index()
daily_sales


In [9]:
daily_sales["Day"] = daily_sales["InvoiceDate"].dt.day
daily_sales["Month"] = daily_sales["InvoiceDate"].dt.month
daily_sales["Year"] = daily_sales["InvoiceDate"].dt.year
daily_sales["InvoiceDate"] = daily_sales["InvoiceDate"].dt.date
daily_sales

In [9]:
#Set date as index
nd =daily_sales.set_index("InvoiceDate")
nd

In [10]:
#We can use this if we consider demand forecasting for individual products
# Group sales for each product per month
# monthly_sales = nd.groupby('StockCode').resample('M')['Quantity'].sum().reset_index()
# monthly_sales

In [11]:
# Calculate total sales of everything per day
daily_sales

In [10]:
# Find the total sales daily
total_daily = daily_sales.groupby("InvoiceDate")["Quantity"].sum().reset_index()
total_daily

In [11]:
total_daily["InvoiceDate"] = pd.to_datetime(total_daily["InvoiceDate"])
total_daily.dtypes

In [12]:
#Chooose January 2011
jan_sales =  total_daily[(total_daily["InvoiceDate"].dt.month) == 1 & (total_daily["InvoiceDate"].dt.year == 2011)]
jan_sales

In [13]:
jan_sales.to_csv("sample_sales_data.csv")

In [15]:
# Plot total sales for each day in January of the specified year
plt.figure(figsize=(10, 6))
plt.plot(jan_sales['InvoiceDate'], jan_sales['Quantity'], marker='o', linestyle='-')
plt.title(f'Total Sales for January 2011')
plt.xlabel('Date')
plt.ylabel('Total Sales')
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show();

## Peak/ Tull Marking

In [16]:
# (Optional) Dummy data generation for testing purposes
import datetime

def generate_dummy_data(start_date='2024-03-01', end_date='2024-03-31'):
  """Generates dummy sales data with random fluctuations, peaks, and dips."""
  dates = pd.date_range(start=start_date, end=end_date)
  base_sales = np.random.randint(50, 150, size=len(dates))
  for i in random.sample(range(len(dates)), k=5):
      base_sales[i] *= random.uniform(1.5, 2)  # Introduce random peaks
  for i in random.sample(range(len(dates)), k=5):
      base_sales[i] *= random.uniform(0.5, 0.8)  # Introduce random dips
  return pd

In [17]:
def proximity_calculator(days):
    """Groups closely spaced dates into promotional periods.

    Args:
        days (list): A list of 'Timestamp' objects.

    Returns:
        list: A list of lists, where each sub-list represents a promotional period (containing 'Timestamp' objects).
    """
    # Sort the list of days
    days.sort()

    # List to store the resulting periods
    periods = []

    # Iterate through the sorted days
    i = 0
    while i < len(days):
        # Initialize the start and end dates for the current period
        start_date = days[i]
        end_date = days[i]

        # Find the end date of the current period
        while i + 1 < len(days) and (days[i + 1] - end_date).days <= 3:
            end_date = days[i + 1]
            i += 1

        # Add the current period to the list of periods
        periods.append(pd.date_range(start=start_date, end=end_date, freq='D').tolist())

        # Move to the next date
        i += 1

    return periods


In [18]:
def find_promo_days(sales_data, peak_threshold=1.2, lull_threshold=0.5, num_promos=3, proximity_days=3):
    """Identifies peak and lull promotional periods based on percentage thresholds of average sales volume, 
    considering proximity to group close dates into extended periods.

    Args:
        sales_data (pd.DataFrame): DataFrame containing 'Date' and 'Sales Volume' columns.
        peak_threshold (float, optional): Multiplier for avg. sales volume to define a peak. Defaults to 1.2 (20% above average).
        lull_threshold (float, optional): Multiplier for avg. sales volume to define a lull. Defaults to 0.8 (20% below average).
        num_promos (int, optional): The maximum number of promotions per month (ignored in this implementation). Defaults to 3.
        proximity_days (int, optional): The maximum number of days between dates to consider them part of the same promotional period. Defaults to 3.

    Returns:
        tuple: A tuple containing lists of peak and lull sales dates.
    """

    if not isinstance(sales_data, pd.DataFrame):
        raise TypeError("sales_data must be a pandas DataFrame")

    # Calculate average sales volume
    avg_sales = sales_data['Quantity'].mean()

    # Apply thresholds to identify days within peak or lull zones
    peak_condition = sales_data['Quantity'] >= avg_sales * peak_threshold
    lull_condition = sales_data['Quantity'] <= avg_sales * lull_threshold

    # Get top peaks and lulls based on the conditions
    peak_days = sales_data[peak_condition].nlargest(num_promos, 'Quantity')['InvoiceDate'].tolist()
    lull_days = sales_data[lull_condition].nsmallest(num_promos, 'Quantity')['InvoiceDate'].tolist()

    # Group peak and lull days into promotional periods based on proximity using proximity_calculator
    peak_periods = proximity_calculator(peak_days)
    lull_periods = proximity_calculator(lull_days)

    return peak_periods, lull_periods


In [19]:
peak ,tull = find_promo_days(jan_sales, proximity_days=3) 
tull


In [20]:
peak

In [21]:
import pandas as pd

def find_promo_days(sales_data, peak_threshold=1.2, lull_threshold=0.5, num_promos=3, proximity_days=3):
    """Identifies peak and lull promotional periods based on percentage thresholds of average sales volume, 
    considering proximity to group close dates into extended periods.

    Args:
        sales_data (pd.DataFrame): DataFrame containing 'Date' and 'Sales Volume' columns.
        peak_threshold (float, optional): Multiplier for avg. sales volume to define a peak. Defaults to 1.2 (20% above average).
        lull_threshold (float, optional): Multiplier for avg. sales volume to define a lull. Defaults to 0.5 (50% below average).
        num_promos (int, optional): The maximum number of promotions per month (ignored in this implementation). Defaults to 3.
        proximity_days (int, optional): The maximum number of days between dates to consider them part of the same promotional period. Defaults to 3.

    Returns:
        pd.DataFrame: DataFrame containing 'Date' and 'Promotion Type' columns for peak and lull dates.
    """

    if not isinstance(sales_data, pd.DataFrame):
        raise TypeError("sales_data must be a pandas DataFrame")

    # Calculate average sales volume
    avg_sales = sales_data['Quantity'].mean()

    # Apply thresholds to identify days within peak or lull zones
    peak_condition = sales_data['Quantity'] >= avg_sales * peak_threshold
    lull_condition = sales_data['Quantity'] <= avg_sales * lull_threshold

    # Get top peaks and lulls based on the conditions
    peak_days = sales_data[peak_condition].nlargest(num_promos, 'Quantity')['InvoiceDate'].tolist()
    lull_days = sales_data[lull_condition].nsmallest(num_promos, 'Quantity')['InvoiceDate'].tolist()

    # Group peak and lull days into promotional periods based on proximity using proximity_calculator
    peak_periods = proximity_calculator(peak_days)
    lull_periods = proximity_calculator(lull_days)

    # Create DataFrames for peak and lull periods
    peak_df = pd.DataFrame({'Date': [day for period in peak_periods for day in period],
                            'Promotion Type': 'Peak'})
    lull_df = pd.DataFrame({'Date': [day for period in lull_periods for day in period],
                            'Promotion Type': 'Lull'})

    # Concatenate peak and lull DataFrames
    promo_df = pd.concat([peak_df, lull_df], ignore_index=True)

    return promo_df


In [22]:
result =find_promo_days(jan_sales, proximity_days=3) 
result

## Segmentation 

In [23]:
master_df

In [24]:
customer = master_df.groupby("CustomerID").agg(
    {
        "TotalPrice":"sum",
        "Quantity":"sum"
    }
).reset_index()
customer

In [25]:
customer["Frequency"] = master_df.groupby('CustomerID')['InvoiceNo'].count().reset_index()["InvoiceNo"]
customer

In [26]:
customer['Monetary'] = master_df.groupby('CustomerID')['TotalPrice'].mean().reset_index()['TotalPrice']
customer

In [27]:
master_df["InvoiceDate"] = pd.to_datetime(master_df["InvoiceDate"])
master_df.dtypes

In [28]:
rfm_data = master_df.groupby('CustomerID')['InvoiceDate'].max().reset_index()
customer['Recency'] = (rfm_data['InvoiceDate'].max() - rfm_data['InvoiceDate']).dt.days
customer

In [29]:
customer

In [30]:
customer['R_score'] = pd.qcut(customer['Recency'], q=3, labels=[1, 2, 3])  # High recency will have a score of 1
customer['F_score'] = pd.qcut(customer['Frequency'], q=3, labels=[1, 2, 3]) 
customer['M_score'] = pd.qcut(customer['Monetary'], q=3, labels=[1, 2, 3]) 
customer

In [31]:
#Assign Final RFM Score
customer['RFM'] = customer[['R_score', 'F_score', 'M_score']].astype(str).agg(''.join, axis=1)
customer.drop(columns=['R_score', 'F_score', 'M_score'],inplace=True)
customer

In [32]:
#Assign Segments

In [33]:
customer["RFM"].unique()

In [34]:
def segment_customers(rfm_column):
    """Segments customers into broad and subsegments based on RFM scores.

    Args:
        rfm_column (pd.Series): Series containing RFM scores.

    Returns:
        tuple: A tuple containing two pandas Series, one for broad segment and one for subsegment.
    """
    broad_segments = []
    subsegments = []

    for rfm in rfm_column:
        recency = int(rfm[0])
        frequency = int(rfm[1])
        monetary = int(rfm[2])

        # Classify into High-Value Segments
        if recency == 1 and frequency in [1, 2] and monetary in [1, 2]:
            broad_segments.append('HighValue')
            if frequency == 1 and monetary == 1:
                subsegments.append('Champions')
            elif frequency == 1 and monetary == 2:
                subsegments.append('LoyalCustomers')
            elif frequency == 2 and monetary == 1:
                subsegments.append('BigSpenders')
        
        # Classify into Segments to Nurture
        elif recency == 1 and frequency in [3] and monetary in [1, 2]:
            broad_segments.append('Nurture')
            subsegments.append('NewCustomer')
        elif recency == 2 and frequency in [1, 2] and monetary in [1, 2]:
            broad_segments.append('Nurture')
            subsegments.append('Promising')
        elif recency == 2 and frequency in [3] and monetary in [2]:
            broad_segments.append('Nurture')
            subsegments.append('NeedsAttention')
        
        # Classify into At-Risk Segments
        else:
            broad_segments.append('Risk')
            if recency == 1 and frequency in [3] and monetary in [3]:
                subsegments.append("CantLoseThem")
            elif recency == 2 and frequency in [3] and monetary in [3]:
                subsegments.append('AboutToSleep')
            else:
                subsegments.append('LostCustomers')

    return pd.Series(broad_segments, name='Broad Segment'), pd.Series(subsegments, name='Subsegment')


In [None]:
def segment_customers(rfm_column):
    """Segments customers into broad and subsegments based on RFM scores for a general retail store.

    Args:
        rfm_column (pd.Series): Series containing RFM scores.

    Returns:
        tuple: A tuple containing two pandas Series, one for broad segment and one for subsegment.
    """
    broad_segments = []
    subsegments = []

    # Define dictionaries for each segment and subsegment
    high_value_segments = {
        (1, 1, 1): 'Loyal Champions', (1, 1, 2): 'Frequent Spenders', (1, 1, 3): 'Rising Stars',
        (1, 2, 1): 'Recent Big Spenders', (1, 2, 2): 'Frequent Spenders', (1, 2, 3): 'Rising Stars',
        (1, 3, 1): 'Rekindled Spenders', (1, 3, 2): 'Needs Attention', (1, 3, 3): 'Value Seekers',
        (2, 3, 1): 'Big Ticket Buyers'
    }
    nurture_segments = {
        (2, 2, 2): 'Occasional Spenders', (2, 2, 3): 'Value Seekers', (2, 3, 2): 'Sleeping Giants',
        (2, 3, 3): 'Value Seekers', (1, 3, 3): 'Needs Attention',
        (2, 1, 2): 'Win-Back Target', (2, 1, 3): 'Win-Back Target',  
        (2, 2, 1): 'Potential Upscale'
    }
    risk_segments = {
        (3, 1, 1): 'Lost Loyalists', (3, 1, 2): 'Fading Interest', (3, 1, 3): 'One-Time Buyers',
        (3, 2, 1): 'At-Risk Customers', (3, 2, 2): 'Fading Interest', (3, 2, 3): 'One-Time Buyers',
        (3, 3, 1): 'Window Shoppers', (3, 3, 2): 'Window Shoppers', (3, 3, 3): 'One-Time Buyers',
        (2, 1, 1): 'At-Risk Customers'   
    }

    all_segments = list(high_value_segments.keys()) + list(nurture_segments.keys()) + list(risk_segments.keys())
    all_subsegments = list(high_value_segments.values()) + list(nurture_segments.values()) + list(risk_segments.values())

    # Check if the lengths of segment and subsegment lists match
    assert len(all_segments) == len(all_subsegments), "Lengths of segment and subsegment lists must match"

    for rfm in rfm_column:
        recency = int(rfm[0])
        frequency = int(rfm[1])
        monetary = int(rfm[2])
        
        if (recency, frequency, monetary) in all_segments:
            broad_segments.append(
                'High Value' if (recency, frequency, monetary) in high_value_segments.keys()
                else 'Nurture' if (recency, frequency, monetary) in nurture_segments.keys()
                else 'Risk'
            )
            subsegments.append(all_subsegments[all_segments.index((recency, frequency, monetary))])
        else:
            broad_segments.append('Unknown')
            subsegments.append('Unknown')

    return pd.Series(broad_segments, name='Broad Segment'), pd.Series(subsegments, name='Subsegment')


In [35]:
customer["Segment"], customer["Subsegment"] = segment_customers(customer['RFM'])
customer

In [36]:
high_value_df = customer[customer['Segment'] == 'HighValue']
risk_df = customer[customer['Segment'] == 'Risk']
nurture_df = customer[customer['Segment'] == 'Nurture']
high_value_df

In [37]:
def segment_customers(rfm_column):
    """Segments customers into broad and subsegments based on RFM scores.

    Args:
        rfm_column (pd.Series): Series containing RFM scores.

    Returns:
        tuple: A tuple containing two pandas Series, one for broad segment and one for subsegment.
    """
    broad_segments = []
    subsegments = []

    # Define arrays for each segment and subsegment
    high_value_segments = [
        (1, 1, 1), (1, 1, 2), (1, 1, 3), 
        (1, 2, 1), (1, 2, 2), (1, 2, 3), 
        (1, 3, 1), (1, 3, 2), (1, 3, 3)
    ]
    high_value_subsegments = [
        'Champions', 'Loyalists', 'High Potential', 
        'Big Ticket', 'Regular Spenders', 'Emerging Loyalists'
    ]
    
    nurture_segments = [
        (1, 3, 3), (2, 1, 3), (2, 2, 2), 
        (2, 2, 3), (2, 3, 2), (2, 3, 3)
    ]
    nurture_subsegments = [
        'Needs a Spark', 'Upscale Focus', 'Consistent Spender', 
        'Potential Upscale', 'Win-Back Target', 'Casual Shopper'
    ]
    
    risk_segments = [
        (1, 3, 1), (1, 3, 2), (2, 3, 1), 
        (3, 1, 1), (3, 1, 2), (3, 1, 3), 
        (3, 2, 1), (3, 2, 2), (3, 2, 3),
        (3, 3, 1), (3, 3, 2), (3, 3, 3)
    ]
    risk_subsegments = [
        'Wake-Up Call', 'Slipping Away', 'Dormant Upscale',
        'One-offs', 'One-offs', 'One-offs', 
        'Sporadic', 'Sporadic', 'Sporadic',
        'Lost Cause', 'Lost Cause', 'Lost Cause'
    ]

    all_segments = high_value_segments + nurture_segments + risk_segments
    all_subsegments = high_value_subsegments + nurture_subsegments + risk_subsegments
    for i, combination in enumerate(all_segments):
     print(f"Index {i}: Combination {combination}")


    for rfm in rfm_column:
        recency = int(rfm[0])
        frequency = int(rfm[1])
        monetary = int(rfm[2])
        
 

        if (recency, frequency, monetary) in all_segments:
            broad_segments.append(
                'High Value' if (recency, frequency, monetary) in high_value_segments
                else 'Nurture' if (recency, frequency, monetary) in nurture_segments
                else 'Risk'
            )
            subsegments.append(all_subsegments[all_segments.index((recency, frequency, monetary))])
        else:
            broad_segments.append('Unknown')
            subsegments.append('Unknown')

    return pd.Series(broad_segments, name='Broad Segment'), pd.Series(subsegments, name='Subsegment')


In [38]:
#Merging customer data with segments
segmented = master_df[["CustomerID","InvoiceNo","StockCode"]]
segmented

In [39]:
customers = customer[["CustomerID","Segment"]]
customers

In [40]:
#Convert customer ids to strings
customers["CustomerID"] = customers["CustomerID"].astype(object)
# segmented["CustomerID"] = segmented["CustomerID"].astype(object)


In [41]:
segmented

In [42]:
master_df[["CustomerID","InvoiceNo"]]

In [43]:
transaction_df = pd.merge(segmented,customers,on="CustomerID",how="left")
transaction_df

In [44]:
#Divide data for offers
high_value_df = transaction_df[transaction_df['Segment'] == 'HighValue']
risk_df = transaction_df[transaction_df['Segment'] == 'Risk']
nurture_df = transaction_df[transaction_df['Segment'] == 'Nurture']
high_value_df

## Promotional Offers

In [2]:
# Aggregate Transctions
def aggregate_transactions(df):
     transactions = df.groupby(["InvoiceNo","CustomerID"]).agg({"StockCode": lambda s : list(set(s))})
     return transactions

In [3]:
from fpgrowth_py import fpgrowth

#Generating  Association Rules
def get_rules(df):
    hbasket = aggregate_transactions(df)
    freqItemSet, rules = fpgrowth(hbasket['StockCode'].values, minSupRatio=0.01, minConf=0.8)
    print('Number of rules generated : ', len(rules))
    association=pd.DataFrame(rules,columns =['basket','next_product','proba']) 
    association=association.sort_values(by='proba',ascending=False)
    
    return association
    

In [48]:
# Get Rules for high Value Segments
h_assoc = get_rules(risk_df)
h_assoc

In [4]:
#Get frequent item sets
def get_most_frequent_itemsets(df):
    # Group by InvoiceNo and CustomerID and aggregate unique StockCodes into lists
    hbasket = aggregate_transactions(df)
    
    # Run FP-Growth algorithm to find frequent itemsets
    freqItemSet, _ = fpgrowth(hbasket['StockCode'].values, minSupRatio=0.01, minConf=0.7)
    
    # Sort items within each itemset and convert frequent itemsets to DataFrame
    frequent_itemsets_df = pd.DataFrame({'Frequent Itemset': [sorted(itemset) for itemset in freqItemSet]})
    
    # Remove duplicates (after sorting, duplicate itemsets will be identical)
    frequent_itemsets_df = frequent_itemsets_df.drop_duplicates(subset='Frequent Itemset')
    
    # Filter out itemsets with less than 2 items
    frequent_itemsets_df = frequent_itemsets_df[frequent_itemsets_df['Frequent Itemset'].apply(len) > 2]
    
    return frequent_itemsets_df


In [50]:
# xg = get_most_frequent_itemsets(nurture_df)
# xg

In [51]:
#Get Buy One,Get One Discounted Bundles
#Returns product bundles
def get_bogd_bundles(df):
        hbasket = aggregate_transactions(df)
        freqItemSet, rules = fpgrowth(hbasket['StockCode'].values, minSupRatio=0.01, minConf=0.9)
        print('Number of rules generated : ', len(rules))
        
        association=pd.DataFrame(rules,columns =['basket','next_product','proba']) 
        association=association.sort_values(by='proba',ascending=False)
    
        return association
    

In [52]:
# Buy One,Get One Discounted
def bogd(product_bundles, order):
    # Check if any bundle matches the items in the order
    matching_bundles = product_bundles[product_bundles['basket'].apply(lambda x: set(x).issubset(set(order)))]

    if matching_bundles.empty:
        print("Order Not Eligible For Discount")
        return []

    # Extract and return consequent products from matching bundles
    recommended_products = matching_bundles['next_product'].tolist()
    recommended_item_codes = [item for sublist in recommended_products for item in sublist]
    return recommended_item_codes

In [53]:
#Try for high value segments
gx = get_bogd_bundles(high_value_df)
gx

In [54]:
# Test
order = {'21080','45373', '21086','67262','63773'}
bogd(gx, order)

In [4]:
#Fixed amount discount
def apply_fixed_discount(total_amount, fixed_discount):
    """
    Apply a fixed amount discount to the total purchase amount.

    Parameters:
    - total_amount (float): Total purchase amount before discount.
    - fixed_discount (float): Fixed discount amount to be subtracted.

    Returns:
    - discounted_amount (float): Total purchase amount after applying the fixed discount.
    """
    discounted_amount = max(total_amount - fixed_discount, 0)  # Ensure discounted amount doesn't go below zero
    return discounted_amount


In [56]:
#Bundled Discounts
def apply_bundle_discount(bundle, discount_df):
    """
    Apply discounts to each item in a product bundle and return the final price.

    Parameters:
    - bundle (list): List of product IDs in the bundle.
    - discount_df (DataFrame): DataFrame containing product IDs and their discounts.

    Returns:
    - final_price (float): Final price of the bundle after applying discounts.
    """
    final_price = 0
    
    # Iterate through each item in the bundle
    for item in bundle:
        # Look up the discount for the item in the discount DataFrame
        item_discount = discount_df.loc[discount_df['ProductID'] == item, 'Discount'].values
        
        # If the item is found in the discount DataFrame, apply the discount
        if len(item_discount) > 0:
            item_discount = item_discount[0]  # Extract the discount value
            # Assume original price of the item is 0 if not found in discount DataFrame
            original_price = discount_df.loc[discount_df['ProductID'] == item, 'Price'].values[0]
            # Apply the discount to the original price of the item
            discounted_price = original_price * (1 - item_discount)
            # Add the discounted price to the final price
            final_price += discounted_price
        else:
            print(f"Discount not found for item {item}. Assuming original price.")

    return final_price


In [14]:
#Test Bundled Discount
# Dummy Discount Data
# Define the discount data
discount_data = {
    'ProductID': ['20725', '20727', '22383', '20728', '85099B', '23209', '23203','23170', '23171', '23172'],
    'Price': [5.0, 8.0, 10.0, 7.0, 12.0, 9.0, 6.0,15.0, 20.0, 25.0],
    'Discount': [0.1, 0.2, 0.15, 0.1, 0.25, 0.2, 0.15,0.1, 0.2, 0.15]  # Assuming sample discount percentages
}

# Create the discount DataFrame
discount_df = pd.DataFrame(discount_data)
discount_df


In [15]:
discount_df.to_csv("discount_sample_data.csv")

In [58]:
# # Example product bundle
# bundle = xg["Frequent Itemset"][107]
# 
# # Calculate final price of the bundle after applying discounts
# final_price = apply_bundle_discount(bundle, discount_df)
# print("Final price of the bundle after applying discounts:", final_price)

In [2]:
# Get Tiered Discount
def calculate_tiered_discount(total_price, tiers):
    discount = 0
    for tier in tiers:
        if total_price >= tier['min_amount']:
            discount = tier['discount']
        else:
            break
    return total_price - (total_price * (discount / 100))




In [3]:
# Example tiers
tiers = [
    {'min_amount': 0, 'discount': 0},   # Tier 1: $0 - $100 (0% discount)
    {'min_amount': 101, 'discount': 5}, # Tier 2: $101 - $200 (5% discount)
    {'min_amount': 201, 'discount': 10} # Tier 3: $201 and above (10% discount)
]



In [5]:
# Example usage
total_price = 150
discounted_amount = calculate_tiered_discount(total_price, tiers)
print(f"Discounted Amount: ${discounted_amount}")

In [61]:
# Loyalty Points
def calculate_loyalty_points(total_price, points_per_dollar):
    return total_price * points_per_dollar

In [62]:
# Example usage
total_price = 150
points_per_dollar = 2  # Assume 2 points per dollar spent
loyalty_points = calculate_loyalty_points(total_price, points_per_dollar)
print(f"Loyalty Points Earned: {loyalty_points}")


In [64]:
def get_bundles(df):
    hbasket = aggregate_transactions(df)
    freqItemSet, rules = fpgrowth(hbasket['StockCode'].values, minSupRatio=0.01, minConf=0.9)
    print('Number of rules generated: ', len(rules))
    
    associations = pd.DataFrame(rules, columns=['basket', 'next_product', 'proba'])
    associations = associations.sort_values(by='proba', ascending=False)
    
    itemsets = pd.DataFrame({'itemset': freqItemSet})
    itemsets['support'] = itemsets['itemset'].apply(lambda x: hbasket[hbasket['StockCode'].apply(lambda y: set(x).issubset(set(y)))].shape[0] / len(hbasket))
    itemsets = itemsets[itemsets['itemset'].apply(lambda x: len(x) > 1)]  # Filter out itemsets with only one item
    itemsets = itemsets.sort_values(by='support', ascending=False)  # Sort itemsets by support
    
    return associations, itemsets


In [71]:
# Function to generate random profit margins
def generate_profit_margin(unit_price):
    # Generate a random percentage between 5% and 20%
    random_percentage = np.random.uniform(0.05, 0.20)
    # Calculate the profit margin
    profit_margin = unit_price * random_percentage
    return profit_margin