# Basket Analysis

Step 1: Import Libraries

In [49]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

Step 2: Import Data

In [50]:
def import_data():
    # Define file paths
    orders_path = 'orders.csv'
    customers_path = 'customers.csv'
    products_path = 'products.csv'
    sales_path = 'sales.csv'

    # Load data
    customers_df = pd.read_csv(customers_path)
    orders_df = pd.read_csv(orders_path)
    products_df = pd.read_csv(products_path)
    sales_df = pd.read_csv(sales_path)

    # Merge data
    sales_orders_df = pd.merge(sales_df, orders_df, on='order_id', how='left')
    sales_orders_products_df = pd.merge(sales_orders_df, products_df, left_on='product_id', right_on='product_id', how='left')
    full_data_df = pd.merge(sales_orders_products_df, customers_df, on='customer_id', how='left')
    return full_data_df

In [51]:
data = import_data()
print(data.head())

   sales_id  order_id  product_id  price_per_unit  quantity_x  total_price  \
0         0         1         218             106           2          212   
1         1         1         481             118           1          118   
2         2         1           2              96           3          288   
3         3         1        1002             106           2          212   
4         4         1         691             113           3          339   

   customer_id  payment order_date delivery_date  ... quantity_y  \
0           64    30811  2021-8-30    2021-09-24  ...         44   
1           64    30811  2021-8-30    2021-09-24  ...         62   
2           64    30811  2021-8-30    2021-09-24  ...         54   
3           64    30811  2021-8-30    2021-09-24  ...         52   
4           64    30811  2021-8-30    2021-09-24  ...         53   

                                   description     customer_name       gender  \
0   A orange coloured, L sized, Chambray 

Step 3: Perform Basket Analysis

In [52]:
def basket_analysis(data):
    # Prepare data for market basket analysis
    basket_sets = data.groupby(['order_id', 'product_name'])['quantity_x'].sum().unstack().reset_index().fillna(0).set_index('order_id')
    basket_sets = (basket_sets > 0).astype(int)  # Ensure boolean type usage

    # Use Apriori algorithm to find frequent itemsets with a minimum support of 0.01
    frequent_itemsets = apriori(basket_sets, min_support=0.01, use_colnames=True)

    # Generate association rules from frequent itemsets using lift as the metric, minimum lift set to 1
    rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

    # Sort association rules by lift, confidence, and support in descending order
    sorted_rules = rules.sort_values(by=['lift', 'confidence', 'support'], ascending=False)

    return sorted_rules

In [13]:
result = basket_analysis(data)
print(result.head())

                 antecedents            consequents  antecedent support  \
591    (Oxford Cloth, Denim)              (Cropped)            0.038268   
594                (Cropped)  (Oxford Cloth, Denim)            0.124874   
593           (Oxford Cloth)       (Cropped, Denim)            0.136959   
592         (Cropped, Denim)         (Oxford Cloth)            0.038268   
590  (Oxford Cloth, Cropped)                (Denim)            0.022155   

     consequent support  support  confidence      lift  leverage  conviction  \
591            0.124874  0.01007    0.263158  2.107385  0.005292    1.187671   
594            0.038268  0.01007    0.080645  2.107385  0.005292    1.046095   
593            0.038268  0.01007    0.073529  1.921440  0.004829    1.038060   
592            0.136959  0.01007    0.263158  1.921440  0.004829    1.171270   
590            0.238671  0.01007    0.454545  1.904488  0.004783    1.395770   

     zhangs_metric  
591       0.546387  
594       0.600460  
593  



Step 4: Save result to new csv file so it can be visualised

In [53]:
result.to_csv('sorted_association_rules.csv', index=False)