In [24]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

In [32]:
# Load the data
data = pd.read_csv('Updated_Pizza_Sales.csv')

In [34]:
# Ensure order_date is in datetime format
data['order_date'] = pd.to_datetime(data['order_date'])

# Group by order_id to form transactions
transactions = data.groupby('order_id')['pizza_name'].apply(list).reset_index()

# Use the transactions to create a list of lists for the apriori algorithm
transaction_list = transactions['pizza_name'].tolist()

# Print a few transactions for verification
print(transaction_list[:5])


[['The Hawaiian Pizza'], ['The Classic Deluxe Pizza', 'The Five Cheese Pizza', 'The Italian Supreme Pizza', 'The Mexicana Pizza', 'The Thai Chicken Pizza'], ['The Italian Supreme Pizza', 'The Prosciutto and Arugula Pizza'], ['The Italian Supreme Pizza'], ['The Italian Supreme Pizza']]


In [35]:
# Initialize the transaction encoder
te = TransactionEncoder()
te_ary = te.fit(transaction_list).transform(transaction_list)
df = pd.DataFrame(te_ary, columns=te.columns_)

# Display the first few rows of the encoded dataframe
print(df.head())


   The Barbecue Chicken Pizza  The Big Meat Pizza  The Brie Carre Pizza  \
0                       False               False                 False   
1                       False               False                 False   
2                       False               False                 False   
3                       False               False                 False   
4                       False               False                 False   

   The Calabrese Pizza  The California Chicken Pizza  \
0                False                         False   
1                False                         False   
2                False                         False   
3                False                         False   
4                False                         False   

   The Chicken Alfredo Pizza  The Chicken Pesto Pizza  \
0                      False                    False   
1                      False                    False   
2                      False             

In [41]:
from mlxtend.frequent_patterns import apriori, association_rules

# Apply the Apriori algorithm with a lower min_support to capture more itemsets
frequent_itemsets = apriori(df, min_support=0.005, use_colnames=True)

# Display the frequent itemsets to ensure they include multiple items
print(frequent_itemsets)

      support                                           itemsets
0    0.106464                       (The Barbecue Chicken Pizza)
1    0.084824                               (The Big Meat Pizza)
2    0.022482                             (The Brie Carre Pizza)
3    0.042998                              (The Calabrese Pizza)
4    0.102904                     (The California Chicken Pizza)
..        ...                                                ...
257  0.005386  (The Spinach and Feta Pizza, The Spicy Italian...
258  0.009040  (The Thai Chicken Pizza, The Spicy Italian Pizza)
259  0.006417  (The Vegetables + Vegetables Pizza, The Spicy ...
260  0.008337  (The Spinach and Feta Pizza, The Thai Chicken ...
261  0.007775  (The Thai Chicken Pizza, The Vegetables + Vege...

[262 rows x 2 columns]


In [42]:
# Generate association rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

# Display the rules to check for completeness
print(rules)

                             antecedents                          consequents  \
0                   (The Big Meat Pizza)         (The Barbecue Chicken Pizza)   
1           (The Barbecue Chicken Pizza)                 (The Big Meat Pizza)   
2         (The California Chicken Pizza)         (The Barbecue Chicken Pizza)   
3           (The Barbecue Chicken Pizza)       (The California Chicken Pizza)   
4            (The Chicken Alfredo Pizza)         (The Barbecue Chicken Pizza)   
..                                   ...                                  ...   
409            (The Spicy Italian Pizza)  (The Vegetables + Vegetables Pizza)   
410         (The Spinach and Feta Pizza)             (The Thai Chicken Pizza)   
411             (The Thai Chicken Pizza)         (The Spinach and Feta Pizza)   
412             (The Thai Chicken Pizza)  (The Vegetables + Vegetables Pizza)   
413  (The Vegetables + Vegetables Pizza)             (The Thai Chicken Pizza)   

     antecedent support  co

In [49]:
# Sort the rules by support in descending order to get the most frequently ordered pairs
top_10_pairs_by_support = rules.sort_values(by='support', ascending=False).head(10)

# Display the top 10 pairs by support
print("Top 10 most frequently ordered pairs:")
print(top_10_pairs_by_support[['support', 'antecedents', 'consequents']])


Top 10 most frequently ordered pairs:
      support                     antecedents                     consequents
274  0.012646            (The Hawaiian Pizza)        (The Thai Chicken Pizza)
275  0.012646        (The Thai Chicken Pizza)            (The Hawaiian Pizza)
27   0.012600    (The Barbecue Chicken Pizza)           (The Pepperoni Pizza)
26   0.012600           (The Pepperoni Pizza)    (The Barbecue Chicken Pizza)
256  0.012131            (The Hawaiian Pizza)           (The Pepperoni Pizza)
257  0.012131           (The Pepperoni Pizza)            (The Hawaiian Pizza)
368  0.012037           (The Pepperoni Pizza)        (The Thai Chicken Pizza)
369  0.012037        (The Thai Chicken Pizza)           (The Pepperoni Pizza)
106  0.011991  (The California Chicken Pizza)           (The Pepperoni Pizza)
107  0.011991           (The Pepperoni Pizza)  (The California Chicken Pizza)


In [47]:
from itertools import combinations
from collections import Counter

# Generate pairs of items from each transaction
pair_counter = Counter()

for transaction in transaction_list:
    transaction_pairs = combinations(sorted(transaction), 2)  # Generate pairs of items
    pair_counter.update(transaction_pairs)

# Convert the counter to a DataFrame for easier analysis
pair_counts = pd.DataFrame(pair_counter.items(), columns=['pair', 'count'])
pair_counts['item1'] = pair_counts['pair'].apply(lambda x: x[0])
pair_counts['item2'] = pair_counts['pair'].apply(lambda x: x[1])
pair_counts.drop(columns='pair', inplace=True)

# Sort the DataFrame by count in descending order to find the most frequently ordered pairs
pair_counts.sort_values(by='count', ascending=False, inplace=True)

# Display the top 10 most frequently ordered pairs
print("Top 10 most frequently ordered pairs by count:")
print(pair_counts.head(10))


Top 10 most frequently ordered pairs by count:
     count                         item1                         item2
128    319            The Hawaiian Pizza        The Thai Chicken Pizza
40     308    The Barbecue Chicken Pizza           The Pepperoni Pizza
118    299           The Pepperoni Pizza        The Thai Chicken Pizza
214    299            The Hawaiian Pizza           The Pepperoni Pizza
216    296      The Classic Deluxe Pizza            The Hawaiian Pizza
258    295      The Classic Deluxe Pizza           The Pepperoni Pizza
255    294  The California Chicken Pizza            The Hawaiian Pizza
42     292  The California Chicken Pizza           The Pepperoni Pizza
168    292    The Barbecue Chicken Pizza            The Hawaiian Pizza
39     287    The Barbecue Chicken Pizza  The California Chicken Pizza
