In [1]:
# Import Libraries
import csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
from scipy.special import comb
from itertools import combinations, permutations
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

# Supressing Warnings
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
# Load the Online retail dataset from Excel
raw_data = pd.read_excel("Online retail.xlsx", header=None).iloc[:, 0]

In [3]:
# Checking for missing values
raw_data.isna().sum()

0

In [4]:
# Checking for duplicate values
raw_data.duplicated().sum()

2325

In [5]:
raw_data[raw_data.duplicated(keep=False)]

2                              chutney
3                       turkey,avocado
5                       low fat yogurt
6       whole wheat pasta,french fries
9                         french fries
                     ...              
7491                     herb & pepper
7492                chocolate,escalope
7495               pancakes,light mayo
7498                           chicken
7499                escalope,green tea
Name: 0, Length: 2713, dtype: object

In [6]:
# Deleting duplicate values
raw_data.drop_duplicates(inplace=True)

In [7]:
# Split each transaction string into a list of items
transactions = [transaction.split(',') for transaction in raw_data]

# Get all unique items from the dataset
unique_items = sorted(set(item for sublist in transactions for item in sublist))

# Create an empty DataFrame with columns for each unique item
df = pd.DataFrame(0, index=range(len(transactions)), columns=unique_items)

# Populate the DataFrame with binary values indicating item presence
for i, transaction in enumerate(transactions):
    for item in transaction:
        df.at[i, item] = 1

# Display the DataFrame
print(df)

       asparagus  almonds  antioxydant juice  asparagus  avocado  babies food  \
0              0        1                  1          0        1            0   
1              0        0                  0          0        0            0   
2              0        0                  0          0        0            0   
3              0        0                  0          0        1            0   
4              0        0                  0          0        0            0   
...          ...      ...                ...        ...      ...          ...   
5171           0        0                  0          0        0            0   
5172           0        0                  0          0        0            0   
5173           0        0                  0          0        0            0   
5174           0        0                  0          0        0            0   
5175           0        0                  0          0        0            0   

      bacon  barbecue sauce

In [8]:
df.columns

Index([' asparagus', 'almonds', 'antioxydant juice', 'asparagus', 'avocado',
       'babies food', 'bacon', 'barbecue sauce', 'black tea', 'blueberries',
       ...
       'turkey', 'vegetables mix', 'water spray', 'white wine',
       'whole weat flour', 'whole wheat pasta', 'whole wheat rice', 'yams',
       'yogurt cake', 'zucchini'],
      dtype='object', length=120)

In [9]:
# Convert transactions to one-hot encoded format
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)

# Apply Apriori algorithm to find frequent itemsets
frequent_itemsets = apriori(df, min_support=0.05, use_colnames=True)

# Generate association rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.2)

# Display the generated rules
print(rules)

            antecedents          consequents  antecedent support  \
0       (mineral water)  (frozen vegetables)            0.299845   
1   (frozen vegetables)      (mineral water)            0.129830   
2         (ground beef)      (mineral water)            0.135819   
3       (mineral water)        (ground beef)            0.299845   
4         (ground beef)          (spaghetti)            0.135819   
5           (spaghetti)        (ground beef)            0.229521   
6       (mineral water)               (milk)            0.299845   
7                (milk)      (mineral water)            0.170015   
8           (spaghetti)               (milk)            0.229521   
9                (milk)          (spaghetti)            0.170015   
10          (spaghetti)      (mineral water)            0.229521   
11      (mineral water)          (spaghetti)            0.299845   

    consequent support   support  confidence      lift  leverage  conviction  \
0             0.129830  0.050425   

In [12]:
rules.sort_values('lift',ascending = False)[0:20]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
4,(ground beef),(spaghetti),0.135819,0.229521,0.055835,0.411095,1.791102,0.024661,1.308326,0.511102
5,(spaghetti),(ground beef),0.229521,0.135819,0.055835,0.243266,1.791102,0.024661,1.141988,0.57326
2,(ground beef),(mineral water),0.135819,0.299845,0.058733,0.432432,1.442184,0.018008,1.233606,0.354795
3,(mineral water),(ground beef),0.299845,0.135819,0.058733,0.195876,1.442184,0.018008,1.074686,0.437914
6,(mineral water),(milk),0.299845,0.170015,0.067813,0.22616,1.330231,0.016835,1.072553,0.354566
7,(milk),(mineral water),0.170015,0.299845,0.067813,0.398864,1.330231,0.016835,1.164718,0.299103
0,(mineral water),(frozen vegetables),0.299845,0.12983,0.050425,0.16817,1.29531,0.011496,1.046091,0.32562
1,(frozen vegetables),(mineral water),0.12983,0.299845,0.050425,0.388393,1.29531,0.011496,1.144778,0.262
8,(spaghetti),(milk),0.229521,0.170015,0.050039,0.218013,1.282316,0.011017,1.06138,0.285745
9,(milk),(spaghetti),0.170015,0.229521,0.050039,0.294318,1.282316,0.011017,1.091822,0.265259


# Conclusion
## Insight and interpretation:

### 1.Ground Beef and Spaghetti:
Rule 4: Customers who purchase ground beef are likely to also purchase spaghetti.

Rule 5: Customers who purchase spaghetti are likely to also purchase ground beef.
##### Interpretation: Ground beef and spaghetti are frequently purchased together, indicating that customers may be buying ingredients for a spaghetti bolognese or other pasta dishes.
##### ______________________________________________________________________________________________________________________________
### 2.Ground Beef and Mineral Water:
Rule 2: Customers who purchase ground beef are likely to also purchase mineral water.

Rule 3: Customers who purchase mineral water are likely to also purchase ground beef.
##### Interpretation: Ground beef and mineral water have a moderate association. This could suggest that customers buying ground beef are also purchasing beverages like mineral water to accompany their meals.
##### ______________________________________________________________________________________________________________________________
### 3.Mineral Water and Milk:
Rule 6: Customers who purchase mineral water are likely to also purchase milk.

Rule 7: Customers who purchase milk are likely to also purchase mineral water.
##### Interpretation: Mineral water and milk are associated, indicating that customers may be buying both to meet their beverage needs.
##### ______________________________________________________________________________________________________________________________
### 4.Mineral Water and Frozen Vegetables:
Rule 0: Customers who purchase mineral water are likely to also purchase frozen vegetables.

Rule 1: Customers who purchase frozen vegetables are likely to also purchase mineral water.
##### Interpretation: There is a moderate association between mineral water and frozen vegetables, suggesting that customers buying one are likely to buy the other, possibly for healthy meal options or cooking ingredients.
##### ______________________________________________________________________________________________________________________________
### 5.Spaghetti and Milk:
Rule 8: Customers who purchase spaghetti are likely to also purchase milk.

Rule 9: Customers who purchase milk are likely to also purchase spaghetti.
##### Interpretation: Spaghetti and milk are moderately associated, indicating that customers may be buying milk as a beverage or ingredient for pasta dishes.
##### ______________________________________________________________________________________________________________________________
### 6.Mineral Water and Spaghetti:
Rule 11: Customers who purchase mineral water are likely to also purchase spaghetti.

Rule 10: Customers who purchase spaghetti are likely to also purchase mineral water.
##### Interpretation: Mineral water and spaghetti are strongly associated, suggesting that customers buying one are highly likely to buy the other, possibly as part of a meal combination.

# ____________________________________________________________________
##### Understanding these associations can help retailers optimize product placement and promotions. For example, placing spaghetti and mineral water together or offering discounts on chocolate when purchasing mineral water could potentially increase sales and customer satisfaction.
##### Additionally, retailers can use these insights for targeted marketing campaigns, such as sending personalized offers to customers who frequently purchase these combinations.