# IMPORTING REQUIRED LIBRARIES

In [56]:
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
import warnings
warnings.simplefilter("ignore")

# LOADING THE DATASET

In [57]:
dt = pd.read_csv(r"C:\Users\HP\Downloads\archive (11)\MBA_Apriori.csv")
print(dt.head())

   BillNo                             Itemname  Quantity              Date  \
0  536365   WHITE HANGING HEART T-LIGHT HOLDER         6  01-12-2010 08:26   
1  536365                  WHITE METAL LANTERN         6  01-12-2010 08:26   
2  536365       CREAM CUPID HEARTS COAT HANGER         8  01-12-2010 08:26   
3  536365  KNITTED UNION FLAG HOT WATER BOTTLE         6  01-12-2010 08:26   
4  536365       RED WOOLLY HOTTIE WHITE HEART.         6  01-12-2010 08:26   

   Price  CustomerID         Country  
0   2.55     17850.0  United Kingdom  
1   3.39     17850.0  United Kingdom  
2   2.75     17850.0  United Kingdom  
3   3.39     17850.0  United Kingdom  
4   3.39     17850.0  United Kingdom  


# DATA PREPROCESSING

In [58]:
#shape of the data
print(dt.shape)

#removing spaces fromt the beginning and ending.
dt["Itemname"] = dt["Itemname"].str.strip()
dt["BillNo"] = dt["BillNo"].astype('str')
print(dt.shape)

(522064, 7)
(522064, 7)


# ANALYSIS OF NUMBER OF RECORDS FOR EACH COUNTRY

In [59]:
dt["Country"].value_counts()
#united kingdom is Holding the most number of records

Country
United Kingdom          487622
Germany                   9042
France                    8408
Spain                     2485
Netherlands               2363
Belgium                   2031
Switzerland               1967
Portugal                  1501
Australia                 1185
Norway                    1072
Italy                      758
Sweden                     451
Unspecified                446
Austria                    398
Poland                     330
Japan                      321
Israel                     295
Hong Kong                  284
Singapore                  222
Iceland                    182
USA                        179
Greece                     145
Malta                      112
United Arab Emirates        68
RSA                         58
Lebanon                     45
Lithuania                   35
Brazil                      32
Bahrain                     18
Saudi Arabia                 9
Name: count, dtype: int64

# GROUPING ALL TRANSACTIONS OF FRANCE

In [60]:
basket = (dt[dt["Country"]=="France"].groupby(["BillNo","Itemname"])["Quantity"].sum().unstack().reset_index().fillna(0).set_index("BillNo"))
print(basket.head(10))

Itemname  10 COLOUR SPACEBOY PEN  12 COLOURED PARTY BALLOONS  \
BillNo                                                         
536370                       0.0                         0.0   
536852                       0.0                         0.0   
536974                       0.0                         0.0   
537065                       0.0                         0.0   
537463                       0.0                         0.0   
537468                      24.0                         0.0   
537693                       0.0                         0.0   
537897                       0.0                         0.0   
537967                       0.0                         0.0   
538008                       0.0                         0.0   

Itemname  12 EGG HOUSE PAINTED WOOD  12 MESSAGE CARDS WITH ENVELOPES  \
BillNo                                                                 
536370                          0.0                              0.0   
536852         

# ENCODING DATA TO BINARY DIGITS

In [61]:
def encode_to_bits(x):
    if x<=0:
        return 0
    return 1

basket = basket.applymap(encode_to_bits)
print(basket.head(10))

Itemname  10 COLOUR SPACEBOY PEN  12 COLOURED PARTY BALLOONS  \
BillNo                                                         
536370                         0                           0   
536852                         0                           0   
536974                         0                           0   
537065                         0                           0   
537463                         0                           0   
537468                         1                           0   
537693                         0                           0   
537897                         0                           0   
537967                         0                           0   
538008                         0                           0   

Itemname  12 EGG HOUSE PAINTED WOOD  12 MESSAGE CARDS WITH ENVELOPES  \
BillNo                                                                 
536370                            0                                0   
536852         

# TRAINING MODEL

In [62]:
#generating frequent itemsets
freq_itemsets = apriori(basket, min_support = 0.07, use_colnames=True)
print(freq_itemsets)

     support                                           itemsets
0   0.071429                      (4 TRADITIONAL SPINNING TOPS)
1   0.096939                       (ALARM CLOCK BAKELIKE GREEN)
2   0.102041                        (ALARM CLOCK BAKELIKE PINK)
3   0.094388                         (ALARM CLOCK BAKELIKE RED)
4   0.081633                     (BAKING SET 9 PIECE RETROSPOT)
5   0.071429                     (CHILDRENS CUTLERY DOLLY GIRL)
6   0.099490                             (DOLLY GIRL LUNCH BOX)
7   0.096939                          (JUMBO BAG RED RETROSPOT)
8   0.076531                       (JUMBO BAG WOODLAND ANIMALS)
9   0.125000                           (LUNCH BAG APPLE DESIGN)
10  0.084184                      (LUNCH BAG DOLLY GIRL DESIGN)
11  0.153061                          (LUNCH BAG RED RETROSPOT)
12  0.119898                        (LUNCH BAG SPACEBOY DESIGN)
13  0.117347                               (LUNCH BAG WOODLAND)
14  0.142857                 (LUNCH BOX 

In [63]:
#generating rules
assoc_rules = association_rules(freq_itemsets, metric="lift", min_threshold=1)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
print(assoc_rules.head(100))

                                          antecedents  \
0                         (ALARM CLOCK BAKELIKE PINK)   
1                        (ALARM CLOCK BAKELIKE GREEN)   
2                        (ALARM CLOCK BAKELIKE GREEN)   
3                          (ALARM CLOCK BAKELIKE RED)   
4                                           (POSTAGE)   
5                        (ALARM CLOCK BAKELIKE GREEN)   
6                         (ALARM CLOCK BAKELIKE PINK)   
7                          (ALARM CLOCK BAKELIKE RED)   
8                         (ALARM CLOCK BAKELIKE PINK)   
9                                           (POSTAGE)   
10                                          (POSTAGE)   
11                         (ALARM CLOCK BAKELIKE RED)   
12                             (DOLLY GIRL LUNCH BOX)   
13                               (SPACEBOY LUNCH BOX)   
14                                          (POSTAGE)   
15                          (JUMBO BAG RED RETROSPOT)   
16                             

# RECOMMENDATIONS USING THE RULES

In [65]:
#Consider First Rules
prod1 = input("Enter the name of Product A: ")
prod2 = input("Enter the name of Product B: ")

# Filter association rules for the specific product pair
specific_rule = assoc_rules[
    (assoc_rules['antecedents'] == frozenset({prod1})) & 
    (assoc_rules['consequents'] == frozenset({prod2}))
]

# Extract and print confidence, support, and lift
confidence = specific_rule.iloc[0]['confidence']
support = specific_rule.iloc[0]['support']
lift = specific_rule.iloc[0]['lift']

# Print the result
print("Out of ",basket[prod1].sum(), " who bought ", prod1, ",",basket[prod2].sum()," people bought",prod2)
print(f"Product A: {prod1}")
print(f"Product B: {prod2}")
print(f"Confidence: {confidence:.2f}")
print(f"Support: {support:.2f}")
print(f"Lift: {lift:.2f}")


Enter the name of Product A:  SET/6 RED SPOTTY PAPER CUPS
Enter the name of Product B:  SET/6 RED SPOTTY PAPER PLATES


Out of  54  who bought  SET/6 RED SPOTTY PAPER CUPS , 50  people bought SET/6 RED SPOTTY PAPER PLATES
Product A: SET/6 RED SPOTTY PAPER CUPS
Product B: SET/6 RED SPOTTY PAPER PLATES
Confidence: 0.89
Support: 0.12
Lift: 6.97


# FILTERING RULES

In [66]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
print(assoc_rules[(assoc_rules["lift"]>=3) & (assoc_rules["confidence"]>=0.3)])

                                           antecedents  \
0                          (ALARM CLOCK BAKELIKE PINK)   
1                         (ALARM CLOCK BAKELIKE GREEN)   
2                         (ALARM CLOCK BAKELIKE GREEN)   
3                           (ALARM CLOCK BAKELIKE RED)   
6                          (ALARM CLOCK BAKELIKE PINK)   
7                           (ALARM CLOCK BAKELIKE RED)   
12                              (DOLLY GIRL LUNCH BOX)   
13                                (SPACEBOY LUNCH BOX)   
34                          (PLASTERS IN TIN SPACEBOY)   
35                     (PLASTERS IN TIN CIRCUS PARADE)   
36                  (PLASTERS IN TIN WOODLAND ANIMALS)   
37                     (PLASTERS IN TIN CIRCUS PARADE)   
40                  (PLASTERS IN TIN WOODLAND ANIMALS)   
41                          (PLASTERS IN TIN SPACEBOY)   
72                (SET/20 RED RETROSPOT PAPER NAPKINS)   
73                       (SET/6 RED SPOTTY PAPER CUPS)   
74            