In [19]:
# Langkah 1 : Mengimpor Library yang diperlukan
import numpy as np
import pandas as pd

from mlxtend.frequent_patterns import apriori, association_rules

In [20]:
# Langkah 2: Meload dan Mengeksplore data
# Memuat Data
data = pd.read_excel('Online_Retail.xlsx')
data.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom


In [21]:
# Mengeksplore kolom data
data.columns

Index(['InvoiceNo', 'StockCode', 'Description', 'Quantity', 'InvoiceDate',
       'UnitPrice', 'CustomerID', 'Country'],
      dtype='object')

In [22]:
# Mengeksplore berbagai wilayah transaksi
data.Country.unique()

array(['United Kingdom', 'France', 'Australia', 'Netherlands', 'Germany',
       'Norway', 'EIRE', 'Switzerland', 'Spain', 'Poland', 'Portugal',
       'Italy', 'Belgium', 'Lithuania', 'Japan', 'Iceland',
       'Channel Islands', 'Denmark', 'Cyprus', 'Sweden', 'Austria',
       'Israel', 'Finland', 'Bahrain', 'Greece', 'Hong Kong', 'Singapore',
       'Lebanon', 'United Arab Emirates', 'Saudi Arabia',
       'Czech Republic', 'Canada', 'Unspecified', 'Brazil', 'USA',
       'European Community', 'Malta', 'RSA'], dtype=object)

In [23]:
# Menghapus ruang ekstra dalam deskripsi
data['Description'] = data['Description'].str.strip()
data['InvoiceNo'] = data['InvoiceNo'].astype('str')

# Menghapus semua transaksi yang dilakukan secara kredit
data = data[~data['InvoiceNo'].str.contains('C')]

In [25]:
# Langkah 4: Memisahkan data sesuai dengan wilayah transaksi
# Transaksi dilakukan di Prancis
basket_France = (data[data['Country'] == "France"]
          .groupby(['InvoiceNo', 'Description'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index('InvoiceNo'))

# Transaksi dilakukan di United Kingdom
basket_UK = (data[data['Country'] == "United Kingdom"]
          .groupby(['InvoiceNo', 'Description'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index('InvoiceNo'))

# Transaksi dilakukan di Portugal
basket_Por = (data[data['Country'] == "Portugal"]
          .groupby(['InvoiceNo', 'Description'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index('InvoiceNo'))

# Transaksi dilakukan di Sweden
basket_Sweden = (data[data['Country'] == "Sweden"]
          .groupby(['InvoiceNo', 'Description'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index('InvoiceNo'))

In [26]:
# Langkah 5: Pengkodean Data dengan "Hot Encoing"
# Mendefinisikan fungsi "Hot Encoding" untuk membuat data sesuai untuk library yang bersangkutan
def hot_encode(x):
  if(x <= 0):
    return 0
  if(x >= 1):
    return 1

In [29]:
# Mengkodekan dataset
basket_encoded = basket_France.applymap(hot_encode)
basket_France = basket_encoded

basket_encoded = basket_UK.applymap(hot_encode)
basket_UK = basket_encoded

basket_encoded = basket_Por.applymap(hot_encode)
basket_Por = basket_encoded

basket_encoded = basket_Sweden.applymap(hot_encode)
basket_Sweden = basket_encoded

In [31]:
# Langkah 6: Membangun model dan menganalisis hasilnya
# 1) France
# Membangun Model
frq_items = apriori(basket_France, min_support = 0.05, use_colnames = True)

# Mengumpulkan aturan yang disimpulkan dalam dataframe
rules = association_rules(frq_items, metric = "lift", min_threshold = 1)
rules = rules.sort_values(['confidence', 'lift'], ascending = [False, False])
print(rules.head())

                                           antecedents  ... conviction
45                        (JUMBO BAG WOODLAND ANIMALS)  ...        inf
259  (PLASTERS IN TIN CIRCUS PARADE, RED TOADSTOOL ...  ...        inf
272  (RED TOADSTOOL LED NIGHT LIGHT, PLASTERS IN TI...  ...        inf
300  (SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...  ...  34.897959
301  (SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...  ...  34.489796

[5 rows x 9 columns]


In [32]:
# 2) United Kingdom
frq_items = apriori(basket_UK, min_support = 0.01, use_colnames = True)
rules = association_rules(frq_items, metric = "lift", min_threshold = 1)
rules = rules.sort_values(['confidence', 'lift'], ascending = [False, False])
print(rules.head())

                                       antecedents  ... conviction
116           (BEADED CRYSTAL HEART PINK ON STICK)  ...  39.637371
2018  (SUKI  SHOULDER BAG, JAM MAKING SET PRINTED)  ...  26.096206
2295         (HERB MARKER MINT, HERB MARKER THYME)  ...  21.947227
2300   (HERB MARKER PARSLEY, HERB MARKER ROSEMARY)  ...  20.444951
2301      (HERB MARKER PARSLEY, HERB MARKER THYME)  ...  20.443842

[5 rows x 9 columns]


In [33]:
# 3) Portugal
frq_items = apriori(basket_Por, min_support = 0.05, use_colnames = True)
rules = association_rules(frq_items, metric = "lift", min_threshold = 1)
rules = rules.sort_values(['confidence', 'lift'], ascending = [False, False])
print(rules.head())

                             antecedents  ... conviction
1170    (SET 12 COLOUR PENCILS SPACEBOY)  ...        inf
1171  (SET 12 COLOUR PENCILS DOLLY GIRL)  ...        inf
1172  (SET 12 COLOUR PENCILS DOLLY GIRL)  ...        inf
1173  (SET OF 4 KNICK KNACK TINS LONDON)  ...        inf
1174  (SET 12 COLOUR PENCILS DOLLY GIRL)  ...        inf

[5 rows x 9 columns]


In [34]:
# 4) Sweden
frq_items = apriori(basket_Sweden, min_support = 0.05, use_colnames = True)
rules = association_rules(frq_items, metric = "lift", min_threshold = 1)
rules = rules.sort_values(['confidence', 'lift'], ascending = [False, False])
print(rules.head())

                           antecedents  ... conviction
0        (PACK OF 72 SKULL CAKE CASES)  ...        inf
1        (12 PENCILS SMALL TUBE SKULL)  ...        inf
4              (36 DOILIES DOLLY GIRL)  ...        inf
5       (ASSORTED BOTTLE TOP  MAGNETS)  ...        inf
180  (CHILDRENS CUTLERY CIRCUS PARADE)  ...        inf

[5 rows x 9 columns]
