In [1]:
# !pip install --upgrade jupyter_client -qq

In [2]:
# ==============================
# STEP 1: Import library
# ==============================
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
import warnings
import os
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore")
os.environ["PYDEVD_DISABLE_FILE_VALIDATION"] = "1"

In [3]:
import pandas as pd
from mlxtend.frequent_patterns import fpgrowth, association_rules

# Load data
url = "https://drive.google.com/uc?id=1BVdO9Pw8wncVsz12xS0c3UE_F4wStr0g"
df = pd.read_csv(url)

In [4]:
# Cek data
print(df.head())
print(df.info())

  order_id product_code                     product_name  quantity  \
0   493410      TEST001          This is a test product.         5   
1  C493411        21539          RETRO SPOTS BUTTER DISH        -1   
2   493412      TEST001          This is a test product.         5   
3   493413        21724  PANDA AND BUNNIES STICKER SHEET         1   
4   493413        84578   ELEPHANT TOY WITH BLUE T-SHIRT         1   

            order_date  price  customer_id  
0  2010-01-04 09:24:00   4.50      12346.0  
1  2010-01-04 09:43:00   4.25      14590.0  
2  2010-01-04 09:53:00   4.50      12346.0  
3  2010-01-04 09:54:00   0.85          NaN  
4  2010-01-04 09:54:00   3.75          NaN  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 461773 entries, 0 to 461772
Data columns (total 7 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   order_id      461773 non-null  object 
 1   product_code  461773 non-null  object 
 2   product_name  4590

In [5]:
# Gunakan hanya order_id & product_name
df = df[['order_id', 'product_name']].drop_duplicates()

In [6]:
# Buang produk yang terlalu jarang muncul (hemat RAM)
product_freq = df['product_name'].value_counts()
valid_products = product_freq[product_freq > 50].index
df = df[df['product_name'].isin(valid_products)]

# Sampling order_id (opsional, untuk testing agar tidak crash)
sample_orders = df['order_id'].drop_duplicates().sample(20000, random_state=42)
df = df[df['order_id'].isin(sample_orders)]

In [7]:
# Transformasi ke basket format (lebih hemat RAM)
basket = pd.crosstab(df['order_id'], df['product_name'])
basket = basket.astype(bool).astype(int)

print("Basket shape:", basket.shape)

# FP-Growth (lebih hemat memori dibanding Apriori)
frequent_itemsets = fpgrowth(basket, min_support=0.01, use_colnames=True)
print(frequent_itemsets.sort_values(by="support", ascending=False).head())

Basket shape: (20000, 2024)
     support                              itemsets
131  0.14250  (WHITE HANGING HEART T-LIGHT HOLDER)
448  0.09605            (REGENCY CAKESTAND 3 TIER)
1    0.07055      (STRAWBERRY CERAMIC TRINKET BOX)
208  0.05940       (ASSORTED COLOUR BIRD ORNAMENT)
132  0.05805            (HOME BUILDING BLOCK WORD)


In [8]:
# Generate rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
rules = rules.sort_values(by="confidence", ascending=False)

print("Association Rules:")
print(rules[['antecedents','consequents','support','confidence','lift']].head())

Association Rules:
                                           antecedents  \
596                     (POPPY'S PLAYHOUSE LIVINGROOM)   
594                        (POPPY'S PLAYHOUSE BEDROOM)   
568                   (PINK REGENCY TEACUP AND SAUCER)   
599                     (POPPY'S PLAYHOUSE LIVINGROOM)   
250  (SWEETHEART CERAMIC TRINKET BOX, WHITE HANGING...   

                           consequents  support  confidence       lift  
596        (POPPY'S PLAYHOUSE KITCHEN)  0.01115    0.884921  54.123586  
594        (POPPY'S PLAYHOUSE KITCHEN)  0.01275    0.855705  52.336679  
568  (GREEN REGENCY TEACUP AND SAUCER)  0.01005    0.844538  47.049460  
599        (POPPY'S PLAYHOUSE BEDROOM)  0.01060    0.841270  56.461063  
250   (STRAWBERRY CERAMIC TRINKET BOX)  0.01260    0.837209  11.866893  


In [9]:
for idx, row in rules.head(5).iterrows():
    print(f"Jika orang membeli {list(row['antecedents'])}, "
          f"maka kemungkinan besar mereka juga membeli {list(row['consequents'])} "
          f"(confidence={row['confidence']:.2f}, lift={row['lift']:.2f})")

Jika orang membeli ["POPPY'S PLAYHOUSE LIVINGROOM"], maka kemungkinan besar mereka juga membeli ["POPPY'S PLAYHOUSE KITCHEN"] (confidence=0.88, lift=54.12)
Jika orang membeli ["POPPY'S PLAYHOUSE BEDROOM"], maka kemungkinan besar mereka juga membeli ["POPPY'S PLAYHOUSE KITCHEN"] (confidence=0.86, lift=52.34)
Jika orang membeli ['PINK REGENCY TEACUP AND SAUCER'], maka kemungkinan besar mereka juga membeli ['GREEN REGENCY TEACUP AND SAUCER'] (confidence=0.84, lift=47.05)
Jika orang membeli ["POPPY'S PLAYHOUSE LIVINGROOM"], maka kemungkinan besar mereka juga membeli ["POPPY'S PLAYHOUSE BEDROOM"] (confidence=0.84, lift=56.46)
Jika orang membeli ['SWEETHEART CERAMIC TRINKET BOX', 'WHITE HANGING HEART T-LIGHT HOLDER'], maka kemungkinan besar mereka juga membeli ['STRAWBERRY CERAMIC TRINKET BOX'] (confidence=0.84, lift=11.87)
