## 1. Import Libraries và Load Data

In [25]:
import sys
sys.path.append('../src')

import pandas as pd
import numpy as np
import networkx as nx
from apriori_library import DataCleaner, FPGrowthMiner, WeightedAssociationRulesMiner

print("✓ Các thư viện đã được import thành công")

✓ Các thư viện đã được import thành công


## 2. Load Dữ Liệu Gốc và Luật Đã Sinh

In [26]:
# Load dữ liệu gốc (đã làm sạch và có TotalPrice)
cleaned_data_path = '../data/processed/cleaned_uk_data.csv'
cleaned_data = pd.read_csv(cleaned_data_path)
cleaned_data['InvoiceDate'] = pd.to_datetime(cleaned_data['InvoiceDate'])

print(f"Kích thước dữ liệu gốc: {cleaned_data.shape}")
print(f"\nCác cột: {list(cleaned_data.columns)}")
print(f"\nThông tin giao dịch:")
print(f"  - Tổng giao dịch: {cleaned_data['InvoiceNo'].nunique():,}")
print(f"  - Tổng sản phẩm: {cleaned_data['StockCode'].nunique():,}")
print(f"  - Tổng giá trị (TotalPrice): £{cleaned_data['TotalPrice'].sum():,.2f}")
print(f"\nMẫu dữ liệu:")
print(cleaned_data.head())

Kích thước dữ liệu gốc: (485123, 11)

Các cột: ['InvoiceNo', 'StockCode', 'Description', 'Quantity', 'InvoiceDate', 'UnitPrice', 'CustomerID', 'Country', 'TotalPrice', 'DayOfWeek', 'HourOfDay']

Thông tin giao dịch:
  - Tổng giao dịch: 18,021
  - Tổng sản phẩm: 3,916
  - Tổng giá trị (TotalPrice): £9,025,222.08

Mẫu dữ liệu:
  InvoiceNo StockCode                          Description  Quantity  \
0    536365    85123A   WHITE HANGING HEART T-LIGHT HOLDER         6   
1    536365     71053                  WHITE METAL LANTERN         6   
2    536365    84406B       CREAM CUPID HEARTS COAT HANGER         8   
3    536365    84029G  KNITTED UNION FLAG HOT WATER BOTTLE         6   
4    536365    84029E       RED WOOLLY HOTTIE WHITE HEART.         6   

          InvoiceDate  UnitPrice CustomerID         Country  TotalPrice  \
0 2010-12-01 08:26:00       2.55     017850  United Kingdom       15.30   
1 2010-12-01 08:26:00       3.39     017850  United Kingdom       20.34   
2 2010-12-01 08


Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



In [27]:
# Load các luật đã sinh từ FP-Growth (từ Q2 hoặc Q3)
rules_path = '../data/processed/rules_fpgrowth_filtered.csv'
rules_df = pd.read_csv(rules_path)

# Chuyển cột antecedents và consequents từ string về frozenset (nếu cần)
def parse_frozenset(s):
    if pd.isna(s):
        return frozenset()
    if isinstance(s, frozenset):
        return s
    # Parse từ string format "{'item1', 'item2'}"
    try:
        # Thử eval (có thể chứa frozenset)
        return eval(s)
    except:
        # Fallback: coi như string thường
        return frozenset([s])

if 'antecedents' in rules_df.columns and rules_df['antecedents'].dtype == 'object':
    rules_df['antecedents'] = rules_df['antecedents'].apply(parse_frozenset)
if 'consequents' in rules_df.columns and rules_df['consequents'].dtype == 'object':
    rules_df['consequents'] = rules_df['consequents'].apply(parse_frozenset)

print(f"Số luật đã tải: {len(rules_df)}")
print(f"\nCác cột trong rules_df: {list(rules_df.columns)}")
print(f"\nMẫu luật:")
print(rules_df.head())

Số luật đã tải: 1794

Các cột trong rules_df: ['antecedents', 'consequents', 'antecedent support', 'consequent support', 'support', 'confidence', 'lift', 'representativity', 'leverage', 'conviction', 'zhangs_metric', 'jaccard', 'certainty', 'kulczynski', 'antecedents_str', 'consequents_str', 'rule_str']

Mẫu luật:
                                   antecedents             consequents  \
0  (HERB MARKER ROSEMARY, HERB MARKER PARSLEY)     (HERB MARKER THYME)   
1        (HERB MARKER MINT, HERB MARKER THYME)  (HERB MARKER ROSEMARY)   
2        (HERB MARKER MINT, HERB MARKER THYME)   (HERB MARKER PARSLEY)   
3     (HERB MARKER THYME, HERB MARKER PARSLEY)  (HERB MARKER ROSEMARY)   
4       (HERB MARKER THYME, HERB MARKER BASIL)  (HERB MARKER ROSEMARY)   

   antecedent support  consequent support   support  confidence       lift  \
0            0.011487            0.012763  0.010932    0.951691  74.567045   
1            0.011098            0.012818  0.010599    0.955000  74.502403   
2    

## 3. Tính Toán Weighted Metrics

In [28]:
# Khởi tạo WeightedAssociationRulesMiner
weighted_miner = WeightedAssociationRulesMiner(rules_df, cleaned_data)

# Tính toán các chỉ số weighted
print("Đang tính toán weighted metrics...")
weighted_rules = weighted_miner.get_weighted_rules()

print(f"\n✓ Weighted metrics đã được tính toán")
print(f"\nCác cột trong weighted_rules:")
print(weighted_rules.columns.tolist())
print(f"\nMẫu weighted rules:")
print(weighted_rules[['antecedents_str', 'consequents_str', 'support', 'confidence', 'lift', 
                       'weighted_support', 'weighted_confidence', 'weighted_lift']].head(10))

Đang tính toán weighted metrics...

✓ Weighted metrics đã được tính toán

Các cột trong weighted_rules:
['antecedents', 'consequents', 'antecedent support', 'consequent support', 'support', 'confidence', 'lift', 'representativity', 'leverage', 'conviction', 'zhangs_metric', 'jaccard', 'certainty', 'kulczynski', 'antecedents_str', 'consequents_str', 'rule_str', 'weighted_support', 'weighted_confidence', 'weighted_lift', 'transactions_with_both', 'transactions_with_antecedent']

Mẫu weighted rules:
                        antecedents_str consequents_str   support  confidence  \
466              RED SPOTTY BISCUIT TIN  DOTCOM POSTAGE  0.010710    0.569322   
573    SET 3 RETROSPOT TEA,COFFEE,SUGAR  DOTCOM POSTAGE  0.010432    0.501333   
302      ROTATING LEAVES T-LIGHT HOLDER  DOTCOM POSTAGE  0.011098    0.668896   
314                  SUKI  SHOULDER BAG  DOTCOM POSTAGE  0.018645    0.662722   
247  CHARLIE+LOLA PINK HOT WATER BOTTLE  DOTCOM POSTAGE  0.011931    0.704918   
556        S

## 4. So Sánh Weighted vs Standard Rules

In [29]:
# So sánh chỉ số unweighted vs weighted
comparison = weighted_miner.compare_weighted_vs_unweighted()

print("Bảng So Sánh Weighted vs Unweighted:")
print(comparison.head(10))

print(f"\n\nThống kê sự khác biệt (%):\n")
stats_comparison = pd.DataFrame({
    'Chỉ số': ['support_diff_pct', 'confidence_diff_pct', 'lift_diff_pct'],
    'Mean': [
        comparison['support_diff_pct'].mean(),
        comparison['confidence_diff_pct'].mean(),
        comparison['lift_diff_pct'].mean()
    ],
    'Std': [
        comparison['support_diff_pct'].std(),
        comparison['confidence_diff_pct'].std(),
        comparison['lift_diff_pct'].std()
    ],
    'Min': [
        comparison['support_diff_pct'].min(),
        comparison['confidence_diff_pct'].min(),
        comparison['lift_diff_pct'].min()
    ],
    'Max': [
        comparison['support_diff_pct'].max(),
        comparison['confidence_diff_pct'].max(),
        comparison['lift_diff_pct'].max()
    ]
})
print(stats_comparison.round(2))

Bảng So Sánh Weighted vs Unweighted:
                        antecedents_str consequents_str   support  confidence  \
466              RED SPOTTY BISCUIT TIN  DOTCOM POSTAGE  0.010710    0.569322   
573    SET 3 RETROSPOT TEA,COFFEE,SUGAR  DOTCOM POSTAGE  0.010432    0.501333   
302      ROTATING LEAVES T-LIGHT HOLDER  DOTCOM POSTAGE  0.011098    0.668896   
314                  SUKI  SHOULDER BAG  DOTCOM POSTAGE  0.018645    0.662722   
247  CHARLIE+LOLA PINK HOT WATER BOTTLE  DOTCOM POSTAGE  0.011931    0.704918   
556        SMALL HEART MEASURING SPOONS  DOTCOM POSTAGE  0.012652    0.508929   
781              TOY TIDY PINK POLKADOT  DOTCOM POSTAGE  0.010210    0.410714   
511                     JUMBO BAG TOYS   DOTCOM POSTAGE  0.015149    0.539526   
328     JUMBO BAG CHARLIE AND LOLA TOYS  DOTCOM POSTAGE  0.010654    0.650847   
791                 BATHROOM METAL SIGN  DOTCOM POSTAGE  0.013984    0.408428   

          lift  weighted_support  weighted_confidence  weighted_lift  \

## 5. Ranking Change Analysis

In [30]:
# Phân tích thay đổi xếp hạng: so sánh top 10 theo lift vs top 10 theo weighted_lift
top10_unweighted = weighted_rules.nlargest(10, 'lift')[['antecedents_str', 'consequents_str', 'lift']].reset_index(drop=True)
top10_unweighted['rank_unweighted'] = range(1, 11)

top10_weighted = weighted_rules.nlargest(10, 'weighted_lift')[['antecedents_str', 'consequents_str', 'weighted_lift']].reset_index(drop=True)
top10_weighted['rank_weighted'] = range(1, 11)

print("\n=== TOP 10 Rules theo UNWEIGHTED LIFT ===")
print(top10_unweighted)

print("\n\n=== TOP 10 Rules theo WEIGHTED LIFT ===")
print(top10_weighted)

print("\n\n=== NHẬN XÉT: ===")
common_rules = set(zip(top10_unweighted['antecedents_str'], top10_unweighted['consequents_str'])) & \
               set(zip(top10_weighted['antecedents_str'], top10_weighted['consequents_str']))
print(f"Số luật xuất hiện trong cả 2 top-10: {len(common_rules)}")
print(f"Điều này cho thấy trọng số giao dịch đã thay đổi đáng kể xếp hạng của các luật.")


=== TOP 10 Rules theo UNWEIGHTED LIFT ===
                             antecedents_str       consequents_str       lift  \
0  HERB MARKER PARSLEY, HERB MARKER ROSEMARY     HERB MARKER THYME  74.567045   
1        HERB MARKER MINT, HERB MARKER THYME  HERB MARKER ROSEMARY  74.502403   
2        HERB MARKER MINT, HERB MARKER THYME   HERB MARKER PARSLEY  74.297105   
3     HERB MARKER PARSLEY, HERB MARKER THYME  HERB MARKER ROSEMARY  74.244244   
4       HERB MARKER BASIL, HERB MARKER THYME  HERB MARKER ROSEMARY  74.169983   
5    HERB MARKER BASIL, HERB MARKER ROSEMARY     HERB MARKER THYME  73.407619   
6     HERB MARKER MINT, HERB MARKER ROSEMARY     HERB MARKER THYME  73.001294   
7     HERB MARKER MINT, HERB MARKER ROSEMARY   HERB MARKER PARSLEY  72.870539   
8                        HERB MARKER PARSLEY   HERB MARKER CHIVES   72.809761   
9                        HERB MARKER CHIVES    HERB MARKER PARSLEY  72.809761   

   rank_unweighted  
0                1  
1                2  
2 

In [39]:
# Visualizations using Plotly (interactive, works reliably)
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# ========== CHART 1: Top 15 Rules - Support, Confidence, Lift Comparison ==========
print("="*80)
print("VISUALIZATION 1: Top 15 Rules Comparison (Unweighted vs Weighted Metrics)")
print("="*80)

top15_rules = weighted_rules.nlargest(15, 'weighted_lift').copy()
top15_rules['rule_short'] = (top15_rules['antecedents_str'].str[:12] + ' → ' + 
                              top15_rules['consequents_str'].str[:12])

rules_labels = [f"R{i+1}" for i in range(len(top15_rules))]

# Create subplots: 2x2 grid
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=('Support Comparison', 'Confidence Comparison', 
                    'Lift Comparison', 'Lift % Change Impact'),
    specs=[[{'type': 'bar'}, {'type': 'bar'}],
           [{'type': 'bar'}, {'type': 'bar'}]]
)

# Subplot 1: Support
fig.add_trace(
    go.Bar(x=rules_labels, y=top15_rules['support'], name='Unweighted Support', marker_color='steelblue'),
    row=1, col=1
)
fig.add_trace(
    go.Bar(x=rules_labels, y=top15_rules['weighted_support'], name='Weighted Support', marker_color='coral'),
    row=1, col=1
)

# Subplot 2: Confidence
fig.add_trace(
    go.Bar(x=rules_labels, y=top15_rules['confidence'], name='Unweighted Conf', marker_color='steelblue', showlegend=False),
    row=1, col=2
)
fig.add_trace(
    go.Bar(x=rules_labels, y=top15_rules['weighted_confidence'], name='Weighted Conf', marker_color='coral', showlegend=False),
    row=1, col=2
)

# Subplot 3: Lift
fig.add_trace(
    go.Bar(x=rules_labels, y=top15_rules['lift'], name='Unweighted Lift', marker_color='steelblue', showlegend=False),
    row=2, col=1
)
fig.add_trace(
    go.Bar(x=rules_labels, y=top15_rules['weighted_lift'], name='Weighted Lift', marker_color='coral', showlegend=False),
    row=2, col=1
)

# Subplot 4: Lift % Change
comparison_top15 = weighted_miner.compare_weighted_vs_unweighted().head(15).copy()
colors = ['red' if x < 0 else 'green' for x in comparison_top15['lift_diff_pct']]

fig.add_trace(
    go.Bar(x=rules_labels, y=comparison_top15['lift_diff_pct'], marker_color=colors, 
           name='Lift % Change', showlegend=False),
    row=2, col=2
)

# Update layout
fig.update_xaxes(title_text='Rules', row=1, col=1)
fig.update_xaxes(title_text='Rules', row=1, col=2)
fig.update_xaxes(title_text='Rules', row=2, col=1)
fig.update_xaxes(title_text='Rules', row=2, col=2)

fig.update_yaxes(title_text='Support Value', row=1, col=1)
fig.update_yaxes(title_text='Confidence Value', row=1, col=2)
fig.update_yaxes(title_text='Lift Value', row=2, col=1)
fig.update_yaxes(title_text='% Change', row=2, col=2)

fig.update_layout(height=900, width=1400, title_text="Q4 Weighted Association Rules - Metrics Comparison", 
                  barmode='group', hovermode='x unified')
fig.show()

print("\n✓ Interactive visualization displayed above\n")

# ========== TABLE 1: Top 15 Rules Details ==========
print("="*80)
print("TABLE 1: Top 15 Rules - Detailed Metrics")
print("="*80)

display_top15 = top15_rules[[
    'rule_short', 'support', 'weighted_support', 
    'confidence', 'weighted_confidence', 
    'lift', 'weighted_lift'
]].copy()

display_top15.columns = ['Rule', 'Unw_Supp', 'W_Supp', 'Unw_Conf', 'W_Conf', 'Unw_Lift', 'W_Lift']

# Format to 4 decimals
for col in display_top15.columns[1:]:
    display_top15[col] = display_top15[col].apply(lambda x: f'{x:.4f}')

display(display_top15.reset_index(drop=True))

# ========== CHART 2: Percent Change Heatmap ==========
print("\n\n" + "="*80)
print("VISUALIZATION 2: Percent Change Heatmap (Top 12 Rules)")
print("="*80)

comparison_subset = weighted_miner.compare_weighted_vs_unweighted().head(12).copy()
heatmap_data = comparison_subset[[
    'support_diff_pct', 'confidence_diff_pct', 'lift_diff_pct'
]].values

rule_labels_12 = [f'Rule {i+1}' for i in range(heatmap_data.shape[0])]

fig = go.Figure(data=go.Heatmap(
    z=heatmap_data,
    x=['Support %', 'Confidence %', 'Lift %'],
    y=rule_labels_12,
    colorscale='RdBu',
    zmid=0,
    text=heatmap_data.round(1),
    texttemplate='%{text:.1f}%',
    colorbar=dict(title='% Change')
))

fig.update_layout(
    title='Percent Change: Weighted vs Unweighted (Top 12 Rules)',
    xaxis_title='Metrics',
    yaxis_title='Rules',
    height=500,
    width=800
)

fig.show()

print("\n✓ Heatmap displayed above\n")

# ========== TABLE 2: Percent Change Details ==========
print("="*80)
print("TABLE 2: Percent Change Analysis (Top 12 Rules)")
print("="*80)

display_comparison = comparison_subset[[
    'antecedents_str', 'consequents_str', 
    'support_diff_pct', 'confidence_diff_pct', 'lift_diff_pct'
]].copy()

display_comparison.columns = ['Antecedents', 'Consequents', 'Supp_Chg%', 'Conf_Chg%', 'Lift_Chg%']

for col in ['Supp_Chg%', 'Conf_Chg%', 'Lift_Chg%']:
    display_comparison[col] = display_comparison[col].apply(lambda x: f'{x:.2f}%')

display(display_comparison.reset_index(drop=True))

# ========== CHART 3: Summary Statistics Comparison ==========
print("\n\n" + "="*80)
print("VISUALIZATION 3: Summary Statistics Comparison (All Rules)")
print("="*80)

metrics_names = ['Mean Support', 'Mean Confidence', 'Mean Lift']
unweighted_vals = [
    weighted_rules['support'].mean(),
    weighted_rules['confidence'].mean(),
    weighted_rules['lift'].mean()
]
weighted_vals = [
    weighted_rules['weighted_support'].mean(),
    weighted_rules['weighted_confidence'].mean(),
    weighted_rules['weighted_lift'].mean()
]

fig = go.Figure(data=[
    go.Bar(x=metrics_names, y=unweighted_vals, name='Unweighted', marker_color='steelblue'),
    go.Bar(x=metrics_names, y=weighted_vals, name='Weighted', marker_color='coral')
])

fig.update_layout(
    title='Summary Statistics: Unweighted vs Weighted (All Rules)',
    xaxis_title='Metrics',
    yaxis_title='Mean Value',
    barmode='group',
    height=500,
    width=900,
    hovermode='x unified'
)

fig.show()

print("\n✓ Summary statistics chart displayed above\n")

# ========== TABLE 3: Summary Statistics ==========
print("="*80)
print("TABLE 3: Summary Statistics - All Rules")
print("="*80)

stats_data = {
    'Metric': ['Mean', 'Median', 'Min', 'Max', 'Std Dev'],
    'Unw_Support': [
        f"{weighted_rules['support'].mean():.4f}",
        f"{weighted_rules['support'].median():.4f}",
        f"{weighted_rules['support'].min():.4f}",
        f"{weighted_rules['support'].max():.4f}",
        f"{weighted_rules['support'].std():.4f}"
    ],
    'W_Support': [
        f"{weighted_rules['weighted_support'].mean():.4f}",
        f"{weighted_rules['weighted_support'].median():.4f}",
        f"{weighted_rules['weighted_support'].min():.4f}",
        f"{weighted_rules['weighted_support'].max():.4f}",
        f"{weighted_rules['weighted_support'].std():.4f}"
    ],
    'Unw_Confidence': [
        f"{weighted_rules['confidence'].mean():.4f}",
        f"{weighted_rules['confidence'].median():.4f}",
        f"{weighted_rules['confidence'].min():.4f}",
        f"{weighted_rules['confidence'].max():.4f}",
        f"{weighted_rules['confidence'].std():.4f}"
    ],
    'W_Confidence': [
        f"{weighted_rules['weighted_confidence'].mean():.4f}",
        f"{weighted_rules['weighted_confidence'].median():.4f}",
        f"{weighted_rules['weighted_confidence'].min():.4f}",
        f"{weighted_rules['weighted_confidence'].max():.4f}",
        f"{weighted_rules['weighted_confidence'].std():.4f}"
    ],
    'Unw_Lift': [
        f"{weighted_rules['lift'].mean():.4f}",
        f"{weighted_rules['lift'].median():.4f}",
        f"{weighted_rules['lift'].min():.4f}",
        f"{weighted_rules['lift'].max():.4f}",
        f"{weighted_rules['lift'].std():.4f}"
    ],
    'W_Lift': [
        f"{weighted_rules['weighted_lift'].mean():.4f}",
        f"{weighted_rules['weighted_lift'].median():.4f}",
        f"{weighted_rules['weighted_lift'].min():.4f}",
        f"{weighted_rules['weighted_lift'].max():.4f}",
        f"{weighted_rules['weighted_lift'].std():.4f}"
    ]
}

stats_df = pd.DataFrame(stats_data)
display(stats_df)

print("\n✓ All visualizations and tables displayed successfully")

VISUALIZATION 1: Top 15 Rules Comparison (Unweighted vs Weighted Metrics)


VISUALIZATION 1: Top 15 Rules Comparison (Unweighted vs Weighted Metrics)



✓ Interactive visualization displayed above

TABLE 1: Top 15 Rules - Detailed Metrics


Unnamed: 0,Rule,Unw_Supp,W_Supp,Unw_Conf,W_Conf,Unw_Lift,W_Lift
0,RED SPOTTY B → DOTCOM POSTA,0.0107,0.0096,0.5693,0.1789,14.5322,1.2545
1,SET 3 RETROS → DOTCOM POSTA,0.0104,0.0087,0.5013,0.1608,12.7968,1.1275
2,ROTATING LEA → DOTCOM POSTA,0.0111,0.0116,0.6689,0.1472,17.0739,1.0324
3,SUKI SHOULD → DOTCOM POSTA,0.0186,0.0161,0.6627,0.1464,16.9163,1.0268
4,CHARLIE+LOLA → DOTCOM POSTA,0.0119,0.0127,0.7049,0.1424,17.9934,0.9985
5,SMALL HEART → DOTCOM POSTA,0.0127,0.0112,0.5089,0.1392,12.9907,0.976
6,TOY TIDY PIN → DOTCOM POSTA,0.0102,0.0086,0.4107,0.1359,10.4837,0.9531
7,JUMBO BAG TO → DOTCOM POSTA,0.0151,0.009,0.5395,0.1312,13.7717,0.9198
8,JUMBO BAG CH → DOTCOM POSTA,0.0107,0.0054,0.6508,0.1311,16.6132,0.9197
9,BATHROOM MET → DOTCOM POSTA,0.014,0.0118,0.4084,0.1273,10.4253,0.8925




VISUALIZATION 2: Percent Change Heatmap (Top 12 Rules)


VISUALIZATION 1: Top 15 Rules Comparison (Unweighted vs Weighted Metrics)



✓ Interactive visualization displayed above

TABLE 1: Top 15 Rules - Detailed Metrics


Unnamed: 0,Rule,Unw_Supp,W_Supp,Unw_Conf,W_Conf,Unw_Lift,W_Lift
0,RED SPOTTY B → DOTCOM POSTA,0.0107,0.0096,0.5693,0.1789,14.5322,1.2545
1,SET 3 RETROS → DOTCOM POSTA,0.0104,0.0087,0.5013,0.1608,12.7968,1.1275
2,ROTATING LEA → DOTCOM POSTA,0.0111,0.0116,0.6689,0.1472,17.0739,1.0324
3,SUKI SHOULD → DOTCOM POSTA,0.0186,0.0161,0.6627,0.1464,16.9163,1.0268
4,CHARLIE+LOLA → DOTCOM POSTA,0.0119,0.0127,0.7049,0.1424,17.9934,0.9985
5,SMALL HEART → DOTCOM POSTA,0.0127,0.0112,0.5089,0.1392,12.9907,0.976
6,TOY TIDY PIN → DOTCOM POSTA,0.0102,0.0086,0.4107,0.1359,10.4837,0.9531
7,JUMBO BAG TO → DOTCOM POSTA,0.0151,0.009,0.5395,0.1312,13.7717,0.9198
8,JUMBO BAG CH → DOTCOM POSTA,0.0107,0.0054,0.6508,0.1311,16.6132,0.9197
9,BATHROOM MET → DOTCOM POSTA,0.014,0.0118,0.4084,0.1273,10.4253,0.8925




VISUALIZATION 2: Percent Change Heatmap (Top 12 Rules)



✓ Heatmap displayed above

TABLE 2: Percent Change Analysis (Top 12 Rules)


Unnamed: 0,Antecedents,Consequents,Supp_Chg%,Conf_Chg%,Lift_Chg%
0,RED SPOTTY BISCUIT TIN,DOTCOM POSTAGE,-10.31%,-68.58%,-91.37%
1,"SET 3 RETROSPOT TEA,COFFEE,SUGAR",DOTCOM POSTAGE,-16.92%,-67.93%,-91.19%
2,ROTATING LEAVES T-LIGHT HOLDER,DOTCOM POSTAGE,4.17%,-77.99%,-93.95%
3,SUKI SHOULDER BAG,DOTCOM POSTAGE,-13.56%,-77.91%,-93.93%
4,CHARLIE+LOLA PINK HOT WATER BOTTLE,DOTCOM POSTAGE,6.62%,-79.80%,-94.45%
5,SMALL HEART MEASURING SPOONS,DOTCOM POSTAGE,-11.28%,-72.65%,-92.49%
6,TOY TIDY PINK POLKADOT,DOTCOM POSTAGE,-15.44%,-66.91%,-90.91%
7,JUMBO BAG TOYS,DOTCOM POSTAGE,-40.45%,-75.69%,-93.32%
8,JUMBO BAG CHARLIE AND LOLA TOYS,DOTCOM POSTAGE,-49.66%,-79.85%,-94.46%
9,BATHROOM METAL SIGN,DOTCOM POSTAGE,-15.55%,-68.84%,-91.44%


VISUALIZATION 1: Top 15 Rules Comparison (Unweighted vs Weighted Metrics)



✓ Interactive visualization displayed above

TABLE 1: Top 15 Rules - Detailed Metrics


Unnamed: 0,Rule,Unw_Supp,W_Supp,Unw_Conf,W_Conf,Unw_Lift,W_Lift
0,RED SPOTTY B → DOTCOM POSTA,0.0107,0.0096,0.5693,0.1789,14.5322,1.2545
1,SET 3 RETROS → DOTCOM POSTA,0.0104,0.0087,0.5013,0.1608,12.7968,1.1275
2,ROTATING LEA → DOTCOM POSTA,0.0111,0.0116,0.6689,0.1472,17.0739,1.0324
3,SUKI SHOULD → DOTCOM POSTA,0.0186,0.0161,0.6627,0.1464,16.9163,1.0268
4,CHARLIE+LOLA → DOTCOM POSTA,0.0119,0.0127,0.7049,0.1424,17.9934,0.9985
5,SMALL HEART → DOTCOM POSTA,0.0127,0.0112,0.5089,0.1392,12.9907,0.976
6,TOY TIDY PIN → DOTCOM POSTA,0.0102,0.0086,0.4107,0.1359,10.4837,0.9531
7,JUMBO BAG TO → DOTCOM POSTA,0.0151,0.009,0.5395,0.1312,13.7717,0.9198
8,JUMBO BAG CH → DOTCOM POSTA,0.0107,0.0054,0.6508,0.1311,16.6132,0.9197
9,BATHROOM MET → DOTCOM POSTA,0.014,0.0118,0.4084,0.1273,10.4253,0.8925




VISUALIZATION 2: Percent Change Heatmap (Top 12 Rules)



✓ Heatmap displayed above

TABLE 2: Percent Change Analysis (Top 12 Rules)


Unnamed: 0,Antecedents,Consequents,Supp_Chg%,Conf_Chg%,Lift_Chg%
0,RED SPOTTY BISCUIT TIN,DOTCOM POSTAGE,-10.31%,-68.58%,-91.37%
1,"SET 3 RETROSPOT TEA,COFFEE,SUGAR",DOTCOM POSTAGE,-16.92%,-67.93%,-91.19%
2,ROTATING LEAVES T-LIGHT HOLDER,DOTCOM POSTAGE,4.17%,-77.99%,-93.95%
3,SUKI SHOULDER BAG,DOTCOM POSTAGE,-13.56%,-77.91%,-93.93%
4,CHARLIE+LOLA PINK HOT WATER BOTTLE,DOTCOM POSTAGE,6.62%,-79.80%,-94.45%
5,SMALL HEART MEASURING SPOONS,DOTCOM POSTAGE,-11.28%,-72.65%,-92.49%
6,TOY TIDY PINK POLKADOT,DOTCOM POSTAGE,-15.44%,-66.91%,-90.91%
7,JUMBO BAG TOYS,DOTCOM POSTAGE,-40.45%,-75.69%,-93.32%
8,JUMBO BAG CHARLIE AND LOLA TOYS,DOTCOM POSTAGE,-49.66%,-79.85%,-94.46%
9,BATHROOM METAL SIGN,DOTCOM POSTAGE,-15.55%,-68.84%,-91.44%




VISUALIZATION 3: Summary Statistics Comparison (All Rules)



✓ Summary statistics chart displayed above

TABLE 3: Summary Statistics - All Rules


VISUALIZATION 1: Top 15 Rules Comparison (Unweighted vs Weighted Metrics)



✓ Interactive visualization displayed above

TABLE 1: Top 15 Rules - Detailed Metrics


Unnamed: 0,Rule,Unw_Supp,W_Supp,Unw_Conf,W_Conf,Unw_Lift,W_Lift
0,RED SPOTTY B → DOTCOM POSTA,0.0107,0.0096,0.5693,0.1789,14.5322,1.2545
1,SET 3 RETROS → DOTCOM POSTA,0.0104,0.0087,0.5013,0.1608,12.7968,1.1275
2,ROTATING LEA → DOTCOM POSTA,0.0111,0.0116,0.6689,0.1472,17.0739,1.0324
3,SUKI SHOULD → DOTCOM POSTA,0.0186,0.0161,0.6627,0.1464,16.9163,1.0268
4,CHARLIE+LOLA → DOTCOM POSTA,0.0119,0.0127,0.7049,0.1424,17.9934,0.9985
5,SMALL HEART → DOTCOM POSTA,0.0127,0.0112,0.5089,0.1392,12.9907,0.976
6,TOY TIDY PIN → DOTCOM POSTA,0.0102,0.0086,0.4107,0.1359,10.4837,0.9531
7,JUMBO BAG TO → DOTCOM POSTA,0.0151,0.009,0.5395,0.1312,13.7717,0.9198
8,JUMBO BAG CH → DOTCOM POSTA,0.0107,0.0054,0.6508,0.1311,16.6132,0.9197
9,BATHROOM MET → DOTCOM POSTA,0.014,0.0118,0.4084,0.1273,10.4253,0.8925




VISUALIZATION 2: Percent Change Heatmap (Top 12 Rules)



✓ Heatmap displayed above

TABLE 2: Percent Change Analysis (Top 12 Rules)


Unnamed: 0,Antecedents,Consequents,Supp_Chg%,Conf_Chg%,Lift_Chg%
0,RED SPOTTY BISCUIT TIN,DOTCOM POSTAGE,-10.31%,-68.58%,-91.37%
1,"SET 3 RETROSPOT TEA,COFFEE,SUGAR",DOTCOM POSTAGE,-16.92%,-67.93%,-91.19%
2,ROTATING LEAVES T-LIGHT HOLDER,DOTCOM POSTAGE,4.17%,-77.99%,-93.95%
3,SUKI SHOULDER BAG,DOTCOM POSTAGE,-13.56%,-77.91%,-93.93%
4,CHARLIE+LOLA PINK HOT WATER BOTTLE,DOTCOM POSTAGE,6.62%,-79.80%,-94.45%
5,SMALL HEART MEASURING SPOONS,DOTCOM POSTAGE,-11.28%,-72.65%,-92.49%
6,TOY TIDY PINK POLKADOT,DOTCOM POSTAGE,-15.44%,-66.91%,-90.91%
7,JUMBO BAG TOYS,DOTCOM POSTAGE,-40.45%,-75.69%,-93.32%
8,JUMBO BAG CHARLIE AND LOLA TOYS,DOTCOM POSTAGE,-49.66%,-79.85%,-94.46%
9,BATHROOM METAL SIGN,DOTCOM POSTAGE,-15.55%,-68.84%,-91.44%




VISUALIZATION 3: Summary Statistics Comparison (All Rules)



✓ Summary statistics chart displayed above

TABLE 3: Summary Statistics - All Rules


Unnamed: 0,Metric,Unw_Support,W_Support,Unw_Confidence,W_Confidence,Unw_Lift,W_Lift
0,Mean,0.0138,0.0018,0.5352,0.0128,13.5702,0.1055
1,Median,0.0123,0.001,0.5135,0.0072,9.7275,0.0652
2,Min,0.01,0.0001,0.3002,0.0003,2.5078,0.0029
3,Max,0.0436,0.0211,0.9757,0.1789,74.567,1.2545
4,Std Dev,0.0045,0.0031,0.1607,0.0215,12.613,0.1531



✓ All visualizations and tables displayed successfully


## 6. Visualizations: Weight Impact

## 7. Business Insights từ Weighted Rules

In [35]:
print("="*100)
print("BUSINESS INSIGHTS TỪ WEIGHTED ASSOCIATION RULES")
print("="*100)

for insight in insights:
    print(f"\n📊 INSIGHT {insight['number']}: {insight['title']}")
    print(f"\n   Luật: {insight['rule']}")
    print(f"   Chỉ số: {insight['metrics']}")
    print(f"\n   📝 Giải thích: {insight['insight']}")
    print(f"\n   {insight['recommendation']}")
    print("\n" + "-"*100)

# Create summary table of insights
print("\n\n" + "="*100)
print("SUMMARY TABLE: Key Insights")
print("="*100)

insights_summary_data = []
for insight in insights:
    insights_summary_data.append({
        '#': insight['number'],
        'Title': insight['title'],
        'Rule': insight['rule'][:50] + '...' if len(insight['rule']) > 50 else insight['rule'],
        'Key Metric': insight['metrics'].split(',')[0] if ',' in insight['metrics'] else insight['metrics']
    })

insights_summary_df = pd.DataFrame(insights_summary_data)
print(insights_summary_df.to_string(index=False))

print("\n\n✓ Table-based visualizations completed successfully")
print(f"✓ Total insights extracted: {len(insights)}")
print(f"✓ All results displayed in notebook (no HTML/PNG exports)")

BUSINESS INSIGHTS TỪ WEIGHTED ASSOCIATION RULES

📊 INSIGHT 1: Sản phẩm cao cấp/ Giá cao thường bán kèm

   Luật: RED SPOTTY BISCUIT TIN → DOTCOM POSTAGE
   Chỉ số: Weighted Lift: 1.254, Weighted Confidence: 0.179

   📝 Giải thích: Khách hàng mua RED SPOTTY BISCUIT TIN thường có xu hướng mua thêm DOTCOM POSTAGE với tỷ lệ 17.9% (từ góc độ giá trị giao dịch). Weighted lift 1.25 cho thấy sự kết hợp này là 1.25x so với kỳ vọng ngẫu nhiên.

   📌 Hành động: Trưng bày chung hoặc bundle combo RED SPOTTY BISCUIT TIN + DOTCOM POSTAGE; Tạo khuyến mãi (cross-selling) khi khách mua RED SPOTTY BISCUIT TIN.

----------------------------------------------------------------------------------------------------

📊 INSIGHT 2: Kết hợp phổ biến & Có giá trị cao

   Luật: GREEN REGENCY TEACUP AND SAUCER, JUMBO BAG RED RETROSPOT → DOTCOM POSTAGE
   Chỉ số: Weighted Support: 0.021, Weighted Confidence: 0.076

   📝 Giải thích: Kết hợp GREEN REGENCY TEACUP AND SAUCER, JUMBO BAG RED RETROSPOT + DOTCOM POSTAGE chiế