In [51]:
import pandas as pd

pairs = pd.read_csv('../outputs/results/strong_pairs.csv')
print(f"Total pairs: {len(pairs)}")
pairs.head(10)

Total pairs: 545


Unnamed: 0,antecedents,consequents,support,confidence,lift
0,FELTCRAFT CUSHION OWL,FELTCRAFT CUSHION RABBIT,0.016692,0.53528,18.396318
1,FELTCRAFT CUSHION RABBIT,FELTCRAFT CUSHION OWL,0.016692,0.573664,18.396318
2,GREEN REGENCY TEACUP AND SAUCER,PINK REGENCY TEACUP AND SAUCER,0.029514,0.634067,17.972045
3,PINK REGENCY TEACUP AND SAUCER,GREEN REGENCY TEACUP AND SAUCER,0.029514,0.836559,17.972045
4,CHARLOTTE BAG PINK POLKADOT,RED RETROSPOT CHARLOTTE BAG,0.022193,0.717791,17.29523
5,RED RETROSPOT CHARLOTTE BAG,CHARLOTTE BAG PINK POLKADOT,0.022193,0.534735,17.29523
6,ROSES REGENCY TEACUP AND SAUCER,PINK REGENCY TEACUP AND SAUCER,0.027807,0.582671,16.515274
7,PINK REGENCY TEACUP AND SAUCER,ROSES REGENCY TEACUP AND SAUCER,0.027807,0.788172,16.515274
8,GREEN REGENCY TEACUP AND SAUCER,ROSES REGENCY TEACUP AND SAUCER,0.03547,0.762021,15.967312
9,ROSES REGENCY TEACUP AND SAUCER,GREEN REGENCY TEACUP AND SAUCER,0.03547,0.743243,15.967312


## Remove bags

In [52]:
pairs_no_bags = pairs[
    ~pairs['antecedents'].str.contains('BAG', case=False, na=False) &
    ~pairs['consequents'].str.contains('BAG', case=False, na=False)
].copy()

print(f"After removing bags: {len(pairs_no_bags)} pairs")
print(f"Removed: {len(pairs) - len(pairs_no_bags)} bag pairs")
pairs_no_bags.head(15)

After removing bags: 305 pairs
Removed: 240 bag pairs


Unnamed: 0,antecedents,consequents,support,confidence,lift
0,FELTCRAFT CUSHION OWL,FELTCRAFT CUSHION RABBIT,0.016692,0.53528,18.396318
1,FELTCRAFT CUSHION RABBIT,FELTCRAFT CUSHION OWL,0.016692,0.573664,18.396318
2,GREEN REGENCY TEACUP AND SAUCER,PINK REGENCY TEACUP AND SAUCER,0.029514,0.634067,17.972045
3,PINK REGENCY TEACUP AND SAUCER,GREEN REGENCY TEACUP AND SAUCER,0.029514,0.836559,17.972045
6,ROSES REGENCY TEACUP AND SAUCER,PINK REGENCY TEACUP AND SAUCER,0.027807,0.582671,16.515274
7,PINK REGENCY TEACUP AND SAUCER,ROSES REGENCY TEACUP AND SAUCER,0.027807,0.788172,16.515274
8,GREEN REGENCY TEACUP AND SAUCER,ROSES REGENCY TEACUP AND SAUCER,0.03547,0.762021,15.967312
9,ROSES REGENCY TEACUP AND SAUCER,GREEN REGENCY TEACUP AND SAUCER,0.03547,0.743243,15.967312
10,RED HARMONICA IN BOX,BLUE HARMONICA IN BOX,0.015099,0.476077,15.322806
11,BLUE HARMONICA IN BOX,RED HARMONICA IN BOX,0.015099,0.485958,15.322806


## Categorization attempt #43

In [53]:
def categorize_product(text):
    text_upper = text.upper()
    
    if any(word in text_upper for word in ['TEACUP', 'SAUCER', 'CAKESTAND', 'CAKE CASES', 
                                             'NAPKINS', 'BAKING', 'KITCHEN SCALES']):
        return 'Food & Dining'
    
    elif 'LUNCH BOX' in text_upper:
        return 'Kids'
    
    elif any(word in text_upper for word in ['ALARM CLOCK', 'CUSHION', 'PILLOW', 
                                              'METAL SIGN', 'FRAME', 'TLIGHT', 'T LIGHT',
                                              'BUILDING BLOCK WORD', 'WICKER HEART',
                                              'PAPER CHAIN', 'CHRISTMAS KIT']):
        return 'Home Decor'
    
    elif any(word in text_upper for word in ['HAND WARMER', 'HOT WATER BOTTLE', 
                                              'PARASOL', 'UMBRELLA', 'HARMONICA']):
        return 'Clothing & Accessories'
    
    elif 'TRINKET BOX' in text_upper or 'BOX' in text_upper:
        return 'Storage'
    
    elif 'CARD GAME' in text_upper or 'SNAP' in text_upper:
        return 'Entertainment'
    
    else:
        return 'Other'

pairs_no_bags['category_ant'] = pairs_no_bags['antecedents'].apply(categorize_product)
pairs_no_bags['category_cons'] = pairs_no_bags['consequents'].apply(categorize_product)

pairs_no_bags['is_matching_set'] = pairs_no_bags['category_ant'] == pairs_no_bags['category_cons']
pairs_no_bags['primary_category'] = pairs_no_bags['category_ant']

def create_pair_id(row):
    items = sorted([row['antecedents'], row['consequents']])
    return f"{items[0]}|{items[1]}"

pairs_no_bags['pair_id'] = pairs_no_bags.apply(create_pair_id, axis=1)

pairs_no_bags = pairs_no_bags.sort_values('lift', ascending=False).drop_duplicates('pair_id').copy()


print(pairs_no_bags['primary_category'].value_counts())
print(f"\nMatching sets: {pairs_no_bags['is_matching_set'].sum()}")
print(f"Cross-category: {(~pairs_no_bags['is_matching_set']).sum()}")

primary_category
Other                     84
Home Decor                60
Food & Dining             41
Clothing & Accessories    39
Storage                    8
Kids                       4
Entertainment              1
Name: count, dtype: int64

Matching sets: 153
Cross-category: 84


## Curate top 10 with diversity (removed weak cross-category additions)

In [54]:
cross_category = pairs_no_bags[~pairs_no_bags['is_matching_set']].sort_values('lift', ascending=False)

print(f"Cross-category pairs with lift > 3: {len(cross_category)}")
if len(cross_category) > 0:
    print("\nTop cross-category bundles:")
    for idx, row in cross_category.head(10).iterrows():
        print(f"  {row['antecedents']} + {row['consequents']}")
        print(f"    Lift: {row['lift']:.2f} | {row['category_ant']} + {row['category_cons']}\n")
else:
    print("No strong cross-category pairs found.")

Cross-category pairs with lift > 3: 84

Top cross-category bundles:
  RED WOOLLY HOTTIE WHITE HEART + KNITTED UNION FLAG HOT WATER BOTTLE
    Lift: 8.78 | Other + Clothing & Accessories

  PACK OF 12 LONDON TISSUES + LUNCH BOX I LOVE LONDON
    Lift: 8.75 | Other + Kids

  60 CAKE CASES VINTAGE CHRISTMAS + PAPER CHAIN KIT VINTAGE CHRISTMAS
    Lift: 8.56 | Food & Dining + Home Decor

  SET OF 20 VINTAGE CHRISTMAS NAPKINS + PAPER CHAIN KIT VINTAGE CHRISTMAS
    Lift: 8.23 | Food & Dining + Home Decor

  CHOCOLATE THIS WAY METAL SIGN + HAND OVER THE CHOCOLATE SIGN
    Lift: 7.77 | Home Decor + Other

  COOK WITH WINE METAL SIGN + HAND OVER THE CHOCOLATE SIGN
    Lift: 7.45 | Home Decor + Other

  WOODEN BOX OF DOMINOES + VINTAGE HEADS AND TAILS CARD GAME
    Lift: 7.15 | Storage + Entertainment

  WOODEN BOX OF DOMINOES + VINTAGE SNAP CARDS
    Lift: 6.90 | Storage + Entertainment

  GIN TONIC DIET METAL SIGN + HAND OVER THE CHOCOLATE SIGN
    Lift: 6.61 | Home Decor + Other

  NO SINGIN

In [55]:
selected_bundles = []
used_categories = set()

cross_added = 0
for idx, row in cross_category.head(5).iterrows():
    if cross_added < 3:
        selected_bundles.append(row)
        cross_added += 1
        print(f"Added CROSS-CATEGORY: {row['antecedents'][:40]} + {row['consequents'][:40]} (Lift: {row['lift']:.2f})")
        print(f"  {row['category_ant']} + {row['category_cons']}")

if cross_added < 2:
    print(f"\n Only found {cross_added} strong cross-category pairs. Searching with lower thresholds...")
    
    weaker_cross = pairs_no_bags[
        (~pairs_no_bags['is_matching_set']) & 
        (pairs_no_bags['lift'] > 2) &
        (pairs_no_bags['confidence'] > 0.4)
    ].sort_values('lift', ascending=False)
    
    for idx, row in weaker_cross.head(3).iterrows():
        if cross_added >= 2:
            break
        if idx not in [r.name for r in selected_bundles]:
            selected_bundles.append(row)
            cross_added += 1
            print(f"Added CROSS-CATEGORY (weaker): {row['antecedents'][:40]} + {row['consequents'][:40]} (Lift: {row['lift']:.2f})")
            print(f"  {row['category_ant']} + {row['category_cons']}")


matching_sets = pairs_no_bags[pairs_no_bags['is_matching_set']].sort_values('lift', ascending=False)

for idx, row in matching_sets.iterrows():
    if len(selected_bundles) >= 10:
        break
    
    category = row['primary_category']
    
    if category not in used_categories:
        selected_bundles.append(row)
        used_categories.add(category)
        print(f"Added from {category}: {row['antecedents'][:40]} + {row['consequents'][:40]} (Lift: {row['lift']:.2f})")

Added CROSS-CATEGORY: RED WOOLLY HOTTIE WHITE HEART + KNITTED UNION FLAG HOT WATER BOTTLE (Lift: 8.78)
  Other + Clothing & Accessories
Added CROSS-CATEGORY: PACK OF 12 LONDON TISSUES + LUNCH BOX I LOVE LONDON (Lift: 8.75)
  Other + Kids
Added CROSS-CATEGORY: 60 CAKE CASES VINTAGE CHRISTMAS + PAPER CHAIN KIT VINTAGE CHRISTMAS (Lift: 8.56)
  Food & Dining + Home Decor
Added from Home Decor: FELTCRAFT CUSHION OWL + FELTCRAFT CUSHION RABBIT (Lift: 18.40)
Added from Food & Dining: GREEN REGENCY TEACUP AND SAUCER + PINK REGENCY TEACUP AND SAUCER (Lift: 17.97)
Added from Clothing & Accessories: RED HARMONICA IN BOX + BLUE HARMONICA IN BOX (Lift: 15.32)
Added from Kids: DOLLY GIRL LUNCH BOX + SPACEBOY LUNCH BOX (Lift: 14.98)
Added from Other: PLASTERS IN TIN CIRCUS PARADE + PLASTERS IN TIN SPACEBOY (Lift: 12.70)
Added from Storage: ROUND SNACK BOXES SET OF4 WOODLAND + ROUND SNACK BOXES SET OF 4 FRUITS (Lift: 12.19)
Added from Entertainment: VINTAGE HEADS AND TAILS CARD GAME + VINTAGE SNAP CAR

## Create Final Top 10

In [56]:
top_10 = pd.DataFrame(selected_bundles).head(10).copy()

def create_bundle_name(row):
    ant = row['antecedents'].upper()
    cons = row['consequents'].upper()
    
    if 'CUSHION' in ant or 'CUSHION' in cons:
        return 'Feltcraft Cushion Duo'
    elif 'TEACUP' in ant or 'TEACUP' in cons:
        return 'Regency Tea Set'
    elif 'HARMONICA' in ant or 'HARMONICA' in cons:
        return 'Musical Harmonica Set'
    elif 'LUNCH BOX' in ant and 'LUNCH BOX' in cons:
        return 'Character Lunch Box Pair'
    elif 'PLASTERS' in ant or 'PLASTERS' in cons:
        return 'Novelty Plasters Duo'
    elif 'SNACK BOXES' in ant or 'SNACK BOXES' in cons:
        return 'Snack Box Collection'
    elif ('CARD GAME' in ant or 'CARD GAME' in cons) or ('SNAP' in ant or 'SNAP' in cons):
        return 'Vintage Card Game Bundle'
    elif 'HOTTIE' in ant or 'HOT WATER BOTTLE' in cons:
        return 'Cozy Hot Water Bottle Set'
    elif ('CAKE CASES' in ant and 'CHRISTMAS' in ant) or ('PAPER CHAIN' in cons and 'CHRISTMAS' in cons):
        return 'Festive Baking Bundle'
    elif ('LONDON' in ant and 'LUNCH BOX' in cons) or ('LUNCH BOX' in cons and 'LONDON' in ant):
        return 'London-Themed Gift Duo'
    elif not row['is_matching_set']:
        return 'Cross-Category Bundle'
    else:
        return 'Matching Gift Set'

top_10['bundle_name'] = top_10.apply(create_bundle_name, axis=1)

top_10['category_display'] = top_10['primary_category'].str.replace('Home Decor - ', '')

print("FINAL TOP 10 CURATED BUNDLES COMPLETE")

FINAL TOP 10 CURATED BUNDLES COMPLETE


## Export for graphs

In [58]:
export = top_10[['bundle_name', 'category_display', 'antecedents', 'consequents', 
                 'support', 'confidence', 'lift', 'is_matching_set']].copy()
export.columns = ['bundle_name', 'category', 'antecedents', 'consequents', 
                  'support', 'confidence', 'lift', 'is_matching_set']

export = export.sort_values('lift', ascending=False).reset_index(drop=True)

export.to_csv('../outputs/results/final_bundles_for_poster.csv', index=False)

print(f"  - {export['is_matching_set'].sum()} matching sets")
print(f"  - {(~export['is_matching_set']).sum()} cross-category bundles")
print(f"  - Lift range: {export['lift'].min():.2f} - {export['lift'].max():.2f}")

  - 7 matching sets
  - 3 cross-category bundles
  - Lift range: 8.56 - 18.40
