In [19]:
import pandas as pd

In [20]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [21]:
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [22]:
df = pd.read_csv("baskets.csv")

In [23]:
df.head()

Unnamed: 0,order_id,basket
0,536365,"WHITE HANGING HEART T-LIGHT HOLDER,WHITE METAL..."
1,536366,"HAND WARMER UNION JACK,HAND WARMER RED POLKA DOT"
2,536367,"ASSORTED COLOUR BIRD ORNAMENT,POPPY'S PLAYHOUS..."
3,536368,"JAM MAKING SET WITH JARS,RED COAT RACK PARIS F..."
4,536369,BATH BUILDING BLOCK WORD


In [24]:
df['basket'] = df['basket'].apply(lambda x: x.split(','))
df.head()

Unnamed: 0,order_id,basket
0,536365,"[WHITE HANGING HEART T-LIGHT HOLDER, WHITE MET..."
1,536366,"[HAND WARMER UNION JACK, HAND WARMER RED POLKA..."
2,536367,"[ASSORTED COLOUR BIRD ORNAMENT, POPPY'S PLAYHO..."
3,536368,"[JAM MAKING SET WITH JARS, RED COAT RACK PARIS..."
4,536369,[BATH BUILDING BLOCK WORD]


In [25]:
# One hot encode transasctions
te = TransactionEncoder()
te_array = te.fit(df['basket']).transform(df['basket'])

basket_df = pd.DataFrame(te_array, columns=te.columns_)
basket_df.head()

Unnamed: 0,Unnamed: 1,SET 2 TEA TOWELS I LOVE LONDON,3 PIECE SPACEBOY COOKIE CUTTER SET,3 STRIPEY MICE FELTCRAFT,3 TIER CAKE TIN GREEN AND CREAM,3 TIER CAKE TIN RED AND CREAM,5 HOOK HANGER MAGIC TOADSTOOL,60 TEATIME FAIRY CAKE CASES,72 SWEETHEART FAIRY CAKE CASES,AIRLINE LOUNGE,...,WOOD BLACK BOARD ANT WHITE FINISH,WOOD S/3 CABINET ANT WHITE FINISH,WOODEN BOX OF DOMINOES,WOODEN FRAME ANTIQUE WHITE,WOODEN OWLS LIGHT GARLAND,WOODEN PICTURE FRAME WHITE FINISH,YELLOW BREAKFAST CUP AND SAUCER,YELLOW COAT RACK PARIS FASHION,YOU'RE CONFUSING ME METAL SIGN,ZINC WILLIE WINKIE CANDLE STICK
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [28]:
# Generating frequent details
frequent_itemsets = apriori(
    basket_df,
    min_support=0.05,
    use_colnames=True,
    max_len=2
)

In [29]:
frequent_itemsets.sort_values(by='support', ascending=False).head()

Unnamed: 0,support,itemsets
51,0.162162,(WHITE HANGING HEART T-LIGHT HOLDER)
16,0.135135,(HAND WARMER UNION JACK)
44,0.135135,(SET 7 BABUSHKA NESTING BOXES)
28,0.135135,(KNITTED UNION FLAG HOT WATER BOTTLE)
120,0.135135,"(WHITE HANGING HEART T-LIGHT HOLDER, KNITTED U..."


In [30]:
# Generating Association Rules
rules = association_rules(
    frequent_itemsets,
    metric="lift",
    min_threshold=1
)

rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(PACK OF 60 PINK PAISLEY CAKE CASES),(60 TEATIME FAIRY CAKE CASES),0.054054,0.081081,0.054054,1.0,12.333333,1.0,0.049671,inf,0.971429,0.666667,1.0,0.833333
1,(60 TEATIME FAIRY CAKE CASES),(PACK OF 60 PINK PAISLEY CAKE CASES),0.081081,0.054054,0.054054,0.666667,12.333333,1.0,0.049671,2.837838,1.0,0.666667,0.647619,0.833333
2,(PACK OF 72 RETROSPOT CAKE CASES),(60 TEATIME FAIRY CAKE CASES),0.081081,0.081081,0.054054,0.666667,8.222222,1.0,0.04748,2.756757,0.955882,0.5,0.637255,0.666667
3,(60 TEATIME FAIRY CAKE CASES),(PACK OF 72 RETROSPOT CAKE CASES),0.081081,0.081081,0.054054,0.666667,8.222222,1.0,0.04748,2.756757,0.955882,0.5,0.637255,0.666667
4,(ALARM CLOCK BAKELIKE RED ),(ALARM CLOCK BAKELIKE GREEN),0.081081,0.108108,0.081081,1.0,9.25,1.0,0.072316,inf,0.970588,0.75,1.0,0.875


In [31]:
# Top 5 Association Rules
top5 = rules.sort_values(
    by=['lift','support'],
    ascending=False
).head(5)

top5[['antecedents','consequents','support','confidence','lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
96,(HAND WARMER OWL DESIGN),(HAND WARMER RED RETROSPOT),0.054054,1.0,18.5
97,(HAND WARMER RED RETROSPOT),(HAND WARMER OWL DESIGN),0.054054,1.0,18.5
108,(LOVE BUILDING BLOCK WORD),(HOME BUILDING BLOCK WORD),0.054054,1.0,18.5
109,(HOME BUILDING BLOCK WORD),(LOVE BUILDING BLOCK WORD),0.054054,1.0,18.5
48,(RETRO COFFEE MUGS ASSORTED),(EDWARDIAN PARASOL RED),0.081081,1.0,12.333333
