### Import Libraries

In [1]:
#!pip install mlxtend

In [2]:
import pandas as pd
from mlxtend.frequent_patterns import apriori,association_rules

import warnings
warnings.filterwarnings('ignore')

### Import Data

In [3]:
online_retail=pd.read_excel('Online Retail.xlsx')
online_retail.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom


### Data Understanding

In [4]:
online_retail.shape

(541909, 8)

In [5]:
online_retail.isna().sum()

InvoiceNo           0
StockCode           0
Description      1454
Quantity            0
InvoiceDate         0
UnitPrice           0
CustomerID     135080
Country             0
dtype: int64

In [6]:
online_retail.dtypes

InvoiceNo              object
StockCode              object
Description            object
Quantity                int64
InvoiceDate    datetime64[ns]
UnitPrice             float64
CustomerID            float64
Country                object
dtype: object

### Data Preparation

In [7]:
online_retail.drop('CustomerID',axis=1,inplace=True)

In [8]:
online_retail.dropna(inplace=True)

In [9]:
online_retail.isna().sum()

InvoiceNo      0
StockCode      0
Description    0
Quantity       0
InvoiceDate    0
UnitPrice      0
Country        0
dtype: int64

In [10]:
france_data=online_retail[online_retail['Country']=='France']
france_data.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,Country
26,536370,22728,ALARM CLOCK BAKELIKE PINK,24,2010-12-01 08:45:00,3.75,France
27,536370,22727,ALARM CLOCK BAKELIKE RED,24,2010-12-01 08:45:00,3.75,France
28,536370,22726,ALARM CLOCK BAKELIKE GREEN,12,2010-12-01 08:45:00,3.75,France
29,536370,21724,PANDA AND BUNNIES STICKER SHEET,12,2010-12-01 08:45:00,0.85,France
30,536370,21883,STARS GIFT TAPE,24,2010-12-01 08:45:00,0.65,France


In [11]:
france_data_pivot=pd.pivot_table(data=france_data,values='Quantity',index='InvoiceNo',columns='Description').fillna(0)
france_data_pivot.head()

Description,50'S CHRISTMAS GIFT BAG LARGE,DOLLY GIRL BEAKER,I LOVE LONDON MINI BACKPACK,NINE DRAWER OFFICE TIDY,SET 2 TEA TOWELS I LOVE LONDON,SPACEBOY BABY GIFT SET,TRELLIS COAT RACK,10 COLOUR SPACEBOY PEN,12 COLOURED PARTY BALLOONS,12 EGG HOUSE PAINTED WOOD,...,WRAP VINTAGE PETALS DESIGN,YELLOW COAT RACK PARIS FASHION,YELLOW GIANT GARDEN THERMOMETER,YELLOW SHARK HELICOPTER,ZINC STAR T-LIGHT HOLDER,ZINC FOLKART SLEIGH BELLS,ZINC HERB GARDEN CONTAINER,ZINC METAL HEART DECORATION,ZINC T-LIGHT HOLDER STAR LARGE,ZINC T-LIGHT HOLDER STARS SMALL
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
536370,0.0,0.0,0.0,0.0,24.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
536852,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
536974,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
537065,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
537463,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
france_data_encoded=france_data_pivot.applymap(lambda x: 1 if x>0 else 0)
france_data_encoded.head()

Description,50'S CHRISTMAS GIFT BAG LARGE,DOLLY GIRL BEAKER,I LOVE LONDON MINI BACKPACK,NINE DRAWER OFFICE TIDY,SET 2 TEA TOWELS I LOVE LONDON,SPACEBOY BABY GIFT SET,TRELLIS COAT RACK,10 COLOUR SPACEBOY PEN,12 COLOURED PARTY BALLOONS,12 EGG HOUSE PAINTED WOOD,...,WRAP VINTAGE PETALS DESIGN,YELLOW COAT RACK PARIS FASHION,YELLOW GIANT GARDEN THERMOMETER,YELLOW SHARK HELICOPTER,ZINC STAR T-LIGHT HOLDER,ZINC FOLKART SLEIGH BELLS,ZINC HERB GARDEN CONTAINER,ZINC METAL HEART DECORATION,ZINC T-LIGHT HOLDER STAR LARGE,ZINC T-LIGHT HOLDER STARS SMALL
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
536370,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
536852,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
536974,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
537065,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
537463,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Model Building // Data Mining

In [13]:
frequent_itemsets=apriori(df=france_data_encoded,min_support=0.03,use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.034707,( SPACEBOY BABY GIFT SET)
1,0.030369,(3 PIECE SPACEBOY COOKIE CUTTER SET)
2,0.039046,(36 PENCILS TUBE RED RETROSPOT)
3,0.060738,(4 TRADITIONAL SPINNING TOPS)
4,0.082430,(ALARM CLOCK BAKELIKE GREEN)
...,...,...
479,0.030369,"(SET/6 RED SPOTTY PAPER PLATES, SET/6 RED SPOT..."
480,0.030369,"(SET/6 RED SPOTTY PAPER PLATES, POSTAGE, SET/6..."
481,0.032538,"(SET/6 RED SPOTTY PAPER PLATES, SET/6 RED SPOT..."
482,0.045553,"(PLASTERS IN TIN WOODLAND ANIMALS, PLASTERS IN..."


In [14]:
best_associates=association_rules(df=frequent_itemsets,metric='confidence',min_threshold=0.8)
best_associates

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(ALARM CLOCK BAKELIKE RED ),(ALARM CLOCK BAKELIKE GREEN),0.080260,0.082430,0.067245,0.837838,10.164296,0.060629,5.658351
1,(ALARM CLOCK BAKELIKE GREEN),(ALARM CLOCK BAKELIKE RED ),0.082430,0.080260,0.067245,0.815789,10.164296,0.060629,4.992873
2,(ALARM CLOCK BAKELIKE GREEN),(POSTAGE),0.082430,0.650759,0.071584,0.868421,1.334474,0.017942,2.654230
3,(ALARM CLOCK BAKELIKE ORANGE),(ALARM CLOCK BAKELIKE RED ),0.036876,0.080260,0.030369,0.823529,10.260731,0.027409,5.211858
4,(ALARM CLOCK BAKELIKE ORANGE),(POSTAGE),0.036876,0.650759,0.032538,0.882353,1.355882,0.008540,2.968547
...,...,...,...,...,...,...,...,...,...
274,"(POSTAGE, SET/6 RED SPOTTY PAPER PLATES, SET/2...",(SET/6 RED SPOTTY PAPER CUPS),0.071584,0.117137,0.069414,0.969697,8.278339,0.061029,29.134490
275,"(SET/6 RED SPOTTY PAPER CUPS, SET/6 RED SPOTTY...",(POSTAGE),0.084599,0.650759,0.069414,0.820513,1.260855,0.014361,1.945770
276,"(POSTAGE, SET/6 RED SPOTTY PAPER CUPS, SET/20 ...",(SET/6 RED SPOTTY PAPER PLATES),0.071584,0.108460,0.069414,0.969697,8.940606,0.061650,29.420824
277,"(SET/6 RED SPOTTY PAPER PLATES, SET/20 RED RET...","(POSTAGE, SET/6 RED SPOTTY PAPER CUPS)",0.086768,0.099783,0.069414,0.800000,8.017391,0.060756,4.501085


### Here, we got the Best associates by removing redundant rules.

In [15]:
best_associates.to_csv('best_associates.csv')

### End !!!