In [1]:
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

In [2]:
#Verisetini Yukluyoruz
data = pd.read_csv('OnlineRetail.csv')
data.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,12/1/2010 8:26,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,12/1/2010 8:26,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,12/1/2010 8:26,3.39,17850.0,United Kingdom


In [3]:
#Veriseti icerisindeki ozelliikleri incelemek adina sutunlari gosteriyoruz.
data.columns

Index(['InvoiceNo', 'StockCode', 'Description', 'Quantity', 'InvoiceDate',
       'UnitPrice', 'CustomerID', 'Country'],
      dtype='object')

In [4]:
#Veri Seti Icerisindeki Farkli Islem Lokasyonlarini Inceliyoruz.
data.Country.unique()

array(['United Kingdom', 'France', 'Australia', 'Netherlands', 'Germany',
       'Norway', 'EIRE', 'Switzerland', 'Spain', 'Poland', 'Portugal',
       'Italy', 'Belgium', 'Lithuania', 'Japan', 'Iceland',
       'Channel Islands', 'Denmark', 'Cyprus', 'Sweden', 'Austria',
       'Israel', 'Finland', 'Bahrain', 'Greece', 'Hong Kong', 'Singapore',
       'Lebanon', 'United Arab Emirates', 'Saudi Arabia',
       'Czech Republic', 'Canada', 'Unspecified', 'Brazil', 'USA',
       'European Community', 'Malta', 'RSA'], dtype=object)

In [5]:
#Verileri Temizleme Islemine Geciyoruz

# Description Icerisinden Fazla Bosluklari Temizleme
data['Description'] = data['Description'].str.strip()
 
# Fatura Numarasi Olmayan Satirlari Silme
data.dropna(axis = 0, subset =['InvoiceNo'], inplace = True)
data['InvoiceNo'] = data['InvoiceNo'].astype('str')
 
# Kredi ile yapilan tum islemlerin iptal edilmesi
data = data[~data['InvoiceNo'].str.contains('C')]

In [6]:
# Fransa'da yapilan islemler
basket_France = (data[data['Country'] =="France"]
          .groupby(['InvoiceNo', 'Description'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index('InvoiceNo'))
 
# Birlesik Krallik'da yapilan islemler
basket_UK = (data[data['Country'] =="United Kingdom"]
          .groupby(['InvoiceNo', 'Description'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index('InvoiceNo'))
 
# Portekiz'de yapilan islemler
basket_Por = (data[data['Country'] =="Portugal"]
          .groupby(['InvoiceNo', 'Description'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index('InvoiceNo'))
 
basket_Sweden = (data[data['Country'] =="Sweden"]
          .groupby(['InvoiceNo', 'Description'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index('InvoiceNo'))

In [7]:
#Verileri Sayisal Hale Getirmek Icin Label Encoder'a benzer Olan Hot encoding kullaniyorum.
def hot_encode(x):
    if(x<= 0):
        return 0
    if(x>= 1):
        return 1
 
basket_encoded = basket_France.applymap(hot_encode)
basket_France = basket_encoded
 
basket_encoded = basket_UK.applymap(hot_encode)
basket_UK = basket_encoded
 
basket_encoded = basket_Por.applymap(hot_encode)
basket_Por = basket_encoded
 
basket_encoded = basket_Sweden.applymap(hot_encode)
basket_Sweden = basket_encoded

In [9]:
#Fransa ICIN
# Modelin Kurulum Asamasi
frq_items = apriori(basket_France, min_support = 0.05, use_colnames = True)
 
# Cikarimlari DataFrame'de Topluyoruz
rules = association_rules(frq_items, metric ="lift", min_threshold = 1)
rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False])
print(rules.head())

                                           antecedents  \
44                        (JUMBO BAG WOODLAND ANIMALS)   
258  (PLASTERS IN TIN CIRCUS PARADE, RED TOADSTOOL ...   
270  (PLASTERS IN TIN WOODLAND ANIMALS, RED TOADSTO...   
302  (SET/6 RED SPOTTY PAPER CUPS, SET/20 RED RETRO...   
301  (SET/6 RED SPOTTY PAPER PLATES, SET/20 RED RET...   

                         consequents  antecedent support  consequent support  \
44                         (POSTAGE)            0.076531            0.765306   
258                        (POSTAGE)            0.051020            0.765306   
270                        (POSTAGE)            0.053571            0.765306   
302  (SET/6 RED SPOTTY PAPER PLATES)            0.102041            0.127551   
301    (SET/6 RED SPOTTY PAPER CUPS)            0.102041            0.137755   

      support  confidence      lift  leverage  conviction  
44   0.076531       1.000  1.306667  0.017961         inf  
258  0.051020       1.000  1.306667  0.011974     