In [None]:
!pip install openpyxl

## ASSOCIATION RULE LEARNING (BİRLİKTELİK KURALI ÖĞRENİMİ)

### 1. Veri Ön İşleme

In [None]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

In [None]:
df_ = pd.read_excel("../input/online-retail-dataset/online_retail_II.xlsx",sheet_name="Year 2010-2011")
df = df_.copy()

In [None]:
df.head()

In [None]:
df.describe().T 

In [None]:
df.isnull().sum()

In [None]:
df.shape

In [None]:
def retail_data_prep(dataframe):
    dataframe.dropna(inplace=True)# kalıcı olarak eksik verileri düşürdük
    dataframe = dataframe[~dataframe["Invoice"].astype(str).str.contains("C", na=False)] #astype(str) ekledim
    dataframe = dataframe[dataframe["Quantity"] > 0]
    dataframe = dataframe[dataframe["Price"] > 0]
    return dataframe

In [None]:
df = retail_data_prep(df)
df.head()

In [None]:
def outlier_thresholds(dataframe, variable):
    quartile1 = dataframe[variable].quantile(0.01)
    quartile3 = dataframe[variable].quantile(0.99)
    interquantile_range = quartile3 - quartile1
    up_limit = quartile3 + 1.5 * interquantile_range
    low_limit = quartile1 - 1.5 * interquantile_range
    return low_limit, up_limit

In [None]:
def replace_with_thresholds(dataframe, variable):
    low_limit, up_limit = outlier_thresholds(dataframe, variable)
    dataframe.loc[(dataframe[variable] < low_limit), variable] = low_limit
    dataframe.loc[(dataframe[variable] > up_limit), variable] = up_limit

In [None]:
def retail_data_prep(dataframe):
    dataframe.dropna(inplace=True)
    dataframe = dataframe[~dataframe["Invoice"].astype(str).str.contains("C", na=False)]
    dataframe = dataframe[dataframe["Quantity"] > 0]
    dataframe = dataframe[dataframe["Price"] > 0]
    replace_with_thresholds(dataframe, "Quantity")
    replace_with_thresholds(dataframe, "Price")
    return dataframe

In [None]:
df = retail_data_prep(df)
df.isnull().sum()

In [None]:
df.describe().T

### 2. ARL Veri Yapısını Hazırlama (Invoice-Product Matrix)

In [None]:
df.head()

In [None]:
df_fr = df[df['Country'] == "France"]

df_fr.groupby(['Invoice', 'Description']).agg({"Quantity": "sum"}).head(20)

In [None]:
df_fr.groupby(['Invoice', 'Description']).agg({"Quantity": "sum"}).unstack().iloc[0:5, 0:5]

In [None]:
df_fr.groupby(['Invoice', 'Description']).agg({"Quantity": "sum"}).unstack().fillna(0).iloc[0:5, 0:5]

In [None]:
df_fr.groupby(['Invoice', 'StockCode']). \
    agg({"Quantity": "sum"}). \
    unstack(). \
    fillna(0). \
    applymap(lambda x: 1 if x > 0 else 0).iloc[0:5, 0:5]

In [None]:
def create_invoice_product_df(dataframe, id=False):
    if id:# true ise
        return dataframe.groupby(['Invoice', "StockCode"])['Quantity'].sum().unstack().fillna(0). \
            applymap(lambda x: 1 if x > 0 else 0)
    else:
        return dataframe.groupby(['Invoice', 'Description'])['Quantity'].sum().unstack().fillna(0). \
            applymap(lambda x: 1 if x > 0 else 0)

In [None]:
fr_inv_pro_df = create_invoice_product_df(df_fr)
fr_inv_pro_df

In [None]:
fr_inv_pro_df = create_invoice_product_df(df_fr, id=True)
fr_inv_pro_df

In [None]:
def check_id(dataframe, stock_code):
    product_name = dataframe[dataframe["StockCode"] == stock_code][["Description"]].values[0].tolist()
    print(product_name)

In [None]:
check_id(df_fr, 10120)

### 3. Birliktelik Kurallarının Çıkarılması

In [None]:
frequent_itemsets = apriori(fr_inv_pro_df,
                            min_support=0.01,
                            use_colnames=True)

In [None]:
frequent_itemsets.sort_values("support", ascending=False)

In [None]:
rules = association_rules(frequent_itemsets,
                          metric="support",
                          min_threshold=0.01)

In [None]:
rules[(rules["support"]>0.05) & (rules["confidence"]>0.1) & (rules["lift"]>5)]

In [None]:
check_id(df_fr, 21086)

In [None]:
rules[(rules["support"]>0.05) & (rules["confidence"]>0.1) & (rules["lift"]>5)]. \
sort_values("confidence", ascending=False)

### 4. Çalışmanın Scriptini Hazırlama

In [None]:
def outlier_thresholds(dataframe, variable):
    quartile1 = dataframe[variable].quantile(0.01)
    quartile3 = dataframe[variable].quantile(0.99)
    interquantile_range = quartile3 - quartile1
    up_limit = quartile3 + 1.5 * interquantile_range
    low_limit = quartile1 - 1.5 * interquantile_range
    return low_limit, up_limit

In [None]:
def replace_with_thresholds(dataframe, variable):
    low_limit, up_limit = outlier_thresholds(dataframe, variable)
    dataframe.loc[(dataframe[variable] < low_limit), variable] = low_limit
    dataframe.loc[(dataframe[variable] > up_limit), variable] = up_limit

In [None]:
def retail_data_prep(dataframe):
    dataframe.dropna(inplace=True)
    dataframe = dataframe[~dataframe["Invoice"].str.contains("C", na=False)]
    dataframe = dataframe[dataframe["Quantity"] > 0]
    dataframe = dataframe[dataframe["Price"] > 0]
    replace_with_thresholds(dataframe, "Quantity")
    replace_with_thresholds(dataframe, "Price")
    return dataframe

In [None]:
def create_invoice_product_df(dataframe, id=False):
    if id:
        return dataframe.groupby(['Invoice', "StockCode"])['Quantity'].sum().unstack().fillna(0). \
            applymap(lambda x: 1 if x > 0 else 0)
    else:
        return dataframe.groupby(['Invoice', 'Description'])['Quantity'].sum().unstack().fillna(0). \
            applymap(lambda x: 1 if x > 0 else 0)

In [None]:
def check_id(dataframe, stock_code):
    product_name = dataframe[dataframe["StockCode"] == stock_code][["Description"]].values[0].tolist()
    print(product_name)

In [None]:
def create_rules(dataframe, id=True, country="France"):
    dataframe = dataframe[dataframe['Country'] == country]
    dataframe = create_invoice_product_df(dataframe, id) #burda id var diye yukarda id var
    frequent_itemsets = apriori(dataframe, min_support=0.01, use_colnames=True)
    rules = association_rules(frequent_itemsets, metric="support", min_threshold=0.01)
    return rules

In [None]:
df = df_.copy()

In [None]:
df = retail_data_prep(df) # veri ön işlemyi reatil_data_prep e yaptırdım
rules = create_rules(df) # bu da kurallarımı getirsin

In [None]:
rules[(rules["support"]>0.05) & (rules["confidence"]>0.1) & (rules["lift"]>5)]. \
sort_values("confidence", ascending=False)

### 5. Sepet Aşamasındaki Kullanıcılara Ürün Önerisinde Bulunmak

In [None]:
# Örnek:
# Kullanıcı örnek ürün id: 22492

product_id = 22492
check_id(df, product_id)

In [None]:
sorted_rules = rules.sort_values("lift", ascending=False)

In [None]:
recommendation_list = []

for i, product in enumerate(sorted_rules["antecedents"]):
    for j in list(product):
        if j == product_id:
            recommendation_list.append(list(sorted_rules.iloc[i]["consequents"])[0])

In [None]:
recommendation_list[0:3]

In [None]:
check_id(df, 22326)

In [None]:
def arl_recommender(rules_df, product_id, rec_count=1):
    sorted_rules = rules_df.sort_values("lift", ascending=False)
    recommendation_list = []
    for i, product in enumerate(sorted_rules["antecedents"]):
        for j in list(product):
            if j == product_id:
                recommendation_list.append(list(sorted_rules.iloc[i]["consequents"])[0])

    return recommendation_list[0:rec_count]

In [None]:
arl_recommender(rules, 22492, 1)

In [None]:
arl_recommender(rules, 22492, 2)

In [None]:
arl_recommender(rules, 22492, 3)