##############################################################
# BG-NBD ve Gamma-Gamma ile CLTV Prediction
##############################################################


In [1]:
# import libraries
import datetime as dt
import pandas as pd
import matplotlib.pyplot as plt
from lifetimes import BetaGeoFitter
from lifetimes import GammaGammaFitter
from lifetimes.plotting import plot_period_transactions
from sklearn.preprocessing import MinMaxScaler

In [None]:

# 读取数据
df = pd.read_excel('Online Retail.xlsx')
df = df.copy()
df.head(2).T

In [2]:
#aykırı değerleri törpüleyen fonksiyon 
def outlier_thresholds(dataframe, variable):
    quartile1 = dataframe[variable].quantile(0.01)
    quartile3 = dataframe[variable].quantile(0.99)
    interquantile_range = quartile3 - quartile1
    up_limit = quartile3 + 1.5 * interquantile_range
    low_limit = quartile1 - 1.5 * interquantile_range
    return low_limit, up_limit

In [3]:
def replace_with_thresholds(dataframe, variable):
    low_limit, up_limit = outlier_thresholds(dataframe, variable)
    # dataframe.loc[(dataframe[variable] < low_limit), variable] = low_limit
    dataframe.loc[(dataframe[variable] > up_limit), variable] = up_limit

In [None]:
df_ = pd.read_excel('online_retail_II.xlsx',
                    sheet_name="Year 2010-2011")
df = df_.copy()
df.shape

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.describe().T

In [None]:
df.dropna(inplace=True)
df = df[~df["Invoice"].str.contains("C", na=False)]
df = df[df["Quantity"] > 0]

In [None]:
#UK müşterilerinin seçilmesi.
df = df[df["Country"].str.contains("United Kingdom")]
df["Country"].head()

In [None]:
replace_with_thresholds(df, "Quantity")
replace_with_thresholds(df, "Price")

In [None]:
df.describe().T

In [None]:
df.head()

In [None]:
df["TotalPrice"] = df["Quantity"] * df["Price"]

In [None]:
df["InvoiceDate"].max()
today_date = dt.datetime(2011, 12, 11)

#############################################
# RFM Table
#############################################

In [None]:
# Recency kullanıcıya özel dinamik.
rfm = df.groupby('Customer ID').agg({'InvoiceDate': [lambda date: (date.max() - date.min()).days,
                                                     lambda date: (today_date - date.min()).days],#Tenure
                                     'Invoice': lambda num: num.nunique(),
                                     'TotalPrice': lambda TotalPrice: TotalPrice.sum()})

rfm.columns = rfm.columns.droplevel(0)
rfm.head()

In [None]:
## recency_cltv_p
rfm.columns = ['recency_cltv_p', 'T', 'frequency', 'monetary']

In [None]:
## basitleştirilmiş monetary_avg
rfm["monetary"] = rfm["monetary"] / rfm["frequency"]
rfm.rename(columns={"monetary": "monetary_avg"}, inplace=True)

## BGNBD için WEEKLY RECENCY VE WEEKLY T'nin HESAPLANMASI

In [None]:
## recency_weekly_p
rfm["recency_weekly_p"] = rfm["recency_cltv_p"] / 7
rfm["T_weekly"] = rfm["T"] / 7
rfm.head()

In [None]:
# KONTROL
rfm = rfm[rfm["monetary_avg"] > 0]

In [None]:
## freq > 1
rfm = rfm[(rfm['frequency'] > 1)]
rfm["frequency"] = rfm["frequency"].astype(int)

In [None]:
# Beklenen satışların tahmini bu modelle yapılır.
# pip install lifetimes

bgf = BetaGeoFitter(penalizer_coef=0.001)

bgf.fit(rfm['frequency'],
        rfm['recency_weekly_p'],
        rfm['T_weekly'])

In [None]:
################################################################
# 1 hafta içinde en çok satın alma beklediğimiz 10 müşteri kimdir?
################################################################

bgf.conditional_expected_number_of_purchases_up_to_time(1,
                                                        rfm['frequency'],
                                                        rfm['recency_weekly_p'],
                                                        rfm['T_weekly']).sort_values(ascending=False).head(10)


In [None]:
#expected number of purchases= beklenen satın alma sayısı
rfm["expected_number_of_purchases"] = bgf.predict(1,
                                                  rfm['frequency'],
                                                  rfm['recency_weekly_p'],
                                                  rfm['T_weekly'])

rfm.head()

In [None]:
################################################################
# 1 ay içinde en çok satın alma beklediğimiz 10 müşteri kimdir?
################################################################


bgf.predict(4,#hafta sayısını gösterir
            rfm['frequency'],
            rfm['recency_weekly_p'],
            rfm['T_weekly']).sort_values(ascending=False).head(10)

rfm["expected_number_of_purchases"] = bgf.predict(4,
                                                  rfm['frequency'],
                                                  rfm['recency_weekly_p'],
                                                  rfm['T_weekly'])

rfm.sort_values("expected_number_of_purchases", ascending=False).head(20)

In [None]:
################################################################
# 1 Ay içinde tüm Şirketin Beklenen Satış Sayısı Nedir?
################################################################

bgf.predict(4,
            rfm['frequency'],
            rfm['recency_weekly_p'],
            rfm['T_weekly']).sum()

In [None]:
################################################################
# 3 Ayda Tüm Şirketin Beklenen Satış Sayısı Nedir?
################################################################
bgf.predict(4 * 3,
            rfm['frequency'],
            rfm['recency_weekly_p'],
            rfm['T_weekly']).sum()

In [None]:
################################################################
# Tahmin Sonuçlarının Değerlendirilmesi
################################################################
plot_period_transactions(bgf)
plt.show()

In [None]:
##############################################################
# 3. GAMMA-GAMMA Modelinin Kurulması
#beklenen karlılığın hesaplanması
##############################################################


ggf = GammaGammaFitter(penalizer_coef=0.01)
ggf.fit(rfm['frequency'], rfm['monetary_avg'])

#En değerli 10 müşteri
ggf.conditional_expected_average_profit(rfm['frequency'],
                                        rfm['monetary_avg']).head(10)
#sıralarsak;
ggf.conditional_expected_average_profit(rfm['frequency'],
                                        rfm['monetary_avg']).sort_values(ascending=False).head(10)

rfm["expected_average_profit"] = ggf.conditional_expected_average_profit(rfm['frequency'],
                                                                         rfm['monetary_avg'])

rfm.sort_values("expected_average_profit", ascending=False).head(20)

##############################################################
# 4. BG-NBD ve GG modeli ile CLTV'nin hesaplanması.
##############################################################

In [None]:
cltv = ggf.customer_lifetime_value(bgf,
                                   rfm['frequency'],
                                   rfm['recency_weekly_p'],
                                   rfm['T_weekly'],
                                   rfm['monetary_avg'],
                                   time=6,  # 6 aylık
                                   freq="W",  # T'nin frekans bilgisi.
                                   discount_rate=0.01)

cltv.head()

In [None]:
cltv.shape

In [None]:
cltv = cltv.reset_index()
cltv.sort_values(by="clv", ascending=False).head()

In [None]:
rfm_cltv_final = rfm.merge(cltv, on="Customer ID", how="left")
rfm_cltv_final.head()

In [None]:
rfm_cltv_final.sort_values(by="clv", ascending=False).head(20)

In [None]:
#2010-2011 UK müşterileri için 1 aylık ve 12 aylık CLTV hesabı.
cltv = ggf.customer_lifetime_value(bgf,
                                   rfm['frequency'],
                                   rfm['recency_weekly_p'],
                                   rfm['T_weekly'],
                                   rfm['monetary_avg'],
                                   time=1,  # 1 aylık
                                   freq="W",  # T'nin frekans bilgisi.
                                   discount_rate=0.01)

cltv.head()

In [None]:
cltv = cltv.reset_index()
cltv.sort_values(by="clv", ascending=False).head(12)

In [None]:
rfm_cltv_final = rfm.merge(cltv, on="Customer ID", how="left")
rfm_cltv_final.head()

In [None]:
cltv = ggf.customer_lifetime_value(bgf,
                                   rfm['frequency'],
                                   rfm['recency_weekly_p'],
                                   rfm['T_weekly'],
                                   rfm['monetary_avg'],
                                   time=12,  # 12 aylık
                                   freq="W",  # T'nin frekans bilgisi.
                                   discount_rate=0.01)

cltv.head()

In [None]:
cltv.shape

In [None]:
cltv = cltv.reset_index()
cltv.sort_values(by="clv", ascending=False).head(12)

In [None]:
rfm_cltv_final = rfm.merge(cltv, on="Customer ID", how="left")
rfm_cltv_final.head()

In [None]:
#2010-2011 UK müşterileri için 6 aylık CLTV'ye göre tüm müşterilerin 3 segmente ayrılması.

In [None]:
cltv = ggf.customer_lifetime_value(bgf,
                                   rfm['frequency'],
                                   rfm['recency_weekly_p'],
                                   rfm['T_weekly'],
                                   rfm['monetary_avg'],
                                   time=6,  # 6 aylık
                                   freq="W",  # T'nin frekans bilgisi.
                                   discount_rate=0.01)

cltv.head()

In [None]:
cltv.shape[0]

In [None]:
cltv = cltv.reset_index()
cltv.sort_values(by="clv", ascending=False).head()

In [None]:
rfm_cltv_final = rfm.merge(cltv, on="Customer ID", how="left")
rfm_cltv_final.head()

In [None]:
rfm_cltv_final["segment"] = pd.qcut(rfm_cltv_final["clv"], 3, labels=["C", "B", "A"])
rfm_cltv_final = rfm_cltv_final.sort_values(by="clv", ascending=False)
rfm_cltv_final=rfm_cltv_final.reset_index()
rfm_cltv_final.head()

In [None]:
len(rfm_cltv_final)

In [None]:
#CLTV'ye göre en iyi yüzde 20 dilimindekiler

In [None]:
(rfm_cltv_final.shape[0] *0.20)

In [None]:
rfm_cltv_final["top_flag"] = 0
rfm_cltv_final["top_flag"].iloc[0:515] = 1

In [None]:
rfm_cltv_final.head()