In [65]:
######################################################################
# BG-NBD ve Gamma-Gamma ile CLTV Prediction
######################################################################

In [66]:
#####################################################################
# İş Problemi (Business Problem)
######################################################################

In [67]:
# FLO satış ve pazarlama faaliyetleri için roadmap belirlemek istemektedir.
# Şirketin orta uzun vadeli plan yapabilmesi için var olan müşterilerin gelecekte şirkete sağlayacakları potansiyel değerin tahmin edilmesi gerekmektedir.

In [68]:
######################################################################
# Veri Seti Hikayesi
######################################################################

In [69]:
# Veri seti son alışverişlerini 2020-2021 yıllarında OmniChannel(hem online hem offline alışveriş yapan) olarak yapan müşterilerin geçmiş alışveriş davranışlarından
# elde edilen bilgilerden oluşmaktadır.

In [70]:
# master_id : Eşsiz müşteri numarası
# order_channel : Alışveriş yapan platforma ait hangi kanalın kullanıldığı (Android, ios, Desktop, Mobile, Offline)
# last_order_channel : En son alışverişin yapıldığı kanal
# first_order_date : Müşterinin yaptığı ilk alışveriş tarihi
# last_order_date : Müşterinin yaptığı son alışveriş tarihi
# last_order_date_online : Müşterinin online platformda yaptığı son alışveriş tarihi
# last_order_date_offline : Müşterinin offline platformda yaptığı son alışveriş tarihi
# order_num_total_ever_online : Müşterinin online platformda yaptığı toplam alışveriş sayısı
# order_num_total_ever_offline : Müşterinin offline'da yaptığı toplam alışveriş sayısı
# customer_value_total_ever_offline : Müşterinin offline alışverişlerinde ödediği toplam ücret
# customer_value_total_ever_online : Müşterinin online alışverişlerinde ödediği toplam ücret
# interested_in_categories_12 : Müşterinin son 12 ayda alışveriş yaptığı kategorilerin listesi

In [71]:
! pip install lifetimes



In [72]:
import pandas as pd
import datetime as dt
from lifetimes import BetaGeoFitter
from lifetimes import GammaGammaFitter
from sklearn.preprocessing import MinMaxScaler
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.float_format', lambda x: '%.2f' % x)
pd.options.mode.chained_assignment = None

In [73]:
df_ = pd.read_csv("/content/flo_data_20k.csv")

In [74]:
df = df_.copy()

In [75]:
df.head()

Unnamed: 0,master_id,order_channel,last_order_channel,first_order_date,last_order_date,last_order_date_online,last_order_date_offline,order_num_total_ever_online,order_num_total_ever_offline,customer_value_total_ever_offline,customer_value_total_ever_online,interested_in_categories_12
0,cc294636-19f0-11eb-8d74-000d3a38a36f,Android App,Offline,2020-10-30,2021-02-26,2021-02-21,2021-02-26,4.0,1.0,139.99,799.38,[KADIN]
1,f431bd5a-ab7b-11e9-a2fc-000d3a38a36f,Android App,Mobile,2017-02-08,2021-02-16,2021-02-16,2020-01-10,19.0,2.0,159.97,1853.58,"[ERKEK, COCUK, KADIN, AKTIFSPOR]"
2,69b69676-1a40-11ea-941b-000d3a38a36f,Android App,Android App,2019-11-27,2020-11-27,2020-11-27,2019-12-01,3.0,2.0,189.97,395.35,"[ERKEK, KADIN]"
3,1854e56c-491f-11eb-806e-000d3a38a36f,Android App,Android App,2021-01-06,2021-01-17,2021-01-17,2021-01-06,1.0,1.0,39.99,81.98,"[AKTIFCOCUK, COCUK]"
4,d6ea1074-f1f5-11e9-9346-000d3a38a36f,Desktop,Desktop,2019-08-03,2021-03-07,2021-03-07,2019-08-03,1.0,1.0,49.99,159.99,[AKTIFSPOR]


In [76]:
# CLTV hesaplanırken frequency değerleri int olması gerekir. Bu nedenle alt ve üst linitlerini round() ile yuvarlayalım.

In [77]:
def outlier_thresholds(dataframe, variable):
  quartile1 = dataframe[variable].quantile(0.01)
  quartile3 = dataframe[variable].quantile(0.99)
  interquantile_range = quartile3 - quartile1
  up_limit = quartile3 + 1.5 * interquantile_range
  low_limit = quartile1 - 1.5 * interquantile_range
  return low_limit, up_limit

In [78]:
def replace_with_thresholds(dataframe, variable):
  low_limit, up_limit = outlier_thresholds(dataframe, variable)
  dataframe.loc[(dataframe[variable] < low_limit), variable] = round(low_limit, 0)
  dataframe.loc[(dataframe[variable] > up_limit), variable] = round(up_limit, 0)

In [79]:
# "order_num_total_ever_online", "order_num_total_ever_offline", "customer_value_total_ever_offline",
# "customer_value_total_ever_online" değişkenlerinin aykırı değerleri varsa baskılayalım.

In [80]:
columns = ["order_num_total_ever_online", "order_num_total_ever_offline", "customer_value_total_ever_offline","customer_value_total_ever_online"]
for col in columns:
  replace_with_thresholds(df, col)

In [81]:
# Omnichannel müşterilerin hem online'dan hem de offline platformlardan alışveriş yaptığını ifade etmektedir. Her bir müşterinin toplam
# alışveriş sayısı ve harcaması için yeni değişkenler oluşturalım.

In [82]:
df["Order_num_total"] = df["order_num_total_ever_online"] + df["order_num_total_ever_offline"]
df["Customer_value_total"] = df["customer_value_total_ever_offline"] + df["customer_value_total_ever_online"]

In [83]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19945 entries, 0 to 19944
Data columns (total 14 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   master_id                          19945 non-null  object 
 1   order_channel                      19945 non-null  object 
 2   last_order_channel                 19945 non-null  object 
 3   first_order_date                   19945 non-null  object 
 4   last_order_date                    19945 non-null  object 
 5   last_order_date_online             19945 non-null  object 
 6   last_order_date_offline            19945 non-null  object 
 7   order_num_total_ever_online        19945 non-null  float64
 8   order_num_total_ever_offline       19945 non-null  float64
 9   customer_value_total_ever_offline  19945 non-null  float64
 10  customer_value_total_ever_online   19945 non-null  float64
 11  interested_in_categories_12        19945 non-null  obj

In [84]:
# Değişken tiplerini inceleyelim. Tarih ifade eden değişikliklerin tipini date'e çevirelim.
date_columns = df.columns[df.columns.str.contains("date")]
df[date_columns] = df[date_columns].apply(pd.to_datetime)

In [85]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19945 entries, 0 to 19944
Data columns (total 14 columns):
 #   Column                             Non-Null Count  Dtype         
---  ------                             --------------  -----         
 0   master_id                          19945 non-null  object        
 1   order_channel                      19945 non-null  object        
 2   last_order_channel                 19945 non-null  object        
 3   first_order_date                   19945 non-null  datetime64[ns]
 4   last_order_date                    19945 non-null  datetime64[ns]
 5   last_order_date_online             19945 non-null  datetime64[ns]
 6   last_order_date_offline            19945 non-null  datetime64[ns]
 7   order_num_total_ever_online        19945 non-null  float64       
 8   order_num_total_ever_offline       19945 non-null  float64       
 9   customer_value_total_ever_offline  19945 non-null  float64       
 10  customer_value_total_ever_online  

In [86]:
# Veri setindeki en son alışverişin yapıldığı tarihten 2 gün sonrasını analiz tarihi olarak alalım.

In [87]:
df["last_order_date"].max()

Timestamp('2021-05-30 00:00:00')

In [88]:
today_date = dt.datetime(2021,6,1)

In [89]:
df.head()

Unnamed: 0,master_id,order_channel,last_order_channel,first_order_date,last_order_date,last_order_date_online,last_order_date_offline,order_num_total_ever_online,order_num_total_ever_offline,customer_value_total_ever_offline,customer_value_total_ever_online,interested_in_categories_12,Order_num_total,Customer_value_total
0,cc294636-19f0-11eb-8d74-000d3a38a36f,Android App,Offline,2020-10-30,2021-02-26,2021-02-21,2021-02-26,4.0,1.0,139.99,799.38,[KADIN],5.0,939.37
1,f431bd5a-ab7b-11e9-a2fc-000d3a38a36f,Android App,Mobile,2017-02-08,2021-02-16,2021-02-16,2020-01-10,19.0,2.0,159.97,1853.58,"[ERKEK, COCUK, KADIN, AKTIFSPOR]",21.0,2013.55
2,69b69676-1a40-11ea-941b-000d3a38a36f,Android App,Android App,2019-11-27,2020-11-27,2020-11-27,2019-12-01,3.0,2.0,189.97,395.35,"[ERKEK, KADIN]",5.0,585.32
3,1854e56c-491f-11eb-806e-000d3a38a36f,Android App,Android App,2021-01-06,2021-01-17,2021-01-17,2021-01-06,1.0,1.0,39.99,81.98,"[AKTIFCOCUK, COCUK]",2.0,121.97
4,d6ea1074-f1f5-11e9-9346-000d3a38a36f,Desktop,Desktop,2019-08-03,2021-03-07,2021-03-07,2019-08-03,1.0,1.0,49.99,159.99,[AKTIFSPOR],2.0,209.98


In [90]:
# recency : Son satın alma üzerinden geçen zaman. Haftalık. (kullanıcı özelinde).
# Bu recency değeri analiz tarihine göre değil müşterinin kendi içinde son satın alması ile ilk satın alması arasındaki farkı ifade etmektedir.
# T : Müşterinin yaşı. Haftalık. (analiz tarihinden ne kadar süre önce ilk satın alma yapılmış)
# frequency : Tekrar eden toplam satın alma sayısı (frequency>1)
# monetary : Satın alma başına ortalama kazanç

In [91]:
cltv_df = pd.DataFrame()
cltv_df["Customer_id"] = df["master_id"]
cltv_df["Recency_cltv_weekly"] = ((df["last_order_date"] - df["first_order_date"])).dt.days/7
cltv_df["T_weekly"] = (today_date - df["first_order_date"]).dt.days/ 7
cltv_df["Frequency"] = df["Order_num_total"]
cltv_df["Monetary_cltv_avg"] = df["Customer_value_total"] / df["Order_num_total"]

In [92]:
cltv_df.head()

Unnamed: 0,Customer_id,Recency_cltv_weekly,T_weekly,Frequency,Monetary_cltv_avg
0,cc294636-19f0-11eb-8d74-000d3a38a36f,17.0,30.57,5.0,187.87
1,f431bd5a-ab7b-11e9-a2fc-000d3a38a36f,209.86,224.86,21.0,95.88
2,69b69676-1a40-11ea-941b-000d3a38a36f,52.29,78.86,5.0,117.06
3,1854e56c-491f-11eb-806e-000d3a38a36f,1.57,20.86,2.0,60.98
4,d6ea1074-f1f5-11e9-9346-000d3a38a36f,83.14,95.43,2.0,104.99


In [93]:
# BG/NBD modelini fit edelim.

In [None]:
bgf = BetaGeoFitter(penalizer_coef=0.001)  # Bu parametre bu modelin parametrelerinin bulunması aşamasında katsayılara uygulanacak olan ceza katsayısıdır.

In [94]:
bgf.fit(cltv_df["Frequency"],
        cltv_df["Recency_cltv_weekly"],
        cltv_df["T_weekly"])

  result = getattr(ufunc, method)(*inputs, **kwargs)


<lifetimes.BetaGeoFitter: fitted with 19945 subjects, a: 0.00, alpha: 76.17, b: 0.00, r: 3.66>

In [95]:
# 3 ay içerisinde müşterilerden beklenen satın almaları tahmin edelim ve exp_sales_3_month olarak cltv dataframe 'ine ekleyelim
cltv_df["exp_sales_3_month"] = bgf.predict(4*3,
                                           cltv_df["Frequency"],
                                           cltv_df["Recency_cltv_weekly"],
                                           cltv_df["T_weekly"])

In [96]:
# 6 ay içerisinde müşterilerden beklenen satın almaları tahmin edelim ve exp_sales_6_mont olarak cltv dataframe'ine ekleyelim.
cltv_df["exp_sales_6_month"] = bgf.predict(4*6,
                                           cltv_df["Frequency"],
                                           cltv_df["Recency_cltv_weekly"],
                                           cltv_df["T_weekly"])

In [97]:
# 3. ay ve 6. aydaki en çok satın alım gerçekleştirecek 10 kişiyi inceleyelim.

In [98]:
cltv_df.sort_values("exp_sales_3_month", ascending=False)[0:10]

Unnamed: 0,Customer_id,Recency_cltv_weekly,T_weekly,Frequency,Monetary_cltv_avg,exp_sales_3_month,exp_sales_6_month
7330,a4d534a2-5b1b-11eb-8dbd-000d3a38a36f,62.71,67.29,52.0,166.22,4.66,9.31
15611,4a7e875e-e6ce-11ea-8f44-000d3a38a36f,39.71,40.0,29.0,165.3,3.37,6.75
8328,1902bf80-0035-11eb-8341-000d3a38a36f,28.86,33.29,25.0,97.44,3.14,6.28
19538,55d54d9e-8ac7-11ea-8ec0-000d3a38a36f,52.57,58.71,31.0,228.53,3.08,6.17
14373,f00ad516-c4f4-11ea-98f7-000d3a38a36f,38.0,46.43,27.0,141.35,3.0,6.0
10489,7af5cd16-b100-11e9-9757-000d3a38a36f,103.14,111.86,43.0,157.11,2.98,5.96
4315,d5ef8058-a5c6-11e9-a2fc-000d3a38a36f,133.14,147.14,49.0,161.85,2.83,5.66
6756,27310582-6362-11ea-a6dc-000d3a38a36f,62.71,64.14,29.0,168.88,2.79,5.59
6666,53fe00d4-7b7a-11eb-960b-000d3a38a36f,9.71,13.0,17.0,259.87,2.78,5.56
10536,e143b6fa-d6f8-11e9-93bc-000d3a38a36f,104.57,113.43,40.0,176.2,2.76,5.53


In [99]:
cltv_df.sort_values("exp_sales_6_month", ascending=False)[0:10]

Unnamed: 0,Customer_id,Recency_cltv_weekly,T_weekly,Frequency,Monetary_cltv_avg,exp_sales_3_month,exp_sales_6_month
7330,a4d534a2-5b1b-11eb-8dbd-000d3a38a36f,62.71,67.29,52.0,166.22,4.66,9.31
15611,4a7e875e-e6ce-11ea-8f44-000d3a38a36f,39.71,40.0,29.0,165.3,3.37,6.75
8328,1902bf80-0035-11eb-8341-000d3a38a36f,28.86,33.29,25.0,97.44,3.14,6.28
19538,55d54d9e-8ac7-11ea-8ec0-000d3a38a36f,52.57,58.71,31.0,228.53,3.08,6.17
14373,f00ad516-c4f4-11ea-98f7-000d3a38a36f,38.0,46.43,27.0,141.35,3.0,6.0
10489,7af5cd16-b100-11e9-9757-000d3a38a36f,103.14,111.86,43.0,157.11,2.98,5.96
4315,d5ef8058-a5c6-11e9-a2fc-000d3a38a36f,133.14,147.14,49.0,161.85,2.83,5.66
6756,27310582-6362-11ea-a6dc-000d3a38a36f,62.71,64.14,29.0,168.88,2.79,5.59
6666,53fe00d4-7b7a-11eb-960b-000d3a38a36f,9.71,13.0,17.0,259.87,2.78,5.56
10536,e143b6fa-d6f8-11e9-93bc-000d3a38a36f,104.57,113.43,40.0,176.2,2.76,5.53


In [100]:
# Gamma-Gamma modelini fit edelim. Müşterilerin ortalama bırakacakları değeri tahminleyip exp_averagvalue olarak
# cltv dataframe' ine ekleyelim.

In [101]:
ggf = GammaGammaFitter(penalizer_coef=0.01)

In [102]:
ggf.fit(cltv_df["Frequency"], cltv_df["Monetary_cltv_avg"])

<lifetimes.GammaGammaFitter: fitted with 19945 subjects, p: 4.15, q: 0.47, v: 4.08>

In [None]:
cltv_df["exp_average_value"] = ggf.conditional_expected_average_profit(cltv_df["Frequency"],
                                        cltv_df["Monetary_cltv_avg"])

In [103]:
cltv_df.head()

Unnamed: 0,Customer_id,Recency_cltv_weekly,T_weekly,Frequency,Monetary_cltv_avg,exp_sales_3_month,exp_sales_6_month
0,cc294636-19f0-11eb-8d74-000d3a38a36f,17.0,30.57,5.0,187.87,0.97,1.95
1,f431bd5a-ab7b-11e9-a2fc-000d3a38a36f,209.86,224.86,21.0,95.88,0.98,1.97
2,69b69676-1a40-11ea-941b-000d3a38a36f,52.29,78.86,5.0,117.06,0.67,1.34
3,1854e56c-491f-11eb-806e-000d3a38a36f,1.57,20.86,2.0,60.98,0.7,1.4
4,d6ea1074-f1f5-11e9-9346-000d3a38a36f,83.14,95.43,2.0,104.99,0.4,0.79


In [104]:
# 6 aylık CLTV hesaplayalım ve cltv ismiyle datafarme'e ekleyelim.
cltv = ggf.customer_lifetime_value(bgf,
                                   cltv_df["Frequency"],
                                   cltv_df["Recency_cltv_weekly"],
                                   cltv_df["T_weekly"],
                                   cltv_df["Monetary_cltv_avg"],
                                   time=6,
                                   freq="W",
                                   discount_rate=0.01)

In [105]:
cltv_df["cltv"] = cltv
cltv_df.head()

Unnamed: 0,Customer_id,Recency_cltv_weekly,T_weekly,Frequency,Monetary_cltv_avg,exp_sales_3_month,exp_sales_6_month,cltv
0,cc294636-19f0-11eb-8d74-000d3a38a36f,17.0,30.57,5.0,187.87,0.97,1.95,395.73
1,f431bd5a-ab7b-11e9-a2fc-000d3a38a36f,209.86,224.86,21.0,95.88,0.98,1.97,199.43
2,69b69676-1a40-11ea-941b-000d3a38a36f,52.29,78.86,5.0,117.06,0.67,1.34,170.22
3,1854e56c-491f-11eb-806e-000d3a38a36f,1.57,20.86,2.0,60.98,0.7,1.4,98.95
4,d6ea1074-f1f5-11e9-9346-000d3a38a36f,83.14,95.43,2.0,104.99,0.4,0.79,95.01


In [106]:
# CLTV (Customer Lifetime Value) değerine göre veriyi büyükten küçüğe sıralayıp en yüksek 10 müşteriyi seçelim
cltv_df.sort_values("cltv", ascending=False)[:10]

Unnamed: 0,Customer_id,Recency_cltv_weekly,T_weekly,Frequency,Monetary_cltv_avg,exp_sales_3_month,exp_sales_6_month,cltv
9055,47a642fe-975b-11eb-8c2a-000d3a38a36f,2.86,7.86,4.0,1401.8,1.09,2.19,3327.78
13880,7137a5c0-7aad-11ea-8f20-000d3a38a36f,6.14,13.14,11.0,758.09,1.97,3.94,3172.39
17323,f59053e2-a503-11e9-a2fc-000d3a38a36f,51.71,101.0,7.0,1106.47,0.72,1.44,1708.98
12438,625f40a2-5bd2-11ea-98b0-000d3a38a36f,74.29,74.57,16.0,501.87,1.57,3.13,1662.61
7330,a4d534a2-5b1b-11eb-8dbd-000d3a38a36f,62.71,67.29,52.0,166.22,4.66,9.31,1628.89
8868,9ce6e520-89b0-11ea-a6e7-000d3a38a36f,3.43,34.43,8.0,601.23,1.27,2.53,1623.81
6402,851de3b4-8f0c-11eb-8cb8-000d3a38a36f,8.29,9.43,2.0,862.69,0.79,1.59,1538.86
6666,53fe00d4-7b7a-11eb-960b-000d3a38a36f,9.71,13.0,17.0,259.87,2.78,5.56,1529.23
19538,55d54d9e-8ac7-11ea-8ec0-000d3a38a36f,52.57,58.71,31.0,228.53,3.08,6.17,1485.82
14858,031b2954-6d28-11eb-99c4-000d3a38a36f,14.86,15.57,3.0,743.59,0.87,1.74,1423.0


In [107]:
# Adım1:  6 aylık CLTV'ye göre tüm müşterilerinizi 4 gruba (segmente) ayırınız ve grup isimlerini veri setine ekleyiniz.

In [109]:
cltv_df["cltv_segment"] = pd.qcut(cltv_df["cltv"], 4, labels=["D", "C", "B", "A"])
cltv_df.head(10)

Unnamed: 0,Customer_id,Recency_cltv_weekly,T_weekly,Frequency,Monetary_cltv_avg,exp_sales_3_month,exp_sales_6_month,cltv,cltv_segment
0,cc294636-19f0-11eb-8d74-000d3a38a36f,17.0,30.57,5.0,187.87,0.97,1.95,395.73,A
1,f431bd5a-ab7b-11e9-a2fc-000d3a38a36f,209.86,224.86,21.0,95.88,0.98,1.97,199.43,B
2,69b69676-1a40-11ea-941b-000d3a38a36f,52.29,78.86,5.0,117.06,0.67,1.34,170.22,B
3,1854e56c-491f-11eb-806e-000d3a38a36f,1.57,20.86,2.0,60.98,0.7,1.4,98.95,D
4,d6ea1074-f1f5-11e9-9346-000d3a38a36f,83.14,95.43,2.0,104.99,0.4,0.79,95.01,D
5,e585280e-aae1-11e9-a2fc-000d3a38a36f,120.86,132.29,3.0,66.95,0.38,0.77,57.43,D
6,c445e4ee-6242-11ea-9d1a-000d3a38a36f,32.57,64.86,4.0,93.98,0.65,1.3,134.28,C
7,3f1b4dc8-8a7d-11ea-8ec0-000d3a38a36f,12.71,54.57,2.0,81.81,0.52,1.04,97.7,D
8,cfbda69e-5b4f-11ea-aca7-000d3a38a36f,58.43,70.71,5.0,210.94,0.71,1.42,322.73,A
9,1143f032-440d-11ea-8b43-000d3a38a36f,61.71,96.0,2.0,82.98,0.39,0.79,75.22,D


In [110]:
# Tüm bu süreci fonksiyon

In [111]:
def create_cltv_df(df):

  # Veriyi Hazırlama
  columns = ["order_num_total_ever_online", "order_num_total_ever_offline", "customer_value_total_ever_offline","customer_value_total_ever_online"]
  for col in columns:
    replace_with_thresholds(df, col)

  df["Order_num_total"] = df["order_num_total_ever_online"] + df["order_num_total_ever_offline"]
  df["Customer_value_total"] = df["customer_value_total_ever_offline"] + df["customer_value_total_ever_online"]
  df = df[~(df["Customer_value_total"] == 0) | (df["Order_num_total"] == 0)]
  date_columns = df.columns[df.columns.str.contains("date")]
  df[date_columns] = df[date_columns].apply(pd.to_datetime)


  # CLTV veri yapısının oluşturulması
  df["last_order_date"].max()  # 2021-05-30
  analysis_date = dt.datetime(2021, 6, 1)
  cltv_df = pd.DataFrame()
  cltv_df["Customer_id"] = df["master_id"]
  cltv_df["Recency_cltv_weekly"] = ((df["last_order_date"] - df["first_order_date"])).dt.days/7
  cltv_df["T_weekly"] = (today_date - df["first_order_date"]).dt.days/ 7
  cltv_df["Frequency"] = df["Order_num_total"]
  cltv_df["Monetary_cltv_avg"] = df["Customer_value_total"] / df["Order_num_total"]
  cltv_df = cltv_df[(cltv_df["Frequency"] > 1)]


  # BG-NBD Modelinin Kurulması
  bgf = BetaGeoFitter(penalizer_coef=0.001)
  bgf.fit(cltv_df["Frequency"],
          cltv_df["Recency_cltv_weekly"],
          cltv_df["T_weekly"])
  # 3 ay içerisinde müşterilerden beklenen satın almaları tahmin ediniz ve exp_sales_3_month olarak cltv dataframe 'ine ekleyiniz.
  cltv_df["exp_sales_3_month"] = bgf.predict(4*3,
                                            cltv_df["Frequency"],
                                            cltv_df["Recency_cltv_weekly"],
                                            cltv_df["T_weekly"])
  # 6 ay içerisinde müşterilerden beklenen satın almaları tahmin ediniz ve exp_sales_6_mont olarak cltv dataframe'ine ekleyiniz.
  cltv_df["exp_sales_6_month"] = bgf.predict(4*6,
                                            cltv_df["Frequency"],
                                            cltv_df["Recency_cltv_weekly"],
                                            cltv_df["T_weekly"])
  # Gamma-Gamma Modelinin Kurulması
  ggf = GammaGammaFitter(penalizer_coef=0.01)
  ggf.fit(cltv_df["Frequency"], cltv_df["Monetary_cltv_avg"])
  cltv_df["exp_average_value"] = ggf.conditional_expected_average_profit(cltv_df["Frequency"],
                                        cltv_df["Monetary_cltv_avg"])

  # CLTV tahmini
  # Adım 3: 6 aylık CLTV hesaplayınız ve cltv ismiyle datafarme'e ekleyiniz.
  cltv = ggf.customer_lifetime_value(bgf,
                                    cltv_df["Frequency"],
                                    cltv_df["Recency_cltv_weekly"],
                                    cltv_df["T_weekly"],
                                    cltv_df["Monetary_cltv_avg"],
                                    time=6,    # 6 aylık
                                    freq="W",    # T'nin frekans bilgisi (Girmiş olduğun veri aylık mı haftalık mı günlük mü ki biz haftalık girdik)
                                    discount_rate=0.01)     # Zaman içinde yaptığımız ürünlerde indirim yapabilirsin bunu da göz önünde bulundur der

  cltv_df["cltv"] = cltv

  # CLTV Segmentleme
  cltv_df["cltv_segment"] = pd.qcut(cltv_df["cltv"], 4, labels=["D", "C", "B", "A"])

  return cltv_df

In [112]:
cltv_df = create_cltv_df(df)

  result = getattr(ufunc, method)(*inputs, **kwargs)


In [113]:
cltv_df.head(20)

Unnamed: 0,Customer_id,Recency_cltv_weekly,T_weekly,Frequency,Monetary_cltv_avg,exp_sales_3_month,exp_sales_6_month,exp_average_value,cltv,cltv_segment
0,cc294636-19f0-11eb-8d74-000d3a38a36f,17.0,30.57,5.0,187.87,0.97,1.95,193.63,395.73,A
1,f431bd5a-ab7b-11e9-a2fc-000d3a38a36f,209.86,224.86,21.0,95.88,0.98,1.97,96.67,199.43,B
2,69b69676-1a40-11ea-941b-000d3a38a36f,52.29,78.86,5.0,117.06,0.67,1.34,120.97,170.22,B
3,1854e56c-491f-11eb-806e-000d3a38a36f,1.57,20.86,2.0,60.98,0.7,1.4,67.32,98.95,D
4,d6ea1074-f1f5-11e9-9346-000d3a38a36f,83.14,95.43,2.0,104.99,0.4,0.79,114.33,95.01,D
5,e585280e-aae1-11e9-a2fc-000d3a38a36f,120.86,132.29,3.0,66.95,0.38,0.77,71.35,57.43,D
6,c445e4ee-6242-11ea-9d1a-000d3a38a36f,32.57,64.86,4.0,93.98,0.65,1.3,98.13,134.28,C
7,3f1b4dc8-8a7d-11ea-8ec0-000d3a38a36f,12.71,54.57,2.0,81.81,0.52,1.04,89.57,97.7,D
8,cfbda69e-5b4f-11ea-aca7-000d3a38a36f,58.43,70.71,5.0,210.94,0.71,1.42,217.3,322.73,A
9,1143f032-440d-11ea-8b43-000d3a38a36f,61.71,96.0,2.0,82.98,0.39,0.79,90.81,75.22,D


In [114]:
# Bu çıktıdan şunları çıkartabiliriz:
# 1- A Segmentindeki müşterilere özel sadakat programları ve kişiselleştirilmiş teklifler sunarak onları elde tutmalıyız.
# 2- B ve C segmentindeki müşterileri A segmentine taşımak için ek indirimler ve teşvik programları uygulanabilir.
# 3- D segmentindeki müşterilere yönelik özel kampanyalar düzenlenerek müşteri kaybı önlenebilir.
# 4- 3 ve 6 aylık satış tahminleri kullanılarak pazarlama stratejileri optimize edilebilir.