# FLO RFM ANALYSIS

In [213]:
import pandas as pd
import datetime as dt

pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', lambda x: '%.2f' % x)

In [214]:
flo_data_ = pd.read_csv("./flo_data_20k.csv")

# TASK 1

## Step 1: Copy the data frame

In [215]:
flodf = flo_data_.copy()
flodf.head()

Unnamed: 0,master_id,order_channel,last_order_channel,first_order_date,last_order_date,last_order_date_online,last_order_date_offline,order_num_total_ever_online,order_num_total_ever_offline,customer_value_total_ever_offline,customer_value_total_ever_online,interested_in_categories_12
0,cc294636-19f0-11eb-8d74-000d3a38a36f,Android App,Offline,2020-10-30,2021-02-26,2021-02-21,2021-02-26,4.0,1.0,139.99,799.38,[KADIN]
1,f431bd5a-ab7b-11e9-a2fc-000d3a38a36f,Android App,Mobile,2017-02-08,2021-02-16,2021-02-16,2020-01-10,19.0,2.0,159.97,1853.58,"[ERKEK, COCUK, KADIN, AKTIFSPOR]"
2,69b69676-1a40-11ea-941b-000d3a38a36f,Android App,Android App,2019-11-27,2020-11-27,2020-11-27,2019-12-01,3.0,2.0,189.97,395.35,"[ERKEK, KADIN]"
3,1854e56c-491f-11eb-806e-000d3a38a36f,Android App,Android App,2021-01-06,2021-01-17,2021-01-17,2021-01-06,1.0,1.0,39.99,81.98,"[AKTIFCOCUK, COCUK]"
4,d6ea1074-f1f5-11e9-9346-000d3a38a36f,Desktop,Desktop,2019-08-03,2021-03-07,2021-03-07,2019-08-03,1.0,1.0,49.99,159.99,[AKTIFSPOR]


## Step 2: Understand the data

In [216]:
def observe_dataframe_basic(df):
    print("--------------------- First 10 Observation -------------------------")
    print(df.head(10))
    print("--------------------- Features -------------------------------------")
    print(df.columns)
    print("--------------------- Statistics -----------------------------------")
    print(df.describe([0, 0.05, 0.50, 0.95, 0.99, 1]).T)
    print("----------------------- NA -----------------------------------------")
    print(df.isnull().sum())
    print("----------------------- Types,Shape --------------------------------")
    print(df.dtypes) # df.info() can be used too
    print(df.shape)

In [217]:
observe_dataframe_basic(flodf)

--------------------- First 10 Observation -------------------------
                              master_id order_channel last_order_channel  \
0  cc294636-19f0-11eb-8d74-000d3a38a36f   Android App            Offline   
1  f431bd5a-ab7b-11e9-a2fc-000d3a38a36f   Android App             Mobile   
2  69b69676-1a40-11ea-941b-000d3a38a36f   Android App        Android App   
3  1854e56c-491f-11eb-806e-000d3a38a36f   Android App        Android App   
4  d6ea1074-f1f5-11e9-9346-000d3a38a36f       Desktop            Desktop   
5  e585280e-aae1-11e9-a2fc-000d3a38a36f       Desktop            Offline   
6  c445e4ee-6242-11ea-9d1a-000d3a38a36f   Android App        Android App   
7  3f1b4dc8-8a7d-11ea-8ec0-000d3a38a36f        Mobile            Offline   
8  cfbda69e-5b4f-11ea-aca7-000d3a38a36f   Android App        Android App   
9  1143f032-440d-11ea-8b43-000d3a38a36f        Mobile             Mobile   

  first_order_date last_order_date last_order_date_online  \
0       2020-10-30      2021-02-2

## Step 3: Create Total_Shopping_Count and Total_Shopping_Price

* For omnichannel customers which is they buy from online and offline platforms we are going to calculate the total amount of the shopping count and the total amount of the money that has spent.

In [218]:
flodf["master_id"].nunique() # Every customer has an unique id !!

19945

In [219]:
flodf["order_num_total_ever_omnichannel"] = flodf["order_num_total_ever_online"] + flodf["order_num_total_ever_offline"]
flodf.head()

Unnamed: 0,master_id,order_channel,last_order_channel,first_order_date,last_order_date,last_order_date_online,last_order_date_offline,order_num_total_ever_online,order_num_total_ever_offline,customer_value_total_ever_offline,customer_value_total_ever_online,interested_in_categories_12,order_num_total_ever_omnichannel
0,cc294636-19f0-11eb-8d74-000d3a38a36f,Android App,Offline,2020-10-30,2021-02-26,2021-02-21,2021-02-26,4.0,1.0,139.99,799.38,[KADIN],5.0
1,f431bd5a-ab7b-11e9-a2fc-000d3a38a36f,Android App,Mobile,2017-02-08,2021-02-16,2021-02-16,2020-01-10,19.0,2.0,159.97,1853.58,"[ERKEK, COCUK, KADIN, AKTIFSPOR]",21.0
2,69b69676-1a40-11ea-941b-000d3a38a36f,Android App,Android App,2019-11-27,2020-11-27,2020-11-27,2019-12-01,3.0,2.0,189.97,395.35,"[ERKEK, KADIN]",5.0
3,1854e56c-491f-11eb-806e-000d3a38a36f,Android App,Android App,2021-01-06,2021-01-17,2021-01-17,2021-01-06,1.0,1.0,39.99,81.98,"[AKTIFCOCUK, COCUK]",2.0
4,d6ea1074-f1f5-11e9-9346-000d3a38a36f,Desktop,Desktop,2019-08-03,2021-03-07,2021-03-07,2019-08-03,1.0,1.0,49.99,159.99,[AKTIFSPOR],2.0


In [220]:
flodf["customer_value_total_ever_omnichannel"] = flodf["customer_value_total_ever_online"] + flodf["customer_value_total_ever_offline"]
flodf.head()

Unnamed: 0,master_id,order_channel,last_order_channel,first_order_date,last_order_date,last_order_date_online,last_order_date_offline,order_num_total_ever_online,order_num_total_ever_offline,customer_value_total_ever_offline,customer_value_total_ever_online,interested_in_categories_12,order_num_total_ever_omnichannel,customer_value_total_ever_omnichannel
0,cc294636-19f0-11eb-8d74-000d3a38a36f,Android App,Offline,2020-10-30,2021-02-26,2021-02-21,2021-02-26,4.0,1.0,139.99,799.38,[KADIN],5.0,939.37
1,f431bd5a-ab7b-11e9-a2fc-000d3a38a36f,Android App,Mobile,2017-02-08,2021-02-16,2021-02-16,2020-01-10,19.0,2.0,159.97,1853.58,"[ERKEK, COCUK, KADIN, AKTIFSPOR]",21.0,2013.55
2,69b69676-1a40-11ea-941b-000d3a38a36f,Android App,Android App,2019-11-27,2020-11-27,2020-11-27,2019-12-01,3.0,2.0,189.97,395.35,"[ERKEK, KADIN]",5.0,585.32
3,1854e56c-491f-11eb-806e-000d3a38a36f,Android App,Android App,2021-01-06,2021-01-17,2021-01-17,2021-01-06,1.0,1.0,39.99,81.98,"[AKTIFCOCUK, COCUK]",2.0,121.97
4,d6ea1074-f1f5-11e9-9346-000d3a38a36f,Desktop,Desktop,2019-08-03,2021-03-07,2021-03-07,2019-08-03,1.0,1.0,49.99,159.99,[AKTIFSPOR],2.0,209.98


## Step 4: Look at the variable types and make type conversion for dates

In [221]:
date_columns = [column for column in flodf if "date" in column]
#date_colums = df.columns[df.columns.str.contains("date")]
date_columns

['first_order_date',
 'last_order_date',
 'last_order_date_online',
 'last_order_date_offline']

In [222]:
# First mehod to do:
for column in date_columns:
    flodf[column] = pd.to_datetime(flodf[column])

In [223]:
# As a second method:
#flodf[date_columns].apply(pd.to_datetime)

In [224]:
flodf.dtypes

master_id                                        object
order_channel                                    object
last_order_channel                               object
first_order_date                         datetime64[ns]
last_order_date                          datetime64[ns]
last_order_date_online                   datetime64[ns]
last_order_date_offline                  datetime64[ns]
order_num_total_ever_online                     float64
order_num_total_ever_offline                    float64
customer_value_total_ever_offline               float64
customer_value_total_ever_online                float64
interested_in_categories_12                      object
order_num_total_ever_omnichannel                float64
customer_value_total_ever_omnichannel           float64
dtype: object

## Step 5: Look at the distrubiton of "number of customers", "total amount of product", "total amount of customer value"

In [225]:
print(flodf["master_id"].describe([0, 0.05, 0.50, 0.95, 0.99, 1]).T)

count                                    19945
unique                                   19945
top       cc294636-19f0-11eb-8d74-000d3a38a36f
freq                                         1
Name: master_id, dtype: object


In [226]:
flodf.groupby("order_channel").agg({"master_id":"count",
                             "order_num_total_ever_omnichannel":"sum",
                             "customer_value_total_ever_omnichannel":"sum"})

Unnamed: 0_level_0,master_id,order_num_total_ever_omnichannel,customer_value_total_ever_omnichannel
order_channel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Android App,9495,52269.0,7819062.76
Desktop,2735,10920.0,1610321.46
Ios App,2833,15351.0,2525999.93
Mobile,4882,21679.0,3028183.16


In [227]:
flodf.sort_values("customer_value_total_ever_omnichannel",ascending=False).head(10)

Unnamed: 0,master_id,order_channel,last_order_channel,first_order_date,last_order_date,last_order_date_online,last_order_date_offline,order_num_total_ever_online,order_num_total_ever_offline,customer_value_total_ever_offline,customer_value_total_ever_online,interested_in_categories_12,order_num_total_ever_omnichannel,customer_value_total_ever_omnichannel
11150,5d1c466a-9cfd-11e9-9897-000d3a38a36f,Android App,Desktop,2013-10-11,2021-04-30,2021-04-30,2020-12-24,200.0,2.0,684.97,45220.13,"[AKTIFCOCUK, ERKEK, COCUK, KADIN, AKTIFSPOR]",202.0,45905.1
4315,d5ef8058-a5c6-11e9-a2fc-000d3a38a36f,Android App,Android App,2018-08-06,2021-02-23,2021-02-23,2020-07-06,67.0,1.0,130.49,36687.8,"[AKTIFCOCUK, ERKEK, KADIN, AKTIFSPOR]",68.0,36818.29
7613,73fd19aa-9e37-11e9-9897-000d3a38a36f,Ios App,Offline,2014-01-14,2021-05-18,2021-01-30,2021-05-18,81.0,1.0,1263.76,32654.34,"[ERKEK, COCUK, KADIN, AKTIFSPOR]",82.0,33918.1
13880,7137a5c0-7aad-11ea-8f20-000d3a38a36f,Ios App,Offline,2021-03-01,2021-04-13,2021-03-18,2021-04-13,10.0,1.0,538.94,30688.47,"[ERKEK, KADIN, AKTIFSPOR]",11.0,31227.41
9055,47a642fe-975b-11eb-8c2a-000d3a38a36f,Android App,Offline,2021-04-07,2021-04-27,2021-04-07,2021-04-27,1.0,3.0,18119.14,2587.2,[AKTIFSPOR],4.0,20706.34
7330,a4d534a2-5b1b-11eb-8dbd-000d3a38a36f,Desktop,Desktop,2020-02-16,2021-04-30,2021-04-30,2020-12-18,66.0,4.0,843.68,17599.89,"[ERKEK, KADIN, AKTIFSPOR]",70.0,18443.57
8068,d696c654-2633-11ea-8e1c-000d3a38a36f,Ios App,Ios App,2017-05-10,2021-04-13,2021-04-13,2019-08-15,69.0,1.0,82.48,16836.09,"[AKTIFCOCUK, ERKEK, COCUK, KADIN, AKTIFSPOR]",70.0,16918.57
163,fef57ffa-aae6-11e9-a2fc-000d3a38a36f,Mobile,Desktop,2016-11-08,2021-05-12,2021-05-12,2020-07-09,36.0,1.0,180.73,12545.37,"[ERKEK, AKTIFSPOR]",37.0,12726.1
7223,cba59206-9dd1-11e9-9897-000d3a38a36f,Android App,Android App,2013-02-21,2021-05-09,2021-05-09,2020-01-25,130.0,1.0,49.99,12232.25,"[AKTIFCOCUK, ERKEK, KADIN, AKTIFSPOR]",131.0,12282.24
18767,fc0ce7a4-9d87-11e9-9897-000d3a38a36f,Desktop,Desktop,2018-11-24,2020-11-11,2020-11-11,2019-12-06,18.0,2.0,64.97,12038.18,"[ERKEK, KADIN]",20.0,12103.15


In [228]:
flodf.sort_values("order_num_total_ever_omnichannel",ascending=False).head(10)

Unnamed: 0,master_id,order_channel,last_order_channel,first_order_date,last_order_date,last_order_date_online,last_order_date_offline,order_num_total_ever_online,order_num_total_ever_offline,customer_value_total_ever_offline,customer_value_total_ever_online,interested_in_categories_12,order_num_total_ever_omnichannel,customer_value_total_ever_omnichannel
11150,5d1c466a-9cfd-11e9-9897-000d3a38a36f,Android App,Desktop,2013-10-11,2021-04-30,2021-04-30,2020-12-24,200.0,2.0,684.97,45220.13,"[AKTIFCOCUK, ERKEK, COCUK, KADIN, AKTIFSPOR]",202.0,45905.1
7223,cba59206-9dd1-11e9-9897-000d3a38a36f,Android App,Android App,2013-02-21,2021-05-09,2021-05-09,2020-01-25,130.0,1.0,49.99,12232.25,"[AKTIFCOCUK, ERKEK, KADIN, AKTIFSPOR]",131.0,12282.24
8783,a57f4302-b1a8-11e9-89fa-000d3a38a36f,Android App,Offline,2019-08-07,2020-11-04,2020-09-07,2020-11-04,2.0,109.0,10239.46,143.98,"[AKTIFCOCUK, ERKEK, COCUK, KADIN, AKTIFSPOR]",111.0,10383.44
2619,fdbe8304-a7ab-11e9-a2fc-000d3a38a36f,Android App,Offline,2018-10-18,2020-06-30,2018-10-18,2020-06-30,1.0,87.0,8432.25,139.98,[ERKEK],88.0,8572.23
6322,329968c6-a0e2-11e9-a2fc-000d3a38a36f,Ios App,Ios App,2019-02-14,2021-04-05,2021-04-05,2020-02-17,2.0,81.0,3997.55,242.81,[ERKEK],83.0,4240.36
7613,73fd19aa-9e37-11e9-9897-000d3a38a36f,Ios App,Offline,2014-01-14,2021-05-18,2021-01-30,2021-05-18,81.0,1.0,1263.76,32654.34,"[ERKEK, COCUK, KADIN, AKTIFSPOR]",82.0,33918.1
9347,44d032ee-a0d4-11e9-a2fc-000d3a38a36f,Mobile,Mobile,2019-02-11,2021-02-11,2021-02-11,2020-12-24,3.0,74.0,4640.77,543.28,"[KADIN, AKTIFSPOR]",77.0,5184.05
10954,b27e241a-a901-11e9-a2fc-000d3a38a36f,Mobile,Mobile,2015-09-12,2021-04-01,2021-04-01,2019-10-08,72.0,3.0,292.93,5004.95,"[AKTIFCOCUK, ERKEK, KADIN, AKTIFSPOR]",75.0,5297.88
8068,d696c654-2633-11ea-8e1c-000d3a38a36f,Ios App,Ios App,2017-05-10,2021-04-13,2021-04-13,2019-08-15,69.0,1.0,82.48,16836.09,"[AKTIFCOCUK, ERKEK, COCUK, KADIN, AKTIFSPOR]",70.0,16918.57
7330,a4d534a2-5b1b-11eb-8dbd-000d3a38a36f,Desktop,Desktop,2020-02-16,2021-04-30,2021-04-30,2020-12-18,66.0,4.0,843.68,17599.89,"[ERKEK, KADIN, AKTIFSPOR]",70.0,18443.57


## Step 6: Functionize the data preprocessing

In [229]:
def convert_to_datetime(df):
    date_columns = [column for column in df if "date" in column]
    for column in date_columns:
        df[column] = pd.to_datetime(df[column])

In [230]:
def preprocess(df,sample_size=10):
    observe_dataframe_basic(df)
    df["order_num_total_ever_omnichannel"] = df["order_num_total_ever_online"] + df["order_num_total_ever_offline"]
    df["customer_value_total_ever_omnichannel"] = df["customer_value_total_ever_online"] + df["customer_value_total_ever_offline"]
    convert_to_datetime(df)
    observe_dataframe_basic(df)
    return df

In [231]:
df = pd.read_csv("./flo_data_20k.csv")
df_flo = preprocess(df)

--------------------- First 10 Observation -------------------------
                              master_id order_channel last_order_channel  \
0  cc294636-19f0-11eb-8d74-000d3a38a36f   Android App            Offline   
1  f431bd5a-ab7b-11e9-a2fc-000d3a38a36f   Android App             Mobile   
2  69b69676-1a40-11ea-941b-000d3a38a36f   Android App        Android App   
3  1854e56c-491f-11eb-806e-000d3a38a36f   Android App        Android App   
4  d6ea1074-f1f5-11e9-9346-000d3a38a36f       Desktop            Desktop   
5  e585280e-aae1-11e9-a2fc-000d3a38a36f       Desktop            Offline   
6  c445e4ee-6242-11ea-9d1a-000d3a38a36f   Android App        Android App   
7  3f1b4dc8-8a7d-11ea-8ec0-000d3a38a36f        Mobile            Offline   
8  cfbda69e-5b4f-11ea-aca7-000d3a38a36f   Android App        Android App   
9  1143f032-440d-11ea-8b43-000d3a38a36f        Mobile             Mobile   

  first_order_date last_order_date last_order_date_online  \
0       2020-10-30      2021-02-2

# TASK 2

## Step 1: What is recency, frequency and monetary for this problem?

1. Recency: last_order_date - first_order_date
2. Frequency: order_number_ever_total_omnichannel
3. Monetary: customer_value_total_ever_omnichannel

## Step 2: Calculate rfm (recency,frequency and monetary metrics)

In [232]:
flodf.columns

Index(['master_id', 'order_channel', 'last_order_channel', 'first_order_date',
       'last_order_date', 'last_order_date_online', 'last_order_date_offline',
       'order_num_total_ever_online', 'order_num_total_ever_offline',
       'customer_value_total_ever_offline', 'customer_value_total_ever_online',
       'interested_in_categories_12', 'order_num_total_ever_omnichannel',
       'customer_value_total_ever_omnichannel'],
      dtype='object')

In [233]:
flodf.head()

Unnamed: 0,master_id,order_channel,last_order_channel,first_order_date,last_order_date,last_order_date_online,last_order_date_offline,order_num_total_ever_online,order_num_total_ever_offline,customer_value_total_ever_offline,customer_value_total_ever_online,interested_in_categories_12,order_num_total_ever_omnichannel,customer_value_total_ever_omnichannel
0,cc294636-19f0-11eb-8d74-000d3a38a36f,Android App,Offline,2020-10-30,2021-02-26,2021-02-21,2021-02-26,4.0,1.0,139.99,799.38,[KADIN],5.0,939.37
1,f431bd5a-ab7b-11e9-a2fc-000d3a38a36f,Android App,Mobile,2017-02-08,2021-02-16,2021-02-16,2020-01-10,19.0,2.0,159.97,1853.58,"[ERKEK, COCUK, KADIN, AKTIFSPOR]",21.0,2013.55
2,69b69676-1a40-11ea-941b-000d3a38a36f,Android App,Android App,2019-11-27,2020-11-27,2020-11-27,2019-12-01,3.0,2.0,189.97,395.35,"[ERKEK, KADIN]",5.0,585.32
3,1854e56c-491f-11eb-806e-000d3a38a36f,Android App,Android App,2021-01-06,2021-01-17,2021-01-17,2021-01-06,1.0,1.0,39.99,81.98,"[AKTIFCOCUK, COCUK]",2.0,121.97
4,d6ea1074-f1f5-11e9-9346-000d3a38a36f,Desktop,Desktop,2019-08-03,2021-03-07,2021-03-07,2019-08-03,1.0,1.0,49.99,159.99,[AKTIFSPOR],2.0,209.98


In [234]:
last_date = max(flodf["last_order_date"])
last_date

Timestamp('2021-05-30 00:00:00')

Today date will be calculated as 2 days after the last date.

In [235]:
analysis_date = dt.datetime(2021, 6, 1)
analysis_date

datetime.datetime(2021, 6, 1, 0, 0)

In [236]:
rfm = flodf.groupby('master_id').agg({'last_order_date': lambda last_order_date: (analysis_date - last_order_date),
                                     'order_num_total_ever_omnichannel': lambda order_num_total_ever_omnichannel: order_num_total_ever_omnichannel,
                                     'customer_value_total_ever_omnichannel': lambda customer_value_total_ever_omnichannel: customer_value_total_ever_omnichannel})

In [237]:
rfm.tail()

Unnamed: 0_level_0,last_order_date,order_num_total_ever_omnichannel,customer_value_total_ever_omnichannel
master_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
fff1db94-afd9-11ea-b736-000d3a38a36f,183 days,5.0,796.87
fff4736a-60a4-11ea-8dd8-000d3a38a36f,67 days,2.0,318.87
fffacd34-ae14-11e9-a2fc-000d3a38a36f,257 days,6.0,983.92
fffacecc-ddc3-11e9-a848-000d3a38a36f,132 days,9.0,1362.2
fffe4b30-18e0-11ea-9213-000d3a38a36f,321 days,4.0,524.69


In [238]:
rfm.columns = ['recency', 'frequency', 'monetary']
rfm.head()


Unnamed: 0_level_0,recency,frequency,monetary
master_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
00016786-2f5a-11ea-bb80-000d3a38a36f,10 days,5.0,776.07
00034aaa-a838-11e9-a2fc-000d3a38a36f,298 days,3.0,269.47
000be838-85df-11ea-a90b-000d3a38a36f,213 days,4.0,722.69
000c1fe2-a8b7-11ea-8479-000d3a38a36f,27 days,7.0,874.16
000f5e3e-9dde-11ea-80cd-000d3a38a36f,20 days,7.0,1620.33


In [239]:
rfm.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
recency,19945.0,134 days 11:00:02.346452746,103 days 06:44:51.245382632,2 days 00:00:00,43 days 00:00:00,111 days 00:00:00,202 days 00:00:00,367 days 00:00:00
frequency,19945.0,5.02,4.74,2.00,3.00,4.00,6.00,202.00
monetary,19945.0,751.24,895.40,44.98,339.98,545.27,897.78,45905.10


# TASK 3

## Step 1: Calculate RFM score 

In [240]:
rfm["recency_score"] = pd.qcut(rfm['recency'], 5, labels=[5, 4, 3, 2, 1])
rfm["frequency_score"] = pd.qcut(rfm['frequency'].rank(method="first"), 5, labels=[1, 2, 3, 4, 5])
rfm["monetary_score"] = pd.qcut(rfm['monetary'], 5, labels=[1, 2, 3, 4, 5])

In [241]:
rfm

Unnamed: 0_level_0,recency,frequency,monetary,recency_score,frequency_score,monetary_score
master_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
00016786-2f5a-11ea-bb80-000d3a38a36f,10 days,5.00,776.07,5,4,4
00034aaa-a838-11e9-a2fc-000d3a38a36f,298 days,3.00,269.47,1,2,1
000be838-85df-11ea-a90b-000d3a38a36f,213 days,4.00,722.69,2,3,4
000c1fe2-a8b7-11ea-8479-000d3a38a36f,27 days,7.00,874.16,5,4,4
000f5e3e-9dde-11ea-80cd-000d3a38a36f,20 days,7.00,1620.33,5,4,5
...,...,...,...,...,...,...
fff1db94-afd9-11ea-b736-000d3a38a36f,183 days,5.00,796.87,2,4,4
fff4736a-60a4-11ea-8dd8-000d3a38a36f,67 days,2.00,318.87,4,2,2
fffacd34-ae14-11e9-a2fc-000d3a38a36f,257 days,6.00,983.92,1,4,4
fffacecc-ddc3-11e9-a848-000d3a38a36f,132 days,9.00,1362.20,3,5,5


In [242]:
rfm["RF_SCORE"] = (rfm['recency_score'].astype(str) +
                    rfm['frequency_score'].astype(str))

In [243]:
rfm.head(10)

Unnamed: 0_level_0,recency,frequency,monetary,recency_score,frequency_score,monetary_score,RF_SCORE
master_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
00016786-2f5a-11ea-bb80-000d3a38a36f,10 days,5.0,776.07,5,4,4,54
00034aaa-a838-11e9-a2fc-000d3a38a36f,298 days,3.0,269.47,1,2,1,12
000be838-85df-11ea-a90b-000d3a38a36f,213 days,4.0,722.69,2,3,4,23
000c1fe2-a8b7-11ea-8479-000d3a38a36f,27 days,7.0,874.16,5,4,4,54
000f5e3e-9dde-11ea-80cd-000d3a38a36f,20 days,7.0,1620.33,5,4,5,54
00136ce2-a562-11e9-a2fc-000d3a38a36f,203 days,2.0,359.45,2,1,2,21
00142f9a-7af6-11eb-8460-000d3a38a36f,25 days,3.0,404.94,5,2,2,52
0014778a-5b11-11ea-9a2c-000d3a38a36f,26 days,3.0,727.43,5,2,4,52
0018c6aa-ab6c-11e9-a2fc-000d3a38a36f,126 days,2.0,317.91,3,1,2,31
0022f41e-5597-11eb-9e65-000d3a38a36f,12 days,2.0,154.98,5,1,1,51


# TASK 4

## Step 1:  Make Segmenation On RF Score

In [244]:
seg_map = {
    r'[1-2][1-2]': 'hibernating',
    r'[1-2][3-4]': 'at_Risk',
    r'[1-2]5': 'cant_loose',
    r'3[1-2]': 'about_to_sleep',
    r'33': 'need_attention',
    r'[3-4][4-5]': 'loyal_customers',
    r'41': 'promising',
    r'51': 'new_customers',
    r'[4-5][2-3]': 'potential_loyalists',
    r'5[4-5]': 'champions'
}

In [245]:
rfm['segment'] = rfm['RF_SCORE'].replace(seg_map, regex=True)

In [246]:
rfm.reset_index(inplace=True)

# TASK 5 Action Timeee :)

## Step 1: Get the mean of  recency,frequency,monetary values for each segment

In [247]:
rfm[["segment", "master_id"]].groupby("segment").agg(["count"])

Unnamed: 0_level_0,master_id
Unnamed: 0_level_1,count
segment,Unnamed: 1_level_2
about_to_sleep,1643
at_Risk,3152
cant_loose,1194
champions,1920
hibernating,3589
loyal_customers,3375
need_attention,806
new_customers,673
potential_loyalists,2925
promising,668


In [248]:
rfm[["segment", "recency", "frequency", "monetary"]].groupby("segment").agg(["mean"])

Unnamed: 0_level_0,recency,frequency,monetary
Unnamed: 0_level_1,mean,mean,mean
segment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
about_to_sleep,114 days 00:45:34.510042604,2.41,361.65
at_Risk,242 days 07:53:45.380710660,4.47,648.33
cant_loose,235 days 03:49:08.743718592,10.72,1481.65
champions,17 days 03:24:45,8.97,1410.71
hibernating,247 days 10:13:52.543884092,2.39,362.58
loyal_customers,82 days 13:23:24.800000,8.36,1216.26
need_attention,113 days 00:53:35.880893300,3.74,553.44
new_customers,17 days 23:25:45.913818722,2.0,344.05
potential_loyalists,36 days 20:52:25.846153846,3.31,533.74
promising,58 days 16:40:14.371257485,2.0,334.15


## Step 2: There are 2 cases for a spesific profile. Look for the profiles and be sure to save the customer id's that is suitable for a spesific profile to a csv file with the help of RFM.

### A) There is a brand new woman shoes that comes to FLO. This brand's products is generally higher than the mean of the general customers expectations. Because of that, FLO wants to be specilized to make commercials for this new brand to the customers that may be intersted in getting this brand's shoes. FLO is looking for the customers in "champions" and "loyal customers" section also has bought anything from Woman(Kadın) category to contact spesifically. Make sure to save a CSV file which includes all the customers that FLO want spesifically.

In [249]:
flodf.head()

Unnamed: 0,master_id,order_channel,last_order_channel,first_order_date,last_order_date,last_order_date_online,last_order_date_offline,order_num_total_ever_online,order_num_total_ever_offline,customer_value_total_ever_offline,customer_value_total_ever_online,interested_in_categories_12,order_num_total_ever_omnichannel,customer_value_total_ever_omnichannel
0,cc294636-19f0-11eb-8d74-000d3a38a36f,Android App,Offline,2020-10-30,2021-02-26,2021-02-21,2021-02-26,4.0,1.0,139.99,799.38,[KADIN],5.0,939.37
1,f431bd5a-ab7b-11e9-a2fc-000d3a38a36f,Android App,Mobile,2017-02-08,2021-02-16,2021-02-16,2020-01-10,19.0,2.0,159.97,1853.58,"[ERKEK, COCUK, KADIN, AKTIFSPOR]",21.0,2013.55
2,69b69676-1a40-11ea-941b-000d3a38a36f,Android App,Android App,2019-11-27,2020-11-27,2020-11-27,2019-12-01,3.0,2.0,189.97,395.35,"[ERKEK, KADIN]",5.0,585.32
3,1854e56c-491f-11eb-806e-000d3a38a36f,Android App,Android App,2021-01-06,2021-01-17,2021-01-17,2021-01-06,1.0,1.0,39.99,81.98,"[AKTIFCOCUK, COCUK]",2.0,121.97
4,d6ea1074-f1f5-11e9-9346-000d3a38a36f,Desktop,Desktop,2019-08-03,2021-03-07,2021-03-07,2019-08-03,1.0,1.0,49.99,159.99,[AKTIFSPOR],2.0,209.98


In [250]:
flodf["interested_in_categories_12"]

0                                 [KADIN]
1        [ERKEK, COCUK, KADIN, AKTIFSPOR]
2                          [ERKEK, KADIN]
3                     [AKTIFCOCUK, COCUK]
4                             [AKTIFSPOR]
                       ...               
19940                  [ERKEK, AKTIFSPOR]
19941                         [AKTIFSPOR]
19942                         [AKTIFSPOR]
19943                  [ERKEK, AKTIFSPOR]
19944                  [KADIN, AKTIFSPOR]
Name: interested_in_categories_12, Length: 19945, dtype: object

In [251]:
customers_in_woman_sector = [row for index,row in flodf.iterrows() if "KADIN" in row["interested_in_categories_12"]]

In [252]:
customer_woman_sector_df = pd.DataFrame(customers_in_woman_sector)
customer_woman_sector_df["interested_in_categories_12"].tail()

19926           [KADIN, AKTIFSPOR]
19932    [ERKEK, KADIN, AKTIFSPOR]
19934               [ERKEK, KADIN]
19937               [COCUK, KADIN]
19944           [KADIN, AKTIFSPOR]
Name: interested_in_categories_12, dtype: object

In [253]:
rfm.head()

Unnamed: 0,master_id,recency,frequency,monetary,recency_score,frequency_score,monetary_score,RF_SCORE,segment
0,00016786-2f5a-11ea-bb80-000d3a38a36f,10 days,5.0,776.07,5,4,4,54,champions
1,00034aaa-a838-11e9-a2fc-000d3a38a36f,298 days,3.0,269.47,1,2,1,12,hibernating
2,000be838-85df-11ea-a90b-000d3a38a36f,213 days,4.0,722.69,2,3,4,23,at_Risk
3,000c1fe2-a8b7-11ea-8479-000d3a38a36f,27 days,7.0,874.16,5,4,4,54,champions
4,000f5e3e-9dde-11ea-80cd-000d3a38a36f,20 days,7.0,1620.33,5,4,5,54,champions


In [254]:
rfm.set_index(rfm["master_id"],inplace=True)
rfm.drop(['master_id'],axis=1,inplace=True)

In [255]:
customer_woman_sector_df.set_index(customer_woman_sector_df["master_id"],inplace=True)
customer_woman_sector_df.drop(['master_id'],axis=1,inplace=True)

In [256]:
customers_speciliazed = pd.merge(rfm, customer_woman_sector_df, left_index=True, right_index=True)
customers_speciliazed["interested_in_categories_12"].value_counts()

[KADIN]                                         2158
[KADIN, AKTIFSPOR]                              1352
[ERKEK, KADIN]                                   848
[ERKEK, KADIN, AKTIFSPOR]                        775
[COCUK, KADIN]                                   443
[COCUK, KADIN, AKTIFSPOR]                        241
[AKTIFCOCUK, ERKEK, COCUK, KADIN, AKTIFSPOR]     223
[ERKEK, COCUK, KADIN, AKTIFSPOR]                 213
[AKTIFCOCUK, COCUK, KADIN]                       213
[AKTIFCOCUK, KADIN]                              210
[ERKEK, COCUK, KADIN]                            204
[AKTIFCOCUK, COCUK, KADIN, AKTIFSPOR]            203
[AKTIFCOCUK, KADIN, AKTIFSPOR]                   184
[AKTIFCOCUK, ERKEK, KADIN, AKTIFSPOR]            132
[AKTIFCOCUK, ERKEK, COCUK, KADIN]                115
[AKTIFCOCUK, ERKEK, KADIN]                        89
Name: interested_in_categories_12, dtype: int64

In [257]:
result_df = pd.DataFrame()
result_df = customers_speciliazed[(customers_speciliazed["segment"] == "champions") | (customers_speciliazed["segment"] == "loyal_customers")]

In [258]:
result_df["segment"].value_counts()

loyal_customers    1591
champions           896
Name: segment, dtype: int64

In [259]:
result_df["interested_in_categories_12"].value_counts()

[KADIN, AKTIFSPOR]                              411
[ERKEK, KADIN, AKTIFSPOR]                       353
[KADIN]                                         331
[ERKEK, KADIN]                                  209
[AKTIFCOCUK, ERKEK, COCUK, KADIN, AKTIFSPOR]    169
[AKTIFCOCUK, COCUK, KADIN, AKTIFSPOR]           130
[ERKEK, COCUK, KADIN, AKTIFSPOR]                129
[COCUK, KADIN, AKTIFSPOR]                       120
[COCUK, KADIN]                                  119
[AKTIFCOCUK, COCUK, KADIN]                       96
[AKTIFCOCUK, ERKEK, KADIN, AKTIFSPOR]            83
[AKTIFCOCUK, KADIN]                              82
[ERKEK, COCUK, KADIN]                            82
[AKTIFCOCUK, KADIN, AKTIFSPOR]                   80
[AKTIFCOCUK, ERKEK, COCUK, KADIN]                60
[AKTIFCOCUK, ERKEK, KADIN]                       33
Name: interested_in_categories_12, dtype: int64

In [260]:
# Without merging:
#rfm[rfm["master_id"].isin(customer_women_sector["master_id"]) & rfm["segment"].isin(["champions","loyal_customers"])]["master_id"]

In [261]:
new_df = pd.DataFrame()
new_df["special_customer_id_for_new_brand"] = result_df.index
new_df.to_csv("special_brand_customers.csv",index=False)

### B) For man and children (ERKEK and COCUK) productions there is gonna be %40 percentage of sale. This sale focus on the customers who has interest in the categories that has mentioned and the customers was active back then and the customers should not be lost by FLO. The main target population is sleepy and new customers. FLO wants to get these customers id in a csv file

In [262]:
flodf.head()

Unnamed: 0,master_id,order_channel,last_order_channel,first_order_date,last_order_date,last_order_date_online,last_order_date_offline,order_num_total_ever_online,order_num_total_ever_offline,customer_value_total_ever_offline,customer_value_total_ever_online,interested_in_categories_12,order_num_total_ever_omnichannel,customer_value_total_ever_omnichannel
0,cc294636-19f0-11eb-8d74-000d3a38a36f,Android App,Offline,2020-10-30,2021-02-26,2021-02-21,2021-02-26,4.0,1.0,139.99,799.38,[KADIN],5.0,939.37
1,f431bd5a-ab7b-11e9-a2fc-000d3a38a36f,Android App,Mobile,2017-02-08,2021-02-16,2021-02-16,2020-01-10,19.0,2.0,159.97,1853.58,"[ERKEK, COCUK, KADIN, AKTIFSPOR]",21.0,2013.55
2,69b69676-1a40-11ea-941b-000d3a38a36f,Android App,Android App,2019-11-27,2020-11-27,2020-11-27,2019-12-01,3.0,2.0,189.97,395.35,"[ERKEK, KADIN]",5.0,585.32
3,1854e56c-491f-11eb-806e-000d3a38a36f,Android App,Android App,2021-01-06,2021-01-17,2021-01-17,2021-01-06,1.0,1.0,39.99,81.98,"[AKTIFCOCUK, COCUK]",2.0,121.97
4,d6ea1074-f1f5-11e9-9346-000d3a38a36f,Desktop,Desktop,2019-08-03,2021-03-07,2021-03-07,2019-08-03,1.0,1.0,49.99,159.99,[AKTIFSPOR],2.0,209.98


In [263]:
customers_manChild_sector_df = [row for index,row in flodf.iterrows() if ("ERKEK" in row["interested_in_categories_12"]) or ("COCUK" in row["interested_in_categories_12"]) ]

In [264]:
customers_manChild_sector_df = pd.DataFrame(customers_manChild_sector_df)
customers_manChild_sector_df.head()

Unnamed: 0,master_id,order_channel,last_order_channel,first_order_date,last_order_date,last_order_date_online,last_order_date_offline,order_num_total_ever_online,order_num_total_ever_offline,customer_value_total_ever_offline,customer_value_total_ever_online,interested_in_categories_12,order_num_total_ever_omnichannel,customer_value_total_ever_omnichannel
1,f431bd5a-ab7b-11e9-a2fc-000d3a38a36f,Android App,Mobile,2017-02-08,2021-02-16,2021-02-16,2020-01-10,19.0,2.0,159.97,1853.58,"[ERKEK, COCUK, KADIN, AKTIFSPOR]",21.0,2013.55
2,69b69676-1a40-11ea-941b-000d3a38a36f,Android App,Android App,2019-11-27,2020-11-27,2020-11-27,2019-12-01,3.0,2.0,189.97,395.35,"[ERKEK, KADIN]",5.0,585.32
3,1854e56c-491f-11eb-806e-000d3a38a36f,Android App,Android App,2021-01-06,2021-01-17,2021-01-17,2021-01-06,1.0,1.0,39.99,81.98,"[AKTIFCOCUK, COCUK]",2.0,121.97
7,3f1b4dc8-8a7d-11ea-8ec0-000d3a38a36f,Mobile,Offline,2020-05-15,2020-08-12,2020-05-15,2020-08-12,1.0,1.0,49.99,113.64,[COCUK],2.0,163.63
8,cfbda69e-5b4f-11ea-aca7-000d3a38a36f,Android App,Android App,2020-01-23,2021-03-07,2021-03-07,2020-01-25,3.0,2.0,120.48,934.21,"[ERKEK, COCUK, KADIN]",5.0,1054.69


In [265]:
customers_manChild_sector_df["interested_in_categories_12"].value_counts()

[ERKEK]                                         1973
[ERKEK, AKTIFSPOR]                              1178
[ERKEK, KADIN]                                   848
[COCUK]                                          836
[ERKEK, KADIN, AKTIFSPOR]                        775
[AKTIFCOCUK]                                     679
[COCUK, KADIN]                                   443
[AKTIFCOCUK, COCUK]                              349
[COCUK, AKTIFSPOR]                               317
[AKTIFCOCUK, AKTIFSPOR]                          317
[COCUK, KADIN, AKTIFSPOR]                        241
[AKTIFCOCUK, ERKEK, COCUK, KADIN, AKTIFSPOR]     223
[ERKEK, COCUK]                                   215
[AKTIFCOCUK, COCUK, KADIN]                       213
[ERKEK, COCUK, KADIN, AKTIFSPOR]                 213
[AKTIFCOCUK, KADIN]                              210
[ERKEK, COCUK, KADIN]                            204
[AKTIFCOCUK, COCUK, KADIN, AKTIFSPOR]            203
[AKTIFCOCUK, COCUK, AKTIFSPOR]                

In [266]:
customers_manChild_sector_df.set_index(customers_manChild_sector_df["master_id"],inplace=True)
customers_manChild_sector_df.drop(['master_id'],axis=1,inplace=True)

In [267]:
flodf.set_index(flodf["master_id"],inplace=True)
flodf.drop(['master_id'],axis=1,inplace=True)

In [268]:
customers_for_sales = pd.merge(rfm, customers_manChild_sector_df, left_index=True, right_index=True)
customers_for_sales["interested_in_categories_12"].value_counts()

[ERKEK]                                         1973
[ERKEK, AKTIFSPOR]                              1178
[ERKEK, KADIN]                                   848
[COCUK]                                          836
[ERKEK, KADIN, AKTIFSPOR]                        775
[AKTIFCOCUK]                                     679
[COCUK, KADIN]                                   443
[AKTIFCOCUK, COCUK]                              349
[AKTIFCOCUK, AKTIFSPOR]                          317
[COCUK, AKTIFSPOR]                               317
[COCUK, KADIN, AKTIFSPOR]                        241
[AKTIFCOCUK, ERKEK, COCUK, KADIN, AKTIFSPOR]     223
[ERKEK, COCUK]                                   215
[AKTIFCOCUK, COCUK, KADIN]                       213
[ERKEK, COCUK, KADIN, AKTIFSPOR]                 213
[AKTIFCOCUK, KADIN]                              210
[ERKEK, COCUK, KADIN]                            204
[AKTIFCOCUK, COCUK, KADIN, AKTIFSPOR]            203
[AKTIFCOCUK, COCUK, AKTIFSPOR]                

In [269]:
customers_for_sales.head()

Unnamed: 0_level_0,recency,frequency,monetary,recency_score,frequency_score,monetary_score,RF_SCORE,segment,order_channel,last_order_channel,first_order_date,last_order_date,last_order_date_online,last_order_date_offline,order_num_total_ever_online,order_num_total_ever_offline,customer_value_total_ever_offline,customer_value_total_ever_online,interested_in_categories_12,order_num_total_ever_omnichannel,customer_value_total_ever_omnichannel
master_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00016786-2f5a-11ea-bb80-000d3a38a36f,10 days,5.0,776.07,5,4,4,54,champions,Mobile,Mobile,2019-11-19,2021-05-22,2021-05-22,2020-10-05,2.0,3.0,427.19,348.88,"[ERKEK, COCUK, AKTIFSPOR]",5.0,776.07
00034aaa-a838-11e9-a2fc-000d3a38a36f,298 days,3.0,269.47,1,2,1,12,hibernating,Desktop,Offline,2015-11-09,2020-08-07,2015-11-09,2020-08-07,1.0,2.0,150.48,118.99,"[ERKEK, KADIN]",3.0,269.47
000be838-85df-11ea-a90b-000d3a38a36f,213 days,4.0,722.69,2,3,4,23,at_Risk,Android App,Offline,2020-04-16,2020-10-31,2020-04-23,2020-10-31,3.0,1.0,365.72,356.97,"[AKTIFCOCUK, AKTIFSPOR]",4.0,722.69
000c1fe2-a8b7-11ea-8479-000d3a38a36f,27 days,7.0,874.16,5,4,4,54,champions,Android App,Android App,2020-06-07,2021-05-05,2021-05-05,2021-01-29,3.0,4.0,531.19,342.97,"[AKTIFCOCUK, ERKEK, COCUK, KADIN, AKTIFSPOR]",7.0,874.16
000f5e3e-9dde-11ea-80cd-000d3a38a36f,20 days,7.0,1620.33,5,4,5,54,champions,Android App,Android App,2020-07-13,2021-05-12,2021-05-12,2021-03-13,5.0,2.0,299.98,1320.35,"[ERKEK, AKTIFSPOR]",7.0,1620.33


In [270]:
sale_df = pd.DataFrame()
sale_df = customers_for_sales[(customers_for_sales["segment"].isin(["cant_loose","atrisk","hibernating","new_customers"]))]

In [271]:
sale_df.head()

Unnamed: 0_level_0,recency,frequency,monetary,recency_score,frequency_score,monetary_score,RF_SCORE,segment,order_channel,last_order_channel,first_order_date,last_order_date,last_order_date_online,last_order_date_offline,order_num_total_ever_online,order_num_total_ever_offline,customer_value_total_ever_offline,customer_value_total_ever_online,interested_in_categories_12,order_num_total_ever_omnichannel,customer_value_total_ever_omnichannel
master_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00034aaa-a838-11e9-a2fc-000d3a38a36f,298 days,3.0,269.47,1,2,1,12,hibernating,Desktop,Offline,2015-11-09,2020-08-07,2015-11-09,2020-08-07,1.0,2.0,150.48,118.99,"[ERKEK, KADIN]",3.0,269.47
00263f1a-210a-11ea-b50a-000d3a38a36f,362 days,3.0,501.94,1,2,3,12,hibernating,Android App,Android App,2019-08-04,2020-06-04,2020-06-04,2019-09-26,1.0,2.0,309.95,191.99,[ERKEK],3.0,501.94
0033f078-7359-11ea-92d0-000d3a38a36f,360 days,2.0,153.98,1,1,1,11,hibernating,Android App,Android App,2019-12-08,2020-06-06,2020-06-06,2019-12-08,1.0,1.0,99.99,53.99,[ERKEK],2.0,153.98
006bb20e-a9a8-11e9-a2fc-000d3a38a36f,188 days,3.0,620.41,2,2,3,22,hibernating,Android App,Offline,2017-01-14,2020-11-25,2020-04-28,2020-11-25,2.0,1.0,214.46,405.95,"[ERKEK, AKTIFSPOR]",3.0,620.41
007cdfe4-1f54-11ea-87bf-000d3a38a36f,355 days,8.0,778.88,1,5,4,15,cant_loose,Android App,Android App,2019-11-21,2020-06-11,2020-06-11,2020-03-05,2.0,6.0,618.9,159.98,[ERKEK],8.0,778.88


In [272]:
sale_df["segment"].value_counts()

hibernating      1785
cant_loose        814
new_customers     171
Name: segment, dtype: int64

In [273]:
sale_df["interested_in_categories_12"].value_counts()

[ERKEK]                                         740
[ERKEK, AKTIFSPOR]                              284
[ERKEK, KADIN]                                  266
[COCUK]                                         245
[AKTIFCOCUK]                                    199
[ERKEK, KADIN, AKTIFSPOR]                       154
[COCUK, KADIN]                                  127
[AKTIFCOCUK, COCUK]                              84
[COCUK, AKTIFSPOR]                               69
[AKTIFCOCUK, AKTIFSPOR]                          66
[ERKEK, COCUK]                                   48
[ERKEK, COCUK, KADIN]                            44
[AKTIFCOCUK, COCUK, KADIN]                       41
[ERKEK, COCUK, KADIN, AKTIFSPOR]                 41
[COCUK, KADIN, AKTIFSPOR]                        37
[AKTIFCOCUK, KADIN]                              37
[AKTIFCOCUK, COCUK, AKTIFSPOR]                   32
[AKTIFCOCUK, KADIN, AKTIFSPOR]                   30
[AKTIFCOCUK, COCUK, KADIN, AKTIFSPOR]            29
[AKTIFCOCUK,

In [274]:
save_df = pd.DataFrame()
save_df["customer_id_for_sales"] = sale_df.index
save_df.to_csv("sale_maleOrChildren_customers.csv",index=False)

# BONUS: Functionize

In [283]:
def create_rfm(dataframe):
    # PREPROCESS
    dataframe["order_num_total"] = dataframe["order_num_total_ever_online"]+dataframe["order_num_total_ever_offline"]
    dataframe["customer_value_total"] =  dataframe["customer_value_total_ever_online"]+ dataframe["customer_value_total_ever_offline"]
    date_columns = dataframe.columns[dataframe.columns.str.contains("date")]
    dataframe[date_columns] = dataframe[date_columns].apply(pd.to_datetime)
    #RFM
    dataframe["last_order_date"].max() # 2021-50-30
    analysis_date = dt.datetime(2021,6,1)
    rfm = pd.DataFrame()
    rfm["customer_id"] = dataframe["master_id"]
    rfm["recency"] = (analysis_date - dataframe["last_order_date"]).astype('timedelta64[D]')
    rfm["frequency"] = dataframe["order_num_total"]
    rfm["monetary"] = dataframe["customer_value_total"]
    #RF vand RFM scores
    rfm["recency_score"] = pd.qcut(rfm['recency'],5,labels=[5,4,3,2,1])
    rfm["frequency_score"] = pd.qcut(rfm['frequency'].rank(method="first"),5,labels=[1,2,3,4,5])
    rfm["monetary_score"] = pd.qcut(rfm['monetary'],5,labels=[1,2,3,4,5])
    rfm["RF_SCORE"] = (rfm["recency_score"].astype(str) + rfm["frequency_score"].astype(str))
    rfm["RFM_SCORE"] = (rfm["recency_score"].astype(str) + rfm["frequency_score"].astype(str) + rfm["monetary_score"].astype(str))
    #SEGMENTATION
    seg_map = {
        r'[1-2][1-2]': 'hibernating',
        r'[1-2][3-4]': 'at_Risk',
        r'[1-2]5': 'cant_loose',
        r'3[1-2]': 'about_to_sleep',
        r'33': 'need_attention',
        r'[3-4][4-5]': 'loyal_customers',
        r'41': 'promising',
        r'51': 'new_customers',
        r'[4-5][2-3]': 'potential_loyalists',
        r'5[4-5]': 'champions'
    }
    rfm['segment'] = rfm["RF_SCORE"].replace(seg_map,regex=True)

    return rfm[["customer_id","RF_SCORE","RFM_SCORE","segment"]]


In [284]:
flo_data_.columns

Index(['master_id', 'order_channel', 'last_order_channel', 'first_order_date',
       'last_order_date', 'last_order_date_online', 'last_order_date_offline',
       'order_num_total_ever_online', 'order_num_total_ever_offline',
       'customer_value_total_ever_offline', 'customer_value_total_ever_online',
       'interested_in_categories_12', 'order_num_total',
       'customer_value_total'],
      dtype='object')

In [285]:
rfm_df = create_rfm(flo_data_)

In [286]:
rfm_df

Unnamed: 0,customer_id,RF_SCORE,RFM_SCORE,segment
0,cc294636-19f0-11eb-8d74-000d3a38a36f,34,344,loyal_customers
1,f431bd5a-ab7b-11e9-a2fc-000d3a38a36f,35,355,loyal_customers
2,69b69676-1a40-11ea-941b-000d3a38a36f,24,243,at_Risk
3,1854e56c-491f-11eb-806e-000d3a38a36f,31,311,about_to_sleep
4,d6ea1074-f1f5-11e9-9346-000d3a38a36f,31,311,about_to_sleep
...,...,...,...,...
19940,727e2b6e-ddd4-11e9-a848-000d3a38a36f,13,132,at_Risk
19941,25cd53d4-61bf-11ea-8dd8-000d3a38a36f,22,222,hibernating
19942,8aea4c2a-d6fc-11e9-93bc-000d3a38a36f,53,533,potential_loyalists
19943,e50bb46c-ff30-11e9-a5e8-000d3a38a36f,34,344,loyal_customers
