In [1]:
# Importing libraries for analysis
import numpy as np
import pandas as pd
import datetime
from dateutil import relativedelta
import calendar
from operator import attrgetter

In [2]:
# Downloading datasets for analysis
customers = pd.read_csv('https://getfile.dokpub.com/yandex/get/https://disk.yandex.ru/d/hfXo-PVswob2Pw')
orders = pd.read_csv('https://getfile.dokpub.com/yandex/get/https://disk.yandex.ru/d/gcSKaZgvvyYDeg')
items = pd.read_csv('https://getfile.dokpub.com/yandex/get/https://disk.yandex.ru/d/4yKVQbEMJoUU6A')

* customers - таблица с уникальными индетификаторами пользователей
* orders - таблица заказов
* items - таблица товарных позиций, входящие в заказ

In [3]:
customers.head()

Unnamed: 0,customer_id,customer_unique_id,customer_zip_code_prefix,customer_city,customer_state
0,06b8999e2fba1a1fbc88172c00ba8bc7,861eff4711a542e4b93843c6dd7febb0,14409,franca,SP
1,18955e83d337fd6b2def6b18a428ac77,290c77bc529b7ac935b93aa66c333dc3,9790,sao bernardo do campo,SP
2,4e7b3e00288586ebd08712fdd0374a03,060e732b5b29e8181a18229c7b0b2b5e,1151,sao paulo,SP
3,b2b6027bc5c5109e529d4dc6358b12c3,259dac757896d24d7702b9acbbff3f3c,8775,mogi das cruzes,SP
4,4f2d8ab171c80ec8364f7c12e35b23ad,345ecd01c38d18a9036ed96c73b8d066,13056,campinas,SP


In [4]:
orders.head()

Unnamed: 0,order_id,customer_id,order_status,order_purchase_timestamp,order_approved_at,order_delivered_carrier_date,order_delivered_customer_date,order_estimated_delivery_date
0,e481f51cbdc54678b7cc49136f2d6af7,9ef432eb6251297304e76186b10a928d,delivered,2017-10-02 10:56:33,2017-10-02 11:07:15,2017-10-04 19:55:00,2017-10-10 21:25:13,2017-10-18 00:00:00
1,53cdb2fc8bc7dce0b6741e2150273451,b0830fb4747a6c6d20dea0b8c802d7ef,delivered,2018-07-24 20:41:37,2018-07-26 03:24:27,2018-07-26 14:31:00,2018-08-07 15:27:45,2018-08-13 00:00:00
2,47770eb9100c2d0c44946d9cf07ec65d,41ce2a54c0b03bf3443c3d931a367089,delivered,2018-08-08 08:38:49,2018-08-08 08:55:23,2018-08-08 13:50:00,2018-08-17 18:06:29,2018-09-04 00:00:00
3,949d5b44dbf5de918fe9c16f97b45f8a,f88197465ea7920adcdbec7375364d82,delivered,2017-11-18 19:28:06,2017-11-18 19:45:59,2017-11-22 13:39:59,2017-12-02 00:28:42,2017-12-15 00:00:00
4,ad21c59c0840e6cb83a9ceb5573f8159,8ab97904e6daea8866dbdbc4fb7aad2c,delivered,2018-02-13 21:18:39,2018-02-13 22:20:29,2018-02-14 19:46:34,2018-02-16 18:17:02,2018-02-26 00:00:00


In [5]:
items.head()

Unnamed: 0,order_id,order_item_id,product_id,seller_id,shipping_limit_date,price,freight_value
0,00010242fe8c5a6d1ba2dd792cb16214,1,4244733e06e7ecb4970a6e2683c13e61,48436dade18ac8b2bce089ec2a041202,2017-09-19 09:45:35,58.9,13.29
1,00018f77f2f0320c557190d7a144bdd3,1,e5f2d52b802189ee658865ca93d83a8f,dd7ddc04e1b6c2c614352b383efe2d36,2017-05-03 11:05:13,239.9,19.93
2,000229ec398224ef6ca0657da4fc703e,1,c777355d18b72b67abbeef9df44fd0fd,5b51032eddd242adc84c38acab88f23d,2018-01-18 14:48:30,199.0,17.87
3,00024acbcdf0a6daa1e931b038114c75,1,7634da152a4610f1595efa32f14722fc,9d7a1d34a5052409006425275ba1c2b4,2018-08-15 10:10:18,12.99,12.79
4,00042b26cf59d7ce69dfabb4e55b4fd9,1,ac6c3623068f30de03045865e4e10089,df560393f3a51e74553ab94004ba5c87,2017-02-13 13:57:51,199.9,18.14


### Finding the number of users who made only one purchase

In [6]:
# Merging two dataframes
df1 = orders.merge(customers)

In [7]:
# Replacing all NaN values by zero
df1 = df1.fillna(0)

# Filtering of unpaid orders
df1 = df1.query('order_approved_at != 0')

In [8]:
# Grouping purchases by paid orders
df1 = df1.groupby('order_approved_at', as_index=False)\
            .agg({'order_id': 'count'})\
            .rename(columns={'order_id': 'order_count'})

In [9]:
# Finding users who have made only one purchase
single_order = df1.query('order_count == 1').shape[0]
print('The data contains {} users who have made only one purchase'.format(single_order))

The data contains 83688 users who have made only one purchase


### Determining the average number of non-deliverable orders per month

In [10]:
# Formatting the date in database
orders.order_approved_at = pd.to_datetime(orders.order_approved_at)

In [11]:
# Finding the earliest date
start_order_date = orders.order_approved_at.min()

# Finding the latest date
end_order_date = orders.order_approved_at.max()

In [12]:
# Determining the number of months contained in the dataset
month_count = round((end_order_date - start_order_date).days / 30)

print('The dataset has {} months'.format(month_count))

The dataset has 24 months


In [13]:
#Let's see the number of orders and information on all order statuses except 'delivered'
orders.query("order_status != 'delivered'")\
    .order_status\
    .value_counts()

shipped        1107
canceled        625
unavailable     609
invoiced        314
processing      301
created           5
approved          2
Name: order_status, dtype: int64

##### *The database contains a large number of undelivered orders to the customers for various reasons. Let's take a closer look at all the reasons for non-delivery of items.*

In [14]:
# Creating a function to determine the average value for different reasons
def check_details(status):
    return round(orders.query("order_status == @status")\
                        .order_status\
                        .count() / month_count, 2)

In [15]:
# Shipment of items from warehouse
status_shipped = check_details('shipped')
print(f"""The average number of orders per month that were not delivered due to the fact 
that the products are shipped from the warehouse and the items are in transit to the recipient = {status_shipped} orders.""")

# Canceled orders
status_canceled = check_details('canceled')
print(f"""\nAverage number of orders per month that were not delivered 
because the recipient canceled the order = {status_canceled} orders.""")

# Unavailable orders
status_unavailable = check_details('unavailable')
print(f"""\nAverage number of orders per month that were not delivered 
because the order is now unavailable = {status_unavailable} orders.""")

# Invoiced orders
status_invoiced = check_details('invoiced')
print(f"""\nAverage number of orders per month that were not delivered 
due to waiting for customer order formation = {status_invoiced} orders.""")

# Processing the assembly of items
status_processing = check_details('processing')
print(f"""\nAverage number of orders per month that were not delivered 
because a customer's order was in processing = {status_processing} orders.""")

# Created orders
status_created = check_details('created')
print(f"""\nAverage number of orders per month that were not delivered
because a customer just created an order = {status_created} orders.""")

# Approved orders
status_approved = check_details('approved')
print(f"""\nAverage number of orders per month that were not delivered
due to customers confirmation = {status_approved} orders.""")

The average number of orders per month that were not delivered due to the fact 
that the products are shipped from the warehouse and the items are in transit to the recipient = 46.12 orders.

Average number of orders per month that were not delivered 
because the recipient canceled the order = 26.04 orders.

Average number of orders per month that were not delivered 
because the order is now unavailable = 25.38 orders.

Average number of orders per month that were not delivered 
due to waiting for customer order formation = 13.08 orders.

Average number of orders per month that were not delivered 
because a customer's order was in processing = 12.54 orders.

Average number of orders per month that were not delivered
because a customer just created an order = 0.21 orders.

Average number of orders per month that were not delivered
due to customers confirmation = 0.08 orders.


### Determing the favorite day of the week on which a product is most often purchased

In [16]:
# Merging databases and leaving columns 'product_id' and 'order_purchase_timestamp'
df2 = items.merge(orders)[['product_id', 'order_purchase_timestamp']]

df2.head()

Unnamed: 0,product_id,order_purchase_timestamp
0,4244733e06e7ecb4970a6e2683c13e61,2017-09-13 08:59:02
1,e5f2d52b802189ee658865ca93d83a8f,2017-04-26 10:53:06
2,c777355d18b72b67abbeef9df44fd0fd,2018-01-14 14:33:31
3,7634da152a4610f1595efa32f14722fc,2018-08-08 10:00:35
4,ac6c3623068f30de03045865e4e10089,2017-02-04 13:57:51


In [17]:
# Creating a function to convert the date to the format of the day of the week
def get_weekdays(d):
    return datetime.datetime.strptime(d, '%Y-%m-%d %H:%M:%S').strftime('%A')

In [18]:
# Adding a new column with the day of the week of the purchase
df2['weekday'] = df2.order_purchase_timestamp.apply(get_weekdays)

df2.head()

Unnamed: 0,product_id,order_purchase_timestamp,weekday
0,4244733e06e7ecb4970a6e2683c13e61,2017-09-13 08:59:02,Wednesday
1,e5f2d52b802189ee658865ca93d83a8f,2017-04-26 10:53:06,Wednesday
2,c777355d18b72b67abbeef9df44fd0fd,2018-01-14 14:33:31,Sunday
3,7634da152a4610f1595efa32f14722fc,2018-08-08 10:00:35,Wednesday
4,ac6c3623068f30de03045865e4e10089,2017-02-04 13:57:51,Saturday


In [19]:
# Creating a dataframe with a count of the number of orders of each item
df3 = df2.groupby(['product_id', 'weekday'], as_index=False)\
        .agg({'order_purchase_timestamp': 'count'})

df3.head()

Unnamed: 0,product_id,weekday,order_purchase_timestamp
0,00066f42aeeb9f3007548bb9d3f33c38,Sunday,1
1,00088930e925c41fd95ebfe695fd2655,Tuesday,1
2,0009406fd7479715e4bef61dd91f2462,Thursday,1
3,000b8f95fcb9e0096488278317764d19,Friday,1
4,000b8f95fcb9e0096488278317764d19,Wednesday,1


In [20]:
# Creating a dataframe with a count of the maximum number of orders of each item
df4 = df3.groupby('product_id', as_index=False)\
        .agg({'order_purchase_timestamp': 'max'})

df4.head()

Unnamed: 0,product_id,order_purchase_timestamp
0,00066f42aeeb9f3007548bb9d3f33c38,1
1,00088930e925c41fd95ebfe695fd2655,1
2,0009406fd7479715e4bef61dd91f2462,1
3,000b8f95fcb9e0096488278317764d19,1
4,000d9be29b5207b54e86aa1b1ac54872,1


In [21]:
# Merging the two dataframes to determine the most common purchase day of the week
df5 = df4.merge(df3)[['product_id', 'weekday']]

df5

Unnamed: 0,product_id,weekday
0,00066f42aeeb9f3007548bb9d3f33c38,Sunday
1,00088930e925c41fd95ebfe695fd2655,Tuesday
2,0009406fd7479715e4bef61dd91f2462,Thursday
3,000b8f95fcb9e0096488278317764d19,Friday
4,000b8f95fcb9e0096488278317764d19,Wednesday
...,...,...
42700,fff6177642830a9a94a0f2cba5e476d1,Sunday
42701,fff81cc3158d2725c0655ab9ba0f712c,Monday
42702,fff9553ac224cec9d15d49f5a263411f,Friday
42703,fffdb2d0ec8d6a61f0a0a0db3f25b441,Tuesday


##### *Some products have several popular days to buy*

### Finding the average number of purchases per week for each customer

In [22]:
# Merging databases for analysis
orders_customers = orders\
                    .merge(customers)[['customer_id'
                                       , 'customer_unique_id'
                                       , 'order_purchase_timestamp']]

orders_customers.head()

Unnamed: 0,customer_id,customer_unique_id,order_purchase_timestamp
0,9ef432eb6251297304e76186b10a928d,7c396fd4830fd04220f754e42b4e5bff,2017-10-02 10:56:33
1,b0830fb4747a6c6d20dea0b8c802d7ef,af07308b275d755c9edb36a90c618231,2018-07-24 20:41:37
2,41ce2a54c0b03bf3443c3d931a367089,3a653a41f6f9fc3d2a113cf8398680e8,2018-08-08 08:38:49
3,f88197465ea7920adcdbec7375364d82,7c142cf63193a1473d2e66489a9ae977,2017-11-18 19:28:06
4,8ab97904e6daea8866dbdbc4fb7aad2c,72632f0f9dd73dfee390c9b22eb56dd6,2018-02-13 21:18:39


In [23]:
# Formatting the 'order_purchase_timestamp' into a date, leaving the year and month
orders_customers['order_purchase_timestamp'] = pd.to_datetime(orders_customers.order_purchase_timestamp)
orders_customers.order_purchase_timestamp = orders_customers.order_purchase_timestamp\
                                                            .apply(lambda d: d.strftime('%Y-%m'))

orders_customers.head()

Unnamed: 0,customer_id,customer_unique_id,order_purchase_timestamp
0,9ef432eb6251297304e76186b10a928d,7c396fd4830fd04220f754e42b4e5bff,2017-10
1,b0830fb4747a6c6d20dea0b8c802d7ef,af07308b275d755c9edb36a90c618231,2018-07
2,41ce2a54c0b03bf3443c3d931a367089,3a653a41f6f9fc3d2a113cf8398680e8,2018-08
3,f88197465ea7920adcdbec7375364d82,7c142cf63193a1473d2e66489a9ae977,2017-11
4,8ab97904e6daea8866dbdbc4fb7aad2c,72632f0f9dd73dfee390c9b22eb56dd6,2018-02


In [24]:
# Counting the number of unique purchases for each user in each month.
df6 = orders_customers.groupby(['customer_unique_id', 'order_purchase_timestamp'], as_index=False)\
                    .agg({'customer_id': 'count'})\
                    .rename(columns={'customer_id': 'purchases'})\
                    .sort_values('purchases', ascending=False)

df6.head()

Unnamed: 0,customer_unique_id,order_purchase_timestamp,purchases
7325,12f5d6e1cbf93dafd9dcc19095df0b3d,2017-01,6
50622,83e7958a94bd7f74a9414d8782f87628,2017-01,4
76793,c8460e4251689ba205045f3ea17884a1,2018-08,4
67755,b08fab27d47a1eb6deda07bfd965ad43,2017-09,4
62196,a239b8e2fbce33780f1f1912e2ee5275,2017-02,4


In [25]:
# Creating a function to find the number of weeks in a month
def get_number_week(d):
    year, month = d.split('-')
    return calendar.monthrange(int(year), int(month))[1]

In [26]:
# Creating a column for the number of weeks in the corresponding month
df6['count_week'] = df6.order_purchase_timestamp\
                        .apply(get_number_week)\
                        .div(7)\
                        .round(2)

In [27]:
# Calculating the average number of purchases per user per week
df6['purchases_per_week'] = df6.purchases\
                                .div(df6.count_week)\
                                .round(2)

df6

Unnamed: 0,customer_unique_id,order_purchase_timestamp,purchases,count_week,purchases_per_week
7325,12f5d6e1cbf93dafd9dcc19095df0b3d,2017-01,6,4.43,1.35
50622,83e7958a94bd7f74a9414d8782f87628,2017-01,4,4.43,0.90
76793,c8460e4251689ba205045f3ea17884a1,2018-08,4,4.43,0.90
67755,b08fab27d47a1eb6deda07bfd965ad43,2017-09,4,4.29,0.93
62196,a239b8e2fbce33780f1f1912e2ee5275,2017-02,4,4.00,1.00
...,...,...,...,...,...
32869,55a08a9545274dc36a0c5804389217d4,2017-03,1,4.43,0.23
32868,55a03f067a142cdde9f79f95a161b257,2018-01,1,4.43,0.23
32867,559df1e714566692202143c82bf531ca,2018-03,1,4.43,0.23
32866,559c99ed25c0bfe03e2e037c3205e9b6,2017-09,1,4.29,0.23


##### *The average number of purchases for each customers is **0.23** items per week*

### Performing a Cohort Analysis of the data and find the highest Retention Rate in the third month

In [28]:
# Users who made a purchase within one month will be selected as a cohort
orders.order_purchase_timestamp = pd.to_datetime(orders.order_purchase_timestamp)

In [29]:
# Creating the dataframe necessary for analysis
df7 = orders.merge(customers)[['customer_unique_id', 'order_purchase_timestamp']]

df7.head()

Unnamed: 0,customer_unique_id,order_purchase_timestamp
0,7c396fd4830fd04220f754e42b4e5bff,2017-10-02 10:56:33
1,af07308b275d755c9edb36a90c618231,2018-07-24 20:41:37
2,3a653a41f6f9fc3d2a113cf8398680e8,2018-08-08 08:38:49
3,7c142cf63193a1473d2e66489a9ae977,2017-11-18 19:28:06
4,72632f0f9dd73dfee390c9b22eb56dd6,2018-02-13 21:18:39


In [30]:
# Defining the month in which the product was purchased by the user
df7['month'] = df7.order_purchase_timestamp\
                    .dt\
                    .to_period('M')

In [31]:
# Determining the month of the very first purchase made by the user
df7['cohort'] = df7.groupby('customer_unique_id')['order_purchase_timestamp']\
                    .transform('min')\
                    .dt\
                    .to_period('M')

In [32]:
df7.head()

Unnamed: 0,customer_unique_id,order_purchase_timestamp,month,cohort
0,7c396fd4830fd04220f754e42b4e5bff,2017-10-02 10:56:33,2017-10,2017-09
1,af07308b275d755c9edb36a90c618231,2018-07-24 20:41:37,2018-07,2018-07
2,3a653a41f6f9fc3d2a113cf8398680e8,2018-08-08 08:38:49,2018-08,2018-08
3,7c142cf63193a1473d2e66489a9ae977,2017-11-18 19:28:06,2017-11,2017-11
4,72632f0f9dd73dfee390c9b22eb56dd6,2018-02-13 21:18:39,2018-02,2018-02


In [33]:
# Creating a new dataframe for the cohort
df_cohort = df7.groupby(['cohort', 'month'], as_index=False)\
                .agg(num_customers=('customer_unique_id', 'nunique'))  # number of users in a cohort

# Determining after how many months a second purchase was made
df_cohort['num_period'] = df_cohort.month\
                                    .sub(df_cohort.cohort)\
                                    .apply(attrgetter('n'))

In [34]:
# Creating a pivot table
cohort_pivot = df_cohort.pivot(index='cohort'
                              , columns='num_period'
                              , values='num_customers')

cohort_pivot

num_period,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,19,20
cohort,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2016-09,4.0,,,,,,,,,,,,,,,,,,,
2016-10,321.0,,,,,,1.0,,,1.0,,1.0,,1.0,,1.0,,1.0,2.0,2.0
2016-12,1.0,1.0,,,,,,,,,,,,,,,,,,
2017-01,764.0,3.0,2.0,1.0,3.0,1.0,4.0,1.0,1.0,,3.0,1.0,6.0,3.0,1.0,1.0,2.0,3.0,1.0,
2017-02,1752.0,4.0,5.0,2.0,7.0,2.0,4.0,3.0,3.0,4.0,2.0,5.0,3.0,3.0,2.0,1.0,1.0,4.0,,
2017-03,2636.0,13.0,10.0,10.0,9.0,4.0,4.0,8.0,9.0,2.0,10.0,4.0,6.0,3.0,4.0,6.0,2.0,4.0,,
2017-04,2352.0,14.0,5.0,4.0,8.0,6.0,8.0,7.0,7.0,4.0,6.0,2.0,2.0,1.0,2.0,2.0,5.0,,,
2017-05,3596.0,18.0,18.0,14.0,11.0,12.0,15.0,6.0,9.0,11.0,9.0,12.0,9.0,1.0,7.0,9.0,,,,
2017-06,3139.0,15.0,11.0,13.0,8.0,12.0,12.0,7.0,4.0,7.0,10.0,11.0,5.0,4.0,6.0,,,,,
2017-07,3894.0,20.0,14.0,10.0,11.0,8.0,12.0,4.0,7.0,10.0,9.0,12.0,5.0,10.0,,,,,,


In [35]:
# Determining the Retention Rate
size = cohort_pivot.iloc[:, 0]  # first cohort value (number of users)
retention = cohort_pivot.div(size, axis='index')

retention

num_period,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,19,20
cohort,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2016-09,1.0,,,,,,,,,,,,,,,,,,,
2016-10,1.0,,,,,,0.003115,,,0.003115,,0.003115,,0.003115,,0.003115,,0.003115,0.006231,0.006231
2016-12,1.0,1.0,,,,,,,,,,,,,,,,,,
2017-01,1.0,0.003927,0.002618,0.001309,0.003927,0.001309,0.005236,0.001309,0.001309,,0.003927,0.001309,0.007853,0.003927,0.001309,0.001309,0.002618,0.003927,0.001309,
2017-02,1.0,0.002283,0.002854,0.001142,0.003995,0.001142,0.002283,0.001712,0.001712,0.002283,0.001142,0.002854,0.001712,0.001712,0.001142,0.000571,0.000571,0.002283,,
2017-03,1.0,0.004932,0.003794,0.003794,0.003414,0.001517,0.001517,0.003035,0.003414,0.000759,0.003794,0.001517,0.002276,0.001138,0.001517,0.002276,0.000759,0.001517,,
2017-04,1.0,0.005952,0.002126,0.001701,0.003401,0.002551,0.003401,0.002976,0.002976,0.001701,0.002551,0.00085,0.00085,0.000425,0.00085,0.00085,0.002126,,,
2017-05,1.0,0.005006,0.005006,0.003893,0.003059,0.003337,0.004171,0.001669,0.002503,0.003059,0.002503,0.003337,0.002503,0.000278,0.001947,0.002503,,,,
2017-06,1.0,0.004779,0.003504,0.004141,0.002549,0.003823,0.003823,0.00223,0.001274,0.00223,0.003186,0.003504,0.001593,0.001274,0.001911,,,,,
2017-07,1.0,0.005136,0.003595,0.002568,0.002825,0.002054,0.003082,0.001027,0.001798,0.002568,0.002311,0.003082,0.001284,0.002568,,,,,,


In [36]:
# Next, for clarity, I apply a style, add symbols and translate values ​​into percentages
retention_style = (retention.style
                    .set_caption('User retention by cohort')  # add signature
                    .background_gradient(cmap='winter')  # coloring cells by columns
                    .highlight_null('black')  # make the background black for NaN values
                    .format("{:.2%}", na_rep=""))  # format float to percentage, get rid of NaN values

retention_style

num_period,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,19,20
cohort,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2016-09,100.00%,,,,,,,,,,,,,,,,,,,
2016-10,100.00%,,,,,,0.31%,,,0.31%,,0.31%,,0.31%,,0.31%,,0.31%,0.62%,0.62%
2016-12,100.00%,100.00%,,,,,,,,,,,,,,,,,,
2017-01,100.00%,0.39%,0.26%,0.13%,0.39%,0.13%,0.52%,0.13%,0.13%,,0.39%,0.13%,0.79%,0.39%,0.13%,0.13%,0.26%,0.39%,0.13%,
2017-02,100.00%,0.23%,0.29%,0.11%,0.40%,0.11%,0.23%,0.17%,0.17%,0.23%,0.11%,0.29%,0.17%,0.17%,0.11%,0.06%,0.06%,0.23%,,
2017-03,100.00%,0.49%,0.38%,0.38%,0.34%,0.15%,0.15%,0.30%,0.34%,0.08%,0.38%,0.15%,0.23%,0.11%,0.15%,0.23%,0.08%,0.15%,,
2017-04,100.00%,0.60%,0.21%,0.17%,0.34%,0.26%,0.34%,0.30%,0.30%,0.17%,0.26%,0.09%,0.09%,0.04%,0.09%,0.09%,0.21%,,,
2017-05,100.00%,0.50%,0.50%,0.39%,0.31%,0.33%,0.42%,0.17%,0.25%,0.31%,0.25%,0.33%,0.25%,0.03%,0.19%,0.25%,,,,
2017-06,100.00%,0.48%,0.35%,0.41%,0.25%,0.38%,0.38%,0.22%,0.13%,0.22%,0.32%,0.35%,0.16%,0.13%,0.19%,,,,,
2017-07,100.00%,0.51%,0.36%,0.26%,0.28%,0.21%,0.31%,0.10%,0.18%,0.26%,0.23%,0.31%,0.13%,0.26%,,,,,,


##### *As can be seen from the Cohort Analysis, the Retention Rate in the third month of 2017-06 is 0.41%.*