In [6]:
# Analyzed customer retention using cohort-based methodology.

import pandas as pd
from operator import attrgetter


In [None]:
# Loaded cleaned transactional data.
df = pd.read_csv("../outputs/clean_retail_data.csv")
df["InvoiceDate"] = pd.to_datetime(df["InvoiceDate"])


In [8]:
# Assigned cohort based on first purchase month.
df["CohortMonth"] = (
    df.groupby("CustomerID")["InvoiceDate"]
    .transform("min")
    .dt.to_period("M")
)

df["OrderMonth"] = df["InvoiceDate"].dt.to_period("M")


In [9]:
# Computed cohort index.
df["CohortIndex"] = (
    df["OrderMonth"] - df["CohortMonth"]
).apply(attrgetter("n"))


In [10]:
# Constructed retention matrix.
cohort_pivot = df.pivot_table(
    index="CohortMonth",
    columns="CohortIndex",
    values="CustomerID",
    aggfunc="nunique"
)

retention = cohort_pivot.divide(
    cohort_pivot.iloc[:, 0], axis=0
)

retention.head()


CohortIndex,0,1,2,3,4,5,6,7,8,9,...,15,16,17,18,19,20,21,22,23,24
CohortMonth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2009-12,1.0,0.35288,0.334031,0.425131,0.380105,0.359162,0.376963,0.342408,0.336126,0.362304,...,0.302618,0.262827,0.302618,0.282723,0.259686,0.255497,0.315183,0.304712,0.40733,0.196859
2010-01,1.0,0.206266,0.310705,0.305483,0.263708,0.300261,0.258486,0.229765,0.279373,0.318538,...,0.151436,0.234987,0.198433,0.185379,0.195822,0.24282,0.193211,0.245431,0.057441,
2010-02,1.0,0.237968,0.224599,0.291444,0.245989,0.200535,0.192513,0.286096,0.254011,0.275401,...,0.200535,0.160428,0.163102,0.144385,0.229947,0.229947,0.163102,0.058824,,
2010-03,1.0,0.189616,0.230248,0.241535,0.232506,0.20316,0.24605,0.302483,0.275395,0.108352,...,0.1693,0.173815,0.155756,0.176072,0.200903,0.21219,0.079007,,,
2010-04,1.0,0.193878,0.193878,0.163265,0.183673,0.22449,0.27551,0.261905,0.105442,0.108844,...,0.156463,0.139456,0.14966,0.180272,0.22449,0.057823,,,,
