In [87]:
import pandas as pd

data = [[1, 101, '2023-05-01', 100],
        [2, 101, '2023-05-02', 150],
        [3, 101, '2023-05-03', 200],
        [4, 102, '2023-05-01', 50],
        [5, 102, '2023-05-03', 100],
        [6, 102, '2023-05-04', 200],
        [7, 105, '2023-05-01', 100],
        [8, 105, '2023-05-02', 150],
        [9, 105, '2023-05-03', 200],
        [10, 105, '2023-05-04', 300],
        [11, 105, '2023-05-12', 250],
        [12, 105, '2023-05-13', 260],
        [13, 105, '2023-05-14', 270]]

transactions = pd.DataFrame(data,
                            columns=['transaction_id',
                                     'customer_id',
                                     'transaction_date',
                                     'amount'])

transactions = transactions.astype({'transaction_id':'Int64',
                                    'customer_id':'Int64',
                                    'transaction_date':'datetime64[ns]',
                                    'amount':'Int64'})

display(transactions)

Unnamed: 0,transaction_id,customer_id,transaction_date,amount
0,1,101,2023-05-01,100
1,2,101,2023-05-02,150
2,3,101,2023-05-03,200
3,4,102,2023-05-01,50
4,5,102,2023-05-03,100
5,6,102,2023-05-04,200
6,7,105,2023-05-01,100
7,8,105,2023-05-02,150
8,9,105,2023-05-03,200
9,10,105,2023-05-04,300


In [88]:
df = transactions.merge(transactions, on='customer_id', how='left', suffixes=['', '_t2'])
print(df.shape)
display(df)

(67, 7)


Unnamed: 0,transaction_id,customer_id,transaction_date,amount,transaction_id_t2,transaction_date_t2,amount_t2
0,1,101,2023-05-01,100,1,2023-05-01,100
1,1,101,2023-05-01,100,2,2023-05-02,150
2,1,101,2023-05-01,100,3,2023-05-03,200
3,2,101,2023-05-02,150,1,2023-05-01,100
4,2,101,2023-05-02,150,2,2023-05-02,150
...,...,...,...,...,...,...,...
62,13,105,2023-05-14,270,9,2023-05-03,200
63,13,105,2023-05-14,270,10,2023-05-04,300
64,13,105,2023-05-14,270,11,2023-05-12,250
65,13,105,2023-05-14,270,12,2023-05-13,260


In [89]:
df["day_dif"] = (df["transaction_date_t2"] - df["transaction_date"]).dt.days
display(df)

Unnamed: 0,transaction_id,customer_id,transaction_date,amount,transaction_id_t2,transaction_date_t2,amount_t2,day_dif
0,1,101,2023-05-01,100,1,2023-05-01,100,0
1,1,101,2023-05-01,100,2,2023-05-02,150,1
2,1,101,2023-05-01,100,3,2023-05-03,200,2
3,2,101,2023-05-02,150,1,2023-05-01,100,-1
4,2,101,2023-05-02,150,2,2023-05-02,150,0
...,...,...,...,...,...,...,...,...
62,13,105,2023-05-14,270,9,2023-05-03,200,-11
63,13,105,2023-05-14,270,10,2023-05-04,300,-10
64,13,105,2023-05-14,270,11,2023-05-12,250,-2
65,13,105,2023-05-14,270,12,2023-05-13,260,-1


In [90]:
df = df[(df["amount"] < df["amount_t2"]) & (df["day_dif"] == 1)]
display(df)

Unnamed: 0,transaction_id,customer_id,transaction_date,amount,transaction_id_t2,transaction_date_t2,amount_t2,day_dif
1,1,101,2023-05-01,100,2,2023-05-02,150,1
5,2,101,2023-05-02,150,3,2023-05-03,200,1
14,5,102,2023-05-03,100,6,2023-05-04,200,1
19,7,105,2023-05-01,100,8,2023-05-02,150,1
27,8,105,2023-05-02,150,9,2023-05-03,200,1
35,9,105,2023-05-03,200,10,2023-05-04,300,1
51,11,105,2023-05-12,250,12,2023-05-13,260,1
59,12,105,2023-05-13,260,13,2023-05-14,270,1


In [91]:
df = df.drop(columns=['transaction_id', 'amount', 'transaction_id_t2', 'amount_t2', 'day_dif'])
df['rank'] = df.groupby('customer_id')['transaction_date'].rank(method='dense')
display(df)

Unnamed: 0,customer_id,transaction_date,transaction_date_t2,rank
1,101,2023-05-01,2023-05-02,1.0
5,101,2023-05-02,2023-05-03,2.0
14,102,2023-05-03,2023-05-04,1.0
19,105,2023-05-01,2023-05-02,1.0
27,105,2023-05-02,2023-05-03,2.0
35,105,2023-05-03,2023-05-04,3.0
51,105,2023-05-12,2023-05-13,4.0
59,105,2023-05-13,2023-05-14,5.0


In [92]:
df['rank_in_days'] = pd.to_timedelta(df['rank'], unit='d')
display(df)

Unnamed: 0,customer_id,transaction_date,transaction_date_t2,rank,rank_in_days
1,101,2023-05-01,2023-05-02,1.0,1 days
5,101,2023-05-02,2023-05-03,2.0,2 days
14,102,2023-05-03,2023-05-04,1.0,1 days
19,105,2023-05-01,2023-05-02,1.0,1 days
27,105,2023-05-02,2023-05-03,2.0,2 days
35,105,2023-05-03,2023-05-04,3.0,3 days
51,105,2023-05-12,2023-05-13,4.0,4 days
59,105,2023-05-13,2023-05-14,5.0,5 days


In [93]:
df = df.drop(columns=['rank'])
df['rank_date'] = df['transaction_date'] - df['rank_in_days']
display(df)

Unnamed: 0,customer_id,transaction_date,transaction_date_t2,rank_in_days,rank_date
1,101,2023-05-01,2023-05-02,1 days,2023-04-30
5,101,2023-05-02,2023-05-03,2 days,2023-04-30
14,102,2023-05-03,2023-05-04,1 days,2023-05-02
19,105,2023-05-01,2023-05-02,1 days,2023-04-30
27,105,2023-05-02,2023-05-03,2 days,2023-04-30
35,105,2023-05-03,2023-05-04,3 days,2023-04-30
51,105,2023-05-12,2023-05-13,4 days,2023-05-08
59,105,2023-05-13,2023-05-14,5 days,2023-05-08


In [94]:
df = df[['customer_id', 'rank_date', 'transaction_date', 'transaction_date_t2']]
display(df)

Unnamed: 0,customer_id,rank_date,transaction_date,transaction_date_t2
1,101,2023-04-30,2023-05-01,2023-05-02
5,101,2023-04-30,2023-05-02,2023-05-03
14,102,2023-05-02,2023-05-03,2023-05-04
19,105,2023-04-30,2023-05-01,2023-05-02
27,105,2023-04-30,2023-05-02,2023-05-03
35,105,2023-04-30,2023-05-03,2023-05-04
51,105,2023-05-08,2023-05-12,2023-05-13
59,105,2023-05-08,2023-05-13,2023-05-14


In [95]:
df = df.groupby(['customer_id', 'rank_date']).agg(consecutive_start=('transaction_date', 'min'),
                                                  consecutive_end=('transaction_date_t2', 'max'),
                                                  count=('customer_id', 'count')).reset_index()
display(df)

Unnamed: 0,customer_id,rank_date,consecutive_start,consecutive_end,count
0,101,2023-04-30,2023-05-01,2023-05-03,2
1,102,2023-05-02,2023-05-03,2023-05-04,1
2,105,2023-04-30,2023-05-01,2023-05-04,3
3,105,2023-05-08,2023-05-12,2023-05-14,2


In [96]:
df = df[df["count"] >= 2][['customer_id', 'consecutive_start', 'consecutive_end']]
display(df)

Unnamed: 0,customer_id,consecutive_start,consecutive_end
0,101,2023-05-01,2023-05-03
2,105,2023-05-01,2023-05-04
3,105,2023-05-12,2023-05-14
