In [60]:
import pandas as pd
import numpy as np

In [61]:
df_rfm = pd.read_csv("../../data/df_rfm.csv")

# Display the first few rows of the data
df_rfm.head()

Unnamed: 0,Invoice,StockCode,Description,Quantity,InvoiceDate,Price,Customer_ID,Country,TotalSales
0,489434,22041,"RECORD_FRAME_7""_SINGLE_SIZE",48,2009-12-01 07:45:00,2.1,13085,United_Kingdom,100.8
1,489434,21232,STRAWBERRY_CERAMIC_TRINKET_BOX,24,2009-12-01 07:45:00,1.25,13085,United_Kingdom,30.0
2,489434,22064,PINK_DOUGHNUT_TRINKET_POT,24,2009-12-01 07:45:00,1.65,13085,United_Kingdom,39.6
3,489434,21871,SAVE_THE_PLANET_MUG,24,2009-12-01 07:45:00,1.25,13085,United_Kingdom,30.0
4,489435,22350,CAT_BOWL,12,2009-12-01 07:46:00,2.55,13085,United_Kingdom,30.6


In [62]:
# Convert InvoiceDate to datetime format
df_rfm['InvoiceDate'] = pd.to_datetime(df_rfm['InvoiceDate'])

# Calculate Recency
snapshot_date = df_rfm['InvoiceDate'].max() + pd.Timedelta(days=1)  # The day after the last transaction
df_rfm['Recency'] = (snapshot_date - df_rfm['InvoiceDate']).dt.days

# Calculate Frequency and MonetaryValue
rfm = df_rfm.groupby('Customer_ID').agg({
    'Invoice': 'nunique',
    'TotalSales': 'sum',
    'Recency': 'min'
})

# Rename the columns
rfm.rename(columns={'Invoice': 'Frequency', 'TotalSales': 'MonetaryValue'}, inplace=True)

# Check the first few rows of the RFM DataFrame
rfm.head(10)

Unnamed: 0_level_0,Frequency,MonetaryValue,Recency
Customer_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
12346,1,3.25,647
12347,8,3866.34,2
12348,5,1658.4,75
12349,3,2174.17,19
12350,1,258.0,310
12352,9,1245.09,36
12353,2,347.06,204
12354,1,810.8,232
12355,2,627.21,214
12356,5,3035.0,246


In [63]:
# montre toute les transactions pour le customer id 12347

df_client = df_rfm[df_rfm['Customer_ID'] == 12347]


In [64]:
# reuni le invoice unique combien
df_client['Invoice'].nunique()

8

In [65]:
# la date la plus eleve dataset

df_rfm.sort_values(by='InvoiceDate', ascending=False)

Unnamed: 0,Invoice,StockCode,Description,Quantity,InvoiceDate,Price,Customer_ID,Country,TotalSales,Recency
611792,581587,22138,BAKING_SET_9_PIECE_RETROSPOT,3,2011-12-09 12:50:00,4.95,12680,France,14.85,1
611785,581587,22367,CHILDRENS_APRON_SPACEBOY_DESIGN,8,2011-12-09 12:50:00,1.95,12680,France,15.60,1
611778,581587,22631,CIRCUS_PARADE_LUNCH_BOX,12,2011-12-09 12:50:00,1.95,12680,France,23.40,1
611779,581587,22556,PLASTERS_IN_TIN_CIRCUS_PARADE,12,2011-12-09 12:50:00,1.65,12680,France,19.80,1
611781,581587,22728,ALARM_CLOCK_BAKELIKE_PINK,4,2011-12-09 12:50:00,3.75,12680,France,15.00,1
...,...,...,...,...,...,...,...,...,...,...
4,489435,22350,CAT_BOWL,12,2009-12-01 07:46:00,2.55,13085,United_Kingdom,30.60,739
1,489434,21232,STRAWBERRY_CERAMIC_TRINKET_BOX,24,2009-12-01 07:45:00,1.25,13085,United_Kingdom,30.00,739
3,489434,21871,SAVE_THE_PLANET_MUG,24,2009-12-01 07:45:00,1.25,13085,United_Kingdom,30.00,739
2,489434,22064,PINK_DOUGHNUT_TRINKET_POT,24,2009-12-01 07:45:00,1.65,13085,United_Kingdom,39.60,739


In [66]:
rfm

Unnamed: 0_level_0,Frequency,MonetaryValue,Recency
Customer_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
12346,1,3.25,647
12347,8,3866.34,2
12348,5,1658.40,75
12349,3,2174.17,19
12350,1,258.00,310
...,...,...,...
18283,22,2373.85,4
18284,1,391.28,432
18285,1,275.40,661
18286,2,766.59,477


In [67]:
rfm


Unnamed: 0_level_0,Frequency,MonetaryValue,Recency
Customer_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
12346,1,3.25,647
12347,8,3866.34,2
12348,5,1658.40,75
12349,3,2174.17,19
12350,1,258.00,310
...,...,...,...
18283,22,2373.85,4
18284,1,391.28,432
18285,1,275.40,661
18286,2,766.59,477


In [68]:
df_rfm

Unnamed: 0,Invoice,StockCode,Description,Quantity,InvoiceDate,Price,Customer_ID,Country,TotalSales,Recency
0,489434,22041,"RECORD_FRAME_7""_SINGLE_SIZE",48,2009-12-01 07:45:00,2.10,13085,United_Kingdom,100.80,739
1,489434,21232,STRAWBERRY_CERAMIC_TRINKET_BOX,24,2009-12-01 07:45:00,1.25,13085,United_Kingdom,30.00,739
2,489434,22064,PINK_DOUGHNUT_TRINKET_POT,24,2009-12-01 07:45:00,1.65,13085,United_Kingdom,39.60,739
3,489434,21871,SAVE_THE_PLANET_MUG,24,2009-12-01 07:45:00,1.25,13085,United_Kingdom,30.00,739
4,489435,22350,CAT_BOWL,12,2009-12-01 07:46:00,2.55,13085,United_Kingdom,30.60,739
...,...,...,...,...,...,...,...,...,...,...
611788,581587,22613,PACK_OF_20_SPACEBOY_NAPKINS,12,2011-12-09 12:50:00,0.85,12680,France,10.20,1
611789,581587,22899,CHILDREN'S_APRON_DOLLY_GIRL,6,2011-12-09 12:50:00,2.10,12680,France,12.60,1
611790,581587,23254,CHILDRENS_CUTLERY_DOLLY_GIRL,4,2011-12-09 12:50:00,4.15,12680,France,16.60,1
611791,581587,23255,CHILDRENS_CUTLERY_CIRCUS_PARADE,4,2011-12-09 12:50:00,4.15,12680,France,16.60,1


In [69]:
# met le rfm dans un csv
rfm.to_csv("../../data/rfm_firstmodel.csv", index='Customer_ID')