In [1]:
import pandas as pd
from datetime import timedelta

In [2]:
# Read CSV with Decimals as comma:
df = pd.read_csv("./Inputs/data FR (MAY 22 - APR 23).csv", decimal=',')

In [3]:
df.head(3)

Unnamed: 0,Company (code+name),Year,Delivery note date,Customer code,Customer name,Segment code (customer),Segment name (customer),Global CoFX Amount (net),Sales order number,Order origin
0,050 - FLUIDRA COMMERCIAL FRANCE SAS,2022,2022-05-01 00:00:00,830400,AATE,14,POOL SPECIALIST-Retailers,349.13,6183514,25
1,050 - FLUIDRA COMMERCIAL FRANCE SAS,2022,2022-05-02 00:00:00,64372,LS TECHNIQUE MEDITERRANEE,14,POOL SPECIALIST-Retailers,70.62,2108967,25
2,050 - FLUIDRA COMMERCIAL FRANCE SAS,2022,2022-05-02 00:00:00,120478,EAU PISCINES SERVICES ROUERGUE,15,POOL SPECIALIST-Instal/Builder,144.04,4441899,25


In [4]:
# Renaming Columns:
df = df.rename(columns={'Delivery note date': 'Purchase Date',
                        'Customer Name': 'Customer name', 
                        'Global CoFX Amount (net)': 'Global Amount'})

In [5]:
df["Customer name"].unique()

array(['AATE', 'LS TECHNIQUE MEDITERRANEE',
       'EAU PISCINES SERVICES ROUERGUE', ...,
       'FONTENEAU PAYSAGISTE SARL - EAU SHOP', 'VENDEE SPA - OASIS',
       'EMYG AQUACULTURE'], dtype=object)

In [6]:
df["Customer name"] = df["Customer name"].str.replace(r' - 100', '')
df["Customer name"] = df["Customer name"].str.replace(r'(?!-)[^\w\s]', '', regex=True)
df["Customer name"] = df["Customer name"].str.replace('Ã‘', 'A')
df["Customer name"] = df["Customer name"].str.replace('Ã“', 'A')
df["Customer name"] = df["Customer name"].str.replace(r'[^\x00-\x7f]', '', regex=True)
df["Customer name"] = df["Customer name"].str.strip()

In [7]:
df2 = df[['Purchase Date', 'Customer name', 'Global Amount']]
df2

Unnamed: 0,Purchase Date,Customer name,Global Amount
0,2022-05-01 00:00:00,AATE,349.13
1,2022-05-02 00:00:00,LS TECHNIQUE MEDITERRANEE,70.62
2,2022-05-02 00:00:00,EAU PISCINES SERVICES ROUERGUE,144.04
3,2022-05-02 00:00:00,ESPACO,543.92
4,2022-05-02 00:00:00,VERT MARINE 17,555.62
...,...,...,...
6787,2023-04-28 00:00:00,SWIMMING COOL VENDEE,291.50
6788,2023-04-28 00:00:00,ARROLIMOUSIN,4213.08
6789,2023-04-28 00:00:00,MB5 PISCINE,120.60
6790,2023-04-29 00:00:00,PISCINE PLUS JUVIGNAC - FUSION,1050.60


In [8]:
df2["Purchase Date"] = pd.to_datetime(df2["Purchase Date"])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2["Purchase Date"] = pd.to_datetime(df2["Purchase Date"])


In [9]:
df2.dtypes

Purchase Date    datetime64[ns]
Customer name            object
Global Amount           float64
dtype: object

In [10]:
snapshot_date = df2['Purchase Date'].max() + timedelta(days=1)

In [11]:
df3 = df2.groupby('Customer name').agg({'Global Amount': ['sum','count'], 'Purchase Date': lambda x: (snapshot_date - x.max()).days}).reset_index()

In [12]:
df3.head(3)

Unnamed: 0_level_0,Customer name,Global Amount,Global Amount,Purchase Date
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,count,<lambda>
0,2 ROUES MOTOCULTURE OUTIMAG 0255,1291.78,4,284
1,2 T SAS,526.07,2,3
2,27 SARL,1247.27,7,4


In [13]:
df3.columns

MultiIndex([('Customer name',         ''),
            ('Global Amount',      'sum'),
            ('Global Amount',    'count'),
            ('Purchase Date', '<lambda>')],
           )

In [14]:
df4 = df3.rename(columns={'': 'Customer Name',
                          "sum": "Monetary value",
                          'count': 'Frequency',
                          '<lambda>': 'Recency'})

In [15]:
# Dropping 1st level Index:
df4.columns = df4.columns.droplevel(0)

In [16]:
df4.head(3)

Unnamed: 0,Customer Name,Monetary value,Frequency,Recency
0,2 ROUES MOTOCULTURE OUTIMAG 0255,1291.78,4,284
1,2 T SAS,526.07,2,3
2,27 SARL,1247.27,7,4


In [17]:
df4.to_excel("./outputs/RFM_FR_2023.xlsx", index=False)