In [1]:
import pandas as pd
from datetime import timedelta

In [2]:
# Config:
month = 'Jun23'
file_name = 'France_Jan_Jun_23'

In [3]:
# Read CSV with Decimals as comma:
df = pd.read_csv(f"./Inputs/{month}/{file_name}.csv", decimal=',')

In [4]:
df.head(3)

Unnamed: 0,Company (code+name),Year,Delivery note date,Customer code,Customer name,Segment code (customer),Segment name (customer),Global CoFX Amount (net),Sales order number,Order origin
0,050 - FLUIDRA COMMERCIAL FRANCE SAS,2023,2023-01-02 00:00:00,357487,PISCINE EVOLUTION,14,POOL SPECIALIST-Retailers,-12.11,7121783.0,25
1,050 - FLUIDRA COMMERCIAL FRANCE SAS,2023,2023-01-03 00:00:00,60061,PIACENTI PATRICK,14,POOL SPECIALIST-Retailers,1327.25,2112990.0,25
2,050 - FLUIDRA COMMERCIAL FRANCE SAS,2023,2023-01-03 00:00:00,170067,UP-LR,14,POOL SPECIALIST-Retailers,39.68,1419377.0,25


In [5]:
# Renaming Columns:
df = df.rename(columns={'Delivery note date': 'Purchase Date',
                        'Customer Name': 'Customer name', 
                        'Global CoFX Amount (net)': 'Global Amount'})

In [6]:
df["Customer name"].unique()

array(['PISCINE EVOLUTION', 'PIACENTI PATRICK', 'UP-LR', ...,
       'BPJ PISCINE - FUSION', 'IDEAL PISCINE & SPA - HYDRO SUD',
       'ATP PISCINES'], dtype=object)

In [7]:
df["Customer name"] = df["Customer name"].str.replace(r' - 100', '')
df["Customer name"] = df["Customer name"].str.replace(r'(?!-)[^\w\s]', '', regex=True)
df["Customer name"] = df["Customer name"].str.replace('Ã‘', 'A')
df["Customer name"] = df["Customer name"].str.replace('Ã“', 'A')
df["Customer name"] = df["Customer name"].str.replace(r'[^\x00-\x7f]', '', regex=True)
df["Customer name"] = df["Customer name"].str.strip()

In [8]:
df2 = df[['Purchase Date', 'Customer name', 'Global Amount']]
df2

Unnamed: 0,Purchase Date,Customer name,Global Amount
0,2023-01-02 00:00:00,PISCINE EVOLUTION,-12.11
1,2023-01-03 00:00:00,PIACENTI PATRICK,1327.25
2,2023-01-03 00:00:00,UP-LR,39.68
3,2023-01-03 00:00:00,MES PISCINES BLAYE,678.43
4,2023-01-03 00:00:00,AQUITAINE RENOVATION PISCINE,759.35
...,...,...,...
5007,2023-06-30 00:00:00,OLOISIRSOSERVICES SARL,717.20
5008,2023-06-30 00:00:00,LEPERE PISCINES,163.35
5009,2023-06-30 00:00:00,GUILLAUME THOMAZEAU,389.29
5010,2023-06-30 00:00:00,OPULSION,-251.37


In [9]:
df2["Purchase Date"] = pd.to_datetime(df2["Purchase Date"])
df2['Global Amount'] = pd.to_numeric(df2['Global Amount'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2["Purchase Date"] = pd.to_datetime(df2["Purchase Date"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['Global Amount'] = pd.to_numeric(df2['Global Amount'])


In [10]:
df2.dtypes

Purchase Date    datetime64[ns]
Customer name            object
Global Amount           float64
dtype: object

In [11]:
snapshot_date = df2['Purchase Date'].max() + timedelta(days=1)

In [12]:
df3 = df2.groupby('Customer name').agg({'Global Amount': ['sum','count'], 'Purchase Date': lambda x: (snapshot_date - x.max()).days}).reset_index()

In [13]:
df3.head(3)

Unnamed: 0_level_0,Customer name,Global Amount,Global Amount,Purchase Date
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,count,<lambda>
0,123SPA,1370.4,1,24
1,2 T SAS,11.74,3,37
2,27 SARL,475.85,4,31


In [14]:
df3.columns

MultiIndex([('Customer name',         ''),
            ('Global Amount',      'sum'),
            ('Global Amount',    'count'),
            ('Purchase Date', '<lambda>')],
           )

In [15]:
df3

Unnamed: 0_level_0,Customer name,Global Amount,Global Amount,Purchase Date
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,count,<lambda>
0,123SPA,1370.40,1,24
1,2 T SAS,11.74,3,37
2,27 SARL,475.85,4,31
3,3C CLIM - ORLEANS,5596.05,1,30
4,3E,263.05,2,79
...,...,...,...,...
1064,WATTIER FLORENT,658.00,1,59
1065,WEST GARDEN PISCINES LIGNE CLAIRE,482.38,2,30
1066,YACONO ALAIN AQUASERVICES66,1491.50,2,66
1067,YGPISCINES - YANNICK GROSEILLER,1035.40,3,4


In [16]:
df4 = df3.rename(columns={'': 'Customer Name',
                          "sum": "Monetary value",
                          'count': 'Frequency',
                          '<lambda>': 'Recency'})

In [17]:
df4

Unnamed: 0_level_0,Customer name,Global Amount,Global Amount,Purchase Date
Unnamed: 0_level_1,Customer Name,Monetary value,Frequency,Recency
0,123SPA,1370.40,1,24
1,2 T SAS,11.74,3,37
2,27 SARL,475.85,4,31
3,3C CLIM - ORLEANS,5596.05,1,30
4,3E,263.05,2,79
...,...,...,...,...
1064,WATTIER FLORENT,658.00,1,59
1065,WEST GARDEN PISCINES LIGNE CLAIRE,482.38,2,30
1066,YACONO ALAIN AQUASERVICES66,1491.50,2,66
1067,YGPISCINES - YANNICK GROSEILLER,1035.40,3,4


In [18]:
# Dropping 1st level Index:
df4.columns = df4.columns.droplevel(0)

In [19]:
df4.head(3)

Unnamed: 0,Customer Name,Monetary value,Frequency,Recency
0,123SPA,1370.4,1,24
1,2 T SAS,11.74,3,37
2,27 SARL,475.85,4,31


In [20]:
df4.to_excel(f"./{month}/FR/RFM_FR_{month}.xlsx", index=False)