In [2]:
import pandas as pd
from datetime import timedelta

In [3]:
# Read CSV with Decimals as comma:
df = pd.read_csv("./Inputs/data ES (MAY 22 - APR 23).csv", decimal=',')

In [4]:
df.head(3)

Unnamed: 0,Company (code+name),Year,Delivery note date,Customer code,Customer name,Segment code (customer),Segment name (customer),Global CoFX Amount (net),Sales order number,Order origin
0,002 - FLUIDRA COMERCIAL ESPAÑA SAU,2022,2022-05-01 00:00:00,107066,"NOMAR R.B., S.L.",17,POOL SPECIALIST-IntDealer/BtoC,654.94,224840,25
1,002 - FLUIDRA COMERCIAL ESPAÑA SAU,2022,2022-05-02 00:00:00,4068,"COSAGUA,S.L.",15,POOL SPECIALIST-Instal/Builder,1328.85,220329,25
2,002 - FLUIDRA COMERCIAL ESPAÑA SAU,2022,2022-05-02 00:00:00,7638,"SUCESORES DE MORENO, S.L.",15,POOL SPECIALIST-Instal/Builder,331.41,234692,25


In [5]:
# Renaming Columns:
df = df.rename(columns={'Delivery note date': 'Purchase Date',
                        'Customer Name': 'Customer name', 
                        'Global CoFX Amount (net)': 'Global Amount'})

In [6]:
df["Customer name"].unique()

array(['NOMAR R.B., S.L.', 'COSAGUA,S.L.', 'SUCESORES DE MORENO, S.L.',
       'COPIL, S.L.', 'POOLPLUS S.L. (ON LINE)',
       'HERMANOS MEDINA UNGUETTI S.L.', 'SUAREP Y LLADO, S.A.',
       'SANEAMIENTOS AVILA, S.A.U.',
       'INSTALACIONES DE CONTROL BACTERIOLOGICO',
       'ACCESORIOS Y TECNICAS DEL AGUA S.L.', 'TUNEU MUNT, S.L.',
       'AGROREPUESTOS LEVANTE S.L', 'ES-QUIMER XXI, S.L.',
       'GLOBAL BLUE INK S.L.U.', 'TIENDA-PISCINAS  S.L.',
       'POOLS CONSULTING & WATER S.L.',
       'SUMINISTROS Y PINTURAS EL TRINI S.L', 'JARDINS TECNICS, C.B.',
       'SANEAMIENTOS LUENGO, S.L.', 'SAYPOOL ADRYSS S.L',
       'REVOLUTION STAR. S.L.', 'WASSER POOL S.L.',
       'CONSTRUCCIONES DEPORTIVAS PROPOOL, S.L.', 'VECOBAY S.L.',
       'GUNITEC MALUMAR, S.L.', 'TAUMAU FACILITY SERVICES, S.L.',
       'CIRILO LOPEZ SERRANO, S.L.', 'COMERCIAL GALAN, S.A.',
       'FONTANERIA FERNANDEZ LUNA S.L.', 'APARISI FERRER, JOSEFA',
       'PRODUCTOS LC LA CORBERANA S.L.',
       'PISCIVAL TECNO

In [7]:
df["Customer name"] = df["Customer name"].str.replace(r' - 100', '')
df["Customer name"] = df["Customer name"].str.replace(r'(?!-)[^\w\s]', '', regex=True)
df["Customer name"] = df["Customer name"].str.replace('Ã‘', 'A')
df["Customer name"] = df["Customer name"].str.replace('Ã“', 'A')
df["Customer name"] = df["Customer name"].str.replace(r'[^\x00-\x7f]', '', regex=True)
df["Customer name"] = df["Customer name"].str.strip()

In [8]:
df2 = df[['Purchase Date', 'Customer name', 'Global Amount']]
df2

Unnamed: 0,Purchase Date,Customer name,Global Amount
0,2022-05-01 00:00:00,NOMAR RB SL,654.94
1,2022-05-02 00:00:00,COSAGUASL,1328.85
2,2022-05-02 00:00:00,SUCESORES DE MORENO SL,331.41
3,2022-05-02 00:00:00,SUCESORES DE MORENO SL,322.96
4,2022-05-02 00:00:00,COPIL SL,13.16
...,...,...,...
23181,2023-04-29 00:00:00,PISCINES DOMESL,3846.41
23182,2023-04-29 00:00:00,PISCINES DOMESL,892.10
23183,2023-04-29 00:00:00,PISCINES DOMESL,829.40
23184,2023-04-29 00:00:00,FONT-MECO SL,908.60


In [9]:
df2["Purchase Date"] = pd.to_datetime(df2["Purchase Date"])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2["Purchase Date"] = pd.to_datetime(df2["Purchase Date"])


In [10]:
df2.dtypes

Purchase Date    datetime64[ns]
Customer name            object
Global Amount           float64
dtype: object

In [11]:
snapshot_date = df2['Purchase Date'].max() + timedelta(days=1)

In [12]:
df3 = df2.groupby('Customer name').agg({'Global Amount': ['sum','count'], 'Purchase Date': lambda x: (snapshot_date - x.max()).days}).reset_index()

In [13]:
df3.head(3)

Unnamed: 0_level_0,Customer name,Global Amount,Global Amount,Purchase Date
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,count,<lambda>
0,A M INFINITY POOLS SL,14463.9,1,111
1,A CAMPIA DE LEMOS SL,7677.6,1,38
2,A PALLISER SL,3784.63,8,9


In [14]:
df3.columns

MultiIndex([('Customer name',         ''),
            ('Global Amount',      'sum'),
            ('Global Amount',    'count'),
            ('Purchase Date', '<lambda>')],
           )

In [15]:
df4 = df3.rename(columns={'': 'Customer Name',
                          "sum": "Monetary value",
                          'count': 'Frequency',
                          '<lambda>': 'Recency'})

In [16]:
# Dropping 1st level Index:
df4.columns = df4.columns.droplevel(0)

In [17]:
df4.head(3)

Unnamed: 0,Customer Name,Monetary value,Frequency,Recency
0,A M INFINITY POOLS SL,14463.9,1,111
1,A CAMPIA DE LEMOS SL,7677.6,1,38
2,A PALLISER SL,3784.63,8,9


In [18]:
df4.to_excel("./outputs/RFM_ES_2023.xlsx", index=False)