In [1]:
import pandas as pd
from datetime import timedelta

In [2]:
# Config:
month = 'Jun23'
file_name = 'Spain_Jan_Jun_23'

In [3]:
# Read CSV with Decimals as comma:
df = pd.read_csv(f"./Inputs/{month}/{file_name}.csv", decimal=',')

In [4]:
df.head(3)

Unnamed: 0,Company (code+name),Year,Delivery note date,Customer code,Customer name,Segment code (customer),Segment name (customer),Global CoFX Amount (net),Sales order number,Order origin
0,002 - FLUIDRA COMERCIAL ESPAÑA SAU,2023,2023-01-02 00:00:00,101822,SUMINISTROS DE FONTANERIA CASTELLON S.L.,42,HVAC/CONSTRUCT.-Whosale&Retail,639.95,461091,25
1,002 - FLUIDRA COMERCIAL ESPAÑA SAU,2023,2023-01-02 00:00:00,109055,"CLIMAPOOL INSULAR, S.L.U. (LAS PALMAS)",14,POOL SPECIALIST-Retailers,6077.0,459851,25
2,002 - FLUIDRA COMERCIAL ESPAÑA SAU,2023,2023-01-02 00:00:00,109233,"M.M. 2004 SERDEMA, S.L.",15,POOL SPECIALIST-Instal/Builder,1399.25,460979,25


In [5]:
# Renaming Columns:
df = df.rename(columns={'Delivery note date': 'Purchase Date',
                        'Customer Name': 'Customer name', 
                        'Global CoFX Amount (net)': 'Global Amount'})

In [6]:
df["Customer name"].unique()

array(['SUMINISTROS DE FONTANERIA CASTELLON S.L.',
       'CLIMAPOOL INSULAR, S.L.U. (LAS PALMAS)',
       'M.M. 2004 SERDEMA, S.L.', 'ITESA PROYECTOS E INSTALACIONES S.L.',
       'NOVA RECAL SERVICIOS Y MANTENIMIENTO S.L',
       'SUPPLAID SUBMINISTRE ASSISTIT, S.L.', 'TU PISCINA EN CASA, S.L.',
       'FENIX PISCINAS , S.C.', 'PISCINAS DEL ESTRECHO S.L.',
       'PLASTINIK, S.L.', 'NOVELEC ANTAS S.L.',
       'FERRETERIA FERROMAR, S.L.', 'CEMENTOS BENIDORM S.A.',
       'APARISI FERRER, JOSEFA', 'IRRIAGRO S.COOP.V.L.',
       'GONZALEZ LOPEZ, C.B.', 'CODISNA, S.L.',
       'GISPERT, DEPURACION DE AGUAS, S.A.',
       'HIDRO-TARRACO, S.A (PROV. 17050)', 'GARCIA MASIDE, JOSE',
       'POOLPLUS S.L. (ON LINE)', 'QUIMICOS LOS ABUELOS, S.L.',
       'SANEAMIENTOS RIGA, S.L.', 'MANEIG PISCINES, S.L.',
       'ESPAI INTEGRAL DE PISCINES, S.L.(GIRONA)', 'NOMAR R.B., S.L.',
       'ARROMEL S.L.', 'RIVAS PEÑA,JESUS JUAN', 'LIZASO VERGARA, S.L.',
       'FERRETERIA LAS NAVES, S.L.', 'TIENDA-PI

In [7]:
df["Customer name"] = df["Customer name"].str.replace(r' - 100', '')
df["Customer name"] = df["Customer name"].str.replace(r'(?!-)[^\w\s]', '', regex=True)
df["Customer name"] = df["Customer name"].str.replace('Ã‘', 'A')
df["Customer name"] = df["Customer name"].str.replace('Ã“', 'A')
df["Customer name"] = df["Customer name"].str.replace(r'[^\x00-\x7f]', '', regex=True)
df["Customer name"] = df["Customer name"].str.strip()

In [8]:
df2 = df[['Purchase Date', 'Customer name', 'Global Amount']]
df2

Unnamed: 0,Purchase Date,Customer name,Global Amount
0,2023-01-02 00:00:00,SUMINISTROS DE FONTANERIA CASTELLON SL,639.95
1,2023-01-02 00:00:00,CLIMAPOOL INSULAR SLU LAS PALMAS,6077.00
2,2023-01-02 00:00:00,MM 2004 SERDEMA SL,1399.25
3,2023-01-02 00:00:00,ITESA PROYECTOS E INSTALACIONES SL,237.51
4,2023-01-02 00:00:00,NOVA RECAL SERVICIOS Y MANTENIMIENTO SL,793.10
...,...,...,...
16035,2023-06-30 00:00:00,SANEAMIENTOS AXARQUIA SL,72.90
16036,2023-06-30 00:00:00,FIBRAGUA ISLEA SL,593.78
16037,2023-06-30 00:00:00,FUTURQUIMIA SL,383.55
16038,2023-06-30 00:00:00,AC PISCINA REFORMA Y JARDIN SL,53.84


In [9]:
df2["Purchase Date"] = pd.to_datetime(df2["Purchase Date"])
df2['Global Amount'] = pd.to_numeric(df2['Global Amount'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2["Purchase Date"] = pd.to_datetime(df2["Purchase Date"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['Global Amount'] = pd.to_numeric(df2['Global Amount'])


In [10]:
df2.dtypes

Purchase Date    datetime64[ns]
Customer name            object
Global Amount           float64
dtype: object

In [11]:
snapshot_date = df2['Purchase Date'].max() + timedelta(days=1)

In [12]:
df3 = df2.groupby('Customer name').agg({'Global Amount': ['sum','count'], 'Purchase Date': lambda x: (snapshot_date - x.max()).days}).reset_index()

In [13]:
df3.head(3)

Unnamed: 0_level_0,Customer name,Global Amount,Global Amount,Purchase Date
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,count,<lambda>
0,A M INFINITY POOLS SL,14463.9,1,173
1,A CAMPIA DE LEMOS SL,7677.6,1,100
2,A PALLISER SL,2362.59,7,29


In [14]:
df3.columns

MultiIndex([('Customer name',         ''),
            ('Global Amount',      'sum'),
            ('Global Amount',    'count'),
            ('Purchase Date', '<lambda>')],
           )

In [15]:
df4 = df3.rename(columns={'': 'Customer Name',
                          "sum": "Monetary value",
                          'count': 'Frequency',
                          '<lambda>': 'Recency'})

In [16]:
# Dropping 1st level Index:
df4.columns = df4.columns.droplevel(0)

In [17]:
df4.head(3)

Unnamed: 0,Customer Name,Monetary value,Frequency,Recency
0,A M INFINITY POOLS SL,14463.9,1,173
1,A CAMPIA DE LEMOS SL,7677.6,1,100
2,A PALLISER SL,2362.59,7,29


In [18]:
df4.to_excel(f"./{month}/ES/RFM_ES_{month}.xlsx", index=False)