### Method which combined global/local percentil score 

In [12]:
import pandas as pd
import numpy as np

In [13]:
dataset=pd.read_csv("../dataset_unified.csv", sep=";")
dataset.head()

Unnamed: 0,company,date,quarter,country,ROA,ROE,debt_to_equity,current_ratio,net_margin,revenue_growth,cash_ratio,inflation_YoY,gdp_growth_rate,interest_rate
0,Banco Santander,2024-09-30,2024-Q3,Spain,,,1615,33,,62414,,217,80,365
1,Banco Santander,2024-06-30,2024-Q2,Spain,63.0,109.0,1623,23,906.0,-8712,23.0,346,80,425
2,Banco Santander,2024-03-31,2024-Q1,Spain,6.0,1043.0,1614,24,87.0,-6185,24.0,314,100,450
3,Banco Santander,2023-12-31,2023-Q4,Spain,59.0,1035.0,1624,35,886.0,221927,34.0,327,70,450
4,Banco Santander,2023-09-30,2023-Q3,Spain,55.0,968.0,1666,31,1044.0,201,31.0,282,70,450


In [14]:
indicators_to_clean = [
    'ROA', 'ROE', 'net_margin', 'current_ratio',
    'cash_ratio', 'debt_to_equity', 'revenue_growth',
    'inflation_YoY', 'gdp_growth_rate', 'interest_rate'
]

# STEP 1: Clean all comma-based numbers → dots, then convert to numeric
for col in indicators_to_clean:
    dataset[col] = (
        dataset[col]
        .astype(str)
        .str.replace(',', '.', regex=False)     # Convert commas to dots
        .replace('nan', np.nan)                 # Replace string 'nan' with real NaN
    )
    dataset[col] = pd.to_numeric(dataset[col], errors='coerce')  # Convert to float

print(dataset[indicators_to_clean].dtypes)

ROA                float64
ROE                float64
net_margin         float64
current_ratio      float64
cash_ratio         float64
debt_to_equity     float64
revenue_growth     float64
inflation_YoY      float64
gdp_growth_rate    float64
interest_rate      float64
dtype: object


In [15]:
# Fonction pour appliquer le percentile local (rank normalisé)
def compute_local_percentile(df, column):
    return df.groupby('company')[column].rank(pct=True)

# Appliquer aux colonnes clés
dataset['ROA_pct'] = compute_local_percentile(dataset, 'ROA')
dataset['ROE_pct'] = compute_local_percentile(dataset, 'ROE')
dataset['net_margin_pct'] = compute_local_percentile(dataset, 'net_margin')
dataset['current_ratio_pct'] = compute_local_percentile(dataset, 'current_ratio')
dataset['cash_ratio_pct'] = compute_local_percentile(dataset, 'cash_ratio')
dataset['debt_to_equity_pct'] = compute_local_percentile(dataset, 'debt_to_equity')


In [16]:
# Score 1: Profitabilité
dataset['score_profitability'] = dataset[['ROA_pct', 'ROE_pct', 'net_margin_pct']].mean(axis=1)

# Score 2: Liquidité
dataset['score_liquidity'] = dataset[['current_ratio_pct', 'cash_ratio_pct']].mean(axis=1)

# Score 3: Solvabilité — attention, plus debt_to_equity est bas, mieux c’est
dataset['score_solvency'] = 1 - dataset['debt_to_equity_pct']

# Score 4: Profitabilité ajustée à l'endettement
dataset['inv_debt_pct'] = 1 - dataset['debt_to_equity_pct']
dataset['score_leverage_adjusted'] = dataset[['ROE_pct', 'inv_debt_pct']].mean(axis=1)


In [17]:
cols = ['company', 'quarter', 'score_profitability', 'score_liquidity', 'score_solvency', 'score_leverage_adjusted']
print(dataset[cols].head(10))


           company  quarter  score_profitability  score_liquidity  \
0  Banco Santander  2024-Q3                  NaN         0.255319   
1  Banco Santander  2024-Q2             0.764493         0.064524   
2  Banco Santander  2024-Q1             0.717391         0.129047   
3  Banco Santander  2023-Q4             0.717391         0.415241   
4  Banco Santander  2023-Q3             0.760870         0.236702   
5  Banco Santander  2023-Q2             0.717391         0.129047   
6  Banco Santander  2023-Q1             0.663043         0.129047   
7  Banco Santander  2022-Q4             0.735507         0.404602   
8  Banco Santander  2022-Q3             0.887681         0.545444   
9  Banco Santander  2022-Q2             0.847826         0.604764   

   score_solvency  score_leverage_adjusted  
0        0.170213                 0.170213  
1        0.148936                 0.574468  
2        0.191489                 0.584875  
3        0.127660                 0.542091  
4        0.0638