### Method which combined global/local percentil score 

In [2]:
import pandas as pd
import numpy as np

In [3]:
dataset1=pd.read_csv("../dataset_unified.csv", sep=";")
dataset1.head()

Unnamed: 0,company,date,quarter,country,ROA,ROE,debt_to_equity,current_ratio,net_margin,revenue_growth,cash_ratio,inflation_YoY,gdp_growth_rate,interest_rate
0,Banco Santander,2024-09-30,2024-Q3,Spain,,,1615,33,,62414,,217,80,365
1,Banco Santander,2024-06-30,2024-Q2,Spain,63.0,109.0,1623,23,906.0,-8712,23.0,346,80,425
2,Banco Santander,2024-03-31,2024-Q1,Spain,6.0,1043.0,1614,24,87.0,-6185,24.0,314,100,450
3,Banco Santander,2023-12-31,2023-Q4,Spain,59.0,1035.0,1624,35,886.0,221927,34.0,327,70,450
4,Banco Santander,2023-09-30,2023-Q3,Spain,55.0,968.0,1666,31,1044.0,201,31.0,282,70,450


In [4]:
indicators_to_clean = [
    'ROA', 'ROE', 'net_margin', 'current_ratio',
    'cash_ratio', 'debt_to_equity', 'revenue_growth',
    'inflation_YoY', 'gdp_growth_rate', 'interest_rate'
]

# STEP 1: Clean all comma-based numbers → dots, then convert to numeric
for col in indicators_to_clean:
    dataset1[col] = (
        dataset1[col]
        .astype(str)
        .str.replace(',', '.', regex=False)     # Convert commas to dots
        .replace('nan', np.nan)                 # Replace string 'nan' with real NaN
    )
    dataset1[col] = pd.to_numeric(dataset1[col], errors='coerce')  # Convert to float

print(dataset1[indicators_to_clean].dtypes)

ROA                float64
ROE                float64
net_margin         float64
current_ratio      float64
cash_ratio         float64
debt_to_equity     float64
revenue_growth     float64
inflation_YoY      float64
gdp_growth_rate    float64
interest_rate      float64
dtype: object


In [5]:

def compute_local_percentile(df, column):
    return df.groupby('company')[column].rank(pct=True)

dataset1['ROA_pct'] = compute_local_percentile(dataset1, 'ROA')
dataset1['ROE_pct'] = compute_local_percentile(dataset1, 'ROE')
dataset1['net_margin_pct'] = compute_local_percentile(dataset1, 'net_margin')
dataset1['current_ratio_pct'] = compute_local_percentile(dataset1, 'current_ratio')
dataset1['cash_ratio_pct'] = compute_local_percentile(dataset1, 'cash_ratio')
dataset1['debt_to_equity_pct'] = compute_local_percentile(dataset1, 'debt_to_equity')


In [6]:
# profitability
dataset1['score_profitability_local'] = dataset1[['ROA_pct', 'ROE_pct', 'net_margin_pct']].mean(axis=1)

# liquidity
dataset1['score_liquidity_local'] = dataset1[['current_ratio_pct', 'cash_ratio_pct']].mean(axis=1)

# Solvency
dataset1['score_solvency_local'] = 1 - dataset1['debt_to_equity_pct']

# Leverage Adjusted profitability
dataset1['inv_debt_pct'] = 1 - dataset1['debt_to_equity_pct']
dataset1['score_leverage_adjusted_local'] = dataset1[['ROE_pct', 'inv_debt_pct']].mean(axis=1)


In [7]:
cols = ['company', 'quarter', 'score_profitability_local', 'score_liquidity_local', 'score_solvency_local', 'score_leverage_adjusted_local']
print(dataset1[cols].head())


           company  quarter  score_profitability_local  score_liquidity_local  \
0  Banco Santander  2024-Q3                        NaN               0.255319   
1  Banco Santander  2024-Q2                   0.764493               0.064524   
2  Banco Santander  2024-Q1                   0.717391               0.129047   
3  Banco Santander  2023-Q4                   0.717391               0.415241   
4  Banco Santander  2023-Q3                   0.760870               0.236702   

   score_solvency_local  score_leverage_adjusted_local  
0              0.170213                       0.170213  
1              0.148936                       0.574468  
2              0.191489                       0.584875  
3              0.127660                       0.542091  
4              0.063830                       0.488437  


In [8]:
def compute_global_percentile(df, column):
    return df.groupby('quarter')[column].rank(pct=True)


# Appliquer le percentile global
dataset1['ROA_pct_global'] = compute_global_percentile(dataset1, 'ROA')
dataset1['ROE_pct_global'] = compute_global_percentile(dataset1, 'ROE')
dataset1['net_margin_pct_global'] = compute_global_percentile(dataset1, 'net_margin')
dataset1['current_ratio_pct_global'] = compute_global_percentile(dataset1, 'current_ratio')
dataset1['cash_ratio_pct_global'] = compute_global_percentile(dataset1, 'cash_ratio')
dataset1['debt_to_equity_pct_global'] = compute_global_percentile(dataset1, 'debt_to_equity')

dataset1['debt_to_equity_pct_global'] = 1- dataset1['debt_to_equity_pct_global']  # Inverser pour la solvabilité

In [9]:
# Score global
dataset1['score_profitability_global']= dataset1[['ROA_pct_global','ROE_pct_global','net_margin_pct_global']].mean(axis=1)
dataset1['score_liquidity_global'] = dataset1[['current_ratio_pct_global','cash_ratio_pct_global']].mean(axis=1)
dataset1['score_solvency_global'] = dataset1['debt_to_equity_pct_global']
dataset1['score_leverage_adjusted_global'] = dataset1[['ROE_pct_global', 'debt_to_equity_pct_global']].mean(axis=1)



In [10]:
print(dataset1.columns.tolist())


['company', 'date', 'quarter', 'country', 'ROA', 'ROE', 'debt_to_equity', 'current_ratio', 'net_margin', 'revenue_growth', 'cash_ratio', 'inflation_YoY', 'gdp_growth_rate', 'interest_rate', 'ROA_pct', 'ROE_pct', 'net_margin_pct', 'current_ratio_pct', 'cash_ratio_pct', 'debt_to_equity_pct', 'score_profitability_local', 'score_liquidity_local', 'score_solvency_local', 'inv_debt_pct', 'score_leverage_adjusted_local', 'ROA_pct_global', 'ROE_pct_global', 'net_margin_pct_global', 'current_ratio_pct_global', 'cash_ratio_pct_global', 'debt_to_equity_pct_global', 'score_profitability_global', 'score_liquidity_global', 'score_solvency_global', 'score_leverage_adjusted_global']


In [11]:
# Check global scores for one quarter
dataset1[dataset1['quarter'] == '2022-Q1'][['company', 'score_profitability_global']]


Unnamed: 0,company,score_profitability_global
10,Banco Santander,0.533333
56,BNP Paribas,0.333333
102,Crédit Agricole,0.666667
131,HSBC,0.533333
173,JP Morgan Chase,0.933333


In [12]:
cols=['company', 'quarter', 'score_profitabilty_global', 'score_liquidity_global', 'score_solvency_global', 'score_leverage_adjusted_global']
print(dataset1[cols].head(10))

KeyError: "['score_profitabilty_global'] not in index"

In [None]:
# Remplace 'dataset1' par le nom de ta DataFrame si différent
dataset1.to_csv("dataset1_complet.csv", index=False)


In [None]:
dataset1.loc[dataset1['score_solvency_global'].idxmax()]


company                           JP Morgan Chase
date                                   2016-06-30
quarter                                   2016-Q2
country                                       USA
ROA                                          0.92
ROE                                        0.0892
debt_to_equity                               8.77
current_ratio                                1.03
net_margin                                 0.2183
revenue_growth                              0.011
cash_ratio                                   0.53
inflation_YoY                              0.0105
gdp_growth_rate                              0.01
interest_rate                              0.0037
ROA_pct                                  0.413462
ROE_pct                                  0.230769
net_margin_pct                           0.596154
current_ratio_pct                        0.942308
cash_ratio_pct                           0.740385
debt_to_equity_pct                       0.057692


In [None]:
print(dataset1.columns.tolist())


NameError: name 'dataset1' is not defined

In [None]:
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, clear_output

# Définir les seuils
thresholds = {
    'low': 0.1,
    'high': 0.9
}

# Classer le percentile
def flag_percentile(p):
    if pd.isna(p): return 'Missing'
    elif p <= thresholds['low']: return 'Bottom 10%'
    elif p >= thresholds['high']: return 'Top 10%'
    else: return 'Middle'

# Appliquer flags
for col in ['score_profitability', 'score_liquidity', 'score_solvency', 'score_leverage_adjusted']:
    dataset1[f'{col}_global_flag'] = dataset1[f'{col}_global'].apply(flag_percentile)
    dataset1[f'{col}_local_flag'] = dataset1[f'{col}_local'].apply(flag_percentile)

# Revenue growth : alerte simple
def revenue_alert(x):
    if pd.isna(x): return "Unknown"
    elif x < -0.1: return "Rev ↓"
    elif x > 0.1: return "Rev ↑"
    else: return "Stable"

dataset1['revenue_alert'] = dataset1['revenue_growth'].apply(revenue_alert)

# Résumé alerte
def alert_summary(row):
    reds, greens, stables = [], [], []
    for col, name in [
        ('score_profitability', 'Profitability'),
        ('score_liquidity', 'Liquidity'),
        ('score_solvency', 'Solvency'),
        ('score_leverage_adjusted', 'Adj. Leverage')
    ]:
        val = row.get(col)
        if pd.isna(val): continue
        if val < thresholds['low']:
            reds.append(name)
        elif val > thresholds['high']:
            greens.append(name)
        else:
            stables.append(name)
    parts = []
    if reds:
        parts.append("Red (" + ", ".join(reds) + ")")
    if greens:
        parts.append("Green (" + ", ".join(greens) + ")")
    if not reds and not greens and stables:
        parts.append("Stable (" + ", ".join(stables) + ")")
    if not parts:
        parts.append("Missing")
    if row['revenue_alert'] in ['Rev ↓', 'Rev ↑']:
        parts.append(row['revenue_alert'])
    return ", ".join(parts)

dataset1['Alert Summary'] = dataset1.apply(alert_summary, axis=1)

# Statut global
def global_status(row):
    indicators = ['score_profitability', 'score_liquidity', 'score_solvency', 'score_leverage_adjusted']
    red = sum(row[col] < thresholds['low'] for col in indicators if pd.notna(row[col]))
    green = sum(row[col] > thresholds['high'] for col in indicators if pd.notna(row[col]))
    leverage = row['score_leverage_adjusted']
    rev = row['revenue_alert']

    if leverage is not None and leverage < thresholds['low']:
        return "Leveraged Risk"
    if leverage is not None and leverage > thresholds['high'] and red == 0 and rev == "Rev ↑":
        return "Excellent Health"
    if leverage is not None and leverage > thresholds['high'] and red == 0:
        return "Strong Capital Efficiency"
    if red >= 3:
        return "Critical Risk"
    if red == 2:
        return "Danger"
    if green >= 2 and red == 0 and leverage <= thresholds['high']:
        return "Strong"
    if green >= 2 and red <= 1 or (green >= 1 and red == 0):
        return "Good signal"
    if red == green and red > 0:
        return "Mixed Risk"
    if red == 1 and green == 0:
        return "Caution"
    if all(0.2 <= row[col] <= 0.8 for col in indicators if pd.notna(row[col])):
        return "Stable"
    return "Watch"

dataset1["Overall Status"] = dataset1.apply(global_status, axis=1)

# Widget pour sélection de société
dropdown = widgets.Dropdown(
    options=sorted(dataset1['company'].dropna().unique()),
    description='Company:',
    value=dataset1['company'].dropna().unique()[0]
)

output = widgets.Output()

def display_summary(change):
    with output:
        clear_output()
        df = dataset1[dataset1['company'] == change['new']].copy()
        df = df[[
            'quarter',
            'score_profitability', 'score_profitability_local_flag', 'score_profitability_global_flag',
            'score_liquidity', 'score_liquidity_local_flag', 'score_liquidity_global_flag',
            'score_solvency', 'score_solvency_local_flag', 'score_solvency_global_flag',
            'score_leverage_adjusted', 'score_leverage_adjusted_local_flag', 'score_leverage_adjusted_global_flag',
            'revenue_alert', 'Alert Summary', 'Overall Status'
        ]].reset_index(drop=True)
        display(df)

# Lancer
dropdown.observe(display_summary, names='value')
display(dropdown, output)
display_summary({'new': dropdown.value})


KeyError: 'score_profitability'

In [None]:
# Thresholds
low_threshold = 0.1
high_threshold = 0.9

# Helper: flag anomalies
def flag_anomalies(df, score_col, suffix):
    df[f'is_low_{suffix}'] = df[score_col] < low_threshold
    df[f'is_high_{suffix}'] = df[score_col] > high_threshold
    return df

# Apply to each score type
for score_type in ['profitability', 'liquidity', 'solvency', 'leverage_adjusted']:
    dataset1 = flag_anomalies(dataset1, f'score_{score_type}_local', f'{score_type}_local')
    dataset1 = flag_anomalies(dataset1, f'score_{score_type}_global', f'{score_type}_global')

# Combined anomaly detection (low local + low global)
for score_type in ['profitability', 'liquidity', 'solvency', 'leverage_adjusted']:
    dataset1[f'anomaly_low_{score_type}'] = (
        dataset1[f'is_low_{score_type}_local'] & dataset1[f'is_low_{score_type}_global']
    )

# Optional: trend over time (score delta vs previous quarter)
dataset1 = dataset1.sort_values(by=['company', 'quarter'])
for score_type in ['profitability', 'liquidity', 'solvency', 'leverage_adjusted']:
    dataset1[f'trend_{score_type}_local'] = dataset1.groupby('company')[f'score_{score_type}_local'].diff()
    dataset1[f'trend_{score_type}_global'] = dataset1.groupby('company')[f'score_{score_type}_global'].diff()


In [None]:
#bon



DeltaGenerator()

SyntaxError: invalid syntax (507122745.py, line 1)