# Computation of MAE, RMSE, Pearson correlation, and SNR for BPM time series.


In [30]:
import pandas as pd
import numpy as np
import zipfile
import os, re

# Path to the archive
models_zip_path = 'Models_13.zip'

# Check file existence
if not os.path.isfile(models_zip_path):
    raise FileNotFoundError(f'The file {models_zip_path} was not found. Upload it or change the path.')

# Extraction directory
extract_dir = 'models_13'
os.makedirs(extract_dir, exist_ok=True)

# Extract files
with zipfile.ZipFile(models_zip_path, 'r') as zf:
    zf.extractall(extract_dir)

# List extracted CSV files
csv_files = [f for f in os.listdir(extract_dir) if f.lower().endswith('.csv')]
print('Extracted CSV files:', csv_files)

Extracted CSV files: []


In [31]:
if 'extract_dir' not in globals():
    extract_dir = "/content/Models_extracted"

# Recursive search for a file containing certain keywords
def find_csv_by_keywords(base_dir, must_contain, must_not_contain=None):
    mc = [w.lower() for w in must_contain]
    mnc = [w.lower() for w in (must_not_contain or [])]
    for root, _, files in os.walk(base_dir):
        for f in files:
            if not f.lower().endswith(".csv"):
                continue
            name = f.lower()
            if all(w in name for w in mc) and all(w not in name for w in mnc):
                return os.path.join(root, f)
    return None

# Separator detection + read a column
def read_series_auto(path, one_based_col, label):
    # auto-detect the separator (;, , \t, ...)
    df = pd.read_csv(path, header=None, sep=None, engine='python')
    ncol = df.shape[1]
    idx = one_based_col - 1
    if idx >= ncol:  # if the requested column doesn't exist, take the last one
        idx = ncol - 1
    s = pd.to_numeric(df.iloc[:, idx], errors="coerce")

    # if too many NaNs, choose the most "numeric" column
    if s.notna().mean() < 0.5:
        num = df.apply(pd.to_numeric, errors="coerce")
        best_col = num.notna().sum().idxmax()
        s = num.iloc[:, best_col]
        print(f"[{label}] Requested column missing/noisy -> using column {best_col+1} (1-based).")
    print(f"[{label}] file: {os.path.basename(path)} | columns: {ncol} | valid values: {int(s.notna().sum())}")
    return s.dropna().to_numpy(dtype=float)

# File discovery (lenient on names)
paths = {
    'GT': find_csv_by_keywords(extract_dir, must_contain=['gtdump']),
    'POS': find_csv_by_keywords(extract_dir, must_contain=['pos'], must_not_contain=['deepphys']),
    'GREEN': find_csv_by_keywords(extract_dir, must_contain=['green']),
    'DeepPhys': find_csv_by_keywords(extract_dir, must_contain=['deepphys'], must_not_contain=['enh']),
    'DeepPhysEnhanced': (
        find_csv_by_keywords(extract_dir, must_contain=['deepphys','enh'], must_not_contain=['relu']) or
        find_csv_by_keywords(extract_dir, must_contain=['enhanced','deepphys'], must_not_contain=['relu'])
    ),
    'DeepPhysEnhancedReLu': (
        find_csv_by_keywords(extract_dir, must_contain=['deepphys','enh', 'relu']) or
        find_csv_by_keywords(extract_dir, must_contain=['enhanced','deepphys', 'relu'])
    ),
}

# Existence check
missing = [k for k,v in paths.items() if v is None]
if missing:
    print(" Files not found for:", ", ".join(missing))
    print("Check the names in the archive or adjust the keywords above.")
    # We proceed even if only non-GT are missing -> if GT is missing, stop
    if paths['GT'] is None:
        raise FileNotFoundError("gtdump.csv not found (GT key).")

# Read series (GT col 2, methods col 4)
series = {}
series['GT'] = read_series_auto(paths['GT'], one_based_col=2, label='GT')

for key in ['POS','GREEN','DeepPhys','DeepPhysEnhanced','DeepPhysEnhancedReLu']:
    if paths.get(key):
        series[key] = read_series_auto(paths[key], one_based_col=4, label=key)

# Align to the minimal common length
min_len = min(len(arr) for arr in series.values())
for k in list(series.keys()):
    series[k] = series[k][:min_len]

print("Common length used:", min_len)
print("Loaded series:", ", ".join(series.keys()))

[GT] file: gtdump_13.csv | columns: 4 | valid values: 7304
[POS] file: resultats_fc_POS_13.csv | columns: 4 | valid values: 7350
[GREEN] file: resultats_fc_GREEN_13.csv | columns: 4 | valid values: 7350
[DeepPhys] file: resultats_fc_DeepPhys_13.csv | columns: 4 | valid values: 7348
[DeepPhysEnhanced] file: resultats_fc_DeepPhysEnhanced_13.csv | columns: 4 | valid values: 7348
[DeepPhysEnhancedReLu] file: resultats_fc_DeepPhysEnhancedReLu_13.csv | columns: 4 | valid values: 7348
Common length used: 7304
Loaded series: GT, POS, GREEN, DeepPhys, DeepPhysEnhanced, DeepPhysEnhancedReLu


In [28]:
def mae(y_true, y_pred):
    "Calcul de la mean absolute error."
    return np.mean(np.abs(y_true - y_pred))

def rmse(y_true, y_pred):
    "Calcul de la root mean square error."
    return np.sqrt(np.mean((y_true - y_pred) ** 2))

def pearson_corr(y_true, y_pred):
    "Corrélation de Pearson entre deux vecteurs."
    if np.all(y_true == y_true[0]) or np.all(y_pred == y_pred[0]):
        return np.nan
    return np.corrcoef(y_true, y_pred)[0, 1]

def snr_db(y_true, y_pred):
    "SNR en décibels basé sur la puissance du signal et du bruit."
    signal_power = np.sum(y_true ** 2)
    noise_power = np.sum((y_true - y_pred) ** 2)
    if noise_power == 0:
        return np.inf
    return 10 * np.log10(signal_power / noise_power)

In [29]:
# file: analysis/save_results.py
methods = ['POS', 'GREEN', 'DeepPhys', 'DeepPhysEnhanced','DeepPhysEnhancedReLu']
y_true = series['GT']
results = []

for m in methods:
    y_pred = series[m]
    results.append({
        'Method': m,
        'MAE': mae(y_true, y_pred),
        'RMSE': rmse(y_true, y_pred),
        'Pearson Correlation': pearson_corr(y_true, y_pred),
        'SNR (dB)': snr_db(y_true, y_pred),
    })

import pandas as pd
results_df = pd.DataFrame(results)

# CSV save (English filename). Why: preserve accented-character encoding.
results_df.to_csv('evaluation_results_13.csv', index=False, encoding='utf-8-sig')
print("Results saved to evaluation_results.csv")

results_df

Results saved to evaluation_results.csv


Unnamed: 0,Method,MAE,RMSE,Pearson Correlation,SNR (dB)
0,POS,49.03364,54.235355,-0.024323,6.509099
1,GREEN,32.930506,45.10591,-0.154367,8.11008
2,DeepPhys,29.70994,36.12997,0.179519,10.037397
3,DeepPhysEnhanced,44.02968,52.6912,-0.208529,6.759987
4,DeepPhysEnhancedReLu,47.730418,54.372055,-0.16306,6.487234
