# 导入包和函数

In [None]:
import os
import sys
from multiprocessing import Pool, cpu_count
import numpy as np
import torch

torch.manual_seed(756)
np.random.seed(756)


current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
sys.path.append(parent_dir)


from mfdnn import *
from utils import *

# 基础设置

In [None]:
torch.manual_seed(756)
np.random.seed(756)

data_path = "Simulation"

configurations = [
    {'T': 16, 'n': 200},
    {'T': 16, 'n': 400}, 
    {'T': 32, 'n': 200},
    {'T': 32, 'n': 400}
]

lam1_values = [0.5, 1, 1.5, 2, 2.5, 3]
lam2_values = [0, 0.001, 0.01, 0.1, 0.5, 1]

model_params = {
    'num_basis': (5, 5),
    'layer_sizes': [64, 64],
    'epochs': 100,
    'val_ratio': 0.25,
    'patience': 10
}

ground_truth = {
    0: {0, 1},      # y1: {1,2}
    1: {1, 4, 5},   # y2: {2,5,6}
    2: {0, 2, 3, 5}, # y3: {1,3,4,6}
    3: {0, 1},      # y4: {1,2}
    4: {1, 4, 5},   # y5: {2,5,6}
    5: {0, 2, 3, 5}  # y6: {1,3,4,6}
}

# 阈值ε
epsilon = 0.01

# 辅助函数

In [None]:

def calculate_selection_metrics(l21_norms, true_vars, epsilon=0.01, p=6):
    
    selected_vars = set(i for i, norm in enumerate(l21_norms) if norm > epsilon)
    
    
    true_positive = len(selected_vars & true_vars)
    false_positive = len(selected_vars - true_vars)
    false_negative = len(true_vars - selected_vars)
    
    
    f1_score = (2 * true_positive) / (2 * true_positive + false_positive + false_negative) if (2 * true_positive + false_positive + false_negative) > 0 else 0
    
    
    perfect_selection = 1.0 if selected_vars == true_vars else 0.0
    
    return f1_score, perfect_selection, selected_vars

def select_best_hyperparameters(X_train, y_train, true_vars, p, domain_range, lam1_values, lam2_values, model_params, epsilon=0.01):
    
    mse_results = np.zeros((len(lam1_values), len(lam2_values)))
    f1_results = np.zeros((len(lam1_values), len(lam2_values)))
    selection_info = {}
    
    
    y_train_mean = np.mean(y_train)
    y_train_std = np.std(y_train)
    
    for i, lam1 in enumerate(lam1_values):
        for j, lam2 in enumerate(lam2_values):
            try:
                train_losses, val_losses, model, l21 = MFDNN(
                    p=p, resp=y_train, func_cov=X_train,
                    num_basis=model_params['num_basis'],
                    layer_sizes=model_params['layer_sizes'],
                    domain_range=domain_range,
                    epochs=model_params['epochs'],
                    val_ratio=model_params['val_ratio'],
                    patience=model_params['patience'],
                    lam1=lam1, lam2=lam2, std_resp=True
                )
                
                
                mse_results[i, j] = min(val_losses) if len(val_losses) > 0 else np.mean(train_losses[-10:])
                
                
                f1_score, perfect_selection, selected_vars = calculate_selection_metrics(l21, true_vars, epsilon, p)
                f1_results[i, j] = f1_score
                
                
                selection_info[f"{i}_{j}"] = {
                    'model': model, 'lam1': lam1, 'lam2': lam2,
                    'f1_score': f1_score, 'mse': mse_results[i, j],
                    'selected_vars': list(selected_vars),
                    'y_mean': y_train_mean, 'y_std': y_train_std,
                    'perfect_selection': perfect_selection
                }
                
            except Exception as e:
                mse_results[i, j] = np.inf
                f1_results[i, j] = 0
    
    
    best_f1 = np.max(f1_results)
    best_f1_indices = np.where(f1_results == best_f1)
    
    if len(best_f1_indices[0]) > 0:
        best_candidates = [selection_info[f"{i}_{j}"] for i, j in zip(best_f1_indices[0], best_f1_indices[1])]
        best_candidate = min(best_candidates, key=lambda x: x['mse'])
    else:
        
        default_lam1 = 1
        default_lam2 = 0.1
    
        best_candidate = {
            'model': None, 'lam1': default_lam1, 'lam2': default_lam2,
            'f1_score': 0, 'mse': np.inf, 'selected_vars': [],
            'y_mean': y_train_mean, 'y_std': y_train_std, 'perfect_selection': 0
        }
    
    return best_candidate['lam1'], best_candidate['lam2'], best_candidate


def evaluate_on_test_set(best_candidate, X_test, y_test, p, domain_range, model_params):
    
    try:
        
        y_mean = best_candidate['y_mean']
        y_std = best_candidate['y_std']
        
        test_predictions_normalized = MFDNN_predict(p, best_candidate['model'], X_test, model_params['num_basis'], domain_range)
        test_predictions_original = test_predictions_normalized.detach().numpy() * y_std + y_mean
        
        test_mse = np.mean((test_predictions_original.flatten() - y_test) ** 2)
        test_rmse = np.sqrt(test_mse)
        test_nrmse = test_rmse / np.std(y_test) if np.std(y_test) > 0 else np.inf
        
        return test_nrmse, best_candidate['f1_score']
    except Exception as e:
        return np.inf, best_candidate['f1_score']


# 50次循环

## 50次循环运行

In [None]:
frun = 50
best_hyperparams = {}  

for config in configurations:
    T = config['T']
    n = config['n']
    
    print(f"\n{'='*50}")
    print(f"Processing T={T}, n={n} over {frun} runs")
    print(f"{'='*50}")
    
    # Load data
    Xlist = np.load(os.path.join(data_path, f"Xlist_T{T}_n{n}.npy"), allow_pickle=True)
    ylist = np.load(os.path.join(data_path, f"ylist_T{T}_n{n}.npy"), allow_pickle=True)
    
    all_results = {f'y{i+1}': {
        'test_nrmse': [],        
        'test_f1': [], 
        'perfect_match': [],  
        'selection_counts': [0]*6
    } for i in range(6)}
    
    for run_idx in range(frun):
        if run_idx % 10 == 0:
            print(f"  Run {run_idx+1}/{frun}")
        
        X = np.array(Xlist[run_idx])
        p, N, T1, T2 = X.shape
        
        # Split data
        split_idx = N // 2
        X_train = X[:, :split_idx, :, :]
        X_test = X[:, split_idx:, :, :]
        
        domain_range = [[[0, 0], [1, 1]] for _ in range(p)]
        
        for y_index in range(6):
            y_full = np.array(ylist[run_idx][y_index])
            y_train = y_full[:split_idx]
            y_test = y_full[split_idx:]
            
            true_vars = ground_truth[y_index]
            
            lam1, lam2, best_candidate = select_best_hyperparameters(
                X_train, y_train, true_vars, p, domain_range, 
                lam1_values, lam2_values, model_params, epsilon
            )

            test_nrmse, test_f1 = evaluate_on_test_set(
                best_candidate, X_test, y_test, p, domain_range, model_params
            )

            selected_vars = set(best_candidate['selected_vars'])
            is_perfect_match = 1 if selected_vars == true_vars else 0
 
            all_results[f'y{y_index+1}']['test_nrmse'].append(test_nrmse)
            all_results[f'y{y_index+1}']['test_f1'].append(test_f1)
            all_results[f'y{y_index+1}']['perfect_match'].append(is_perfect_match) 
   
            for var_idx in selected_vars:
                all_results[f'y{y_index+1}']['selection_counts'][var_idx] += 1
            

    config_results = {}
    for y_index in range(6):
        y_key = f'y{y_index+1}'
        test_nrmse_mean = np.mean(all_results[y_key]['test_nrmse'])
        test_nrmse_std = np.std(all_results[y_key]['test_nrmse'])
        test_f1_mean = np.mean(all_results[y_key]['test_f1'])
        test_f1_std = np.std(all_results[y_key]['test_f1'])
        

        perfect_match_count = sum(all_results[y_key]['perfect_match'])
        perfect_match_rate = perfect_match_count / frun
        
        selection_freq = [count / frun for count in all_results[y_key]['selection_counts']]
        
        config_results[y_key] = {
            'test_nrmse_mean': test_nrmse_mean,
            'test_nrmse_std': test_nrmse_std,
            'test_f1_mean': test_f1_mean,
            'test_f1_std': test_f1_std,
            'perfect_match_count': perfect_match_count,  
            'perfect_match_rate': perfect_match_rate,   
            'selection_counts': all_results[y_key]['selection_counts'],
            'selection_freq': selection_freq
        }
        
    

    config_key = f"T{T}_n{n}"
    best_hyperparams[config_key] = config_results




Processing T=16, n=200 over 50 runs
  Run 1/50
  Run 11/50
  Run 21/50
  Run 31/50
  Run 41/50

Processing T=16, n=400 over 50 runs
  Run 1/50
  Run 11/50
  Run 21/50
  Run 31/50
  Run 41/50

Processing T=32, n=200 over 50 runs
  Run 1/50
  Run 11/50
  Run 21/50
  Run 31/50
  Run 41/50

Processing T=32, n=400 over 50 runs
  Run 1/50
  Run 11/50
  Run 21/50
  Run 31/50
  Run 41/50


## 50次循环结果输出

In [None]:
import pandas as pd


rows = []

for config_key, results in best_hyperparams.items():
    for y_key, metrics in results.items():
        row = {
            "Configuration": config_key,
            "Response": y_key,
            "NRMSE_Mean": metrics["test_nrmse_mean"],
            "NRMSE_Std": metrics.get("test_nrmse_std", 0.0),  
            "F1_Mean": metrics["test_f1_mean"],
            "F1_Std": metrics.get("test_f1_std", 0.0),        
            "Perfect_Match": f"{metrics['perfect_match_count']}/{frun}"
        }
        
        
        for var_idx in range(6):
            row[f"X{var_idx + 1}"] = metrics["selection_counts"][var_idx]
        
        rows.append(row)


df_results = pd.DataFrame(rows)
csv_path = "mfdnn_results_summary.csv"
df_results.to_csv(csv_path, index=False, encoding="utf-8-sig")