Import Dependencies

In [None]:
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
import pickle

from sklearn.preprocessing import StandardScaler, MinMaxScaler, FunctionTransformer, RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.pipeline import Pipeline
from sklearn.base import clone
from sklearn.decomposition import NMF

from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR

In [None]:
def ensure_nonnegative(X):
  return np.clip(X, 0, None)

def shift_function(X):
  min_val = np.min(X)

  if min_val < 0:
    X_shifted = X - min_val + 1e-6
  else:
    X_shifted = X
  
  return X_shifted

def results(pipe, x_train, x_test, y_train_scaled, y_test_scaled, y_scaler):
  pipe.fit(x_train, y_train_scaled)
  y_pred_scaled = pipe.predict(x_test)

  y_pred_original = y_scaler.inverse_transform(y_pred_scaled)
  y_test_original_check = y_scaler.inverse_transform(y_test_scaled)

  rmse = np.sqrt(mean_squared_error(y_test_original_check, y_pred_original))
  r2 = r2_score(y_test_original_check, y_pred_original)

  return rmse, r2

x, y = np.loadtxt("X_all.csv", delimiter = ','), np.loadtxt("Y_all.csv", delimiter = ',')
subject_ids = np.loadtxt("subject_ids.csv", delimiter=",").astype(int)

subject_results = {
    'subject': [], 'n_train': [], 'n_test': [],
    'ns_lr_rmse': [], 'ns_lr_r2': [],
    's_lr_rmse': [], 's_lr_r2': [],
    'ns_rf_rmse': [], 'ns_rf_r2': [],
    's_rf_rmse': [], 's_rf_r2': [],
    'ns_svm_rmse': [], 'ns_svm_r2': [],
    's_svm_rmse': [], 's_svm_r2': []
}

Baseline Metrics

In [None]:
for subject in np.unique(subject_ids):
    print(f"Subject {subject}")

    mask = subject_ids == subject
    x_subject = x[mask]
    y_subject = y[mask]

    x_train, x_test, y_train, y_test = train_test_split(
        x_subject, y_subject, test_size=0.2, random_state=42
    )

    y_train_original = y_train.copy()
    y_test_original = y_test.copy()

    lr_y_scaler = StandardScaler()
    lr_y_train = lr_y_scaler.fit_transform(y_train)
    lr_y_test = lr_y_scaler.transform(y_test)

    y_scaler = RobustScaler()
    y_train = y_scaler.fit_transform(y_train)
    y_test = y_scaler.transform(y_test)

    # Non-Synergy LinearRegression
    ns_lr_pipe = Pipeline([
        ('scaler', StandardScaler()),
        ('model', MultiOutputRegressor(LinearRegression()))
    ])
    ns_lr_rmse, ns_lr_r2 = results(ns_lr_pipe, x_train, x_test, lr_y_train, lr_y_test, lr_y_scaler)

    # Synergy LinearRegression 48
    s_lr_pipe = Pipeline([
        ('scaler', StandardScaler()),
        ('shift', FunctionTransformer(shift_function)),
        ('nmf', NMF(n_components = 30, random_state = 42, max_iter = 5000, init='nndsvda')),
        ('model', MultiOutputRegressor(LinearRegression()))
    ])
    s_lr_rmse, s_lr_r2 = results(s_lr_pipe, x_train, x_test, lr_y_train, lr_y_test, lr_y_scaler)

    # Non-Synergy RandomForestRegressor
    ns_rf_pipe = Pipeline([
        ('scaler', MinMaxScaler()),
        ('model', RandomForestRegressor(
            max_features = 'sqrt',
            max_depth = 7,
            min_samples_split = 15,
            min_samples_leaf = 8,
            n_estimators = 300,
            random_state = 42,
            n_jobs=-1)),
    ])
    ns_rf_rmse, ns_rf_r2 = results(ns_rf_pipe, x_train, x_test, y_train, y_test, y_scaler)

    # Synergy RandomForestRegressor 48
    s_rf_pipe = Pipeline([
        ('scaler', MinMaxScaler(feature_range = (0, 1))),
        ('clip', FunctionTransformer(ensure_nonnegative)),
        ('nmf', NMF(n_components = 40, random_state = 42, max_iter = 5000, init='nndsvda')),
        ('model', RandomForestRegressor(
            max_features = 'sqrt',
            max_depth = 7,
            min_samples_split = 15,
            min_samples_leaf = 8,
            n_estimators = 300,
            random_state = 42,
            n_jobs=-1)),
    ])
    s_rf_rmse, s_rf_r2 = results(s_rf_pipe, x_train, x_test, y_train, y_test, y_scaler)

    # Non-Synergy SupportVectorRegression
    ns_svm_pipe = Pipeline([
        ('scaler', StandardScaler()),
        ('model', MultiOutputRegressor(SVR(C = 10, epsilon = 0.1, kernel = 'linear'))),
    ])
    ns_svm_rmse, ns_svm_r2 = results(ns_svm_pipe, x_train, x_test, y_train, y_test, y_scaler)

    # Synergy SupportVectorRegression 40
    s_svm_pipe = Pipeline([
        ('scaler', MinMaxScaler()),
        ('clip', FunctionTransformer(ensure_nonnegative)),
        ('nmf', NMF(n_components = 35, random_state = 42, max_iter = 5000)),
        ('model', MultiOutputRegressor(SVR(C = 10, epsilon = 0.1, kernel = 'linear'))),
    ])
    s_svm_rmse, s_svm_r2 = results(s_svm_pipe, x_train, x_test, y_train, y_test, y_scaler)

    subject_results['subject'].append(subject)
    subject_results['n_train'].append(len(x_train))
    subject_results['n_test'].append(len(x_test))
    subject_results['ns_lr_rmse'].append(ns_lr_rmse)
    subject_results['ns_lr_r2'].append(ns_lr_r2)
    subject_results['s_lr_rmse'].append(s_lr_rmse)
    subject_results['s_lr_r2'].append(s_lr_r2)
    subject_results['ns_rf_rmse'].append(ns_rf_rmse)
    subject_results['ns_rf_r2'].append(ns_rf_r2)
    subject_results['s_rf_rmse'].append(s_rf_rmse)
    subject_results['s_rf_r2'].append(s_rf_r2)
    subject_results['ns_svm_rmse'].append(ns_svm_rmse)
    subject_results['ns_svm_r2'].append(ns_svm_r2)
    subject_results['s_svm_rmse'].append(s_svm_rmse)
    subject_results['s_svm_r2'].append(s_svm_r2)

print("\n" + "="*80)
print("SUMMARY: AVERAGE ACROSS ALL SUBJECTS")
print("="*80)

results_df = pd.DataFrame(subject_results)

summary_data = []
for model_name, rmse_col, r2_col in [
    ('NS LinearRegression', 'ns_lr_rmse', 'ns_lr_r2'),
    ('S LinearRegression', 's_lr_rmse', 's_lr_r2'),
    ('NS RandomForest', 'ns_rf_rmse', 'ns_rf_r2'),
    ('S RandomForest', 's_rf_rmse', 's_rf_r2'),
    ('NS SupportVectorMachine', 'ns_svm_rmse', 'ns_svm_r2'),
    ('S SupportVectorMachine', 's_svm_rmse', 's_svm_r2')
]:
    rmse_mean = results_df[rmse_col].mean()
    rmse_std = results_df[rmse_col].std()
    r2_mean = results_df[r2_col].mean()
    r2_std = results_df[r2_col].std()
    
    summary_data.append([
        model_name,
        rmse_mean,
        rmse_std,
        r2_mean,
        r2_std
    ])

summary_df = pd.DataFrame(summary_data, 
                          columns=['Model', 'Mean RMSE', 'Std RMSE', 'Mean R2', 'Std R2'])

print(summary_df)

Optimal Synergy Components

In [None]:
n_components_range = [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]

sweep_results = []

# Loop through each subject
for subject in np.unique(subject_ids):
    print(f"\n{'='*60}")
    print(f"Processing Subject {subject}")
    print(f"{'='*60}")

    # Get subject-specific data
    mask = subject_ids == subject
    x_subject = x[mask]
    y_subject = y[mask]

    x_train, x_test, y_train, y_test = train_test_split(
        x_subject, y_subject, test_size=0.2, random_state=42
    )

    # Scale y data
    lr_y_scaler = StandardScaler()
    lr_y_train = lr_y_scaler.fit_transform(y_train.copy())
    lr_y_test = lr_y_scaler.transform(y_test.copy())

    y_scaler = RobustScaler()
    y_train_scaled = y_scaler.fit_transform(y_train.copy())
    y_test_scaled = y_scaler.transform(y_test.copy())

    # Define base pipelines
    base_s_lr_pipe = Pipeline([
        ('scaler', StandardScaler()),
        ('shift', FunctionTransformer(shift_function)),
        ('nmf', NMF(n_components=48, random_state=42, max_iter=5000, init='nndsvda')),
        ('model', MultiOutputRegressor(LinearRegression()))
    ])

    base_s_rf_pipe = Pipeline([
        ('scaler', MinMaxScaler(feature_range=(0, 1))),
        ('clip', FunctionTransformer(ensure_nonnegative)),
        ('nmf', NMF(n_components=48, random_state=42, max_iter=5000, init='nndsvda')),
        ('model', RandomForestRegressor(
            max_features='sqrt', max_depth=7,
            min_samples_split=15, min_samples_leaf=8,
            n_estimators=300, random_state=42, n_jobs=-1)),
    ])

    base_s_svm_pipe = Pipeline([
        ('scaler', MinMaxScaler()),
        ('clip', FunctionTransformer(ensure_nonnegative)),
        ('nmf', NMF(n_components=40, random_state=42, max_iter=5000)),
        ('model', MultiOutputRegressor(SVR(C=10, epsilon=0.1, kernel='rbf'))),
    ])

    # Models config with proper y data and scalers
    synergy_models = {
        'S LinearRegression': {
            'base_pipe': base_s_lr_pipe,
            'y_train': lr_y_train,
            'y_test': lr_y_test,
            'y_scaler': lr_y_scaler
        },
        'S RandomForestRegressor': {
            'base_pipe': base_s_rf_pipe,
            'y_train': y_train_scaled,
            'y_test': y_test_scaled,
            'y_scaler': y_scaler
        },
        'S SupportVectorRegression': {
            'base_pipe': base_s_svm_pipe,
            'y_train': y_train_scaled,
            'y_test': y_test_scaled,
            'y_scaler': y_scaler
        }
    }

    # Sweep through models and components
    for model_name, model_config in synergy_models.items():
        print(f"\n  Sweeping {model_name}")

        for n_comp in n_components_range:
            print(f"    N_Components: {n_comp}...", end=" ")
            
            # Clone and configure pipeline
            pipe = clone(model_config['base_pipe'])
            pipe.named_steps['nmf'].n_components = n_comp

            # Train
            train_start = time.time()
            pipe.fit(x_train, model_config['y_train'])
            train_time = time.time() - train_start

            # Inference timing
            inference_times = []
            for _ in range(10):
                inf_start = time.time()
                y_pred = pipe.predict(x_test)
                inference_times.append(time.time() - inf_start)
            avg_inference_time = np.mean(inference_times)

            # Transform predictions back to original scale
            y_pred_original = model_config['y_scaler'].inverse_transform(y_pred)
            y_test_original = model_config['y_scaler'].inverse_transform(model_config['y_test'])
                
            # Calculate metrics
            rmse = np.sqrt(mean_squared_error(y_test_original, y_pred_original))
            r2 = r2_score(y_test_original, y_pred_original)

            # Model size
            model_bytes = pickle.dumps(pipe)
            model_size_mb = len(model_bytes) / (1024 * 1024)

            sweep_results.append({
                'Subject': subject,
                'Model': model_name,
                'n_components': n_comp,
                'rmse': rmse,
                'r2': r2,
                'train_time_s': train_time,
                'inference_time_s': avg_inference_time,
                'model_size_mb': model_size_mb
            })
            
            print(f"RMSE={rmse:.4f}, R²={r2:.4f}")

# Create DataFrame with all subjects
sweep_df = pd.DataFrame(sweep_results)

print("\n" + "="*80)
print("RAW SWEEP DATA (ALL SUBJECTS)")
print("="*80)
print(sweep_df.head(20))

# Calculate averages across subjects
sweep_avg = sweep_df.groupby(['Model', 'n_components']).agg({
    'rmse': ['mean', 'std'],
    'r2': ['mean', 'std'],
    'train_time_s': ['mean', 'std'],
    'inference_time_s': ['mean', 'std'],
    'model_size_mb': ['mean', 'std']
}).reset_index()

# Flatten column names
sweep_avg.columns = ['_'.join(col).strip('_') for col in sweep_avg.columns.values]

print("\n" + "="*80)
print("AVERAGED SWEEP RESULTS (ACROSS ALL 7 SUBJECTS)")
print("="*80)
print(sweep_avg)

# Save results
sweep_df.to_csv('component_sweep_all_subjects.csv', index=False)
sweep_avg.to_csv('component_sweep_averaged.csv', index=False)

print("\n✓ Component sweep complete!")
print(f"  Total experiments: {len(sweep_df)} ({len(sweep_df) // 7} per subject × 7 subjects)")

Model Robustness

In [None]:
def gaussian_noise(signal, snr_db):
    signal_power = np.mean(signal ** 2)
    noise_power = signal_power / (10 ** (snr_db / 10))
    noise = np.random.normal(0, np.sqrt(noise_power), signal.shape)
    return signal + noise

snr_list = [100, 80, 60, 40, 20]
model_names = ['NS RandomForestRegressor', 'S RandomForestRegressor', 
               'NS SupportVectorRegression', 'S SupportVectorRegression']

# Store results for ALL subjects
all_noise_results = []

# Loop through each subject
for subject in np.unique(subject_ids):
    print(f"\n{'='*50}")
    print(f"Testing Noise Robustness - Subject {subject}")
    print('='*50)
    
    # Get subject-specific data
    mask = subject_ids == subject
    x_subject = x[mask]
    y_subject = y[mask]

    x_train, x_test, y_train, y_test = train_test_split(
        x_subject, y_subject, test_size=0.2, random_state=42
    )

    # Scale y data (using RobustScaler for RF and SVM)
    y_scaler = RobustScaler()
    y_train_scaled = y_scaler.fit_transform(y_train.copy())
    y_test_scaled = y_scaler.transform(y_test.copy())

    # Define fresh pipelines for this subject
    ns_rf_pipe = Pipeline([
        ('scaler', MinMaxScaler()),
        ('model', RandomForestRegressor(
            max_features='sqrt', max_depth=7,
            min_samples_split=15, min_samples_leaf=8,
            n_estimators=300, random_state=42, n_jobs=-1)),
    ])

    s_rf_pipe = Pipeline([
        ('scaler', MinMaxScaler(feature_range=(0, 1))),
        ('clip', FunctionTransformer(ensure_nonnegative)),
        ('nmf', NMF(n_components=40, random_state=42, max_iter=5000, init='nndsvda')),
        ('model', RandomForestRegressor(
            max_features='sqrt', max_depth=7,
            min_samples_split=15, min_samples_leaf=8,
            n_estimators=300, random_state=42, n_jobs=-1)),
    ])

    ns_svm_pipe = Pipeline([
        ('scaler', StandardScaler()),
        ('model', MultiOutputRegressor(SVR(C=10, epsilon=0.1, kernel='rbf'))),
    ])

    s_svm_pipe = Pipeline([
        ('scaler', MinMaxScaler()),
        ('clip', FunctionTransformer(ensure_nonnegative)),
        ('nmf', NMF(n_components=35, random_state=42, max_iter=5000)),
        ('model', MultiOutputRegressor(SVR(C=10, epsilon=0.1, kernel='rbf'))),
    ])

    pipe_list = [ns_rf_pipe, s_rf_pipe, ns_svm_pipe, s_svm_pipe]

    # Fit all pipelines on THIS subject's training data
    for pipe in pipe_list:
        pipe.fit(x_train, y_train_scaled)

    # Test with different noise levels
    for snr_db in snr_list:
        print(f"  Testing SNR = {snr_db} dB")
        x_test_noise = gaussian_noise(x_test, snr_db)

        for pipe, model_name in zip(pipe_list, model_names):
            y_pred_scaled = pipe.predict(x_test_noise)
            
            # Transform back to original scale
            y_pred_original = y_scaler.inverse_transform(y_pred_scaled)
            y_test_original = y_scaler.inverse_transform(y_test_scaled)
            
            # Calculate RMSE and R² on original scale
            rmse_noise = np.sqrt(mean_squared_error(y_test_original, y_pred_original))
            r2_noise = r2_score(y_test_original, y_pred_original)
            
            all_noise_results.append({
                'Subject': subject,
                'SNR (dB)': snr_db,
                'Model': model_name,
                'RMSE': rmse_noise,
                'R2': r2_noise
            })

# Create DataFrame with ALL subjects
noise_df_all = pd.DataFrame(all_noise_results)

print("\n" + "="*80)
print("RAW NOISE DATA (ALL SUBJECTS)")
print("="*80)
print(noise_df_all)

# Calculate AVERAGE across all subjects
noise_df = noise_df_all.groupby(['SNR (dB)', 'Model']).agg({
    'RMSE': ['mean', 'std'],
    'R2': ['mean', 'std']
}).reset_index()

# Flatten column names
noise_df.columns = ['SNR (dB)', 'Model', 'RMSE', 'RMSE_std', 'R2', 'R2_std']

print("\n" + "="*80)
print("AVERAGED NOISE ROBUSTNESS (ACROSS ALL 7 SUBJECTS)")
print("="*80)
print(noise_df)

# Verify all models are present
print("\nUnique models in noise_df:", noise_df['Model'].unique())

Training Efficiency and Sample Complexity

In [None]:
# Setup
train_sizes = [0.10, 0.25, 0.50, 0.75, 1.00]  # Percentages
n_components_list = [5, 20, 35, 50]

all_training_results = []
print("Starting Training Efficiency Analysis...")

for subject in np.unique(subject_ids):
    print(f"\n{'='*80}")
    print(f"Training Efficiency Analysis - Subject {subject}")
    print(f"{'='*80}")
    
    # Get subject-specific data
    mask = subject_ids == subject
    x_subject = x[mask]
    y_subject = y[mask]

    x_train, x_test, y_train, y_test = train_test_split(
        x_subject, y_subject, test_size=0.2, random_state=42
    )

    # Scale y data
    lr_y_scaler = StandardScaler()
    lr_y_train = lr_y_scaler.fit_transform(y_train.copy())
    lr_y_test = lr_y_scaler.transform(y_test.copy())

    y_scaler = RobustScaler()
    y_train_scaled = y_scaler.fit_transform(y_train.copy())
    y_test_scaled = y_scaler.transform(y_test.copy())

    # Define pipelines for this subject
    s_lr_pipe = Pipeline([
        ('scaler', StandardScaler()),
        ('shift', FunctionTransformer(shift_function)),
        ('nmf', NMF(n_components=48, random_state=42, max_iter=5000, init='nndsvda')),
        ('model', MultiOutputRegressor(LinearRegression()))
    ])

    s_rf_pipe = Pipeline([
        ('scaler', MinMaxScaler(feature_range=(0, 1))),
        ('clip', FunctionTransformer(ensure_nonnegative)),
        ('nmf', NMF(n_components=48, random_state=42, max_iter=5000, init='nndsvda')),
        ('model', RandomForestRegressor(
            max_features='sqrt', max_depth=7,
            min_samples_split=15, min_samples_leaf=8,
            n_estimators=300, random_state=42, n_jobs=-1)),
    ])

    s_svm_pipe = Pipeline([
        ('scaler', MinMaxScaler()),
        ('clip', FunctionTransformer(ensure_nonnegative)),
        ('nmf', NMF(n_components=40, random_state=42, max_iter=5000)),
        ('model', MultiOutputRegressor(SVR(C=10, epsilon=0.1, kernel='rbf'))),
    ])

    # Models to test with their scalers
    synergy_models = {
        'S LinearRegression': {'pipe': s_lr_pipe, 'y_train': lr_y_train, 'y_test': lr_y_test, 'scaler': lr_y_scaler},
        'S RandomForestRegressor': {'pipe': s_rf_pipe, 'y_train': y_train_scaled, 'y_test': y_test_scaled, 'scaler': y_scaler},
        'S SupportVectorRegression': {'pipe': s_svm_pipe, 'y_train': y_train_scaled, 'y_test': y_test_scaled, 'scaler': y_scaler}
    }

    for model_name, model_config in synergy_models.items():
        print(f"\n{'='*60}")
        print(f"  Starting model: {model_name}")
        print(f"{'='*60}")
        
        base_pipe = model_config['pipe']
        y_train_full = model_config['y_train']
        y_test_use = model_config['y_test']
        scaler = model_config['scaler']
        
        for train_size_pct in train_sizes:

            n_samples = int(len(x_train) * train_size_pct)
            x_train_subset = x_train[:n_samples]
            y_train_subset = y_train_full[:n_samples]
            
            print(f"\n  Training Size: {train_size_pct*100:.0f}% ({n_samples} samples)")
            
            for n_comp in n_components_list:
                print(f"    n_components = {n_comp}...", end=" ")
                
                # Clone and configure pipeline
                pipe = clone(base_pipe)
                pipe.named_steps['nmf'].n_components = n_comp
                
                # Train
                train_start = time.time()
                pipe.fit(x_train_subset, y_train_subset)
                train_time = time.time() - train_start
                
                # Predict
                y_pred_scaled = pipe.predict(x_test)
                
                # Transform back to original scale
                y_pred_original = scaler.inverse_transform(y_pred_scaled)
                y_test_original = scaler.inverse_transform(y_test_scaled)
                
                # Metrics on original scale
                rmse = np.sqrt(mean_squared_error(y_test_original, y_pred_original))
                r2 = r2_score(y_test_original, y_pred_original)
                
                all_training_results.append({
                    'Subject': subject,
                    'Model': model_name,
                    'Train_Size_Pct': train_size_pct,
                    'Train_Size_Samples': n_samples,
                    'n_components': n_comp,
                    'RMSE': rmse,
                    'R2': r2,
                    'Train_Time': train_time
                })
                
                print(f"RMSE={rmse:.4f}, R²={r2:.4f}, Time={train_time:.2f}s") 

training_df_all = pd.DataFrame(all_training_results)

print("\n" + "="*80)
print("RAW TRAINING DATA (ALL SUBJECTS)")
print("="*80)
print(training_df_all.head(20))

# Calculate AVERAGE across all subjects
training_df = training_df_all.groupby(['Model', 'Train_Size_Pct', 'n_components']).agg({
    'Train_Size_Samples': 'mean',
    'RMSE': ['mean', 'std'],
    'R2': ['mean', 'std'],
    'Train_Time': ['mean', 'std']
}).reset_index()

# Flatten column names
training_df.columns = ['Model', 'Train_Size_Pct', 'n_components', 
                       'Train_Size_Samples', 
                       'RMSE', 'RMSE_std', 
                       'R2', 'R2_std', 
                       'Train_Time', 'Train_Time_std']

print("\n" + "="*80)
print("AVERAGED TRAINING EFFICIENCY (ACROSS ALL 7 SUBJECTS)")
print("="*80)
print(training_df)

Computational Performance

In [None]:
# Non-Synergy LinearRegression
ns_lr_pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('model', MultiOutputRegressor(LinearRegression()))
])
ns_lr_rmse, ns_lr_r2 = results(ns_lr_pipe, x_train, x_test, lr_y_train, lr_y_test, lr_y_scaler)

# Synergy LinearRegression
s_lr_pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('shift', FunctionTransformer(ensure_nonnegative)),
    ('nmf', NMF(n_components = 48, random_state = 42, max_iter = 5000, init='nndsvda')),
    ('model', MultiOutputRegressor(LinearRegression()))
])
s_lr_rmse, s_lr_r2 = results(s_lr_pipe, x_train, x_test, lr_y_train, lr_y_test, lr_y_scaler)

# Non-Synergy RandomForestRegressor
ns_rf_pipe = Pipeline([
    ('scaler', MinMaxScaler()),
    ('model', RandomForestRegressor(
        max_features = 'sqrt',
        max_depth = 7,
        min_samples_split = 15,
        min_samples_leaf = 8,
        n_estimators = 300,
        random_state = 42,
        n_jobs=-1)),
])
ns_rf_rmse, ns_rf_r2 = results(ns_rf_pipe, x_train, x_test, y_train, y_test, y_scaler)

# Synergy RandomForestRegressor
s_rf_pipe = Pipeline([
    ('scaler', MinMaxScaler(feature_range = (0, 1))),
    ('clip', FunctionTransformer(ensure_nonnegative)),
    ('nmf', NMF(n_components = 48, random_state = 42, max_iter = 5000, init='nndsvda')),
    ('model', RandomForestRegressor(
        max_features = 'sqrt',
        max_depth = 7,
        min_samples_split = 15,
        min_samples_leaf = 8,
        n_estimators = 300,
        random_state = 42,
        n_jobs=-1)),
])
s_rf_rmse, s_rf_r2 = results(s_rf_pipe, x_train, x_test, y_train, y_test, y_scaler)

# Non-Synergy SupportVectorRegression
ns_svm_pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('model', MultiOutputRegressor(SVR(C = 10, epsilon = 0.1, kernel = 'rbf'))),
])
ns_svm_rmse, ns_svm_r2 = results(ns_svm_pipe, x_train, x_test, y_train, y_test, y_scaler)

# Synergy SupportVectorRegression
s_svm_pipe = Pipeline([
    ('scaler', MinMaxScaler()),
    ('clip', FunctionTransformer(ensure_nonnegative)),
    ('nmf', NMF(n_components = 40, random_state = 42, max_iter = 5000)),
    ('model', MultiOutputRegressor(SVR(C = 10, epsilon = 0.1, kernel = 'rbf'))),
])
s_svm_rmse, s_svm_r2 = results(s_svm_pipe, x_train, x_test, y_train, y_test, y_scaler)