In [None]:
from mainv3 import SystemDesign

In [None]:
system_5 = {
    'system_columns': ['solvent_1', 'solvent_2', 'temperature'],
    'extra_fitted_points': 1,
    'name': 'system_sst_1',
}

system_6 = {
    'system_columns': ['solvent_1', 'solvent_2', 'temperature'],
    'extra_fitted_points': 2,
    'name': 'system_sst_2',
}

system_7 = {
    'system_columns': ['solvent_1', 'solvent_2'],
    'extra_fitted_points': 1,
    'name': 'system_ss_1',
}

system_8 = {
    'system_columns': ['solvent_1', 'solvent_2'],
    'extra_fitted_points': 2,
    'name': 'system_ss_2',
}

system_9 = {
    'system_columns': ['solvent_1'],
    'extra_fitted_points': 1,
    'name': 'system_s_1',
}

system_10 = {
    'system_columns': ['solvent_1'],
    'extra_fitted_points': 2,
    'name': 'system_s_2',
}

system_11 = {
    'system_columns': [],
    'extra_fitted_points': 1,
    'name': 'system_1',
}

system_12 = {
    'system_columns': [],
    'extra_fitted_points': 2,
    'name': 'system_1',
}

system_13 = {
    'system_columns': [],
    'extra_fitted_points': 3,
    'name': 'system_3',
}

system_14 = {
    'system_columns': ['solvent_1', 'solvent_2', 'temperature'],
    'extra_fitted_points': 0,
    'name': 'system_sst_0',
}

system_15 = {
    'system_columns': ['solvent_1', 'solvent_2'],
    'extra_fitted_points': 0,
    'name': 'system_ss_0',
}

system_16 = {
    'system_columns': ['solvent_1'],
    'extra_fitted_points': 0,
    'name': 'system_s_0',
}

system_17 = {
    'system_columns': [],
    'extra_fitted_points': 0,
    'name': 'system_0',
}

systems = [
    system_5,
    system_6,
    system_7,
    system_8,
    system_9,
    system_10,
    system_11,
    system_12,
    system_14,
    system_15,
    system_16,
    system_17,

]

from vae_model import VAEWithFeatureSelection
from neural_network_model import NeuralNetworkWithFeatureSelection
from xgb_model import XGBoostWithFeatureSelection
from itertools import product


models = [
    {
        'name': 'vae',
        'model_class' : VAEWithFeatureSelection
    },
    {
        'name': 'nn',
        'model_class' : NeuralNetworkWithFeatureSelection
    },
    {
        'name': 'xgb',
        'model_class' : XGBoostWithFeatureSelection
    }
]

n_features = [
    {
        'name': '10_features',
        'n_features': 10
    },
    {
        'name': '50_features',
        'n_features': 50
    },
    {
        'name': '500_features',
        'n_features': 500
    }
]

systemPermutations = [
    {
        'system': system,
        'model': model,
        'n_features': n_feature
    }
    for system, model, n_feature in product(systems, models, n_features)
]

len(systemPermutations)

In [None]:
import time
import os

def train_and_save_system(system_permutation, raw_data_path='curve_fit_results_x_is_7.csv', base_save_path="../../output/models/"):
    """
    Train and save a model system based on a system permutation configuration.
    
    Args:
        system_permutation (dict): Dictionary containing system, model, and n_features configurations
        raw_data_path (str): Path to the raw data file
        base_save_path (str): Base directory for saving models
    """
    # Extract configuration
    system_config = system_permutation['system']
    model_config = system_permutation['model']
    n_features_config = system_permutation['n_features']
    
    print(f"Training {model_config['name']} model with {system_config['name']} using {n_features_config['name']}")
    
    # Create system
    system = SystemDesign(
        system_columns=system_config['system_columns'],
        raw_data_path=raw_data_path,
        extra_fitted_points=system_config['extra_fitted_points'],
        target_columns=['J0', 'J1', 'J2']
    )
    
    # Common parameters for all models
    common_params = {
        'feature_selection_method': 'random_forest',
        'n_features': n_features_config['n_features'],
        'keep_prefixes': ['solvent_1_pure', 'solvent_2_pure', 'system', 'solubility_', 'temperature'],
        'verbose': 1,
        'optimize_hyperparams': True,
        'n_calls': 100
    }
    
    # Model-specific parameters
    if model_config['name'] == 'vae':
        model_params = {
            'epochs': 1000,
            'batch_size': 32,
            'latent_dim': 16,
            'kl_weight': 0.001
        }
    elif model_config['name'] == 'nn':
        model_params = {
            'epochs': 1000,
            'batch_size': 32
        }
    elif model_config['name'] == 'xgb':
        model_params = {
            'n_estimators': 200,
            'max_depth': 6
        }
    
    # Train the model
    system.train_model(
        model_class=model_config['model_class'],
        **common_params,
        **model_params
    )
    
    # Evaluate the model
    system.evaluate_model()
    
    # Construct save path
    model_name = f"{model_config['name']}_{system_config['name']}_{n_features_config['name']}.pkl"
    save_path = base_save_path + model_name
    
    # Save the model
    system.save(save_path)
    
    return system, model_name

def run_all_systems(system_permutations, start_idx=0, max_systems=None, raw_data_path='curve_fit_results_x_is_7.csv', 
                   base_save_path="../../output/models/"):
    """
    Run and save all systems in the provided list of system permutations.
    
    Args:
        system_permutations (list): List of system permutation configurations
        start_idx (int): Index to start from (for resuming interrupted runs)
        max_systems (int): Maximum number of systems to train (None = all)
        raw_data_path (str): Path to the raw data file
        base_save_path (str): Base directory for saving models
    """
    
    # Create directory if it doesn't exist
    os.makedirs(os.path.dirname(base_save_path), exist_ok=True)
    
    end_idx = len(system_permutations) if max_systems is None else min(start_idx + max_systems, len(system_permutations))
    total_time = 0
    trained_models = []
    
    for i, system_permutation in enumerate(system_permutations[start_idx:end_idx], start=start_idx):
        print(f"\nTraining system {i+1}/{end_idx} ({i+1}/{len(system_permutations)} total)")
        print("-" * 50)
        
        start_time = time.time()
        try:
            system, model_name = train_and_save_system(system_permutation, raw_data_path, base_save_path)
            elapsed = time.time() - start_time
            total_time += elapsed
            trained_models.append(model_name)
            
            avg_time = total_time / (i - start_idx + 1)
            remaining = (end_idx - i - 1) * avg_time
            
            print(f"Training completed in {elapsed:.2f} seconds")
            print(f"Estimated time remaining: {remaining/60:.2f} minutes")
        except Exception as e:
            print(f"Error training system: {str(e)}")
        
        print("-" * 50)
    
    return trained_models

In [None]:
run_all_systems(systemPermutations, start_idx=0, max_systems=None, raw_data_path='curve_fit_results_x_is_7.csv',
                   base_save_path="../../output/models/") 