In [15]:
from typing import List
import yaml
from MDRMF import Dataset, MoleculeLoader, Featurizer

class Experimenter:

    def __init__(self, config_file: str):
        self.config_file = config_file
        self.experiments = self._load_config()
    
    def _load_config(self) -> List[dict]:
        with open(self.config_file, 'r') as stream:
            try:
                config = yaml.safe_load(stream)
            except yaml.YAMLError as exc:
                print(exc)
                return []

        # If there is only one experiment, make it into a list
        if isinstance(config, dict):
            config = [config]

        return [config]
    
    def conduct_all_experiments(self):
        for config in self.experiments:
            for experiment in config:
                key, value = list(experiment.items())[0]
                if key == 'Experiment':
                    self.conduct_experiment(value)
                elif key == 'Dataset':
                    # add your code here to handle 'Dataset' cases
                    pass
                elif key == 'Parallelize_experiments':
                    # add your code here to handle 'Parallelize_experiments' cases
                    pass
    
    def conduct_experiment(self, exp_config: dict):
        # If there is a dataset use this
        if 'dataset' in exp_config:
            dataset_file = exp_config['dataset']
            dataset_model = Dataset.load(dataset_file)
            dataset_eval = Dataset.load(dataset_file)
        elif 'data' in exp_config:
            # Load data
            data_conf = exp_config['data']

            datafile = data_conf['datafile']
            SMILES = data_conf['SMILES_col']
            scores = data_conf['scores_col']
            ids = data_conf['ids_col']

            data = MoleculeLoader(datafile, SMILES, scores).df

            # Featurize
            feat = Featurizer(data)
            feat_config = exp_config['featurizer']

            feat_type = feat_config['name']
            feat_params = feat_config.copy()
            del feat_params['name']

            features = feat.featurize(feat_type, **feat_params)

            # Get data
            X = data[SMILES]
            y = data[scores]
            ids_data = data[ids]

            # Make datasets
            dataset_model = Dataset(X=X, y=y, ids=ids_data)
            dataset_eval = Dataset(X=X, y=y, ids=ids_data)

            # Save the dataset
            dataset_model.save("dataset_" + exp_config['name']+".pkl")

        for i in range(exp_config['replicate']):
            print(f"Running Experiment {exp_config['name']} replicate {i+1}")
            

In [18]:
exp = Experimenter("test.yaml")

In [20]:
exp.conduct_all_experiments()

Running Experiment Exp01 replicate 1
Running Experiment Exp01 replicate 2
Running Experiment Exp01 replicate 3
Running Experiment Exp01 replicate 4
Running Experiment Exp01 replicate 5
Running Experiment Exp02 replicate 1
Running Experiment Exp02 replicate 2
Running Experiment Exp02 replicate 3
Running Experiment Exp02 replicate 4
Running Experiment Exp02 replicate 5
