File for loading data given a text string

In [1]:
import pandas as pd

The following file will be used to hold summary statistics of running the optimiser various times with different configurations.

In [2]:
output_file = ''
output_file += 'filename,mean yield,standard deviation,average loss, worst-case loss\n'

In [3]:
def update_output_file_with_data(filename, output_file, max_yield=100.0):
    """
    Update the output file with the data present in filename.
    
    
    -------
    Parameters:
    
    filename: string
        Path to the file with the data. Format of file should be as follows:
        seed,maximum_observed_yield
        124142,99.4
        092402,96.8
        ...
        
    output_file: string
        Current state of internal output file. Format described above
    
    max_yield: float
        The maximum observed yield across all data points for this experiment.
        Used in calculating average loss and worst-case loss
    """
    
    yields_df = pd.read_csv(filename)
    
    # Don't care about the seed column, only interested in the yields
    yields_df = yields_df[['maximum observed yield']].copy()
    
    # Mean
    average_yield = yields_df['maximum observed yield'].mean()
    
    # Standard deviation
    std_yield = yields_df['maximum observed yield'].std()
    
    # Average loss
    # This will be the average of {max_yield - yield}
    # But that's simply max_yield - the average yield!
    
    average_loss = max_yield - average_yield
    
    # Worst-case loss
    # This is the maximum of max_yield - yield
    # But that's simply max_yield - min(yield)
    
    worst_loss = max_yield - yields_df['maximum observed yield'].min()
    
    # Now add to the file, and return
    
    output_file += f"{filename},{average_yield:.2f},{std_yield:.2f},{average_loss:.2f},{worst_loss:.2f}\n"
    return output_file

Now let's run it on all the desired files

In [4]:
SUZUKI_MAX = 100.0
ARYL_AMINATION_MAX = 99.99999
DIRECT_ARYLATION_MAX = 100.0

suzuki_files = [
    'suzuki_random_3_24_50',
    'suzuki_random_3_51_50',
    'suzuki_random_5_25_50',
    'suzuki_random_5_50_100',
    'suzuki_random_8_24_50',
    'suzuki_random_10_50_50',
]

aryl_amination_files = [
    ''
]

output_file = update_output_file_with_data(filename, output_file, max_yield=100.0)

filename,mean yield,standard deviation,average loss, worst-case loss

filename,mean yield,standard deviation,average loss, worst-case loss
suzuki_random_5_50_100.csv,98.78,1.08,1.22,5.67

