# Import the necessary libraries and modules.

In [4]:
import numpy as np
import GPyOpt
import pandas as pd
from ml_models import OptimizationModel

# Update experiment_data DataFrame after each batch.

In [5]:
def _update_experiment_data(self, wellnames, recipes, Experiment_result, GP_Prediction):
    new_data = pd.DataFrame({
        'Recipes': recipes,
        'WellNames': wellnames, 
        'Experiment Result': Experiment_result,
        'GP Prediction': GP_Prediction
    })
    self.experiment_data = pd.concat([self.experiment_data, new_data], ignore_index=True)

# Dataframe we will be updating.

In [6]:
experiment_data = pd.DataFrame(columns = ["Recipes", "Wellnames", "Experiment_result", "GP_Prediction"])

# Define the Branin function

Branin function (look up graph + equation as reference), has three global minimas. We are using it as a way to test our optimization model as we're trying to exploit/explore these global minimas using our acquisition function.

Branin function will behave as our objective function here. Hopefully, our GP model will learn the space of the branin function by sampling + evaluating different points (acquisition function will balance exploitation vs exploration).
Hopefully it will find global minimas.

In [1]:
def branin(x):
    # The recommended values of the parameters
    a = 1
    b = 5.1 / (4 * np.pi**2)
    c = 5 / np.pi
    r = 6
    s = 10
    t = 1 / (8 * np.pi)
    # The expression of the function
    return (a * (x[:, 1] - b * x[:, 0]**2 + c * x[:, 0] - r)**2 + s * (1 - t) * np.cos(x[:, 0]) + s).reshape(-1, 1)

# Training loop.

In [None]:
import random

def generate_shaha(index):
    return f"shaha{index}{random.randint(0, 9999):04d}"

In [None]:
# hardcode batch_size = 5
# hardcode y_shape to be 2 assuming we are fixing 3 reagents and varing 2 of em.
batch_size = 5
y_shape = 2
def run(self, model):
        # define variables.
        self.batch_num = 0 
        recipes = np.ones((batch_size, y_shape)) * 3.1415e-2
        
        # RUN FIRST RECIPE.
        print('<<controller>> executing batch {}'.format(self.batch_num))
        
        # generate new wellnames for next batch
        wellnames = [self.generate_shaha(i) for i in range(recipes.shape[0])]
        
        # Use the Branin function to generate "scan data"
        scan_data = branin(recipes)

        model.train(recipes, scan_data)
                
        self.batch_num += 1

        # ENTER LOOP TO TRAIN RECIPES.
        while not model.quit:
            model.train(recipes, scan_data)
            print('<<controller>> executing batch {}'.format(self.batch_num))
            
            # generate new wellnames for next batch
            wellnames = [self.generate_shaha(i) for i in range(recipes.shape[0])]
            
            # TODO: last_filename gets in data from the create_samples sheet. Use our "function" to pull in fake data/replicate create_samples.
            
            # pull in wavelength light absorption level.
            scan_data = ???
            
            # generate the predictions for the next round
            gp_prediction = model.predict()
            self.batch_num += 1
            
            # update our experiment data TODO.
            self._update_experiment_data(wellnames, recipes, scan_data, gp_prediction)
            recipes = gp_prediction
            
        return
    

In [None]:
def launch_auto(serveraddr, rxn_sheet_name, use_cache, simulate, no_sim, no_pr):
    '''
    main function to launch an auto scientist that designs it's own experiments
    '''
    if not rxn_sheet_name:
        rxn_sheet_name = input('<<controller>> please input the sheet name ')
    my_ip = socket.gethostbyname(socket.gethostname())
    auto = AutoContr(rxn_sheet_name, my_ip, serveraddr, use_cache=use_cache)
    #note shorter iterations for testing
    final_spectra = np.loadtxt(
            "test_target_1.csv", delimiter=',', dtype=float).reshape(1,-1)
    print(auto.rxn_df.describe())
    print(auto.rxn_df.head(20))
    print(auto.rxn_df)
    y_shape = auto.y_shape# number of reagents to learn on
    print("starting with y_shape:", y_shape)
    reagent_info = auto.robo_params['reagent_df']
    # Generate bounds for each reagent, assuming concentrations range from 0 to 1
    bounds = [{'name': f'reagent_{i+1}_conc', 'type': 'continuous', 'domain': (0, 1)} for i in range(y_shape)]
    #TODO get reagent info
    model = OptimizationModel(bounds, final_spectra, reagent_info, fixed_reagents)
    if not no_sim:
        auto.run_simulation(no_pr=no_pr)
    if input('would you like to run on robot and pr? [yn] ').lower() == 'y':
        auto.run_protocol(simulate=simulate, model=model,no_pr=no_pr)

In [None]:
def run_simulation(self,model=None,no_pr=False):
    '''
    runs a full simulation of the protocol on local machine
    Temporarilly overwrites the self.server_ip with loopback, but will restore it at
    end of function  
    params:
        MLModel model: the model to use when training and predicting  
    Returns:  
        bool: True if all tests were passed  
    '''
    #cache some things before you overwrite them for the simulation
    stored_server_ip = self.server_ip
    stored_simulate = self.simulate
    self.server_ip = '127.0.0.1'
    self.simulate = True
    if model == None:
        #you're simulating with a dummy model.
        print('<<controller>> running with dummy ml')
        model = DummyMLModel(self.reagent_order.shape[0], max_iters=2)
    print('<<controller>> ENTERING SIMULATION')
    port = 50000
    #launch an eve server in background for simulation purposes
    b = threading.Barrier(2,timeout=20)
    eve_thread = threading.Thread(target=launch_eve_server, kwargs={'my_ip':'','barrier':b},name='eve_thread')
    eve_thread.start()
    #do create a connection
    b.wait()
    self._run(port, True, model, no_pr)

    #collect the eve thread
    eve_thread.join()

    #restore changed vars
    self.server_ip = stored_server_ip
    self.simulate = stored_simulate
    print('<<controller>> EXITING SIMULATION')
    return True

def run_protocol(self, model=None, simulate=False, port=50000, no_pr=False):
    '''
    The real deal. Input a server addr and port if you choose and protocol will be run  
    params:  
        str simulate: (this should never be used in normal operation. It is for debugging
            on the robot)  
        bool no_pr: if True, will not use the plate reader even if possible to simulate
        MLModel model: the model to use when training and predicting  
    NOTE: the simulate here is a little different than running run_simulation(). This simulate
        is sent to the robot to tell it to simulate the reaction, but that it all. The other
        simulate changes some things about how code is run from the controller
    '''
    print('<<controller>> RUNNING')
    if model == None:
        #you're simulating with a dummy model.
        print('<<controller>> running with dummy ml')
        model = DummyMLModel(self.reagent_order.shape[0], max_iters=2)
    self._run(port, simulate, model, no_pr)
    print('<<controller>> EXITING')

In [None]:
def _run(self, port, simulate, model, no_pr):
    '''
    private function to run
    '''
    self.batch_num = 0 #used internally for unique filenames
    self.well_count = 0 #used internally for unique wellnames
    self._init_pr(simulate, no_pr)
    #create a connection
    sock = socket.socket(socket.AF_INET)
    sock.connect((self.server_ip, port))
    buffered_sock = BufferedSocket(sock, maxsize=1e9, timeout=None)
    print("<<controller>> connected")
    self.portal = Armchair(buffered_sock,'controller','Armchair_Logs', buffsize=4)
    self.init_robot(simulate)
    
    recipes = model.generate_seed_rxns()
    recipes =  self.duplicate_list_elements(recipes, self.num_duplicates)     
    
    # expand recipes: recipes[n-1] holds num_duplicates (default value is 3).
    # recipes = np.vstack([recipes, [self.num_duplicates]])

    #do the first one
    print('<<controller>> executing batch {}'.format(self.batch_num))
    #don't have data to train, so, not training
    #generate new wellnames for next batch
    wellnames = [self._generate_wellname() for i in range(recipes.shape[0])]
    #plan and execute a reaction with duplicates.
    self._create_samples(wellnames, recipes)
    #pull in the scan data
    filenames = self.rxn_df[
            (self.rxn_df['op'] == 'scan') |
            (self.rxn_df['op'] == 'scan_until_complete')
            ].reset_index()
    #TODO filenames is empty. dunno why
    last_filename = filenames.loc[filenames['index'].idxmax(),'scan_filename']
    scan_data = self._get_sample_data(wellnames, last_filename)
    model.train(recipes, scan_data.T.to_numpy())
    #this is different because we don't want to use untrained model to generate predictions
    recipes = model.generate_seed_rxns()
    recipes =  self.duplicate_list_elements(recipes, self.num_duplicates)     
    self.batch_num += 1

    #enter iterative while loop now that we have data
    while not model.quit:
        model.train(recipes, scan_data.T.to_numpy())      # temp: added experiment_result.
        print('<<controller>> executing batch {}'.format(self.batch_num))
        #generate new wellnames for next batch
        wellnames = [self._generate_wellname() for i in range(recipes.shape[0])]
        # plan and execute a reaction with duplicate reactions.
        self._create_samples(wellnames, recipes, self.num_duplicates)
        #pull in the scan data
        filenames = self.rxn_df[
                (self.rxn_df['op'] == 'scan') |
                (self.rxn_df['op'] == 'scan_until_complete')
                ].reset_index()
        last_filename = filenames.loc[filenames['index'].idxmax(),'scan_filename']
        scan_data = self._get_sample_data(wellnames, last_filename)
        #generate the predictions for the next round
        gp_prediction = model.predict()     # changed from recipes, used as temp variable; refer to line 2200.
        #threaded train on scans. Will run while the robot is generating new materials
        self.batch_num += 1
        # update our experiment data TODO.
        self._update_experiment_data(wellnames, recipes, scan_data, gp_prediction)
        recipes = gp_prediction
        recipes =  self.duplicate_list_elements(recipes, self.num_duplicates)     
        
    self.close_connection()
    self.pr.shutdown()
    return