# BiDir-GRCO

In [None]:
import pandas as pd
import warnings
from sklearn.exceptions import ConvergenceWarning
import numpy as np
import os
import pickle
import math
import random
THRESHOLD = 10

warnings.filterwarnings('ignore', category=ConvergenceWarning)

class RegretAlgorithm:
    def __init__(self, n_arms, counts=None, emp_means=None):
        self.counts = counts if counts else [0 for col in range(n_arms)]
        self.emp_means = emp_means if emp_means else [0.0 for col in range(n_arms)]
        self.ranking = []  
        return
    def __str__(self):
        return None
    def reset(self, n_arms):
        self.counts = [0 for col in range(n_arms)]
        self.emp_means = [0.0 for col in range(n_arms)]
        self.ranking = []
        return
    def select_next_arm(self):
        pass
    def update(self, chosen_arm, reward):
        self.counts[chosen_arm] = self.counts[chosen_arm] + 1
        n = self.counts[chosen_arm]
        value = self.emp_means[chosen_arm]
        new_value = ((n - 1) / float(n)) * value + (1 / float(n)) * reward
        self.emp_means[chosen_arm] = new_value
        self.ranking = list(np.arange(len(self.counts))[np.argsort(self.counts)])
        return

class UCB1(RegretAlgorithm):
    def __init__(self, n_arms, counts=None, emp_means=None, ucbs=None, batch=False):
        RegretAlgorithm.__init__(self, n_arms, counts, emp_means)
        self.ucbs = ucbs if ucbs else [0.0 for col in range(n_arms)]  
        self.batch = batch
        return
    def __str__(self):
        return 'ucb1'
    def reset(self, n_arms):
        RegretAlgorithm.reset(self, n_arms)
        self.ucbs = [0.0 for col in range(n_arms)]
        return
    def select_next_arm(self):
        if not self.batch:
            if 0 in self.counts:  
                for arm in range(len(self.counts)):
                    if self.counts[arm] == 0:
                        return arm
            else:  
                return np.argmax(self.ucbs)
        else:
            return np.argmax(self.ucbs)

    def update(self, chosen_arm, reward):
        RegretAlgorithm.update(self, chosen_arm, reward)
        bonuses = [math.sqrt((2 * math.log(sum(self.counts) + 1)) / float(self.counts[arm] + 1e-7)) for arm in range(len(self.counts))]
        self.ucbs = [e + b for e, b in zip(self.emp_means, bonuses)]
        return
    
class BayesUCBGaussian(UCB1):
    def __init__(self, n_arms, counts=None, emp_means=None, ucbs=None, c=2, assumed_sd=0.25, batch=False):
        UCB1.__init__(self, n_arms, counts, emp_means, ucbs, batch)
        self.c = c  
        self.assumed_sd = assumed_sd
        return
    def __str__(self):
        return f'bayes_ucb_gaussian_c={self.c}_assumed_sd={self.assumed_sd}'
    def update(self, chosen_arm, reward):
        RegretAlgorithm.update(self, chosen_arm, reward)
        stds = [self.c * self.assumed_sd/math.sqrt(cc+1e-7) for cc in self.counts]
        self.ucbs = [m + s for m, s in zip(self.emp_means, stds)]



from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
def create_model_ohe(save_dir="model/"):
    os.makedirs(save_dir, exist_ok=True)  
    kernel = C(1.0, (1e-7, 1e7)) * RBF(1.0, (1e-8, 1e8))
    model = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=20)
    model_path = os.path.join(save_dir, f"model.pkl")
    with open(model_path, "wb") as f:
        pickle.dump(model, f)
    return

from sklearn.kernel_ridge import KernelRidge
def create_model_kr(save_dir="model/"):
    os.makedirs(save_dir, exist_ok=True)
    model = KernelRidge(kernel='rbf', alpha=1.0)
    model_path = os.path.join(save_dir, f"model.pkl")
    with open(model_path, "wb") as f:
        pickle.dump(model, f)

from sklearn.tree import DecisionTreeRegressor
def create_model_dcr(save_dir="model/"):
    os.makedirs(save_dir, exist_ok=True)  
    model = DecisionTreeRegressor()
    model_path = os.path.join(save_dir, f"model.pkl")
    with open(model_path, "wb") as f:
        pickle.dump(model, f)
    return

from sklearn.svm import SVR
def create_model_svm(save_dir="model/"):
    os.makedirs(save_dir, exist_ok=True)  
    model = SVR()
    model_path = os.path.join(save_dir, f"model.pkl")
    with open(model_path, "wb") as f:
        pickle.dump(model, f)
    return

from sklearn.ensemble import RandomForestRegressor
def create_model_rmr(save_dir="model/"):
    os.makedirs(save_dir, exist_ok=True)  

    model = RandomForestRegressor()
    model_path = os.path.join(save_dir, f"model.pkl")
    with open(model_path, "wb") as f:
        pickle.dump(model, f)
    return

from sklearn.neighbors import KNeighborsRegressor
def create_model_knn(save_dir="model/"):
    os.makedirs(save_dir, exist_ok=True)  
    model = KNeighborsRegressor(n_neighbors=1)
    model_path = os.path.join(save_dir, f"model.pkl")
    with open(model_path, "wb") as f:
        pickle.dump(model, f)
    return

def load_model_ohe(save_dir="model/"):
    model_path = os.path.join(save_dir, "model.pkl")
    with open(model_path, "rb") as f:
        model = pickle.load(f)
    return model

def save_model_ohe(model, save_dir="model/"):
    model_path = os.path.join(save_dir, "model.pkl")
    with open(model_path, "wb") as f:
        pickle.dump(model, f)
    return

def fit_model_ohe(ohe_dict, substrate, record_data, save_dir="model/"):
    model = load_model_ohe(save_dir)
    X = []
    y = []    
    for condition, substrates in record_data.items():
        value = substrates[substrate]
        if value is not None :
            arm_ohe = ohe_dict.get((substrate, condition))
            if arm_ohe is not None:
                X.append(arm_ohe)
                y.append(value)                
    if len(X) == 0 or len(y) == 0:
        raise ValueError(f"No valid data found for substrate '{substrate}'.")
    X = np.array(X)
    y = np.array(y)
    model.fit(X, y)
    save_model_ohe(model,save_dir)
    return model 

def generate_mordred(Substrate_list, arms_list):
    substrate_mordred = pd.read_csv(r'C:\Users\Administrator\Desktop\supply data\C–N Cross-Coupling\additive_substrate_Mordred.csv')
    arms_mordred = pd.read_csv(r'C:\Users\Administrator\Desktop\supply data\C–N Cross-Coupling\base_ligand_Mordred.csv')
    combinations = [(s, a) for s in Substrate_list for a in arms_list]
    mordred_dict = {}
    for s, a in combinations:
        substrate_code = substrate_mordred[substrate_mordred['additive_substrate']==s].iloc[:,1:].values[0]
        arm_code = arms_mordred[arms_mordred['base_ligand']==a].iloc[:,1:].values[0]
        code = np.concatenate((substrate_code, arm_code))
        mordred_dict[(s, a)] = code
    return mordred_dict

def generate_morgan(Substrate_list, arms_list):
    substrate_morgan = pd.read_csv(r'C:\Users\Administrator\Desktop\supply data\C–N Cross-Coupling\additive_substrate_Morgan.csv')
    arms_morgan = pd.read_csv(r'C:\Users\Administrator\Desktop\supply data\C–N Cross-Coupling\base_ligand_Morgan.csv')
    combinations = [(s, a) for s in Substrate_list for a in arms_list]
    mordred_dict = {}
    for s, a in combinations:
        substrate_code = substrate_morgan[substrate_morgan['additive_substrate']==s].iloc[:,1:].values[0]
        arm_code = arms_morgan[arms_morgan['base_ligand']==a].iloc[:,1:].values[0]
        code = np.concatenate((substrate_code, arm_code))
        mordred_dict[(s, a)] = code
    return mordred_dict

def generate_CM(Substrate_list, arms_list):
    substrate_CM = pd.read_csv(r'C:\Users\Administrator\Desktop\supply data\C–N Cross-Coupling\additive_substrate_CM.csv')
    arms_CM = pd.read_csv(r'C:\Users\Administrator\Desktop\supply data\C–N Cross-Coupling\base_ligand_CM.csv')
    combinations = [(s, a) for s in Substrate_list for a in arms_list]
    
    mordred_dict = {}
    for s, a in combinations:
        substrate_code = substrate_CM[substrate_CM['additive_substrate']==s].iloc[:,1:].values[0]
        arm_code = arms_CM[arms_CM['base_ligand']==a].iloc[:,1:].values[0]
        code = np.concatenate((substrate_code, arm_code))
        mordred_dict[(s, a)] = code
    return mordred_dict

def generate_EI(Substrate_list, arms_list):
    substrate_EI = pd.read_csv(r'C:\Users\Administrator\Desktop\supply data\C–N Cross-Coupling\additive_substrate_EI.csv')
    arms_EI = pd.read_csv(r'C:\Users\Administrator\Desktop\supply data\C–N Cross-Coupling\base_ligand_EI.csv')
    combinations = [(s, a) for s in Substrate_list for a in arms_list]
    mordred_dict = {}
    for s, a in combinations:
        substrate_code = substrate_EI[substrate_EI['additive_substrate']==s].iloc[:,1:].values[0]
        arm_code = arms_EI[arms_EI['base_ligand']==a].iloc[:,1:].values[0]
        code = np.concatenate((substrate_code, arm_code))
        mordred_dict[(s, a)] = code
    return mordred_dict

def get_field(data,arm,substrate):
    field=data[arm][substrate]
    return field

def recode_experiment(record_data,arm,substrate,field):
    record_data[arm][substrate]=field
    return record_data

def generate_ohe(Substrate_list, arms_list):
    Substrate_list = list(Substrate_list)
    arms_list = list(arms_list)
    combinations = [(s, a) for s in Substrate_list for a in arms_list]
    ohe_dict = {}
    for s, a in combinations:
        ohe = np.zeros(len(Substrate_list) * len(arms_list))
        index = Substrate_list.index(s) * len(arms_list) + arms_list.index(a)
        ohe[index] = 1
        ohe_dict[(s, a)] = ohe
    return ohe_dict

def select_substrate_ohe(horizon,arm, Substrate_list, ohe_dict, record_data,save_dir="model/"):
    if horizon <= 30:
        untrained_substrates = []
        for substrate in Substrate_list:
            if all(record_data[condition].get(substrate) is None for condition in record_data):
                untrained_substrates.append(substrate)
        if untrained_substrates:
            selected_substrate = random.choice(untrained_substrates)  # Randomly choose an untrained substrate
            return selected_substrate
    else:
        predict_substrates = []
        model=load_model_ohe(save_dir)
        for substrate in Substrate_list:
            X=ohe_dict[(substrate, arm)].reshape(1, -1)
            y_pred= model.predict(X)
            predict_substrates.append((substrate, y_pred[0])) 
        available_substrates = [substrate for substrate, _ in predict_substrates if record_data[arm].get(substrate) is None]
        if available_substrates:
            available_predictions = [(substrate, pred) for substrate, pred in predict_substrates if substrate in available_substrates]
            value = max(available_predictions, key=lambda x: x[1])[1]
            candidates = [item for item in available_predictions if item[1] == value]
            selected_substrate = random.choice(candidates)[0]
            return selected_substrate
    return None

def data_anlyse():
    data_file = pd.read_csv(r'C:\Users\Administrator\Desktop\supply data\C–N Cross-Coupling\cn-processed.csv')
    data_file['yield'] = data_file['yield'] / 100
    Arm_name = pd.read_csv(r'C:\Users\Administrator\Desktop\supply data\C–N Cross-Coupling\base_ligand.csv')
    Substrate_list= pd.read_csv(r'C:\Users\Administrator\Desktop\supply data\C–N Cross-Coupling\additive_substrate.csv')
    arms_list = Arm_name['base_ligand']
    substrate_list = Substrate_list['additive_substrate']
    data = {}
    for i in arms_list:
        data[i] = {}
        for j in substrate_list:
            i1, i2 = i.split('+')
            j1, j2 = j.split('+')
            data[i][j] = data_file[
                (data_file['base_name'] == i1) & 
                (data_file['ligand_name'] == i2) & 
                (data_file['additive_id'] == j1) & 
                (data_file['substrate_id'] == j2)
            ]['yield'].values[0]
    return data,arms_list,substrate_list

def run_simulation(model,enconding,num_sims, horizons, arms_list, Substrate_list,data):

    results = []
    if enconding == 'mordred':
        ohe_dict = generate_mordred(Substrate_list, arms_list)
    elif enconding == 'EI':
        ohe_dict = generate_EI(Substrate_list, arms_list)
    elif enconding == 'CM':
        ohe_dict = generate_CM(Substrate_list, arms_list)
    elif enconding == 'One-hot':
        ohe_dict = generate_ohe(Substrate_list, arms_list)
    elif enconding == 'Morgan':
        ohe_dict = generate_morgan(Substrate_list, arms_list)
    else:
        raise ValueError("Unknown encoding type")
    for sim in range(num_sims):

        save_dir="model/"
        if os.path.exists(save_dir):
            for f in os.listdir(save_dir):
                os.remove(os.path.join(save_dir, f))
                print('model delete success')
                
        if model=='Gaussian_process':
            create_model_ohe(save_dir)

        elif model=='decision_tree':
            create_model_dcr(save_dir)

        elif model=='svm':
            create_model_svm(save_dir)

        elif model=='random_forest':
            create_model_rmr(save_dir)

        elif model=='Knn':
            create_model_knn(save_dir)
        
        record_data = {condition: {substrate: None for substrate in Substrate_list} for condition in arms_list}
        algo = BayesUCBGaussian(len(arms_list))
        algo.reset(len(arms_list))
        history = []
        for horizon in range(horizons):
            arm = algo.select_next_arm()
            condition = arms_list[arm]
            substrate = select_substrate_ohe(horizon,condition, Substrate_list, ohe_dict, record_data, save_dir="model/")
            field = get_field(data, condition, substrate)
            record_data = recode_experiment(record_data, condition, substrate, field)
            history.append({
                'sim': sim + 1,  
                'horizon': horizon + 1,  
                'condition': condition,
                'substrate': substrate,
                'yield': field
            })
            algo.update(arm, field)
            fit_model_ohe(ohe_dict, substrate, record_data, save_dir="model/")
        results.extend(history)
        print(f"Simulation {sim + 1} completed.")
    return results

if __name__ == '__main__':
    data,arms_list,Substrate_list=data_anlyse()
    num=500
    encodings=['mordred','One-hot','CM','EI','Morgan']

    models=['svm','decision_tree','Gaussian_process','random_forest','Knn']

    for model in models:
        for enconding in encodings:
            history = run_simulation(
                model=model,
                enconding=enconding,
                num_sims=num,
                horizons=100,
                arms_list=arms_list,
                Substrate_list=Substrate_list,
                data=data,
            )
            history_df = pd.DataFrame(history)
            path=r'C:\Users\Administrator\Desktop\supply data\Basic experiment\C–N Cross-Coupling\results'
            history_df.to_csv(path+f'\{model}_history_{num}_{enconding}.csv', index=False)

# MAB

In [None]:
import pandas as pd
import warnings
from sklearn.exceptions import ConvergenceWarning
import numpy as np
import os
import pickle
import math
import random
THRESHOLD = 10
from scipy.stats import beta, norm

warnings.filterwarnings('ignore', category=ConvergenceWarning)

class RegretAlgorithm:

    def __init__(self, n_arms, counts=None, emp_means=None):
        self.counts = counts if counts else [0 for col in range(n_arms)]
        self.emp_means = emp_means if emp_means else [0.0 for col in range(n_arms)]
        self.ranking = []  # ranks from worst to best
        return

    def __str__(self):
        return None

    def reset(self, n_arms):
        self.counts = [0 for col in range(n_arms)]
        self.emp_means = [0.0 for col in range(n_arms)]
        self.ranking = []
        return

    def select_next_arm(self):
        pass

    def update(self, chosen_arm, reward):

        self.counts[chosen_arm] = self.counts[chosen_arm] + 1

        n = self.counts[chosen_arm]
        value = self.emp_means[chosen_arm]
        new_value = ((n - 1) / float(n)) * value + (1 / float(n)) * reward
        self.emp_means[chosen_arm] = new_value

        self.ranking = list(np.arange(len(self.counts))[np.argsort(self.counts)])
        return

class ETC(RegretAlgorithm):  

    def __init__(self, n_arms, counts=None, emp_means=None, explore_limit=1):
        RegretAlgorithm.__init__(self, n_arms, counts, emp_means)
        self.limit = explore_limit 
        self.best_arm = -1
        return

    def __str__(self):
        return 'etc'

    def reset(self, n_arms):
        RegretAlgorithm.reset(self, n_arms)
        self.best_arm = -1
        return

    def select_next_arm(self):
        if sum(self.counts) == self.limit*len(self.counts):  # exploration just complete, pick the best arm
            self.best_arm = np.argmax(self.emp_means)

        if self.best_arm == -1:  
            return np.argmin(self.counts) 
        else:  
            return self.best_arm

class Random(RegretAlgorithm):


    def __str__(self):
        return 'random'

    def select_next_arm(self):
        return random.randrange(len(self.emp_means))

class Exploit(RegretAlgorithm):

    def __init__(self, n_arms, counts=None, emp_means=None):
        RegretAlgorithm.__init__(self, n_arms, counts, emp_means)
        # set all initial emp_means to 2.0, so all arms are at least select once
        self.emp_means = emp_means if emp_means else [2.0 for col in range(n_arms)]
        return

    def __str__(self):
        return 'exploit'

    def select_next_arm(self):
        return np.random.choice(np.flatnonzero(np.array(self.emp_means) == max(self.emp_means)))

class EpsilonGreedy(RegretAlgorithm):

    def __init__(self, n_arms, epsilon, counts=None, emp_means=None):
        RegretAlgorithm.__init__(self, n_arms, counts, emp_means)
        self.epsilon = epsilon
        return

    def __str__(self):
        return f'eps_greedy_{self.epsilon}'

    def select_next_arm(self):
        if random.random() > self.epsilon:
            return np.random.choice(np.flatnonzero(np.array(self.emp_means) == max(self.emp_means)))
        else:
            return random.randrange(len(self.emp_means))


class Pursuit(RegretAlgorithm):

    def __init__(self, n_arms, lr, counts=None, emp_means=None, probs=None):
        RegretAlgorithm.__init__(self, n_arms, counts, emp_means)
        self.lr = lr  
        self.probs = probs if probs else [float(1/n_arms) for col in range(n_arms)]
        return

    def __str__(self):
        return f'pursuit_{self.lr}'

    def reset(self, n_arms):
        RegretAlgorithm.reset(self, n_arms)
        self.probs = [float(1/n_arms) for col in range(n_arms)]
        return

    def select_next_arm(self):
        return random.choices(np.arange(len(self.emp_means)), weights=self.probs, k=1)[0]

    def update(self, chosen_arm, reward):
        RegretAlgorithm.update(self, chosen_arm, reward)


        if np.sum(self.emp_means) == 0: 
            pass
        else:
            for ii in range(len(self.counts)):
                current_prob = self.probs[ii]
                if ii == np.argmax(self.emp_means):
                    self.probs[ii] = current_prob + self.lr*(1-current_prob)
                else:
                    self.probs[ii] = current_prob + self.lr*(0-current_prob)

        return


class UCB1(RegretAlgorithm):

    def __init__(self, n_arms, counts=None, emp_means=None, ucbs=None, batch=False):
        RegretAlgorithm.__init__(self, n_arms, counts, emp_means)
        self.ucbs = ucbs if ucbs else [0.0 for col in range(n_arms)]  # ucb values calculated with means and counts
        self.batch = batch
        return

    def __str__(self):
        return 'ucb1'

    def reset(self, n_arms):
        RegretAlgorithm.reset(self, n_arms)
        self.ucbs = [0.0 for col in range(n_arms)]
        return

    def select_next_arm(self):
        if not self.batch:
            if 0 in self.counts: 
                for arm in range(len(self.counts)):
                    if self.counts[arm] == 0:
                        return arm
            else:  
                return np.argmax(self.ucbs)
        else:
            return np.argmax(self.ucbs)

    def update(self, chosen_arm, reward):
        RegretAlgorithm.update(self, chosen_arm, reward)
        bonuses = [math.sqrt((2 * math.log(sum(self.counts) + 1)) / float(self.counts[arm] + 1e-7)) for arm in range(len(self.counts))]
        self.ucbs = [e + b for e, b in zip(self.emp_means, bonuses)]
        return


class BayesUCBBeta(UCB1):

    def __init__(self, n_arms, counts=None, emp_means=None, ucbs=None, alphas=None, betas=None, c=2, batch=False):
        UCB1.__init__(self, n_arms, counts, emp_means, ucbs, batch)
        self.alphas = alphas if alphas else [1.0 for col in range(n_arms)]
        self.betas = betas if betas else [1.0 for col in range(n_arms)]
        self.c = c  
        return

    def __str__(self):
        return f'bayes_ucb_beta_c={self.c}'

    def reset(self, n_arms):
        UCB1.reset(self, n_arms)
        self.alphas = [1.0 for col in range(n_arms)]
        self.betas = [1.0 for col in range(n_arms)]
        return

    def update(self, chosen_arm, reward):
        RegretAlgorithm.update(self, chosen_arm, reward)

        self.alphas[chosen_arm] = self.alphas[chosen_arm] + reward
        self.betas[chosen_arm] = self.betas[chosen_arm] + (1-reward)

        means = [a/(a+b) for a, b in zip(self.alphas, self.betas)]
        stds = [self.c * beta.std(a, b) for a, b in zip(self.alphas, self.betas)]
        self.ucbs = [m + s for m, s in zip(means, stds)]


class BayesUCBGaussian(UCB1):

    def __init__(self, n_arms, counts=None, emp_means=None, ucbs=None, c=2, assumed_sd=0.25, batch=False):
        UCB1.__init__(self, n_arms, counts, emp_means, ucbs, batch)
        self.c = c  
        self.assumed_sd = assumed_sd
        return

    def __str__(self):
        return f'bayes_ucb_gaussian_c={self.c}_assumed_sd={self.assumed_sd}'

    def update(self, chosen_arm, reward):
        RegretAlgorithm.update(self, chosen_arm, reward)
        stds = [self.c * self.assumed_sd/math.sqrt(cc+1e-7) for cc in self.counts]
        self.ucbs = [m + s for m, s in zip(self.emp_means, stds)]


class ThompsonSamplingBeta(RegretAlgorithm):

    def __init__(self, n_arms, counts=None, emp_means=None, alphas=None, betas=None):
        RegretAlgorithm.__init__(self, n_arms, counts, emp_means)
        self.alphas = alphas if alphas else [1.0 for col in range(n_arms)]
        self.betas = betas if betas else [1.0 for col in range(n_arms)]
        return

    def __str__(self):
        return 'ts_beta'

    def reset(self, n_arms):
        RegretAlgorithm.reset(self, n_arms)
        self.alphas = [1.0 for col in range(n_arms)]
        self.betas = [1.0 for col in range(n_arms)]
        return

    def select_next_arm(self):
        rng = np.random.default_rng()
        probs = rng.beta(self.alphas, self.betas)
        return np.argmax(probs)

    def update(self, chosen_arm, reward):
        RegretAlgorithm.update(self, chosen_arm, reward)
        self.alphas[chosen_arm] = self.alphas[chosen_arm] + reward
        self.betas[chosen_arm] = self.betas[chosen_arm] + (1-reward)
        return


class ThompsonSamplingGaussian(RegretAlgorithm):

    def __init__(self, n_arms, counts=None, emp_means=None, alphas=None, betas=None):
        RegretAlgorithm.__init__(self, n_arms, counts, emp_means)
        self.alphas = alphas if alphas else [1.0 for col in range(n_arms)]
        self.betas = betas if betas else [0.1 for col in range(n_arms)]
        return

    def __str__(self):
        return 'ts_gaussian_novar'

    def reset(self, n_arms):
        RegretAlgorithm.reset(self, n_arms)
        self.alphas = [1.0 for col in range(n_arms)]
        self.betas = [0.1 for col in range(n_arms)]
        return

    def select_next_arm(self):
        rng = np.random.default_rng()
        precisions = rng.gamma(self.alphas, [1/b for b in self.betas])  # rng.gamma() uses θ (θ=1/β)
        variances = [1/(p+1e-7) for p in precisions]
        probs = rng.normal(self.emp_means, np.sqrt(variances))
        return np.argmax(probs)

    def update(self, chosen_arm, reward):
        RegretAlgorithm.update(self, chosen_arm, reward)

        n = 1
        nu = self.counts[chosen_arm]
        self.alphas[chosen_arm] = self.alphas[chosen_arm] + 0.5
        self.betas[chosen_arm] = self.betas[chosen_arm] + ((n * nu / (nu + n)) * (((reward - self.emp_means[chosen_arm])**2)/2))
        return


def get_field(data,arm,substrate):
    field=data[arm][substrate]
    return field

def select_substrate_ohe(Substrate_list):
    selected_substrate = random.choice(Substrate_list)
    return selected_substrate

def data_anlyse():
    data_file = pd.read_csv(r'C:\Users\Administrator\Desktop\supply data\C–N Cross-Coupling\cn-processed.csv')
    # when using the ['Random','Exploit','EpsilonGreedy','ETC','Pursuit', 'ThompsonSamplingGaussian','UCB1','BayesUCBGaussian' ]
    # data_file['yield'] = data_file['yield'] / 100
    # when using the ThompsonSamplingBeta',BayesUCBBeta
    data_file['yield'] = data_file['yield'].apply(lambda x: 0 if x<50 else 1)
    Arm_name = pd.read_csv(r'C:\Users\Administrator\Desktop\supply data\C–N Cross-Coupling\base_ligand.csv')
    Substrate_list= pd.read_csv(r'C:\Users\Administrator\Desktop\supply data\C–N Cross-Coupling\additive_substrate.csv')
    arms_list = Arm_name['base_ligand']
    substrate_list = Substrate_list['additive_substrate']
    data = {}
    for i in arms_list:
        data[i] = {}
        for j in substrate_list:
            i1, i2 = i.split('+')
            j1, j2 = j.split('+')
            data[i][j] = data_file[
                (data_file['base_name'] == i1) & 
                (data_file['ligand_name'] == i2) & 
                (data_file['additive_id'] == j1) & 
                (data_file['substrate_id'] == j2)
            ]['yield'].values[0]
    return data,arms_list,substrate_list


def run_simulation(algo,num_sims, horizons, arms_list, Substrate_list,data):

    results = []

    for sim in range(num_sims):

        if algo == 'Random':
            algo = Random(len(arms_list))
        elif algo == 'Exploit':
            algo = Exploit(len(arms_list))
        elif algo == 'EpsilonGreedy':
            algo = EpsilonGreedy(len(arms_list), epsilon=0.1)   
        elif algo == 'Pursuit':
            algo = Pursuit(len(arms_list),lr=0.1)
        elif algo == 'ETC':
            algo=ETC(len(arms_list))
        elif algo == 'UCB1':
            algo = UCB1(len(arms_list))
        elif algo == 'BayesUCBBeta':
            algo = BayesUCBBeta(len(arms_list))
        elif algo == 'BayesUCBGaussian':
            algo = BayesUCBGaussian(len(arms_list))
        elif algo == 'ThompsonSamplingBeta':
            algo = ThompsonSamplingBeta(len(arms_list))
        elif algo == 'ThompsonSamplingGaussian':
            algo = ThompsonSamplingGaussian(len(arms_list))

        
        algo.reset(len(arms_list))
        history = []
        for horizon in range(horizons):
            arm = algo.select_next_arm()
            condition = arms_list[arm]
            substrate = select_substrate_ohe(Substrate_list)

            field = get_field(data, condition, substrate)

            history.append({
                'sim': sim + 1,  
                'horizon': horizon + 1,  
                'condition': condition,
                'substrate': substrate,
                'yield': field
            })
            algo.update(arm, field)

        results.extend(history)
        print(f"Simulation {sim + 1} completed.")
    return results

if __name__ == '__main__':
    data,arms_list,Substrate_list=data_anlyse()
    num=500
    algos=['Random','Exploit','EpsilonGreedy','ETC','Pursuit', 'ThompsonSamplingGaussian','UCB1','BayesUCBGaussian','ThompsonSamplingBeta','BayesUCBBeta']

    for algo in algos:
        history = run_simulation(
            algo=algo,
            num_sims=num,
            horizons=50,
            arms_list=arms_list,
            Substrate_list=Substrate_list,
            data=data)

        history_df = pd.DataFrame(history)
        path=r'C:\Users\Administrator\Desktop\supply data\Basic experiment\C–N Cross-Coupling\results\MAB'
        history_df.to_csv(path+f'\{algo}.csv', index=False)

Simulation 1 completed.
Simulation 2 completed.
Simulation 3 completed.
Simulation 4 completed.
Simulation 5 completed.
Simulation 6 completed.
Simulation 7 completed.
Simulation 8 completed.
Simulation 9 completed.
Simulation 10 completed.
Simulation 11 completed.
Simulation 12 completed.
Simulation 13 completed.
Simulation 14 completed.
Simulation 15 completed.
Simulation 16 completed.
Simulation 17 completed.
Simulation 18 completed.
Simulation 19 completed.
Simulation 20 completed.
Simulation 21 completed.
Simulation 22 completed.
Simulation 23 completed.
Simulation 24 completed.
Simulation 25 completed.
Simulation 26 completed.
Simulation 27 completed.
Simulation 28 completed.
Simulation 29 completed.
Simulation 30 completed.
Simulation 31 completed.
Simulation 32 completed.
Simulation 33 completed.
Simulation 34 completed.
Simulation 35 completed.
Simulation 36 completed.
Simulation 37 completed.
Simulation 38 completed.
Simulation 39 completed.
Simulation 40 completed.
Simulatio