# H + COOH Coverage

#### Imports

In [19]:
import numpy as np
import matplotlib.pyplot as plt
import xgboost as xgb
import csv
import time

np.random.seed(1)

#### Load energy prediction models

In [19]:
#### Load models
#hollow_site_model = xgb.Booster({'nthread': 8})
#hollow_site_model.load_model("../models/"+"Hollow_site.model")

#on_top_site_model = xgb.Booster({'nthread': 8})
#on_top_site_model.load_model("../models/"+"on_top_site.model")

#models = {"H": hollow_site_model, "COOH": on_top_site_model}

#### Setup

In [3]:
# Size of the surface
dim_x, dim_y = 100, 100

# Metals
metals = ['Ag', 'Au', 'Cu', 'Pd', 'Pt']

# Other parameters (Coming)
# Voltages
start = 0.025
end = 1.05
scan_rate = 0.010

### Run the simulation
#### Seperate H and COOH - Precompute all binding energies 

In [11]:
surface = initialize_surface(dim_x, dim_y) # I should combine this with the occupance matrix in a dictionary

surface["ads_hol"]

array([['empty', 'empty', 'empty', ..., 'empty', 'empty', 'empty'],
       ['empty', 'empty', 'empty', ..., 'empty', 'empty', 'empty'],
       ['empty', 'empty', 'empty', ..., 'empty', 'empty', 'empty'],
       ...,
       ['empty', 'empty', 'empty', ..., 'empty', 'empty', 'empty'],
       ['empty', 'empty', 'empty', ..., 'empty', 'empty', 'empty'],
       ['empty', 'empty', 'empty', ..., 'empty', 'empty', 'empty']],
      dtype='<U5')

In [44]:
surface["H_V"][3][12]

nan

In [33]:
log_folder = "../Coverage_logs/"
simulation_name = "test_log" + ".csv"

column_names = ['Adsorbate', 'Action', 'Gibbs E', 'Border_voltage', 'Voltage', 'Index x', 'Index y', 'Timestamp']
create_log_file(log_folder + simulation_name + ".csv", column_names)

data = ['Adsorbate1', 'Action1', 1.23, 4.56, 7.89, 2, 3, time.time()]
append_to_log_file(log_folder + simulation_name + ".csv", data)

In [41]:
## Define the simulation name for the log file
log_folder = "../Coverage_logs/"
simulation_name = "test_log"

## Create the log file with column names
column_names = ['Adsorbate', 'Action', 'Gibbs E', 'Border_voltage', 'Voltage', 'Index x', 'Index y', 'Timestamp']
create_log_file(log_folder + simulation_name + ".csv", column_names)

## Create the surface with: 'atoms', 'ads_top', 'ads_hol', 'COOH_G', 'H_G', 'COOH_V', 'H_V'
surface = initialize_surface(dim_x, dim_y) # Combined with the occupance matrix in a dictionary

## Precompute all binding energies
models = "_" #Put this here, when just using the quick and easy random guesses for G's. Now we can continue on the simulation!
surface = precompute_binding_energies(surface, dim_x, dim_y, models, predict_G_TEST)
start_time = time.time()
## Set a voltage
for voltage in voltage_sweep(start, end, scan_rate): # Approximates the desired scan rate
    
    ## Look through all sites
    
    surface = decision_to_leave(G = G, site_x = x, site_y = y, surface = surface) ## I could REMOVE or ADD stuff right here for simplicity. I guess ideally I would add the most negative energy first, but if I just do small steps in voltage it shouldn't matter. I can avoid a huge complication by doing it like this
        # So decision_to_leave goes through both adsorbates, but not all sites. Maybe it should either go through all sites and both adsorbates or none. I'll go with both
## and predict all adsorbtion energies

## Log stats of the surface

In [None]:
## I should probably have a log that saves for each voltage: the voltage, the number of adsorbates, number of neighbours 

In [40]:
surface

{'atoms': array([[['Ag', 'Pd', 'Pd'],
         ['Pd', 'Pd', 'Ag'],
         ['Pd', 'Pt', 'Pd'],
         ...,
         ['Au', 'Cu', 'Cu'],
         ['Ag', 'Pd', 'Ag'],
         ['Pd', 'Au', 'Au']],
 
        [['Au', 'Pd', 'Pt'],
         ['Pt', 'Pt', 'Cu'],
         ['Au', 'Ag', 'Ag'],
         ...,
         ['Au', 'Ag', 'Pt'],
         ['Pt', 'Pd', 'Ag'],
         ['Cu', 'Pd', 'Cu']],
 
        [['Pt', 'Pd', 'Au'],
         ['Pt', 'Cu', 'Cu'],
         ['Pt', 'Pt', 'Pd'],
         ...,
         ['Au', 'Cu', 'Cu'],
         ['Au', 'Pd', 'Pd'],
         ['Pt', 'Au', 'Pt']],
 
        ...,
 
        [['Ag', 'Pt', 'Au'],
         ['Cu', 'Pt', 'Au'],
         ['Ag', 'Pt', 'Pt'],
         ...,
         ['Pt', 'Au', 'Ag'],
         ['Cu', 'Pt', 'Cu'],
         ['Pt', 'Pt', 'Pt']],
 
        [['Ag', 'Pt', 'Cu'],
         ['Pd', 'Ag', 'Cu'],
         ['Pd', 'Pt', 'Cu'],
         ...,
         ['Ag', 'Pt', 'Pd'],
         ['Au', 'Cu', 'Pt'],
         ['Au', 'Pd', 'Cu']],
 
        [['Au', 'Pt',

#### Plot the statistics

In [None]:
# Make a nice plot showing how many neighbours, etc. there are at each voltage

#### Functions (To be moved to seperate .py file)

In [37]:
def initialize_surface(dim_x, dim_y): #Is still random - could be used with a seed in the name of reproduceability
    dim_z = 3
    surface_list = np.array([int(dim_x*dim_y*dim_z/len(metals))*[metals[metal_number]] for metal_number in range(len(metals))]).flatten() #Jack had a way shorter way of doing this, but I think it was random drawing instead of ensuring a perfectly even split
    np.random.shuffle(surface_list) #Shuffle list
    surf_atoms = np.reshape(surface_list, (dim_x, dim_y, dim_z)) #Reshape list to the
    
    # Adsorbates
    surf_ads_top = np.reshape(["empty"]*dim_x*dim_y, (dim_x, dim_y))
    surf_ads_hol = np.reshape(["empty"]*dim_x*dim_y, (dim_x, dim_y))
    
    # Binding energies
    surf_COOH_G = np.reshape([np.nan]*dim_x*dim_y, (dim_x, dim_y))# On-top sites
    surf_H_G    = np.reshape([np.nan]*dim_x*dim_y, (dim_x, dim_y))# On-top sites# Hollow sites
    
    # Ad/desorbs at voltage (At which voltage is the binding energy 0?)
    surf_COOH_V = np.reshape([np.nan]*dim_x*dim_y, (dim_x, dim_y))# On-top sites
    surf_H_V    = np.reshape([np.nan]*dim_x*dim_y, (dim_x, dim_y))# On-top sites# Hollow sites
    
    surf = {"atoms": surf_atoms, "ads_top": surf_ads_top, "ads_hol": surf_ads_top, "COOH_G": surf_COOH_G, "H_G": surf_H_G, "COOH_V": surf_COOH_V, "H_V": surf_H_V}
    return surf

def voltage_sweep(start, end, scan_rate):
    return np.linspace(start, end, int(np.abs(start - end) / scan_rate))

#def voltage_circle
#def voltage_up_down (All these could be worked into a single function)

def predict_G(ads, site_x, site_y, surface, models):
    """This function returns the predicted binding energy without taking the voltage into account"""
    # Make vectors from the surface structure
    if ads == "H":
        input_vector = hollow_site_vector(surface["atoms"], site_x, site_y)
    if ads == "COOH":
        input_vector = on_top_site_vector(surface["atoms"], site_x, site_y)
    
    # Use the gibbs free energy prediction model
    G = models[ads].predict(input_vector) #Check out, that this is the way to input

    return G

def predict_G_TEST(ads, site_x, site_y, surface, models):
    if ads == "H":
        G = np.random.normal(0.25, 0.5) #mean 0.25 eV 1 sigma spread omkring 0.5 eV
    if ads == "COOH":
        G = np.random.normal(1.53, 0.44)
    return G

def calc_V_border(ads, G):
    """This function returns the border-voltage, at which the adsorbate adsorbs or desorbs"""
    
    if ads == "H":
        V_border = - G
    if ads == "COOH":
        V_border = G
    
    return V_border

def decision_to_leave(voltage, V_border, dim_x, dim_y, surface):
    ## Look through all sites (not bridge sites yet) by index
    for site_x, site_y in [(x, y) for x in range(dim_x) for y in range(dim_y)]:
        
        ## Look at both adsorbates
        for ads in ["H", "COOH"]:
            if ads == "H":
                site_type = "ads_hol"
            if ads == "COOH":
                site_type = "ads_top"
                
            ## What is sitting at the site currently?
            ads_current = surface[site_type][site_x][site_y] #OOPS - We need to look at the appropriate site, hence the "site_type" variable

            ## Is the adsorbant there?
            is_ads_there = surface[site_type][site_x][site_y] != "empty"
            # Is something supposed to be there?
            if ads == "H":
                supposed_to  = voltage < V_border
            if ads == "COOH":
                supposed_to  = voltage > V_border
            
            # Now there is four options
            ## The two, where everything is, as it is supposed to be: #if is_ads_there == supposed_to: do nothing
            
            # The option where the adsorbate is not there, but it should be:
            if (is_ads_there == False) and (supposed_to == True):
                # Adsorb the adsorbates
                surface[site_type][site_x][site_y] = ads
                ## Write to log:
                # column_names = ['Adsorbate', 'Action',    'Gibbs E',                          'Border_voltage',                 'Voltage','Index x', 'Index y', 'Timestamp']
                data =           [ads,         "adsorption", surface[ads+"_G"][site_x, site_y], surface[ads+"_V"][site_x, site_y], voltage,  site_x,    site_y, start_time - time.time()]
                append_to_log_file(log_folder + simulation_name + ".csv", data)
            
            # The option where the adsorbate is there, and it should not be:
            if (is_ads_there == True) and (supposed_to == False):
                # Adsorb the adsorbates
                surface[site_type][site_x][site_y] = ads
                # Write to log:
                 # column_names = ['Adsorbate', 'Action',    'Gibbs E',                          'Border_voltage',                 'Voltage','Index x', 'Index y', 'Timestamp']
                data =           [ads,         "desorption", surface[ads+"_G"][site_x, site_y], surface[ads+"_V"][site_x, site_y], voltage,  site_x,    site_y, start_time - time.time()]
                append_to_log_file(log_folder + simulation_name + ".csv", data)
    
    return surface

def precompute_binding_energies(surface, dim_x, dim_y, models, predict_G_function):
    for x, y in [(x, y) for x in range(dim_x) for y in range(dim_y)]: # I could randomise this, so I go through all sites in a random order
        ## What is the binding energy # With more adsorbates in future can be exchanged for a loop through the adsorbates
        ads = "H"
        surface["H_G"][x][y] = predict_G_function(ads = ads, site_x = x, site_y = y, surface = surface, models = models) ## A new function that wraps/uses the XGBoost model
        surface["H_V"][x][y] = calc_V_border(ads = ads, G = surface["H_G"][x][y])
        
    for x, y in [(x, y) for x in range(dim_x) for y in range(dim_y)]:
        ads = "COOH"
        surface["COOH_G"][x][y] = predict_G_function(ads = ads, site_x = x, site_y = y, surface = surface, models = models) ## A new function that wraps/uses the XGBoost model
        surface["COOH_V"][x][y] = calc_V_border(ads = ads, G = surface["COOH_G"][x][y])

    return surface

def count_pairs(surface):
    
    return pairs

def create_log_file(file_name, column_names):
    with open(file_name, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(column_names)
    return None

def append_to_log_file(file_name, data):
    with open(file_name, 'a', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(data)
    return None

In [45]:
ads = "H"
print(surface[ads+"_G"])
surface.keys()

[[ 0.17280566 -0.19968336  0.42750619 ...  0.79594875 -0.04074284
   0.55083234]
 [ 0.38837039 -0.1737978   0.52458573 ...  0.85998104 -0.16502527
  -0.01095899]
 [-0.22112595  0.52970486 -0.14779448 ...  0.54425517  0.15235065
   0.87593994]
 ...
 [-0.03961668  0.13187917  0.30173726 ... -0.11850723  1.5944644
   0.49193473]
 [ 0.45507857  0.93111877  0.06668144 ...  0.22764701  1.08070272
   0.0991517 ]
 [ 0.6149974  -0.17856406  0.78286791 ... -1.81847171  0.00651988
   1.5217975 ]]


dict_keys(['atoms', 'ads_top', 'ads_hol', 'COOH_G', 'H_G', 'COOH_V', 'H_V'])

#### Just quickly checking how the energies look

In [30]:
import csv
import statistics

# Specify the file path and name
csv_file = '../csv_features/COOH_features.csv'

# Specify the target column header
target_column = "G_ads (eV)"

# Open the .csv file and read its contents using list comprehension
with open(csv_file, 'r') as file:
    # Create a CSV reader object
    reader = csv.DictReader(file)
    
    # Extract the values of the target column using list comprehension
    target_values = [float(row[target_column]) for row in reader]

# Calculate the mean and spread (standard deviation) of the target values
mean = statistics.mean(target_values)
spread = statistics.stdev(target_values)

# Print the mean and spread
print("Mean:", mean)
print("Spread (Standard Deviation):", spread)

Mean: 1.5326902202380952
Spread (Standard Deviation): 0.4410240645509031


In [28]:
import csv

# Specify the file path and name
csv_file = '../csv_features/COOH_features.csv'

# Open the .csv file and read its contents
with open(csv_file, 'r') as file:
    # Create a CSV reader object
    reader = csv.DictReader(file)
    
    # Extract the header names
    header_names = reader.fieldnames

# Now you have the header names in the header_names list


In [29]:
header_names

['feature0',
 'feature1',
 'feature2',
 'feature3',
 'feature4',
 'feature5',
 'feature6',
 'feature7',
 'feature8',
 'feature9',
 'feature10',
 'feature11',
 'feature12',
 'feature13',
 'feature14',
 'feature15',
 'feature16',
 'feature17',
 'feature18',
 'feature19',
 'G_ads(eV)',
 'slab db row',
 'COOH_C_adsorbed_out.dbrow']

#### Logik

Oh shit, kunne man ikke bare predicte alle energier én gang? Det er simpelt i den model, hvor ingen nabointeraktioner tages med. Der skal også komme en model, som tager højde for H og COOH ved siden af hinanden. Men der kunne alle sites og så vinkel i alle retninger gemmes i et look-up table. Så predicter man først alle energier én gang, og så har man bare gemt en matrix med de spændinger, som ville få et adsorbat til at hoppe på/af.

Måske gem alle.

I surface dict kan jeg bare gemme ekstra lag, som hedder H_G, H_Voltage (hvor hvis voltage er UNDER hopper den på) og samme med COOH_G og COOH_Voltage (Hvor hvis voltage er OVER hopper den på) Så kan alt det preprocessing laves inden man "kører simulationen", som så bliver utroligt simpelt. Oh shit et speedup det kan blive til.

Hvad er logikken når man sætter en *H ved siden af en *COOH eller en *COOH ved siden af en *H?
Skal man for hver H der allerede sidder se, om der gerne vil sætte sig en COOH ved siden af, med den model der tager højde for begge?
ELLER
Skal sitet til en COOH ved siden af en *H som allerede sidder der først være favorabelt for en COOH, og så tjekker man med *H+*COOH-modellen om det er favorabelt, når man tager højde for nabointeraktion gennem modellen?

In [30]:
np.abs(start - end) / number of numbers = scan_rate

0.20500000000000002