# H + COOH Coverage

#### Imports

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import xgboost as xgb
import csv
import time
from tqdm import tqdm

np.random.seed(1)

#### Load energy prediction models

In [3]:
#### Load models
#hollow_site_model = xgb.Booster({'nthread': 8})
#hollow_site_model.load_model("../models/"+"Hollow_site.model")

#on_top_site_model = xgb.Booster({'nthread': 8})
#on_top_site_model.load_model("../models/"+"on_top_site.model")

#models = {"H": hollow_site_model, "COOH": on_top_site_model}

#### Setup

In [5]:
# Size of the surface
dim_x, dim_y = 5, 5

# Metals
metals = ['Ag', 'Au', 'Cu', 'Pd', 'Pt']
site_types = {"H": "ads_hol", "COOH": "ads_top"}

# Other parameters (Coming)
# Voltages
start = 0.025
end = 0.035
scan_rate = 0.005

## Define the simulation name for the log file
log_folder = "../Coverage_logs/"
simulation_name = "test_log_bugsquash"

### Run the simulation
#### Seperate H and COOH - Precompute all binding energies 

In [9]:
## Create the log file with column names
column_names = ['Adsorbate', 'Action', 'Gibbs E', 'Border_voltage', 'Voltage', 'Index x', 'Index y', 'site_type', 'Timestamp']
create_log_file(log_folder + simulation_name + ".csv", column_names)

## Create the surface with: 'atoms', 'ads_top', 'ads_hol', 'COOH_G', 'H_G', 'COOH_V', 'H_V'
surface = initialize_surface(dim_x, dim_y) # Combined with the occupance matrix in a dictionary

## Precompute all binding energies
models = "_" #Put this here, when just using the quick and easy random guesses for G's. Now we can continue on the simulation!
surface = precompute_binding_energies(surface, dim_x, dim_y, models, predict_G_TEST)

start_time = time.time()

## Set a voltage
for voltage in tqdm(voltage_sweep(start, end, scan_rate)): # Approximates the desired scan rate
    
    ## Look through the whole surface, both adsorbates, and ad/desorb as it should and log actions in log file
    surface = look_at_all_sites_and_adsorbates(surface, dim_x, dim_y, site_types, voltage) ## I could REMOVE or ADD stuff right here for simplicity. I guess ideally I would add the most negative energy first, but if I just do small steps in voltage it shouldn't matter. I can avoid a huge complication by doing it like this

    ## Log stats of the surface # Voltage, n_H, n_COOH, n_neighbours
    #statistics_log
    

100%|████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 926.82it/s]


In [None]:
def initialize_statistics_log():
    statistics_log = 

#### Plot the statistics

In [None]:
# Make a nice plot showing how many neighbours, etc. there are at each voltage

#### Functions (To be moved to seperate .py file)

In [7]:
def initialize_surface(dim_x, dim_y): #Is still random - could be used with a seed in the name of reproduceability
    dim_z = 3
    surface_list = np.array([int(dim_x*dim_y*dim_z/len(metals))*[metals[metal_number]] for metal_number in range(len(metals))]).flatten() #Jack had a way shorter way of doing this, but I think it was random drawing instead of ensuring a perfectly even split
    np.random.shuffle(surface_list) #Shuffle list
    surf_atoms = np.reshape(surface_list, (dim_x, dim_y, dim_z)) #Reshape list to the
    
    # Adsorbates
    surf_ads_top = np.reshape(["empty"]*dim_x*dim_y, (dim_x, dim_y))
    surf_ads_hol = np.reshape(["empty"]*dim_x*dim_y, (dim_x, dim_y))
    
    # Binding energies
    surf_COOH_G = np.reshape([np.nan]*dim_x*dim_y, (dim_x, dim_y))# On-top sites
    surf_H_G    = np.reshape([np.nan]*dim_x*dim_y, (dim_x, dim_y))# On-top sites# Hollow sites
    
    # Ad/desorbs at voltage (At which voltage is the binding energy 0?)
    surf_COOH_V = np.reshape([np.nan]*dim_x*dim_y, (dim_x, dim_y))# On-top sites
    surf_H_V    = np.reshape([np.nan]*dim_x*dim_y, (dim_x, dim_y))# On-top sites# Hollow sites
    
    surf = {"atoms": surf_atoms, "ads_top": surf_ads_top, "ads_hol": surf_ads_hol, "COOH_G": surf_COOH_G, "H_G": surf_H_G, "COOH_V": surf_COOH_V, "H_V": surf_H_V} #This line had a but that took me two days to find... Pass by reference is such a smart feature (y)
    return surf

def voltage_sweep(start, end, scan_rate):
    return np.linspace(start, end, int(np.abs(start - end) / scan_rate))

#def voltage_circle
#def voltage_up_down (All these could be worked into a single function)

def predict_G(ads, site_x, site_y, surface, models):
    """This function returns the predicted binding energy without taking the voltage into account"""
    # Make vectors from the surface structure
    if ads == "H":
        input_vector = hollow_site_vector(surface["atoms"], site_x, site_y)
    if ads == "COOH":
        input_vector = on_top_site_vector(surface["atoms"], site_x, site_y)
    
    # Use the gibbs free energy prediction model
    G = models[ads].predict(input_vector) #Check out, that this is the way to input

    return G

def predict_G_TEST(ads, site_x, site_y, surface, models):
    if ads == "H":
        G = np.random.normal(0.25, 0.25) #mean 0.25 eV 1 sigma spread omkring 0.5 eV
    if ads == "COOH":
        G = np.random.normal(1.53, 0.44)
    return G

def calc_V_border(ads, G):
    """This function returns the border-voltage, at which the adsorbate adsorbs or desorbs"""
    
    if ads == "H":
        V_border = - G
    if ads == "COOH":
        V_border = G
    
    return V_border

def precompute_binding_energies(surface, dim_x, dim_y, models, predict_G_function):
    for x, y in [(x, y) for x in range(dim_x) for y in range(dim_y)]: # I could randomise this, so I go through all sites in a random order
        ## What is the binding energy # With more adsorbates in future can be exchanged for a loop through the adsorbates
        ads = "H"
        surface["H_G"][x][y] = predict_G_function(ads = ads, site_x = x, site_y = y, surface = surface, models = models) ## A new function that wraps/uses the XGBoost model
        surface["H_V"][x][y] = calc_V_border(ads = ads, G = surface["H_G"][x][y])
        
    for x, y in [(x, y) for x in range(dim_x) for y in range(dim_y)]:
        ads = "COOH"
        surface["COOH_G"][x][y] = predict_G_function(ads = ads, site_x = x, site_y = y, surface = surface, models = models) ## A new function that wraps/uses the XGBoost model
        surface["COOH_V"][x][y] = calc_V_border(ads = ads, G = surface["COOH_G"][x][y])

    return surface

def count_pairs(surface):
    
    return pairs

def create_log_file(file_name, column_names):
    with open(file_name, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(column_names)
    return None

def append_to_log_file(file_name, data):
    with open(file_name, 'a', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(data)
    return None

def count_statistics(surface, statistics_log): # Should I input and return the statistics log each time or just append to it from inside the function? That's probably the way.
    ## Count number of the different adsorbates:
    # Count H adsorbates
    
    # Count COOH adsorbates
    
    # Make a list of all id-pairs, where we should check for H and COOH
    
    return None

## Rewriting the decision to leave function

In [8]:
#def decision_to_leave(voltage, dim_x, dim_y, surface):
#    """This function looks through all sites and both adsorbates and determines, based on the precomputed binding energies and border voltages
#       The function also adsorbs and desorbs or does nothing, whichever is appropriate, and returns the surface.
#       When an adsorbate is adsorbed or desorbed, a line is written to a log file. This is done in order to make debugging possible later, if needed"""
#    
#    ## Look through all sites (not bridge sites yet) by index
#    for site_x, site_y in [(x, y) for x in range(dim_x) for y in range(dim_y)]:
#        
#        ## Look at both adsorbates
#        for ads in ["H", "COOH"]: #These 5 lines could actually be done in 1 line (with zip), but this is more readable
#            if ads == "H":
#                site_type = "ads_hol"
#                
#                # Is something supposed to be there?
#                V_border = surface["H_V"][site_x][site_y]
#                supposed_to  = voltage < V_border
#                
#                ## Is the adsorbant there?
#                is_ads_there = surface[site_type][site_x][site_y] != "empty"
#                
#                ## There are two instances, where something should be changed:
#                
#                # 1) An adsorbate is not there, but it is supposed to!
#                
#                if (is_ads_there == False) and (supposed_to == True):
#                
#                    # Adsorb the adsorbates
#                    surface[site_type][site_x][site_y] = ads
#                    ## Write to log:
#                    # column_names = ['Adsorbate', 'Action',    'Gibbs E',                          'Border_voltage',                 'Voltage','Index x', 'Index y', 'site_type' 'Timestamp']
#                    data =           [ads,         "adsorption", surface[ads+"_G"][site_x, site_y], surface[ads+"_V"][site_x, site_y], voltage,  site_x,    site_y,    site_type,  time.time() - start_time]
#                    append_to_log_file(log_folder + simulation_name + ".csv", data)
#                
#                # 2) An adsorbate is there, and it is not supposed to!
#                
#                if (is_ads_there == True) and (supposed_to == False):
#                    # Desorb the adsorbates
#                    surface[site_type][site_x][site_y] = "empty"
#                    # Write to log:
#                     # column_names = ['Adsorbate', 'Action',    'Gibbs E',                          'Border_voltage',                 'Voltage','Index x', 'Index y', 'site_type' 'Timestamp']
#                    data =           [ads,         "desorption", surface[ads+"_G"][site_x, site_y], surface[ads+"_V"][site_x, site_y], voltage,  site_x,    site_y,     site_type,  time.time() - start_time]
#                    append_to_log_file(log_folder + simulation_name + ".csv", data)
#                
#            if ads == "COOH":
#                site_type = "ads_top"
#                
#                # Is something supposed to be there?
#                V_border = surface["COOH_V"][site_x][site_y]
#                supposed_to  = voltage > V_border
#                
#                ## Is the adsorbant there?
#                is_ads_there = surface[site_type][site_x][site_y] != "empty"
#                ## There are two instances, where something should be changed:
#                
#                # 1) An adsorbate is not there, but it is supposed to!
#                
#                if (is_ads_there == False) and (supposed_to == True):
#                
#                    # Adsorb the adsorbates
#                    surface[site_type][site_x][site_y] = ads
#                    ## Write to log:
#                    # column_names = ['Adsorbate', 'Action',    'Gibbs E',                          'Border_voltage',                 'Voltage','Index x', 'Index y', 'site_type' 'Timestamp']
#                    data =           [ads,         "adsorption", surface[ads+"_G"][site_x, site_y], surface[ads+"_V"][site_x, site_y], voltage,  site_x,    site_y,    site_type,  time.time() - start_time]
#                    append_to_log_file(log_folder + simulation_name + ".csv", data)
#                
#                # 2) An adsorbate is there, and it is not supposed to!
#                
#                if (is_ads_there == True) and (supposed_to == False):
#                    # Desorb the adsorbates
#                    surface[site_type][site_x][site_y] = "empty"
#                    # Write to log:
#                     # column_names = ['Adsorbate', 'Action',    'Gibbs E',                          'Border_voltage',                 'Voltage','Index x', 'Index y', 'site_type' 'Timestamp']
#                    data =           [ads,         "desorption", surface[ads+"_G"][site_x, site_y], surface[ads+"_V"][site_x, site_y], voltage,  site_x,    site_y,     site_type,  time.time() - start_time]
#                    append_to_log_file(log_folder + simulation_name + ".csv", data)
#    
#    return surface

#################################### Here starts the new smaller functions #####################################

def look_at_all_sites_and_adsorbates(surface, dim_x, dim_y, site_types, voltage):
    
    ## Look through all sites (not bridge sites yet) by index
    for site_x, site_y in [(x, y) for x in range(dim_x) for y in range(dim_y)]:
        
        ## Look through all adsorbates and their respective adsorption sites
        for ads, site_type in [(ads, site_types[ads]) for ads in ["H", "COOH"]]:
            #print(site_x, site_y, ads, site_type)
            
            # Make the decision to adsorb/desorb or do nothing with this function:
            surface = decision_to_leave(surface, site_x, site_y, ads, site_type, voltage)
    
    return surface

def decision_to_leave(surface, site_x, site_y, ads, site_type, voltage):
    ## Figure out if anything should be ad or desorbed
    
    # Is the adsorbate sitting at the site already?
    contents_of_site = surface[site_type][site_x][site_y]
    is_ads_there = contents_of_site != "empty"
    
    # Should the adsorbate be there? #tested, seems to work
    V_border = surface[ads+"_V"][site_x][site_y]
    supposed_to = voltage > V_border
    
    if (is_ads_there == False) and (supposed_to == True):
        # Adsorb
        surface[site_type][site_x][site_y] = ads
        
        # Save line to log file
        append_to_log_file(log_folder + simulation_name + ".csv", [ads, "adsorb", surface[ads+"_G"][site_x, site_y], surface[ads+"_V"][site_x, site_y], voltage, site_x, site_y, site_type, time.time() - start_time])
        
    if (is_ads_there == True) and (supposed_to == False):
        # Desorb
        surface[site_type][site_x][site_y] = "empty"
        
        # Save line to log file
        append_to_log_file(log_folder + simulation_name + ".csv", [ads, "desorption", surface[ads+"_G"][site_x, site_y], surface[ads+"_V"][site_x, site_y], voltage, site_x, site_y, site_type, time.time() - start_time])
    return surface

In [24]:
## Look through all sites (not bridge sites yet) by index
for site_x, site_y in [(x, y) for x in range(dim_x) for y in range(dim_y)]:
    
    ## Look through all adsorbates and their respective adsorption sites
    for ads, site_type in [(ads, site_types[ads]) for ads in ["H", "COOH"]]:
        print(site_x, site_y, ads, site_type)

0 0 H ads_hol
0 0 COOH ads_top
0 1 H ads_hol
0 1 COOH ads_top
0 2 H ads_hol
0 2 COOH ads_top
0 3 H ads_hol
0 3 COOH ads_top
0 4 H ads_hol
0 4 COOH ads_top
1 0 H ads_hol
1 0 COOH ads_top
1 1 H ads_hol
1 1 COOH ads_top
1 2 H ads_hol
1 2 COOH ads_top
1 3 H ads_hol
1 3 COOH ads_top
1 4 H ads_hol
1 4 COOH ads_top
2 0 H ads_hol
2 0 COOH ads_top
2 1 H ads_hol
2 1 COOH ads_top
2 2 H ads_hol
2 2 COOH ads_top
2 3 H ads_hol
2 3 COOH ads_top
2 4 H ads_hol
2 4 COOH ads_top
3 0 H ads_hol
3 0 COOH ads_top
3 1 H ads_hol
3 1 COOH ads_top
3 2 H ads_hol
3 2 COOH ads_top
3 3 H ads_hol
3 3 COOH ads_top
3 4 H ads_hol
3 4 COOH ads_top
4 0 H ads_hol
4 0 COOH ads_top
4 1 H ads_hol
4 1 COOH ads_top
4 2 H ads_hol
4 2 COOH ads_top
4 3 H ads_hol
4 3 COOH ads_top
4 4 H ads_hol
4 4 COOH ads_top


In [11]:
surface["ads_top"][2][3] = "empty"
contents_of_site = surface["ads_top"][2][3]
is_ads_there = contents_of_site != "empty"
print("is_ads_there:", is_ads_there)

# Should the adsorbate be there?
V_border = surface["COOH_V"][2][3]
supposed_to = 1.5 > V_border
print("supposed_to:", supposed_to)

if (is_ads_there == False) and (supposed_to == True):
    print("let's adsorb")
        #ADSORB

NameError: name 'surface' is not defined

In [47]:
surface["ads_top"][2][3] = "empty"
contents_of_site = surface["ads_top"][2][3]
is_ads_there = contents_of_site != "empty"
print("is_ads_there:", is_ads_there)

# Should the adsorbate be there?
V_border = surface["COOH_V"][2][3]
supposed_to = 1.1 > V_border
print("supposed_to:", supposed_to)

if (is_ads_there == True) and (supposed_to == False):
    print("let's desorb")
        #ADSORB

is_ads_there: False
supposed_to: False


In [40]:
surface["COOH_V"][2][3]

1.2455439482150428

In [17]:
ads = "COOH"
site_type = "ads_top"
site_x, site_y = 0, 0
# Is something supposed to be there?
V_border = surface["COOH_V"][site_x][site_y]
supposed_to  = voltage > V_border

## Is the adsorbant there?
is_ads_there = surface[site_type][site_x][site_y] != "empty"

## There are two instances, where something should be changed:

# 1) An adsorbate is not there, but it is supposed to!

if (is_ads_there == False) and (supposed_to == True):
    print("Vi tilføjer")

# 2) An adsorbate is there, and it is not supposed to!

if (is_ads_there == True) and (supposed_to == False):
    print("Vi fjerner")
    
if (is_ads_there == True) and (supposed_to == True):
    print("Den bliver på")
    
if (is_ads_there == False) and (supposed_to == False):
    print("Den bliver væk")

Den bliver væk


In [20]:
def count_elements(arr):
    unique_elements, counts = np.unique(arr, return_counts=True)
    counts_dict = dict(zip(unique_elements, counts))
    return counts_dict

count_elements(surface["ads_top"])

{'empty': 100}

### Debugging the coverage simulation:
- SO: H actually does get adsorbed onto "ads_top" sites, even though the function decision to leave never even gets ads = "H" and site_type = "ads_top" at the same time

- Figure out why H gets adsorbed onto 



I'll make the structure like this:

An outer function that goes through all sites and for each site both [H, ads_hol] and [COOH, ads_top] # DONE

An inner function, that makes decisions # DONE

A function, that does the logging # Already made

A function, that accepts a decision and actually adds or removes adsorbates. # Simply done in the preprevious function

This way I can actually test them one at a time

#### Just quickly checking how the energies look

In [30]:
import csv
import statistics

# Specify the file path and name
csv_file = '../csv_features/COOH_features.csv'

# Specify the target column header
target_column = "G_ads (eV)"

# Open the .csv file and read its contents using list comprehension
with open(csv_file, 'r') as file:
    # Create a CSV reader object
    reader = csv.DictReader(file)
    
    # Extract the values of the target column using list comprehension
    target_values = [float(row[target_column]) for row in reader]

# Calculate the mean and spread (standard deviation) of the target values
mean = statistics.mean(target_values)
spread = statistics.stdev(target_values)

# Print the mean and spread
print("Mean:", mean)
print("Spread (Standard Deviation):", spread)

Mean: 1.5326902202380952
Spread (Standard Deviation): 0.4410240645509031


In [28]:
import csv

# Specify the file path and name
csv_file = '../csv_features/COOH_features.csv'

# Open the .csv file and read its contents
with open(csv_file, 'r') as file:
    # Create a CSV reader object
    reader = csv.DictReader(file)
    
    # Extract the header names
    header_names = reader.fieldnames

# Now you have the header names in the header_names list


In [29]:
header_names

['feature0',
 'feature1',
 'feature2',
 'feature3',
 'feature4',
 'feature5',
 'feature6',
 'feature7',
 'feature8',
 'feature9',
 'feature10',
 'feature11',
 'feature12',
 'feature13',
 'feature14',
 'feature15',
 'feature16',
 'feature17',
 'feature18',
 'feature19',
 'G_ads(eV)',
 'slab db row',
 'COOH_C_adsorbed_out.dbrow']

#### Logik

Oh shit, kunne man ikke bare predicte alle energier én gang? Det er simpelt i den model, hvor ingen nabointeraktioner tages med. Der skal også komme en model, som tager højde for H og COOH ved siden af hinanden. Men der kunne alle sites og så vinkel i alle retninger gemmes i et look-up table. Så predicter man først alle energier én gang, og så har man bare gemt en matrix med de spændinger, som ville få et adsorbat til at hoppe på/af.

Måske gem alle.

I surface dict kan jeg bare gemme ekstra lag, som hedder H_G, H_Voltage (hvor hvis voltage er UNDER hopper den på) og samme med COOH_G og COOH_Voltage (Hvor hvis voltage er OVER hopper den på) Så kan alt det preprocessing laves inden man "kører simulationen", som så bliver utroligt simpelt. Oh shit et speedup det kan blive til.

Hvad er logikken når man sætter en *H ved siden af en *COOH eller en *COOH ved siden af en *H?
Skal man for hver H der allerede sidder se, om der gerne vil sætte sig en COOH ved siden af, med den model der tager højde for begge?
ELLER
Skal sitet til en COOH ved siden af en *H som allerede sidder der først være favorabelt for en COOH, og så tjekker man med *H+*COOH-modellen om det er favorabelt, når man tager højde for nabointeraktion gennem modellen?