### **Install Required Libraries and Import Libraries**

In [55]:
# Required Libraries
!apt-get install -y python-rdkit librdkit1 rdkit-data
!pip install rdkit
import pandas as pd
import itertools
from io import BytesIO
from rdkit import Chem
from rdkit.Chem import Draw
from IPython.display import display, HTML
import base64
import ast


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
E: Unable to locate package python-rdkit


### **Define and Display Catalyst Structures**

In [58]:
# List of catalyst SMILES strings
catalyst_smiles_list = [
    "[O]N1[C@@H]2C[C@@H]3C[C@H]1C[C@@H]3C2",
    "[O]N1[C@@H]2CCC[C@H]1CCC2",
    "[O]N1[C@@H]2C[C@@H]3C[C@H]1C[C@@H](C3)C2",
    "O=C1C[C@@H]2N([O])[C@H](C1)CCC2",
    "[O]N1[C@@H]2C[C@@H]3C[C@H]1C[C@@H](O3)C2",
    "O=C1C[C@@H]2N([O])[C@H](C1)COC2",
    "[O]N1C(C)(C)CCCC1(C)C",
    "[O]N1[C@]2(C)C[C@@H]3C[C@H]1C[C@@H](C3)C2",
    "[O]N(C1(C)C)C(C)(C)COC1=O",
    "[O]N1C(C)(C)CC(NC(C)=O)CC1(C)C",
    "[O]N1[C@]2(C)C[C@@H]3C[C@@]1(C(OC)=O)C[C@@H](C3)C2",
    "[O]N(C1(CCCCC1)C2)C3(CCCCC3)CC2=O",
    "[O]N1[C@@](C)(C(OC)=O)CCC[C@@]1(C(OC)=O)C",
    "[O]N1C2(CCCCC2)CC(NC(C)=O)CC13CCCCC3",
    "[O]N1[C@@H]2COC[C@H]1CC2",
    "[O]N1[C@]2(C(OC)=O)CC[C@H]1CC2",
    "[O]N1[C@]2(F)C[C@@H]3C[C@H]1C[C@@H](O3)C2",
    "[O]N1[C@]2(F)CC[C@@]1(C(OC)=O)CC2",
    "[O]N1[C@@H]2C[C@@H]3C[C@@]1(C(OC)=O)C[C@@H](C3)C2",
    "[O]N1[C@@H]2C[C@@H]3C[C@@]1(C(OC)=O)C[C@@H](O3)C2",
    "[O]N1[C@]2(F)C[C@@H]3C[C@@]1(C(OC)=O)C[C@@H](C3)C2",
    "[O]N1[C@@H]2C[C@@H]3C[C@H]1C[C@@H](N3C(C(F)(F)F)=O)C2",
    "O=C1N(O)C(C2=CC=CC=C21)=O"
]

# Import necessary libraries
from rdkit import Chem
from rdkit.Chem import Draw
from IPython.display import display, HTML
from io import BytesIO
import base64

# Create RDKit molecule objects from SMILES strings
catalyst_molecules = [Chem.MolFromSmiles(smiles) for smiles in catalyst_smiles_list]

# Function to display catalysts with labels in rows of 8
def display_catalysts(molecules):
    num_per_row = 8
    rows = [molecules[i:i + num_per_row] for i in range(0, len(molecules), num_per_row)]

    for row_index, row in enumerate(rows):
        # Create a list to store HTML for images and labels
        img_html = []
        for i, mol in enumerate(row):
            # Correct label for each molecule in a sequential manner
            label = f"A{row_index * num_per_row + (i + 1)}"
            img = Draw.MolToImage(mol, size=(150, 150))
            img_buffer = BytesIO()
            img.save(img_buffer, format="PNG")
            img_base64 = base64.b64encode(img_buffer.getvalue()).decode("utf-8")
            img_html.append(f"<div style='display:inline-block; margin:10px; text-align:center;'>"
                            f"<b>{label}</b><br>"
                            f"<img src='data:image/png;base64,{img_base64}' width='150' height='150'></div>")
        # Join the images in a row and display
        display(HTML("".join(img_html)))

# Display the catalysts
display_catalysts(catalyst_molecules)





### **Define Solvents and Acid Conditions**




In [59]:
# Define solvents and acid conditions
solvents = ["DCE", "MeCN"]
acid_conditions = ["no acid", "HNTf2"]


### **Generate Reaction Conditions**


In [60]:
# Generate all combinations of catalysts, solvents, and acid conditions
reaction_conditions = [{
    "Label": str(i + 1),
    "Catalyst": catalyst,
    "Solvent": solvent,
    "Acid": acid_condition
} for i, (solvent, acid_condition, catalyst) in enumerate(itertools.product(solvents, acid_conditions, catalyst_molecules))]

### **Function to Display Selected Molecules**




In [61]:
# Function to display selected molecules in an HTML table
def display_selected_molecules(molecule_list, selected_labels, additional_series):
    # Create an HTML table header
    table_html = '<table style="border-collapse: collapse; width: auto;"><tr>'

    for label in selected_labels:
        entry = next((entry for entry in molecule_list if entry['Label'] == str(label)), None)
        if entry:
            img_buffer = BytesIO()
            # Convert the RDKit molecule to an image
            img = Draw.MolToImage(entry['Catalyst'], size=(150, 150))
            img.save(img_buffer, format="PNG")
            img_base64 = base64.b64encode(img_buffer.getvalue()).decode("utf-8")

            # Create a table cell with label, molecule image, solvent, and acid
            table_html += f'''
            <td style="border: 1px solid #dddddd; text-align: left; padding: 8px;">
            <b>Label:</b> {entry['Label']}<br>
            <img src="data:image/png;base64,{img_base64}" alt="molecule" /><br>
            <b>Solvent:</b> {entry['Solvent']}<br>
            <b>Acid:</b> {entry['Acid']}</td>'''

    # Add additional series data in the last cell
    table_html += f'''
    <td style="border: 1px solid #dddddd; text-align: left; padding: 8px;">
    {additional_series.round(0).to_frame().to_html(classes="dataframe", header=False, index=False)}
    </td>'''

    table_html += '</tr></table>'

    # Display the HTML table
    display(HTML(table_html))


### **Load and Filter HTE Data**

In [62]:
# Load HTE data from Excel
try:
    df = pd.read_excel('HTE Data.xlsx', engine='openpyxl', header=0, index_col=0)
except FileNotFoundError:
    print("Error: The file 'HTE Data.xlsx' was not found.")
    raise

# Filter out columns with an average yield less than 5
df = df.loc[:, df.mean() >= 5]

### **Generate Combinations and Calculate Yields**

In [64]:
# Generate combinations of columns and calculate the best yield for each substrate
n = 3  # Number of columns to combine
m = 1  # m = 1 only includes the highest yield for each entry (maximal peforming combination); m =2 uses the two highest yields in the analysis (robust combination); results only differ slightly between either parameter choice
column_combinations = itertools.combinations(df.columns, n)
results_df = pd.DataFrame({
    str(cols): df[list(cols)].apply(lambda row: sum(row.nlargest(2)), axis=1)
    for cols in column_combinations
})


### **Rank Combinations and Save Results**

In [65]:
# Rank the combinations based on total yield and save the top results
top_conditions = results_df.sum().sort_values(ascending=False).head(500)
top_conditions.to_csv('Ranked combinations.csv', index=True)


### **Display Top Conditions**

In [66]:
# Display top conditions with their corresponding molecules
for condition_index in top_conditions.index[:50]:  # Limit to the first 50 entries
    display_selected_molecules(reaction_conditions, ast.literal_eval(condition_index), results_df[condition_index])

0,1,2,3
142.0,,,
182.0,,,
169.0,,,
31.0,,,
42.0,,,
109.0,,,
170.0,,,
91.0,,,
47.0,,,
64.0,,,

0
142.0
182.0
169.0
31.0
42.0
109.0
170.0
91.0
47.0
64.0


0,1,2,3
110.0,,,
181.0,,,
162.0,,,
52.0,,,
27.0,,,
71.0,,,
173.0,,,
121.0,,,
57.0,,,
92.0,,,

0
110.0
181.0
162.0
52.0
27.0
71.0
173.0
121.0
57.0
92.0


0,1,2,3
102.0,,,
186.0,,,
169.0,,,
47.0,,,
50.0,,,
84.0,,,
173.0,,,
123.0,,,
57.0,,,
92.0,,,

0
102.0
186.0
169.0
47.0
50.0
84.0
173.0
123.0
57.0
92.0


0,1,2,3
91.0,,,
170.0,,,
139.0,,,
54.0,,,
43.0,,,
105.0,,,
170.0,,,
84.0,,,
85.0,,,
91.0,,,

0
91.0
170.0
139.0
54.0
43.0
105.0
170.0
84.0
85.0
91.0


0,1,2,3
131.0,,,
183.0,,,
166.0,,,
55.0,,,
43.0,,,
109.0,,,
165.0,,,
77.0,,,
49.0,,,
63.0,,,

0
131.0
183.0
166.0
55.0
43.0
109.0
165.0
77.0
49.0
63.0


0,1,2,3
123.0,,,
188.0,,,
173.0,,,
50.0,,,
66.0,,,
122.0,,,
170.0,,,
79.0,,,
49.0,,,
64.0,,,

0
123.0
188.0
173.0
50.0
66.0
122.0
170.0
79.0
49.0
64.0


0,1,2,3
150.0,,,
183.0,,,
158.0,,,
42.0,,,
34.0,,,
102.0,,,
165.0,,,
77.0,,,
60.0,,,
66.0,,,

0
150.0
183.0
158.0
42.0
34.0
102.0
165.0
77.0
60.0
66.0


0,1,2,3
91.0,,,
188.0,,,
166.0,,,
71.0,,,
51.0,,,
85.0,,,
173.0,,,
109.0,,,
57.0,,,
92.0,,,

0
91.0
188.0
166.0
71.0
51.0
85.0
173.0
109.0
57.0
92.0


0,1,2,3
142.0,,,
188.0,,,
165.0,,,
37.0,,,
57.0,,,
115.0,,,
170.0,,,
79.0,,,
60.0,,,
66.0,,,

0
142.0
188.0
165.0
37.0
57.0
115.0
170.0
79.0
60.0
66.0


0,1,2,3
114.0,,,
191.0,,,
163.0,,,
64.0,,,
59.0,,,
76.0,,,
170.0,,,
79.0,,,
59.0,,,
81.0,,,

0
114.0
191.0
163.0
64.0
59.0
76.0
170.0
79.0
59.0
81.0


0,1,2,3
81.0,,,
188.0,,,
159.0,,,
80.0,,,
54.0,,,
80.0,,,
173.0,,,
109.0,,,
57.0,,,
92.0,,,

0
81.0
188.0
159.0
80.0
54.0
80.0
173.0
109.0
57.0
92.0


0,1,2,3
81.0,,,
172.0,,,
136.0,,,
78.0,,,
44.0,,,
106.0,,,
164.0,,,
71.0,,,
85.0,,,
91.0,,,

0
81.0
172.0
136.0
78.0
44.0
106.0
164.0
71.0
85.0
91.0


0,1,2,3
113.0,,,
189.0,,,
166.0,,,
59.0,,,
69.0,,,
118.0,,,
170.0,,,
79.0,,,
47.0,,,
64.0,,,

0
113.0
189.0
166.0
59.0
69.0
118.0
170.0
79.0
47.0
64.0


0,1,2,3
121.0,,,
183.0,,,
159.0,,,
64.0,,,
46.0,,,
105.0,,,
165.0,,,
77.0,,,
45.0,,,
60.0,,,

0
121.0
183.0
159.0
64.0
46.0
105.0
165.0
77.0
45.0
60.0


0,1,2,3
71.0,,,
172.0,,,
129.0,,,
87.0,,,
47.0,,,
101.0,,,
164.0,,,
71.0,,,
85.0,,,
91.0,,,

0
71.0
172.0
129.0
87.0
47.0
101.0
164.0
71.0
85.0
91.0


0,1,2,3
110.0,,,
187.0,,,
159.0,,,
58.0,,,
42.0,,,
77.0,,,
173.0,,,
109.0,,,
60.0,,,
92.0,,,

0
110.0
187.0
159.0
58.0
42.0
77.0
173.0
109.0
60.0
92.0


0,1,2,3
99.0,,,
165.0,,,
132.0,,,
59.0,,,
20.0,,,
92.0,,,
165.0,,,
83.0,,,
85.0,,,
91.0,,,

0
99.0
165.0
132.0
59.0
20.0
92.0
165.0
83.0
85.0
91.0


0,1,2,3
114.0,,,
172.0,,,
154.0,,,
53.0,,,
25.0,,,
67.0,,,
172.0,,,
118.0,,,
59.0,,,
84.0,,,

0
114.0
172.0
154.0
53.0
25.0
67.0
172.0
118.0
59.0
84.0


0,1,2,3
100.0,,,
171.0,,,
128.0,,,
65.0,,,
35.0,,,
99.0,,,
164.0,,,
71.0,,,
85.0,,,
91.0,,,

0
100.0
171.0
128.0
65.0
35.0
99.0
164.0
71.0
85.0
91.0


0,1,2,3
122.0,,,
186.0,,,
156.0,,,
68.0,,,
36.0,,,
63.0,,,
165.0,,,
77.0,,,
59.0,,,
81.0,,,

0
122.0
186.0
156.0
68.0
36.0
63.0
165.0
77.0
59.0
81.0


0,1,2,3
128.0,,,
181.0,,,
162.0,,,
55.0,,,
29.0,,,
86.0,,,
168.0,,,
77.0,,,
62.0,,,
64.0,,,

0
128.0
181.0
162.0
55.0
29.0
86.0
168.0
77.0
62.0
64.0


0,1,2,3
77.0,,,
184.0,,,
154.0,,,
81.0,,,
65.0,,,
61.0,,,
173.0,,,
109.0,,,
57.0,,,
92.0,,,

0
77.0
184.0
154.0
81.0
65.0
61.0
173.0
109.0
57.0
92.0


0,1,2,3
106.0,,,
177.0,,,
161.0,,,
49.0,,,
47.0,,,
80.0,,,
172.0,,,
120.0,,,
59.0,,,
84.0,,,

0
106.0
177.0
161.0
49.0
47.0
80.0
172.0
120.0
59.0
84.0


0,1,2,3
109.0,,,
185.0,,,
161.0,,,
60.0,,,
80.0,,,
99.0,,,
170.0,,,
79.0,,,
47.0,,,
64.0,,,

0
109.0
185.0
161.0
60.0
80.0
99.0
170.0
79.0
47.0
64.0


0,1,2,3
82.0,,,
191.0,,,
157.0,,,
84.0,,,
44.0,,,
38.0,,,
173.0,,,
109.0,,,
59.0,,,
92.0,,,

0
82.0
191.0
157.0
84.0
44.0
38.0
173.0
109.0
59.0
92.0


0,1,2,3
121.0,,,
186.0,,,
169.0,,,
50.0,,,
52.0,,,
99.0,,,
170.0,,,
79.0,,,
62.0,,,
64.0,,,

0
121.0
186.0
169.0
50.0
52.0
99.0
170.0
79.0
62.0
64.0


0,1,2,3
96.0,,,
178.0,,,
158.0,,,
72.0,,,
48.0,,,
80.0,,,
172.0,,,
107.0,,,
59.0,,,
84.0,,,

0
96.0
178.0
158.0
72.0
48.0
80.0
172.0
107.0
59.0
84.0


0,1,2,3
117.0,,,
179.0,,,
153.0,,,
65.0,,,
57.0,,,
86.0,,,
165.0,,,
77.0,,,
46.0,,,
60.0,,,

0
117.0
179.0
153.0
65.0
57.0
86.0
165.0
77.0
46.0
60.0


0,1,2,3
132.0,,,
190.0,,,
162.0,,,
61.0,,,
58.0,,,
115.0,,,
158.0,,,
50.0,,,
60.0,,,
66.0,,,

0
132.0
190.0
162.0
61.0
58.0
115.0
158.0
50.0
60.0
66.0


0,1,2,3
66.0,,,
168.0,,,
123.0,,,
88.0,,,
58.0,,,
82.0,,,
164.0,,,
71.0,,,
85.0,,,
91.0,,,

0
66.0
168.0
123.0
88.0
58.0
82.0
164.0
71.0
85.0
91.0


0,1,2,3
86.0,,,
178.0,,,
151.0,,,
81.0,,,
51.0,,,
76.0,,,
172.0,,,
107.0,,,
59.0,,,
84.0,,,

0
86.0
178.0
151.0
81.0
51.0
76.0
172.0
107.0
59.0
84.0


0,1,2,3
59.0,,,
170.0,,,
132.0,,,
75.0,,,
28.0,,,
68.0,,,
173.0,,,
114.0,,,
85.0,,,
92.0,,,

0
59.0
170.0
132.0
75.0
28.0
68.0
173.0
114.0
85.0
92.0


0,1,2,3
122.0,,,
190.0,,,
155.0,,,
70.0,,,
61.0,,,
111.0,,,
158.0,,,
49.0,,,
60.0,,,
66.0,,,

0
122.0
190.0
155.0
70.0
61.0
111.0
158.0
49.0
60.0
66.0


0,1,2,3
92.0,,,
173.0,,,
169.0,,,
36.0,,,
54.0,,,
124.0,,,
170.0,,,
79.0,,,
49.0,,,
72.0,,,

0
92.0
173.0
169.0
36.0
54.0
124.0
170.0
79.0
49.0
72.0


0,1,2,3
89.0,,,
186.0,,,
163.0,,,
71.0,,,
37.0,,,
61.0,,,
173.0,,,
109.0,,,
62.0,,,
92.0,,,

0
89.0
186.0
163.0
71.0
37.0
61.0
173.0
109.0
62.0
92.0


0,1,2,3
78.0,,,
169.0,,,
132.0,,,
78.0,,,
30.0,,,
83.0,,,
168.0,,,
71.0,,,
85.0,,,
91.0,,,

0
78.0
169.0
132.0
78.0
30.0
83.0
168.0
71.0
85.0
91.0


0,1,2,3
100.0,,,
167.0,,,
162.0,,,
41.0,,,
31.0,,,
111.0,,,
165.0,,,
77.0,,,
49.0,,,
72.0,,,

0
100.0
167.0
162.0
41.0
31.0
111.0
165.0
77.0
49.0
72.0


0,1,2,3
115.0,,,
178.0,,,
151.0,,,
59.0,,,
40.0,,,
73.0,,,
172.0,,,
107.0,,,
60.0,,,
84.0,,,

0
115.0
178.0
151.0
59.0
40.0
73.0
172.0
107.0
60.0
84.0


0,1,2,3
103.0,,,
193.0,,,
160.0,,,
88.0,,,
60.0,,,
77.0,,,
141.0,,,
50.0,,,
59.0,,,
81.0,,,

0
103.0
193.0
160.0
88.0
60.0
77.0
141.0
50.0
59.0
81.0


0,1,2,3
101.0,,,
162.0,,,
163.0,,,
47.0,,,
53.0,,,
79.0,,,
173.0,,,
109.0,,,
57.0,,,
92.0,,,

0
101.0
162.0
163.0
47.0
53.0
79.0
173.0
109.0
57.0
92.0


0,1,2,3
133.0,,,
162.0,,,
170.0,,,
26.0,,,
68.0,,,
116.0,,,
170.0,,,
79.0,,,
47.0,,,
64.0,,,

0
133.0
162.0
170.0
26.0
68.0
116.0
170.0
79.0
47.0
64.0


0,1,2,3
60.0,,,
172.0,,,
162.0,,,
57.0,,,
39.0,,,
86.0,,,
173.0,,,
109.0,,,
57.0,,,
92.0,,,

0
60.0
172.0
162.0
57.0
39.0
86.0
173.0
109.0
57.0
92.0


0,1,2,3
110.0,,,
188.0,,,
166.0,,,
74.0,,,
53.0,,,
100.0,,,
168.0,,,
50.0,,,
62.0,,,
64.0,,,

0
110.0
188.0
166.0
74.0
53.0
100.0
168.0
50.0
62.0
64.0


0,1,2,3
90.0,,,
146.0,,,
133.0,,,
54.0,,,
46.0,,,
100.0,,,
164.0,,,
71.0,,,
85.0,,,
91.0,,,

0
90.0
146.0
133.0
54.0
46.0
100.0
164.0
71.0
85.0
91.0


0,1,2,3
114.0,,,
191.0,,,
163.0,,,
91.0,,,
59.0,,,
105.0,,,
163.0,,,
84.0,,,
68.0,,,
61.0,,,

0
114.0
191.0
163.0
91.0
59.0
105.0
163.0
84.0
68.0
61.0


0,1,2,3
88.0,,,
181.0,,,
156.0,,,
57.0,,,
42.0,,,
70.0,,,
173.0,,,
109.0,,,
57.0,,,
92.0,,,

0
88.0
181.0
156.0
57.0
42.0
70.0
173.0
109.0
57.0
92.0


0,1,2,3
141.0,,,
157.0,,,
163.0,,,
31.0,,,
45.0,,,
103.0,,,
165.0,,,
77.0,,,
43.0,,,
60.0,,,

0
141.0
157.0
163.0
31.0
45.0
103.0
165.0
77.0
43.0
60.0


0,1,2,3
122.0,,,
192.0,,,
153.0,,,
74.0,,,
51.0,,,
69.0,,,
158.0,,,
43.0,,,
60.0,,,
81.0,,,

0
122.0
192.0
153.0
74.0
51.0
69.0
158.0
43.0
60.0
81.0


0,1,2,3
102.0,,,
186.0,,,
169.0,,,
75.0,,,
50.0,,,
105.0,,,
172.0,,,
123.0,,,
67.0,,,
71.0,,,

0
102.0
186.0
169.0
75.0
50.0
105.0
172.0
123.0
67.0
71.0


0,1,2,3
120.0,,,
182.0,,,
163.0,,,
36.0,,,
57.0,,,
108.0,,,
170.0,,,
79.0,,,
48.0,,,
64.0,,,

0
120.0
182.0
163.0
36.0
57.0
108.0
170.0
79.0
48.0
64.0
