In [69]:
import requests
from bs4 import BeautifulSoup
import json
import re
import csv

In [70]:

url = "https://cymitquimica.com/categories/1828/nicotine-and-nicotine-derivatives/?srsltid=AfmBOor5CHkEY17td7i8alPNqfsjPX-VKsd6igxeoJFVzukYf576WD9_&page="
response = requests.get(url)

soup = BeautifulSoup(response.content, 'html.parser')

json_ld = None
for script in soup.find_all('script', type='application/ld+json'):
    try:
        json_ld = json.loads(script.string)
        break
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
        continue

if json_ld:
    if isinstance(json_ld, dict):
        print("json_ld is a dictionary")
        print(json.dumps(json_ld, indent=2))
    else:
        print(f"Unexpected JSON-LD structure: {type(json_ld)}")
else:
    print("No valid JSON-LD found")


json_ld is a dictionary
{
  "@context": "http://schema.org",
  "@type": "WebSite",
  "url": "https://cymitquimica.com",
  "dateModified": "2019-02-15T00:00",
  "image": "https://static.cymitquimica.com/public/img/logo-cymit.png",
  "potentialAction": {
    "@type": "SearchAction",
    "target": "https://cymitquimica.com/search/{search_term_string}/",
    "query-input": "required name=search_term_string"
  },
  "sameAs": [
    "https://www.facebook.com/cymitquimica/",
    "https://twitter.com/cymitquimica",
    "https://es.linkedin.com/company/cymit-quimica-s-l-",
    "https://www.instagram.com/cymitquimica/",
    "https://www.pinterest.es/cymit/",
    "https://cymit.tumblr.com/"
  ]
}


In [71]:
response = requests.get(url)

soup = BeautifulSoup(response.text, "html.parser")

product_links = soup.find_all("a", class_="js-product-link")

for link in product_links:
    product_url = link.get("href")
    if product_url:
        print(f"https://example.com{product_url}")


In [72]:
def scrape_page(page_number):
    url_new = url + str(page_number)
    response = requests.get(url_new)
    
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")
        product_links = soup.find_all("a", class_="js-product-link")
        
        for link in product_links:
            product_url = link.get("href")
            if product_url:
                # Extract last part (after the last "/")
                last_part = product_url.strip("/").split("/")[-1]
                second_to_last_part = product_url.strip("/").split("/")[-2]
                product_urls.append(last_part)
                cas_numbers.append(second_to_last_part)
    else:
        print(f"Failed to retrieve page {page_number}")

In [None]:
product_urls = []
cas_numbers = []

# for page in range(1, 14):
#     print(f"Scraping page {page}...")
#     scrape_page(page)

# csv_filename = "nicotine_derivatives_data.csv"
# with open(csv_filename, mode="w", newline="", encoding="utf-8") as file:
#     writer = csv.writer(file)
#     writer.writerow(["product_url", "cas_number"])
#     for url, cas in zip(product_urls, cas_numbers):
#         writer.writerow([url, cas])

# print(f"Number of entries saved in {csv_filename}: {len(product_urls)}")
# print(f"Number of CAS numbers: {len(cas_numbers)}")
def contains_uppercase(cas):
    return any(c.isupper() for c in cas)

for page in range(1, 14):
    print(f"Scraping page {page}...")
    scrape_page(page)

csv_filename = "nicotine_derivatives_data.csv"
with open(csv_filename, mode="w", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)
    writer.writerow(["product_url", "cas_number"])
    for url, cas in zip(product_urls, cas_numbers):
        # Only write the row if the CAS number doesn't contain an uppercase letter
        if not contains_uppercase(cas):
            writer.writerow([url, cas])

print(f"Number of entries saved in {csv_filename}: {len(product_urls)}")
print(f"Number of CAS numbers: {len(cas_numbers)}")

Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Scraping page 8...
Scraping page 9...
Scraping page 10...
Scraping page 11...
Scraping page 12...
Scraping page 13...
Number of entries saved in nicotine_derivatives_data.csv: 252
Number of CAS numbers: 252


In [74]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import pandas as pd
from tqdm import tqdm
import time
from selenium.webdriver.chrome.options import Options

options = Options()
options.add_argument("--headless")  # Run in headless mode for speed
options.add_argument("--disable-gpu")  # Avoid GPU rendering issues
options.add_argument("--no-sandbox")
driver = webdriver.Chrome(options=options)

# Set a page load timeout (e.g., 180 seconds)
driver.set_page_load_timeout(180)

In [None]:
def get_smiles_from_cas(cas_number):
    url = f'https://www.1stsci.com/products/{cas_number}?cas={cas_number}'
    
    for attempt in range(3):
        try:
            driver.get(url)  # Load the page
            smiles_element = WebDriverWait(driver, 10).until(
                EC.visibility_of_element_located((By.CLASS_NAME, "smiles"))
            )
            smiles_tag = driver.find_element(By.CLASS_NAME, 'smiles')
            smiles = smiles_tag.text
            print(f'{cas_number} gives the SMILES: {smiles} and is OK')
            # time.sleep(7)
            return smiles
        except Exception as e:
            print("Retrying CAS {cas_number}")
            time.sleep(7)
    print(f"FAILED TO RETRIEVE SMILES FOR CAS {cas_number} and is WRONGGGG.")
    return None

In [87]:
df = pd.read_csv('nicotine_derivatives_data.csv')
print(df.columns)

tqdm.pandas()
df['SMILES'] = df['cas_number'].progress_apply(get_smiles_from_cas)

total = len(df)
converted = df['SMILES'].notna().sum()
print(f"Total compounds: {total}")
print(f"Successfully converted: {converted}")
print(f"Conversion rate: {converted/total:.2%}")

df = df.dropna(subset=['SMILES'])
print(df)

df.to_csv('nicotine_derivatives_with_smiles.csv', index=False)

Index(['product_url', 'cas_number'], dtype='object')


  1%|          | 2/252 [00:09<20:20,  4.88s/it]

494-97-3 gives the SMILES: C1=CN=CC([C@@H]2CCCN2)=C1 and is OK


  1%|          | 3/252 [00:19<29:03,  7.00s/it]

485-35-8 gives the SMILES: O=C1C=CC=C2[C@H]3CNC[C@H](C3)CN21 and is OK


  2%|▏         | 4/252 [00:28<32:18,  7.82s/it]

95091-91-1 gives the SMILES: CON(C)C(=O)C1=CC=CN=C1 and is OK


  2%|▏         | 5/252 [00:37<33:06,  8.04s/it]

6456-44-6 gives the SMILES: CN1C=CC(I)=C(C(N)=O)C1 and is OK


  2%|▏         | 6/252 [00:46<34:05,  8.31s/it]

532-12-7 gives the SMILES: C1=CN=CC(C2=NCCC2)=C1 and is OK


  3%|▎         | 7/252 [00:55<35:15,  8.63s/it]

20260-53-1 gives the SMILES: Cl.O=C(Cl)C1=CC=CN=C1 and is OK


  3%|▎         | 8/252 [01:04<35:46,  8.80s/it]

2743-90-0 gives the SMILES: C1=CCC(C2=CC=CN=C2)NC1 and is OK


  4%|▎         | 9/252 [01:13<35:31,  8.77s/it]

609-71-2 gives the SMILES: O=C(O)C1=CC=CN=C1O and is OK


  4%|▍         | 10/252 [01:22<35:18,  8.76s/it]

59288-43-6 gives the SMILES: O=C(O)C1=CN=C([N+](=O)[O-])C(O)=C1 and is OK


  4%|▍         | 11/252 [01:31<35:31,  8.85s/it]

5470-70-2 gives the SMILES: COC(=O)C1=CC=C(C)N=C1 and is OK


  5%|▍         | 12/252 [01:39<34:48,  8.70s/it]

3562-11-6 gives the SMILES: O=C(O)C1CCC(C2=CC=CN=C2)N1 and is OK


  5%|▌         | 13/252 [01:48<35:12,  8.84s/it]

177785-14-7 gives the SMILES: COC(=O)C1=CC=CN=C1CCl and is OK


  6%|▌         | 14/252 [01:57<35:03,  8.84s/it]

29681-45-6 gives the SMILES: COC(=O)C1=CN=CC(C)=C1 and is OK


  6%|▌         | 15/252 [02:06<34:42,  8.78s/it]

6960-22-1 gives the SMILES: CC1=CC=C(C(N)=O)C=N1 and is OK


  6%|▋         | 16/252 [02:15<35:20,  8.99s/it]

1802-30-8 gives the SMILES: O=C(O)C1=CC=C(C2=CC=C(C(=O)O)C=N2)N=C1 and is OK


  7%|▋         | 17/252 [02:24<35:31,  9.07s/it]

1215721-40-6 gives the SMILES: Cl.ClC1=CC=CC(COC2=CN=CC(N3CCNCC3)=N2)=C1 and is OK


  7%|▋         | 18/252 [02:34<36:16,  9.30s/it]

132334-98-6 gives the SMILES: CCOC(=O)C1=CC=C(Br)N=C1 and is OK


  8%|▊         | 19/252 [02:44<36:27,  9.39s/it]

89690-09-5 gives the SMILES: C[NH2+]CCCC[NH3+].[Cl-].[Cl-] and is OK


  8%|▊         | 20/252 [02:54<36:57,  9.56s/it]

494-52-0 gives the SMILES: C1=CN=CC([C@@H]2CCCCN2)=C1 and is OK


  8%|▊         | 21/252 [03:03<36:35,  9.51s/it]

98-92-0 gives the SMILES: NC(=O)C1=CC=CN=C1 and is OK


  9%|▊         | 22/252 [03:13<36:29,  9.52s/it]

6197-39-3 gives the SMILES: COC(=O)C1=CCCNC1.Cl and is OK


  9%|▉         | 23/252 [03:22<36:26,  9.55s/it]

5398-44-7 gives the SMILES: O=C(O)C1=CC(Cl)=NC(Cl)=C1 and is OK


 10%|▉         | 24/252 [03:32<35:50,  9.43s/it]

61445-55-4 gives the SMILES: CN(CCCC(=O)O)N=O and is OK


 10%|▉         | 25/252 [03:41<35:14,  9.32s/it]

5006-66-6 gives the SMILES: O=C(O)C1=CNC(=O)C=C1 and is OK


 10%|█         | 26/252 [03:50<34:42,  9.21s/it]

5746-86-1 gives the SMILES: C1=CN=CC(C2CCCN2)=C1 and is OK


 11%|█         | 27/252 [03:58<33:59,  9.06s/it]

38496-18-3 gives the SMILES: O=C(O)C1=CC=C(Cl)N=C1Cl and is OK


 11%|█         | 28/252 [04:07<33:30,  8.97s/it]

39178-35-3 gives the SMILES: Cl.O=C(Cl)C1=CC=NC=C1 and is OK


 12%|█▏        | 29/252 [04:16<32:50,  8.83s/it]

129747-52-0 gives the SMILES: COC(=O)C1=CC(CO)=CN=C1 and is OK


 12%|█▏        | 30/252 [04:24<32:23,  8.75s/it]

38806-38-1 gives the SMILES: NC(=O)C1=C[N+]([C@@H]2O[C@H](COP(=O)([O-])OP(=O)(O)OC[C@H]3O[C@@H](N4C=NC5=C4N=CN4C=CN=C54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)=CC=C1 and is OK


 12%|█▏        | 31/252 [04:33<32:21,  8.79s/it]

64091-91-4 gives the SMILES: CN(CCCC(=O)C1=CC=CN=C1)N=O and is OK


 13%|█▎        | 32/252 [04:42<32:22,  8.83s/it]

486-56-6 gives the SMILES: CN1C(=O)CC[C@H]1C1=CC=CN=C1 and is OK


 13%|█▎        | 33/252 [04:52<33:12,  9.10s/it]

1094-61-7 gives the SMILES: NC(=O)C1=C[N+]([C@@H]2O[C@H](COP(=O)([O-])O)[C@@H](O)[C@H]2O)=CC=C1 and is OK


 13%|█▎        | 34/252 [05:01<33:42,  9.28s/it]

27828-71-3 gives the SMILES: O=C(O)C1=CC(O)=CN=C1 and is OK


 14%|█▍        | 35/252 [05:12<34:44,  9.61s/it]

78348-28-4 gives the SMILES: O=C(ON1C(=O)CCC1=O)C1=CC=CN=C1 and is OK


 14%|█▍        | 36/252 [05:21<33:51,  9.41s/it]

66093-90-1 gives the SMILES: C[NH2+]CCCC(=O)C1=CC=C[NH+]=C1.[Cl-].[Cl-] and is OK


 15%|█▍        | 37/252 [05:30<33:34,  9.37s/it]

1364663-27-3 gives the SMILES: COC(=O)C1=CN=C(Br)C=C1Br and is OK


 15%|█▌        | 38/252 [05:39<33:28,  9.39s/it]

13190-97-1 gives the SMILES: CC(C)=CCC/C(C)=C/CC/C(C)=C/CC/C(C)=C/CC/C(C)=C/CC/C(C)=C/CC/C(C)=C/CC/C(C)=C/CC/C(C)=C/CO and is OK


 15%|█▌        | 39/252 [05:50<34:35,  9.75s/it]

3222-56-8 gives the SMILES: CC1=NC=CC=C1C(=O)O and is OK


 16%|█▌        | 40/252 [05:59<33:24,  9.46s/it]

1986-81-8 gives the SMILES: NC(=O)C1=C[N+]([O-])=CC=C1 and is OK


 16%|█▋        | 41/252 [06:10<35:28, 10.09s/it]

90872-72-3 gives the SMILES: CC1=CC=C(C2CCCN2)C=N1 and is OK


 17%|█▋        | 42/252 [06:19<33:57,  9.70s/it]

59578-62-0 gives the SMILES: O=C(CCCO)C1=CC=CN=C1 and is OK


 17%|█▋        | 43/252 [06:28<32:50,  9.43s/it]

2047-49-6 gives the SMILES: O=C(O)C1=CN=CC([N+](=O)[O-])=C1 and is OK


 17%|█▋        | 44/252 [06:36<31:46,  9.16s/it]

6311-35-9 gives the SMILES: O=C(O)C1=CC=C(Br)N=C1 and is OK


 18%|█▊        | 45/252 [06:46<32:13,  9.34s/it]

499-81-0 gives the SMILES: O=C(O)C1=CN=CC(C(=O)O)=C1 and is OK


 18%|█▊        | 46/252 [06:55<31:44,  9.24s/it]

7076-23-5 gives the SMILES: C1=CN=CC([C@H]2CCCN2)=C1 and is OK


 19%|█▊        | 47/252 [07:04<31:39,  9.26s/it]

614-00-6 gives the SMILES: CN(N=O)C1=CC=CC=C1 and is OK


 19%|█▉        | 48/252 [07:18<35:24, 10.41s/it]

2004-06-0 gives the SMILES: OC[C@H]1OC(N2C=NC3=C2N=CN=C3Cl)[C@H](O)[C@@H]1O and is OK


 19%|█▉        | 49/252 [07:28<35:39, 10.54s/it]

27247-34-3 gives the SMILES: [O-]C(=O)c1cc(cnc1)C(=O)OC.[K+] and is OK


 20%|█▉        | 50/252 [07:39<35:10, 10.45s/it]

3569-99-1 gives the SMILES: O=C(NCO)C1=CC=CN=C1 and is OK


 20%|██        | 51/252 [07:48<33:58, 10.14s/it]

3222-47-7 gives the SMILES: CC1=CC=C(C(=O)O)C=N1 and is OK


 21%|██        | 52/252 [07:57<33:02,  9.91s/it]

321-02-8 gives the SMILES: O=C([O-])C1=C[N+]([C@@H]2O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]2O)=CC=C1 and is OK


 21%|██        | 53/252 [08:07<32:11,  9.71s/it]

13078-04-1 gives the SMILES: C1=CN=CC(C2CCCCN2)=C1 and is OK


 21%|██▏       | 54/252 [08:16<31:29,  9.54s/it]

1314217-69-0 gives the SMILES: CC1=CC(C2=NC(CN3CCNCC3)=NO2)=CC2=C1C(=O)N(CC1=CC=C(OC(F)(F)F)C=C1)C2.CS(=O)(=O)O and is OK


 22%|██▏       | 55/252 [08:25<30:58,  9.44s/it]

4314-66-3 gives the SMILES: CCNC(=O)C1=CC=CN=C1 and is OK


 22%|██▏       | 56/252 [08:35<30:54,  9.46s/it]

5176-27-2 gives the SMILES: CC(C)(C)OC(=O)N1C=CC=C1 and is OK


 23%|██▎       | 57/252 [08:44<30:42,  9.45s/it]

924-16-3 gives the SMILES: CCCCN(CCCC)N=O and is OK
Retrying
Retrying
Retrying


 23%|██▎       | 58/252 [09:48<1:23:02, 25.68s/it]

FAILED TO RETRIEVE SMILES FOR CAS 4Z-N-0626 and is WRONGGGG.
Retrying


 23%|██▎       | 59/252 [10:23<1:31:59, 28.60s/it]

1207384-47-1 gives the SMILES: [2H]C([2H])([2H])N1C=CC(=O)C(C(N)=O)=C1 and is OK


 24%|██▍       | 60/252 [10:32<1:12:29, 22.65s/it]

51095-86-4 gives the SMILES: C[N@+]1([O-])CCC[C@H]1C1=CC=CN=C1 and is OK
Retrying
Retrying
Retrying


 24%|██▍       | 61/252 [11:38<1:53:27, 35.64s/it]

FAILED TO RETRIEVE SMILES FOR CAS 51020-67-8 and is WRONGGGG.
Retrying


 25%|██▍       | 62/252 [12:22<2:01:05, 38.24s/it]

1216737-36-8 gives the SMILES: [2H]C1=NC([2H])=C(C(=O)NCC(=O)O)C([2H])=C1[2H] and is OK


 25%|██▌       | 63/252 [12:33<1:34:46, 30.09s/it]

5654-86-4 gives the SMILES: CC(CC1NC(=O)C2N(C1=O)CCC2)C and is OK
Retrying
Retrying
Retrying


 25%|██▌       | 64/252 [13:37<2:06:07, 40.25s/it]

FAILED TO RETRIEVE SMILES FOR CAS 491-26-9 and is WRONGGGG.
Retrying
Retrying
Retrying


 26%|██▌       | 65/252 [14:42<2:28:32, 47.66s/it]

FAILED TO RETRIEVE SMILES FOR CAS 63551-14-4 and is WRONGGGG.
Retrying
Retrying
Retrying


 26%|██▌       | 66/252 [15:50<2:46:49, 53.82s/it]

FAILED TO RETRIEVE SMILES FOR CAS 491-26-9 and is WRONGGGG.


 27%|██▋       | 67/252 [16:00<2:04:52, 40.50s/it]

3612-80-4 gives the SMILES: O=C(OCCO)C1=CC=CN=C1 and is OK


 27%|██▋       | 68/252 [16:08<1:34:21, 30.77s/it]

54-11-5 gives the SMILES: CN1CCC[C@H]1C1=CC=CN=C1 and is OK


 27%|██▋       | 69/252 [16:15<1:12:29, 23.77s/it]

2743-90-0 gives the SMILES: C1=CCC(C2=CC=CN=C2)NC1 and is OK


 28%|██▊       | 70/252 [16:22<57:13, 18.86s/it]  

532-12-7 gives the SMILES: C1=CN=CC(C2=NCCC2)=C1 and is OK


 28%|██▊       | 71/252 [16:30<46:41, 15.48s/it]

3719-45-7 gives the SMILES: CN1C=C(C(=O)O)C=CC1=O and is OK


 29%|██▊       | 72/252 [16:38<39:52, 13.29s/it]

701-44-0 gives the SMILES: CN1C=C(C(N)=O)C=CC1=O and is OK


 29%|██▉       | 73/252 [16:46<34:52, 11.69s/it]

66148-19-4 gives the SMILES: [2H]C1=NC([2H])=C(C2CCCN2N=O)C([2H])=C1[2H] and is OK


 29%|██▉       | 74/252 [16:55<31:42, 10.69s/it]

1219805-86-3 gives the SMILES: [2H]C1=C([2H])C([2H])=C(C2CCCN2C([2H])([2H])[2H])C([2H])=N1 and is OK


 30%|██▉       | 75/252 [17:03<29:11,  9.90s/it]

69980-24-1 gives the SMILES: [2H]C([2H])([2H])N1CCCC1C1=CC=CN=C1 and is OK


 30%|███       | 76/252 [17:09<26:14,  8.95s/it]

501-81-5 gives the SMILES: O=C(O)CC1=CC=CN=C1 and is OK


 31%|███       | 77/252 [17:18<25:31,  8.75s/it]

769-49-3 gives the SMILES: CN1C=CC(=O)C(C(N)=O)=C1 and is OK


 31%|███       | 78/252 [17:25<23:55,  8.25s/it]

5746-86-1 gives the SMILES: C1=CN=CC(C2CCCN2)=C1 and is OK


 31%|███▏      | 79/252 [17:32<22:43,  7.88s/it]

1207384-48-2 gives the SMILES: [2H]C([2H])([2H])N1C=C(C(N)=O)C=CC1=O and is OK


 32%|███▏      | 80/252 [17:40<22:40,  7.91s/it]

350818-69-8 gives the SMILES: [2H]C1=C([2H])C([2H])=C(C2CCCN2C)C([2H])=N1 and is OK


 32%|███▏      | 81/252 [17:48<22:46,  7.99s/it]

59-67-6 gives the SMILES: O=C(O)C1=CC=CN=C1 and is OK


 33%|███▎      | 82/252 [17:56<22:56,  8.10s/it]

487-19-4 gives the SMILES: CN1C=CC=C1C1=CC=CN=C1 and is OK


 33%|███▎      | 83/252 [18:05<23:22,  8.30s/it]

66148-15-0 gives the SMILES: [2H]C1=NC([2H])=C(C(=O)O)C([2H])=C1[2H] and is OK


 33%|███▎      | 84/252 [18:13<22:57,  8.20s/it]

94-44-0 gives the SMILES: O=C(OCC1=CC=CC=C1)C1=CC=CN=C1 and is OK
Retrying
Retrying
Retrying


 34%|███▎      | 85/252 [19:13<1:06:24, 23.86s/it]

FAILED TO RETRIEVE SMILES FOR CAS 65550-28-9 and is WRONGGGG.


 34%|███▍      | 86/252 [19:36<1:05:16, 23.59s/it]

1020719-11-2 gives the SMILES: [2H]C1=C([2H])C([2H])=C(C2CC=CCN2)C([2H])=N1 and is OK


 35%|███▍      | 87/252 [19:43<50:31, 18.37s/it]  

5654-86-4 gives the SMILES: CC(CC1NC(=O)C2N(C1=O)CCC2)C and is OK
Retrying
Retrying
Retrying


 35%|███▍      | 88/252 [26:04<5:47:37, 127.18s/it]

FAILED TO RETRIEVE SMILES FOR CAS 153536-53-9 and is WRONGGGG.
Retrying
Retrying
Retrying


 35%|███▌      | 89/252 [30:37<7:44:23, 170.94s/it]

FAILED TO RETRIEVE SMILES FOR CAS 4Z-N-0618 and is WRONGGGG.
Retrying


 36%|███▌      | 90/252 [31:10<5:50:24, 129.78s/it]

138946-42-6 gives the SMILES: [2H]C1=NC(C(=O)O)=C(C(=O)O)C([2H])=C1[2H] and is OK


 36%|███▌      | 91/252 [31:18<4:10:07, 93.21s/it] 

51095-86-4 gives the SMILES: C[N@+]1([O-])CCC[C@H]1C1=CC=CN=C1 and is OK


 37%|███▋      | 92/252 [31:26<3:00:19, 67.62s/it]

887355-56-8 gives the SMILES: O=CNCCCC(=O)C1=CC=CN=C1 and is OK


 37%|███▋      | 93/252 [31:35<2:12:33, 50.02s/it]

2743-90-0 gives the SMILES: C1=CCC(C2=CC=CN=C2)NC1 and is OK


 37%|███▋      | 94/252 [31:46<1:40:39, 38.23s/it]

25162-00-9 gives the SMILES: CN1CCC[C@@H]1C1=CC=CN=C1 and is OK


 38%|███▊      | 95/252 [31:54<1:16:07, 29.09s/it]

487-19-4 gives the SMILES: CN1C=CC=C1C1=CC=CN=C1 and is OK


 38%|███▊      | 96/252 [32:01<58:21, 22.44s/it]  

5746-86-1 gives the SMILES: C1=CN=CC(C2CCCN2)=C1 and is OK


 38%|███▊      | 97/252 [32:07<45:51, 17.75s/it]

532-12-7 gives the SMILES: C1=CN=CC(C2=NCCC2)=C1 and is OK


 39%|███▉      | 98/252 [32:13<36:29, 14.22s/it]

125630-26-4 gives the SMILES: CN1CCC(C(=O)C2=CC=CN=C2)C1 and is OK
Retrying
Retrying
Retrying


 39%|███▉      | 99/252 [38:34<5:16:56, 124.29s/it]

FAILED TO RETRIEVE SMILES FOR CAS 491-26-9 and is WRONGGGG.
Retrying
Retrying
Retrying


 40%|███▉      | 100/252 [43:17<7:15:29, 171.90s/it]

FAILED TO RETRIEVE SMILES FOR CAS 54-11-5 and is WRONGGGG.


 40%|████      | 101/252 [43:26<5:09:24, 122.94s/it]

494-52-0 gives the SMILES: C1=CN=CC([C@@H]2CCCCN2)=C1 and is OK


 40%|████      | 102/252 [43:34<3:41:17, 88.52s/it] 

494-98-4 gives the SMILES: C1=CN=CC(C2=CC=CN2)=C1 and is OK


 41%|████      | 103/252 [43:42<2:39:25, 64.20s/it]

16543-55-8 gives the SMILES: O=NN1CCC[C@H]1C1=CC=CN=C1 and is OK


 41%|████▏     | 104/252 [43:50<1:56:55, 47.40s/it]

2743-90-0 gives the SMILES: C1=CCC(C2=CC=CN=C2)NC1 and is OK


 42%|████▏     | 105/252 [43:58<1:27:23, 35.67s/it]

51095-86-4 gives the SMILES: C[N@+]1([O-])CCC[C@H]1C1=CC=CN=C1 and is OK
Retrying
Retrying
Retrying


 42%|████▏     | 106/252 [45:02<1:47:10, 44.05s/it]

FAILED TO RETRIEVE SMILES FOR CAS 2055-29-0 and is WRONGGGG.
Retrying
Retrying
Retrying


 42%|████▏     | 107/252 [46:16<2:08:13, 53.06s/it]

FAILED TO RETRIEVE SMILES FOR CAS 17708-87-1 and is WRONGGGG.


 43%|████▎     | 108/252 [46:23<1:34:25, 39.34s/it]

13078-04-1 gives the SMILES: C1=CN=CC(C2CCCCN2)=C1 and is OK


 43%|████▎     | 109/252 [46:31<1:11:13, 29.88s/it]

66093-90-1 gives the SMILES: C[NH2+]CCCC(=O)C1=CC=C[NH+]=C1.[Cl-].[Cl-] and is OK


 44%|████▎     | 110/252 [46:39<55:19, 23.37s/it]  

486-56-6 gives the SMILES: CN1C(=O)CC[C@H]1C1=CC=CN=C1 and is OK


 44%|████▍     | 111/252 [46:46<42:51, 18.23s/it]

73057-36-0 gives the SMILES: CN1CCC[C@H]1c1cccnc1.OC(=O)CC(C(=O)O)O and is OK
Retrying
Retrying
Retrying


 44%|████▍     | 112/252 [47:50<1:14:43, 32.02s/it]

FAILED TO RETRIEVE SMILES FOR CAS 1824020-12-3 and is WRONGGGG.
Retrying


 45%|████▍     | 113/252 [48:24<1:15:44, 32.70s/it]

494-97-3 gives the SMILES: C1=CN=CC([C@@H]2CCCN2)=C1 and is OK


 45%|████▌     | 114/252 [48:32<58:05, 25.26s/it]  

1060802-34-7 gives the SMILES: O=CC1=CN=CC(F)=C1Cl and is OK


 46%|████▌     | 115/252 [48:39<45:26, 19.90s/it]

535-83-1 gives the SMILES: C[N+]1=CC=CC(C(=O)[O-])=C1 and is OK


 46%|████▌     | 116/252 [48:46<36:17, 16.01s/it]

320386-54-7 gives the SMILES: Cl.O=C(O)C1=NC=CC=C1S and is OK


 46%|████▋     | 117/252 [48:52<29:02, 12.91s/it]

92761-98-3 gives the SMILES: c1([2H])nc([2H])c(c(c1[C@@H]1CCCN1)[2H])[2H] and is OK
Retrying
Retrying
Retrying


 47%|████▋     | 118/252 [55:13<4:35:29, 123.35s/it]

FAILED TO RETRIEVE SMILES FOR CAS 15268-31-2 and is WRONGGGG.
Retrying
Retrying
Retrying


 47%|████▋     | 119/252 [59:47<6:13:36, 168.55s/it]

FAILED TO RETRIEVE SMILES FOR CAS 1393569-52-2 and is WRONGGGG.
Retrying


 48%|████▊     | 120/252 [1:00:23<4:43:08, 128.70s/it]

29790-52-1 gives the SMILES: CN1CCCC1C1=CC=CN=C1.O=C(O)C1=CC=CC=C1O and is OK


 48%|████▊     | 121/252 [1:00:31<3:22:18, 92.66s/it] 

60138-76-3 gives the SMILES: CN(C)C1=NC=CC=C1C#N and is OK


 48%|████▊     | 122/252 [1:00:39<2:25:18, 67.07s/it]

2873-36-1 gives the SMILES: CC(C)C[C@@H]1NC(=O)[C@@H]2CCCN2C1=O and is OK
Retrying
Retrying
Retrying


 49%|████▉     | 123/252 [1:01:39<2:19:43, 64.99s/it]

FAILED TO RETRIEVE SMILES FOR CAS 4Z-N-0649 and is WRONGGGG.
Retrying


 49%|████▉     | 124/252 [1:02:12<1:58:21, 55.48s/it]

104809-30-5 gives the SMILES: NC1=NC=NC2=C1N=CN2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]2O[C@@H]([N+]3=CC=CC(C(=O)O)=C3)[C@H](O)[C@@H]2O)[C@@H](O)[C@H]1O.[Na+] and is OK
Retrying
Retrying
Retrying


 50%|████▉     | 125/252 [1:03:34<2:14:12, 63.41s/it]

FAILED TO RETRIEVE SMILES FOR CAS 339155-13-4 and is WRONGGGG.


 50%|█████     | 126/252 [1:03:44<1:39:44, 47.50s/it]

525-74-6 gives the SMILES: CN1CCC=C1c1cccnc1 and is OK


 50%|█████     | 127/252 [1:03:52<1:14:06, 35.57s/it]

2873-36-1 gives the SMILES: CC(C)C[C@@H]1NC(=O)[C@@H]2CCCN2C1=O and is OK


 51%|█████     | 128/252 [1:03:59<55:58, 27.09s/it]  

29681-45-6 gives the SMILES: COC(=O)C1=CN=CC(C)=C1 and is OK
Retrying
Retrying
Retrying


 51%|█████     | 129/252 [1:05:04<1:18:54, 38.49s/it]

FAILED TO RETRIEVE SMILES FOR CAS 4Z-N-0656 and is WRONGGGG.
Retrying


 52%|█████▏    | 130/252 [1:05:42<1:17:44, 38.24s/it]

3705-26-8 gives the SMILES: O=C1N[C@@H](CC2=CC=CC=C2)C(=O)N2CCC[C@@H]12 and is OK


 52%|█████▏    | 131/252 [1:05:50<58:57, 29.23s/it]  

39642-60-9 gives the SMILES: OC(=NC1=CC=CN=C1)NC1=CC=CN=C1 and is OK


 52%|█████▏    | 132/252 [1:05:59<46:07, 23.06s/it]

3222-49-9 gives the SMILES: CC1=CC(C(=O)O)=CN=C1 and is OK


 53%|█████▎    | 133/252 [1:06:08<37:11, 18.75s/it]

535-83-1 gives the SMILES: C[N+]1=CC=CC(C(=O)[O-])=C1 and is OK
Retrying
Retrying
Retrying


 53%|█████▎    | 134/252 [1:07:13<1:04:24, 32.75s/it]

FAILED TO RETRIEVE SMILES FOR CAS 75195-76-5 and is WRONGGGG.
Retrying


 54%|█████▎    | 135/252 [1:07:49<1:05:48, 33.75s/it]

21446-46-8 gives the SMILES: CN1CCC[C@H]1C1=CC=C[N+](C)=C1.[I-] and is OK


 54%|█████▍    | 136/252 [1:07:58<50:30, 26.13s/it]  

6019-06-3 gives the SMILES: CN1CCC[C@H]1C1=CC=CN=C1.O.O.O=C(O)[C@H](O)[C@@H](O)C(=O)O.O=C(O)[C@H](O)[C@@H](O)C(=O)O and is OK


 54%|█████▍    | 137/252 [1:08:06<40:05, 20.92s/it]

942922-74-9 gives the SMILES: COC(=O)C1=NC=CC(C2=CC=NC(C(=O)OC)=C2)=C1 and is OK


 55%|█████▍    | 138/252 [1:08:15<32:40, 17.20s/it]

66148-18-3 gives the SMILES: [2H]C1=C([2H])C([2H])=C(C2CCCN2)C([2H])=N1 and is OK
Retrying
Retrying
Retrying


 55%|█████▌    | 139/252 [1:09:19<59:03, 31.36s/it]

FAILED TO RETRIEVE SMILES FOR CAS 4Z-N-06111 and is WRONGGGG.
Retrying


 56%|█████▌    | 140/252 [1:09:56<1:01:28, 32.93s/it]

2854-40-2 gives the SMILES: CC(C)[C@@H]1NC(=O)[C@@H]2CCCN2C1=O and is OK


 56%|█████▌    | 141/252 [1:10:04<47:21, 25.60s/it]  

88660-53-1 gives the SMILES: CN1CCC[C@H]1C1=CC=CN=C1.O=C(O)C1=CC=CC=C1 and is OK
Retrying
Retrying
Retrying


 56%|█████▋    | 142/252 [1:11:06<1:06:49, 36.45s/it]

FAILED TO RETRIEVE SMILES FOR CAS 870975-59-0 and is WRONGGGG.
Retrying
Retrying
Retrying


 57%|█████▋    | 143/252 [1:12:11<1:21:59, 45.13s/it]

FAILED TO RETRIEVE SMILES FOR CAS 4Z-N-0617 and is WRONGGGG.


 57%|█████▋    | 144/252 [1:12:21<1:01:45, 34.31s/it]

23950-04-1 gives the SMILES: CN1CCCC1C1=CC=CC=N1 and is OK
Retrying
Retrying
Retrying


 58%|█████▊    | 145/252 [1:13:23<1:16:23, 42.83s/it]

FAILED TO RETRIEVE SMILES FOR CAS 75195-76-5 and is WRONGGGG.


 58%|█████▊    | 146/252 [1:13:39<1:01:21, 34.74s/it]

98-98-6 gives the SMILES: O=C(O)C1=CC=CC=N1 and is OK


 58%|█████▊    | 147/252 [1:13:49<47:52, 27.35s/it]  

3705-26-8 gives the SMILES: O=C1N[C@@H](CC2=CC=CC=C2)C(=O)N2CCC[C@@H]12 and is OK


 59%|█████▊    | 148/252 [1:13:59<38:03, 21.96s/it]

102074-19-1 gives the SMILES: CC1=CC(CO)=CN=C1 and is OK


 59%|█████▉    | 149/252 [1:14:09<31:33, 18.38s/it]

1657-32-5 gives the SMILES: NC1=C[N+]([O-])=CC=C1 and is OK
Retrying
Retrying
Retrying


 60%|█████▉    | 150/252 [1:15:11<53:47, 31.64s/it]

FAILED TO RETRIEVE SMILES FOR CAS 4Z-N-06100 and is WRONGGGG.


 60%|█████▉    | 151/252 [1:15:29<46:02, 27.35s/it]

1426174-36-8 gives the SMILES: [2H]C1=NC([2H])=C([C@@H]2CCCN2N=O)C([2H])=C1[2H] and is OK


 60%|██████    | 152/252 [1:15:35<35:15, 21.16s/it]

2854-40-2 gives the SMILES: CC(C)[C@@H]1NC(=O)[C@@H]2CCCN2C1=O and is OK


 61%|██████    | 153/252 [1:15:42<27:43, 16.80s/it]

535-83-1 gives the SMILES: C[N+]1=CC=CC(C(=O)[O-])=C1 and is OK
Retrying
Retrying
Retrying


 61%|██████    | 154/252 [1:16:46<50:32, 30.94s/it]

FAILED TO RETRIEVE SMILES FOR CAS 129547-84-8 and is WRONGGGG.
Retrying
Retrying
Retrying


 62%|██████▏   | 155/252 [1:17:54<1:08:11, 42.18s/it]

FAILED TO RETRIEVE SMILES FOR CAS 1824020-12-3 and is WRONGGGG.


 62%|██████▏   | 156/252 [1:18:03<51:38, 32.28s/it]  

591-22-0 gives the SMILES: CC1=CN=CC(C)=C1 and is OK


 62%|██████▏   | 157/252 [1:18:11<39:33, 24.98s/it]

347841-88-7 gives the SMILES: [2H]C1=NC([2H])=C(C(N)=O)C([2H])=C1[2H] and is OK
Retrying
Retrying
Retrying


 63%|██████▎   | 158/252 [1:19:10<54:58, 35.09s/it]

FAILED TO RETRIEVE SMILES FOR CAS 51020-67-8 and is WRONGGGG.
Retrying


 63%|██████▎   | 159/252 [1:19:40<51:48, 33.42s/it]

1346601-08-8 gives the SMILES: C([2H])(N1CCC(C1)c1cccnc1)([2H])[2H] and is OK


 63%|██████▎   | 160/252 [1:19:51<41:17, 26.93s/it]

2873-36-1 gives the SMILES: CC(C)C[C@@H]1NC(=O)[C@@H]2CCCN2C1=O and is OK


 64%|██████▍   | 161/252 [1:19:57<31:07, 20.52s/it]

494-52-0 gives the SMILES: C1=CN=CC([C@@H]2CCCCN2)=C1 and is OK


 64%|██████▍   | 162/252 [1:20:04<24:52, 16.58s/it]

71267-22-6 gives the SMILES: O=NN1CC=CC[C@H]1C1=CC=CN=C1 and is OK


 65%|██████▍   | 163/252 [1:20:12<20:39, 13.93s/it]

1207384-48-2 gives the SMILES: [2H]C([2H])([2H])N1C=C(C(N)=O)C=CC1=O and is OK


 65%|██████▌   | 164/252 [1:20:19<17:09, 11.70s/it]

1020719-70-3 gives the SMILES: [2H]C1=NC([2H])=C(C2CCC(=O)N2)C([2H])=C1[2H] and is OK
Retrying
Retrying
Retrying


 65%|██████▌   | 165/252 [1:21:20<38:42, 26.70s/it]

FAILED TO RETRIEVE SMILES FOR CAS 1469367-99-4 and is WRONGGGG.
Retrying


 66%|██████▌   | 166/252 [1:21:49<39:06, 27.28s/it]

5746-86-1 gives the SMILES: C1=CN=CC(C2CCCN2)=C1 and is OK


 66%|██████▋   | 167/252 [1:21:57<30:19, 21.40s/it]

532-12-7 gives the SMILES: C1=CN=CC(C2=NCCC2)=C1 and is OK
Retrying
Retrying
Retrying


 67%|██████▋   | 168/252 [1:22:59<47:00, 33.58s/it]

FAILED TO RETRIEVE SMILES FOR CAS TR-C725179 and is WRONGGGG.
Retrying


 67%|██████▋   | 169/252 [1:23:33<46:46, 33.82s/it]

350818-69-8 gives the SMILES: [2H]C1=C([2H])C([2H])=C(C2CCCN2C)C([2H])=N1 and is OK


 67%|██████▋   | 170/252 [1:23:43<36:36, 26.79s/it]

1020719-11-2 gives the SMILES: [2H]C1=C([2H])C([2H])=C(C2CC=CCN2)C([2H])=N1 and is OK


 68%|██████▊   | 171/252 [1:23:53<29:09, 21.60s/it]

114-33-0 gives the SMILES: CNC(=O)C1=CC=CN=C1 and is OK


 68%|██████▊   | 172/252 [1:23:58<22:21, 16.76s/it]

1207384-47-1 gives the SMILES: [2H]C([2H])([2H])N1C=CC(=O)C(C(N)=O)=C1 and is OK


 69%|██████▊   | 173/252 [1:24:06<18:32, 14.08s/it]

494-04-2 gives the SMILES: C1=CN=CC(C2=CC=NC(C3=CC=CN=C3)=C2)=C1 and is OK


 69%|██████▉   | 174/252 [1:24:14<16:02, 12.33s/it]

34834-67-8 gives the SMILES: CN1C(=O)[C@H](O)C[C@H]1C1=CC=CN=C1 and is OK


 69%|██████▉   | 175/252 [1:24:22<13:52, 10.82s/it]

15569-85-4 gives the SMILES: CN1C(=O)CCC1C1=CC=CN=C1 and is OK


 70%|██████▉   | 176/252 [1:24:29<12:18,  9.72s/it]

71267-22-6 gives the SMILES: O=NN1CC=CC[C@H]1C1=CC=CN=C1 and is OK


 70%|███████   | 177/252 [1:24:36<11:07,  8.90s/it]

66148-17-2 gives the SMILES: [2H]C1=NC([2H])=C(C2=NCCC2)C([2H])=C1[2H] and is OK


 71%|███████   | 178/252 [1:24:43<10:22,  8.42s/it]

139427-57-9 gives the SMILES: CN1C(=O)CCC1C1=C[N+](C2OC(C(=O)[O-])C(O)C(O)C2O)=CC=C1 and is OK


 71%|███████   | 179/252 [1:24:51<10:02,  8.25s/it]

51095-86-4 gives the SMILES: C[N@+]1([O-])CCC[C@H]1C1=CC=CN=C1 and is OK


 71%|███████▏  | 180/252 [1:24:58<09:32,  7.95s/it]

66148-19-4 gives the SMILES: [2H]C1=NC([2H])=C(C2CCCN2N=O)C([2H])=C1[2H] and is OK


 72%|███████▏  | 181/252 [1:25:05<09:02,  7.64s/it]

21446-46-8 gives the SMILES: CN1CCC[C@H]1C1=CC=C[N+](C)=C1.[I-] and is OK


 72%|███████▏  | 182/252 [1:25:13<08:59,  7.71s/it]

486-56-6 gives the SMILES: CN1C(=O)CC[C@H]1C1=CC=CN=C1 and is OK
Retrying
Retrying
Retrying


 73%|███████▎  | 183/252 [1:26:14<27:15, 23.71s/it]

FAILED TO RETRIEVE SMILES FOR CAS TR-C725170 and is WRONGGGG.
Retrying


 73%|███████▎  | 184/252 [1:26:46<29:49, 26.32s/it]

53-84-9 gives the SMILES: NC(=O)C1=C[N+](C2OC(COP(=O)([O-])OP(=O)(O)OCC3OC(N4C=NC5=C4N=CN=C5N)C(O)C3O)C(O)C2O)=CC=C1 and is OK
Retrying
Retrying
Retrying


 73%|███████▎  | 185/252 [1:27:55<43:24, 38.87s/it]

FAILED TO RETRIEVE SMILES FOR CAS 17708-87-1 and is WRONGGGG.


 74%|███████▍  | 186/252 [1:28:03<32:46, 29.80s/it]

34366-21-7 gives the SMILES: C1=CN=CC([C@H]2CCCCN2)=C1 and is OK


 74%|███████▍  | 187/252 [1:28:12<25:25, 23.47s/it]

4314-66-3 gives the SMILES: CCNC(=O)C1=CC=CN=C1 and is OK


 75%|███████▍  | 188/252 [1:28:20<20:06, 18.86s/it]

66148-19-4 gives the SMILES: [2H]C1=NC([2H])=C(C2CCCN2N=O)C([2H])=C1[2H] and is OK


 75%|███████▌  | 189/252 [1:28:29<16:34, 15.78s/it]

494-98-4 gives the SMILES: C1=CN=CC(C2=CC=CN2)=C1 and is OK


 75%|███████▌  | 190/252 [1:28:37<13:54, 13.46s/it]

764661-23-6 gives the SMILES: [2H]C1=NC([2H])=C(C(=O)CCCNC)C([2H])=C1[2H] and is OK


 76%|███████▌  | 191/252 [1:28:45<12:09, 11.96s/it]

146275-18-5 gives the SMILES: CN1C(=O)[C@H](O)C[C@H]1C1=C[N+]([C@@H]2O[C@H](C(=O)[O-])[C@@H](O)[C@H](O)[C@H]2O)=CC=C1 and is OK


 76%|███████▌  | 192/252 [1:28:54<10:59, 10.99s/it]

159956-78-2 gives the SMILES: [2H]C([2H])([2H])N1C(=O)[C@@H](O)C[C@@H]1C1=CC=CN=C1 and is OK


 77%|███████▋  | 193/252 [1:29:02<09:59, 10.15s/it]

6138-41-6 gives the SMILES: C[N+]1=CC=CC(C(=O)O)=C1.[Cl-] and is OK


 77%|███████▋  | 194/252 [1:29:10<09:14,  9.56s/it]

487-19-4 gives the SMILES: CN1C=CC=C1C1=CC=CN=C1 and is OK


 77%|███████▋  | 195/252 [1:29:18<08:40,  9.13s/it]

69980-24-1 gives the SMILES: [2H]C([2H])([2H])N1CCCC1C1=CC=CN=C1 and is OK


 78%|███████▊  | 196/252 [1:29:26<08:07,  8.70s/it]

66148-19-4 gives the SMILES: [2H]C1=NC([2H])=C(C2CCCN2N=O)C([2H])=C1[2H] and is OK


 78%|███████▊  | 197/252 [1:29:32<07:16,  7.94s/it]

909014-86-4 gives the SMILES: [13C]([2H])(N1CCCC1c1cccnc1)([2H])[2H] and is OK


 79%|███████▊  | 198/252 [1:29:41<07:20,  8.15s/it]

110952-70-0 gives the SMILES: [2H]C([2H])([2H])N1C(=O)CCC1C1=CC=CN=C1 and is OK


 79%|███████▉  | 199/252 [1:29:49<07:12,  8.17s/it]

1020719-08-7 gives the SMILES: [2H]C1=C([2H])C([2H])=C(C2CCCCN2)C([2H])=N1 and is OK
Retrying
Retrying
Retrying


 79%|███████▉  | 200/252 [1:30:50<20:42, 23.89s/it]

FAILED TO RETRIEVE SMILES FOR CAS 1040920-61-3 and is WRONGGGG.
Retrying


 80%|███████▉  | 201/252 [1:31:19<21:42, 25.53s/it]

1020719-68-9 gives the SMILES: [2H]C1=NC([2H])=C(C2CCCCN2N=O)C([2H])=C1[2H] and is OK
Retrying
Retrying
Retrying


 80%|████████  | 202/252 [1:37:40<1:50:09, 132.19s/it]

FAILED TO RETRIEVE SMILES FOR CAS 769-49-3 and is WRONGGGG.
Retrying
Retrying


 81%|████████  | 203/252 [1:42:03<2:19:59, 171.41s/it]

125630-26-4 gives the SMILES: CN1CCC(C(=O)C2=CC=CN=C2)C1 and is OK


 81%|████████  | 204/252 [1:42:11<1:38:01, 122.53s/it]

701-44-0 gives the SMILES: CN1C=C(C(N)=O)C=CC1=O and is OK


 81%|████████▏ | 205/252 [1:42:19<1:08:53, 87.95s/it] 

25162-00-9 gives the SMILES: CN1CCC[C@@H]1C1=CC=CN=C1 and is OK


 82%|████████▏ | 206/252 [1:42:26<48:51, 63.73s/it]  

284685-07-0 gives the SMILES: [2H]C1=C([2H])C([2H])=C([C@H]2CCCN2C)C([2H])=N1 and is OK


 82%|████████▏ | 207/252 [1:42:33<35:09, 46.87s/it]

494-97-3 gives the SMILES: C1=CN=CC([C@@H]2CCCN2)=C1 and is OK


 83%|████████▎ | 208/252 [1:42:42<25:50, 35.24s/it]

66148-18-3 gives the SMILES: [2H]C1=C([2H])C([2H])=C(C2CCCN2)C([2H])=N1 and is OK


 83%|████████▎ | 209/252 [1:42:49<19:15, 26.87s/it]

23950-04-1 gives the SMILES: CN1CCCC1C1=CC=CC=N1 and is OK


 83%|████████▎ | 210/252 [1:42:57<14:55, 21.33s/it]

581-49-7 gives the SMILES: C1=CC[C@@H](C2=CC=CN=C2)NC1 and is OK


 84%|████████▎ | 211/252 [1:43:03<11:21, 16.63s/it]

1246819-72-6 gives the SMILES: CNCCC[13C](=O)[13C]1=[13CH][13CH]=[13CH]N=[13CH]1 and is OK


 84%|████████▍ | 212/252 [1:43:08<08:50, 13.25s/it]

98-98-6 gives the SMILES: O=C(O)C1=CC=CC=N1 and is OK
Retrying
Retrying
Retrying


 85%|████████▍ | 213/252 [1:44:09<17:53, 27.52s/it]

FAILED TO RETRIEVE SMILES FOR CAS TR-N424587 and is WRONGGGG.
Retrying


 85%|████████▍ | 214/252 [1:44:38<17:43, 27.98s/it]

54-11-5 gives the SMILES: CN1CCC[C@H]1C1=CC=CN=C1 and is OK


 85%|████████▌ | 215/252 [1:44:45<13:18, 21.58s/it]

1215842-75-3 gives the SMILES: [2H][13C]([2H])([2H])N1C(=O)CCC1C1=CC=CN=C1 and is OK


 86%|████████▌ | 216/252 [1:44:50<10:03, 16.75s/it]

66148-15-0 gives the SMILES: [2H]C1=NC([2H])=C(C(=O)O)C([2H])=C1[2H] and is OK


 86%|████████▌ | 217/252 [1:44:57<08:01, 13.74s/it]

153536-53-9 gives the SMILES: CN1CCC[C@H]1C1=CC=C[N+]([C@@H]2O[C@H](C(=O)[O-])[C@@H](O)[C@H](O)[C@H]2O)=C1 and is OK


 87%|████████▋ | 218/252 [1:45:04<06:34, 11.61s/it]

2743-90-0 gives the SMILES: C1=CCC(C2=CC=CN=C2)NC1 and is OK


 87%|████████▋ | 219/252 [1:45:10<05:32, 10.07s/it]

347841-88-7 gives the SMILES: [2H]C1=NC([2H])=C(C(N)=O)C([2H])=C1[2H] and is OK


 87%|████████▋ | 220/252 [1:45:17<04:48,  9.03s/it]

1189727-40-9 gives the SMILES: [2H]C([2H])([2H])NCCCC(=O)C1=CC=CN=C1 and is OK


 88%|████████▊ | 221/252 [1:45:23<04:10,  8.08s/it]

1216737-36-8 gives the SMILES: [2H]C1=NC([2H])=C(C(=O)NCC(=O)O)C([2H])=C1[2H] and is OK
Retrying
Retrying
Retrying


 88%|████████▊ | 222/252 [1:51:44<59:59, 119.98s/it]

FAILED TO RETRIEVE SMILES FOR CAS 1426174-36-8 and is WRONGGGG.
Retrying
Retrying


 88%|████████▊ | 223/252 [1:56:10<1:19:13, 163.92s/it]

350818-68-7 gives the SMILES: [2H]C1=C([2H])C([2H])=C(C2CCC(=O)N2C)C([2H])=N1 and is OK
Retrying


 89%|████████▉ | 224/252 [1:56:45<58:24, 125.17s/it]  

66148-18-3 gives the SMILES: [2H]C1=C([2H])C([2H])=C(C2CCCN2)C([2H])=N1 and is OK
Retrying
Retrying
Retrying


 89%|████████▉ | 225/252 [1:57:52<48:25, 107.62s/it]

FAILED TO RETRIEVE SMILES FOR CAS 17708-87-1 and is WRONGGGG.


 90%|████████▉ | 226/252 [1:57:59<33:38, 77.64s/it] 

871894-35-8 gives the SMILES: CN1CCCC1C1=CC=CN=C1Cl and is OK


 90%|█████████ | 227/252 [1:58:08<23:41, 56.87s/it]

132929-88-5 gives the SMILES: CN1C(=O)[C@H](O[C@@H]2O[C@H](C(=O)O)[C@@H](O)[C@H](O)[C@H]2O)C[C@H]1C1=CC=CN=C1 and is OK


 90%|█████████ | 228/252 [1:58:16<16:56, 42.36s/it]

6419-36-9 gives the SMILES: Cl.O=C(O)CC1=CC=CN=C1 and is OK


 91%|█████████ | 229/252 [1:58:25<12:19, 32.15s/it]

90872-72-3 gives the SMILES: CC1=CC=C(C2CCCN2)C=N1 and is OK


 91%|█████████▏| 230/252 [1:58:32<09:06, 24.86s/it]

581-50-0 gives the SMILES: C1=CC=C(C2=CC=CN=C2)N=C1 and is OK


 92%|█████████▏| 231/252 [1:58:40<06:55, 19.78s/it]

764661-24-7 gives the SMILES: [2H]C1=NC([2H])=C(C(=O)CCCN(C)N=O)C([2H])=C1[2H] and is OK


 92%|█████████▏| 232/252 [1:58:48<05:23, 16.19s/it]

80508-23-2 gives the SMILES: O=NN1CCCC1C1=CC=CN=C1 and is OK


 92%|█████████▏| 233/252 [1:58:56<04:20, 13.68s/it]

887407-16-1 gives the SMILES: O=NN1CC=CCC1C1=CC=CN=C1 and is OK


 93%|█████████▎| 234/252 [1:59:04<03:37, 12.08s/it]

612-64-6 gives the SMILES: CCN(N=O)C1=CC=CC=C1 and is OK


 93%|█████████▎| 235/252 [1:59:13<03:05, 10.93s/it]

486-56-6 gives the SMILES: CN1C(=O)CC[C@H]1C1=CC=CN=C1 and is OK
Retrying
Retrying
Retrying


 94%|█████████▎| 236/252 [2:00:13<06:51, 25.74s/it]

FAILED TO RETRIEVE SMILES FOR CAS TR-C725165 and is WRONGGGG.
Retrying
Retrying
Retrying


 94%|█████████▍| 237/252 [2:01:18<09:23, 37.57s/it]

FAILED TO RETRIEVE SMILES FOR CAS TR-E936805 and is WRONGGGG.


 94%|█████████▍| 238/252 [2:01:25<06:38, 28.50s/it]

918625-36-2 gives the SMILES: OC/C=CC[C@@H](c1cccnc1)N and is OK


 95%|█████████▍| 239/252 [2:01:33<04:48, 22.19s/it]

82111-06-6 gives the SMILES: CC1=CC(C2CCCN2C)=CN=C1 and is OK


 95%|█████████▌| 240/252 [2:01:42<03:40, 18.41s/it]

5746-86-1 gives the SMILES: C1=CN=CC(C2CCCN2)=C1 and is OK
Retrying
Retrying
Retrying


 96%|█████████▌| 241/252 [2:02:40<05:31, 30.17s/it]

FAILED TO RETRIEVE SMILES FOR CAS 1217540-34-5 and is WRONGGGG.
Retrying
Retrying
Retrying


 96%|█████████▌| 242/252 [2:03:45<06:45, 40.54s/it]

FAILED TO RETRIEVE SMILES FOR CAS TR-A429258 and is WRONGGGG.


 96%|█████████▋| 243/252 [2:03:52<04:33, 30.41s/it]

1076199-53-5 gives the SMILES: O=C(N1CCCC1c1cccnc1)OC(C)(C)C and is OK


 97%|█████████▋| 244/252 [2:03:59<03:07, 23.45s/it]

147732-32-9 gives the SMILES: O[Cl](=O)(=O)=O.O=C1CCC(N1C)c1ccccn1 and is OK


 97%|█████████▋| 245/252 [2:04:06<02:09, 18.55s/it]

887406-85-1 gives the SMILES: COc1ccc(cc1)CN1C(=O)CCC1c1cccnc1 and is OK


 98%|█████████▊| 246/252 [2:04:12<01:29, 14.88s/it]

4315-37-1 gives the SMILES: OC(=O)[C@@H]([C@H](C(=O)O)O)O.Cn1cccc1c1cccnc1 and is OK
Retrying
Retrying
Retrying


 98%|█████████▊| 247/252 [2:05:14<02:24, 28.92s/it]

FAILED TO RETRIEVE SMILES FOR CAS TR-N524263 and is WRONGGGG.
Retrying
Retrying
Retrying


 98%|█████████▊| 248/252 [2:06:21<02:41, 40.27s/it]

FAILED TO RETRIEVE SMILES FOR CAS 107971-06-2 and is WRONGGGG.


 99%|█████████▉| 249/252 [2:06:28<01:31, 30.37s/it]

27247-34-3 gives the SMILES: [O-]C(=O)c1cc(cnc1)C(=O)OC.[K+] and is OK
Retrying
Retrying
Retrying


 99%|█████████▉| 250/252 [2:07:28<01:18, 39.32s/it]

FAILED TO RETRIEVE SMILES FOR CAS 1217540-34-5 and is WRONGGGG.
Retrying


100%|█████████▉| 251/252 [2:08:04<00:38, 38.43s/it]

857146-29-3 gives the SMILES: CC1=CC(CO)=CN=C1C and is OK


100%|██████████| 252/252 [2:08:11<00:00, 28.76s/it]

20971-79-3 gives the SMILES: O=C1CCC(O1)c1cccnc1 and is OK
Retrying
Retrying
Retrying


100%|██████████| 252/252 [2:14:32<00:00, 32.03s/it]

FAILED TO RETRIEVE SMILES FOR CAS TR-B426320 and is WRONGGGG.
Total compounds: 252
Successfully converted: 208
Conversion rate: 82.54%
                                           product_url   cas_number  \
0                                    nornicotine-dl-rg     494-97-3   
1    15-methano-8h-pyrido12-a15diazocin-8-one-12345...     485-35-8   
2             3-pyridinecarboxamide-n-methoxy-n-methyl   95091-91-1   
3     4-iodo-1-methyl-12-dihydropyridine-3-carboxamide    6456-44-6   
4                  3-34-dihydro-2h-pyrrol-5-ylpyridine     532-12-7   
..                                                 ...          ...   
243                          n-4-methoxybenzylcotinine  887406-85-1   
244                         beta-nicotyrine-l-tartrate    4315-37-1   
247     5-methoxycarbonylnicotinic-acid-potassium-salt   27247-34-3   
249                5-hydroxymethyl-23-dimethylpyridine  857146-29-3   
250               rac-5-3-pyridyltetrahydro-2-furanone   20971-79-3   

            




In [None]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
import torch
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader as GeoDataLoader
from torch.utils.data import Dataset
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from gnn import GNNModel, smiles_to_graph, MoleculeDataset

In [None]:
df = pd.read_csv('nicotine_derivatives_with_smiles.csv')
df_cleaned = df[df['SMILES'].notna() & (df['SMILES'] != '')]
df_cleaned.to_csv('cleaned_nicotine_derivatives_with_smiles.csv', index=False)

In [None]:
df = pd.read_csv('cleaned_nicotine_derivatives_with_smiles.csv')
nicotine_smiles = df['SMILES']

nicotine_graphs = [graph for graph in [smiles_to_graph(smile) for smile in nicotine_smiles] if graph is not None]

model_path = 'gnn_model.pth'
model = GNNModel(num_node_features=26, hidden_dim=128, output_dim=2)
model.load_state_dict(torch.load(model_path))
model.eval()

predictions = []

with torch.no_grad():
    for graph in nicotine_graphs:
        output = model(graph)
        _, predicted = torch.max(output, dim=1)
        predictions.append(predicted.item())

df['Predicted Label'] = predictions

df.to_csv('nicotine_derivatives_with_predictions.csv', index=False)

total = len(df)
predicted_count = len(predictions)
print(f"Total rows: {total}")
print(f"Predictions made: {predicted_count}")
print(f"Updated CSV saved with predictions.")

Error processing SMILES: nan - No registered converter was able to produce a C++ rvalue of type std::__1::basic_string<wchar_t, std::__1::char_traits<wchar_t>, std::__1::allocator<wchar_t>> from this Python object of type float
Error processing SMILES: nan - No registered converter was able to produce a C++ rvalue of type std::__1::basic_string<wchar_t, std::__1::char_traits<wchar_t>, std::__1::allocator<wchar_t>> from this Python object of type float
Error processing SMILES: nan - No registered converter was able to produce a C++ rvalue of type std::__1::basic_string<wchar_t, std::__1::char_traits<wchar_t>, std::__1::allocator<wchar_t>> from this Python object of type float
Error processing SMILES: nan - No registered converter was able to produce a C++ rvalue of type std::__1::basic_string<wchar_t, std::__1::char_traits<wchar_t>, std::__1::allocator<wchar_t>> from this Python object of type float
Error processing SMILES: nan - No registered converter was able to produce a C++ rvalue 

ValueError: Length of values (0) does not match length of index (260)