In [69]:
import requests
from bs4 import BeautifulSoup
import json
import re
import csv

In [70]:

url = "https://cymitquimica.com/categories/1828/nicotine-and-nicotine-derivatives/?srsltid=AfmBOor5CHkEY17td7i8alPNqfsjPX-VKsd6igxeoJFVzukYf576WD9_&page="
response = requests.get(url)

soup = BeautifulSoup(response.content, 'html.parser')

json_ld = None
for script in soup.find_all('script', type='application/ld+json'):
    try:
        json_ld = json.loads(script.string)
        break
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
        continue

if json_ld:
    if isinstance(json_ld, dict):
        print("json_ld is a dictionary")
        print(json.dumps(json_ld, indent=2))
    else:
        print(f"Unexpected JSON-LD structure: {type(json_ld)}")
else:
    print("No valid JSON-LD found")


json_ld is a dictionary
{
  "@context": "http://schema.org",
  "@type": "WebSite",
  "url": "https://cymitquimica.com",
  "dateModified": "2019-02-15T00:00",
  "image": "https://static.cymitquimica.com/public/img/logo-cymit.png",
  "potentialAction": {
    "@type": "SearchAction",
    "target": "https://cymitquimica.com/search/{search_term_string}/",
    "query-input": "required name=search_term_string"
  },
  "sameAs": [
    "https://www.facebook.com/cymitquimica/",
    "https://twitter.com/cymitquimica",
    "https://es.linkedin.com/company/cymit-quimica-s-l-",
    "https://www.instagram.com/cymitquimica/",
    "https://www.pinterest.es/cymit/",
    "https://cymit.tumblr.com/"
  ]
}


In [71]:
response = requests.get(url)

soup = BeautifulSoup(response.text, "html.parser")

product_links = soup.find_all("a", class_="js-product-link")

for link in product_links:
    product_url = link.get("href")
    if product_url:
        print(f"https://example.com{product_url}")


In [72]:
def scrape_page(page_number):
    url_new = url + str(page_number)
    response = requests.get(url_new)
    
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")
        product_links = soup.find_all("a", class_="js-product-link")
        
        for link in product_links:
            product_url = link.get("href")
            if product_url:
                # Extract last part (after the last "/")
                last_part = product_url.strip("/").split("/")[-1]
                second_to_last_part = product_url.strip("/").split("/")[-2]
                product_urls.append(last_part)
                cas_numbers.append(second_to_last_part)
    else:
        print(f"Failed to retrieve page {page_number}")

In [73]:
product_urls = []
cas_numbers = []

for page in range(1, 14):
    print(f"Scraping page {page}...")
    scrape_page(page)

csv_filename = "nicotine_derivatives_data.csv"
with open(csv_filename, mode="w", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)
    writer.writerow(["product_url", "cas_number"])
    for url, cas in zip(product_urls, cas_numbers):
        writer.writerow([url, cas])

print(f"Number of entries saved in {csv_filename}: {len(product_urls)}")
print(f"Number of CAS numbers: {len(cas_numbers)}")

Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Scraping page 8...
Scraping page 9...
Scraping page 10...
Scraping page 11...
Scraping page 12...
Scraping page 13...
Number of entries saved in nicotine_derivatives_data.csv: 252
Number of CAS numbers: 252


In [74]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import pandas as pd
from tqdm import tqdm
import time
from selenium.webdriver.chrome.options import Options

options = Options()
options.add_argument("--headless")  # Run in headless mode for speed
options.add_argument("--disable-gpu")  # Avoid GPU rendering issues
options.add_argument("--no-sandbox")
driver = webdriver.Chrome(options=options)

# Set a page load timeout (e.g., 180 seconds)
driver.set_page_load_timeout(180)

In [None]:
def get_smiles_from_cas(cas_number):
    url = f'https://www.1stsci.com/products/{cas_number}?cas={cas_number}'
    
    for attempt in range(3):
        try:
            driver.get(url)  # Load the page
            smiles_element = WebDriverWait(driver, 10).until(
                EC.visibility_of_element_located((By.CLASS_NAME, "smiles"))
            )
            smiles_tag = driver.find_element(By.CLASS_NAME, 'smiles')
            smiles = smiles_tag.text
            print(f'{cas_number} gives the SMILES: {smiles} and is OK')
            # time.sleep(7)
            return smiles
        except Exception as e:
            print("Retrying CAS {cas_number}")
            time.sleep(7)
    print(f"FAILED TO RETRIEVE SMILES FOR CAS {cas_number} and is WRONGGGG.")
    return None

In [None]:
df = pd.read_csv('nicotine_derivatives_data.csv')
print(df.columns)

tqdm.pandas()
df['SMILES'] = df['cas_number'].progress_apply(get_smiles_from_cas)

total = len(df)
converted = df['SMILES'].notna().sum()
print(f"Total compounds: {total}")
print(f"Successfully converted: {converted}")
print(f"Conversion rate: {converted/total:.2%}")

df = df.dropna(subset=['SMILES'])
print(df)

df.to_csv('nicotine_derivatives_with_smiles.csv', index=False)

Index(['product_url', 'cas_number'], dtype='object')


  1%|          | 2/252 [00:09<20:20,  4.88s/it]

494-97-3 gives the SMILES: C1=CN=CC([C@@H]2CCCN2)=C1 and is OK


  1%|          | 3/252 [00:19<29:03,  7.00s/it]

485-35-8 gives the SMILES: O=C1C=CC=C2[C@H]3CNC[C@H](C3)CN21 and is OK


  2%|▏         | 4/252 [00:28<32:18,  7.82s/it]

95091-91-1 gives the SMILES: CON(C)C(=O)C1=CC=CN=C1 and is OK


  2%|▏         | 5/252 [00:37<33:06,  8.04s/it]

6456-44-6 gives the SMILES: CN1C=CC(I)=C(C(N)=O)C1 and is OK


  2%|▏         | 6/252 [00:46<34:05,  8.31s/it]

532-12-7 gives the SMILES: C1=CN=CC(C2=NCCC2)=C1 and is OK


  3%|▎         | 7/252 [00:55<35:15,  8.63s/it]

20260-53-1 gives the SMILES: Cl.O=C(Cl)C1=CC=CN=C1 and is OK


  3%|▎         | 8/252 [01:04<35:46,  8.80s/it]

2743-90-0 gives the SMILES: C1=CCC(C2=CC=CN=C2)NC1 and is OK


  4%|▎         | 9/252 [01:13<35:31,  8.77s/it]

609-71-2 gives the SMILES: O=C(O)C1=CC=CN=C1O and is OK


  4%|▍         | 10/252 [01:22<35:18,  8.76s/it]

59288-43-6 gives the SMILES: O=C(O)C1=CN=C([N+](=O)[O-])C(O)=C1 and is OK


  4%|▍         | 11/252 [01:31<35:31,  8.85s/it]

5470-70-2 gives the SMILES: COC(=O)C1=CC=C(C)N=C1 and is OK


  5%|▍         | 12/252 [01:39<34:48,  8.70s/it]

3562-11-6 gives the SMILES: O=C(O)C1CCC(C2=CC=CN=C2)N1 and is OK


  5%|▌         | 13/252 [01:48<35:12,  8.84s/it]

177785-14-7 gives the SMILES: COC(=O)C1=CC=CN=C1CCl and is OK


  6%|▌         | 14/252 [01:57<35:03,  8.84s/it]

29681-45-6 gives the SMILES: COC(=O)C1=CN=CC(C)=C1 and is OK


  6%|▌         | 15/252 [02:06<34:42,  8.78s/it]

6960-22-1 gives the SMILES: CC1=CC=C(C(N)=O)C=N1 and is OK


  6%|▋         | 16/252 [02:15<35:20,  8.99s/it]

1802-30-8 gives the SMILES: O=C(O)C1=CC=C(C2=CC=C(C(=O)O)C=N2)N=C1 and is OK


  7%|▋         | 17/252 [02:24<35:31,  9.07s/it]

1215721-40-6 gives the SMILES: Cl.ClC1=CC=CC(COC2=CN=CC(N3CCNCC3)=N2)=C1 and is OK


  7%|▋         | 18/252 [02:34<36:16,  9.30s/it]

132334-98-6 gives the SMILES: CCOC(=O)C1=CC=C(Br)N=C1 and is OK


  8%|▊         | 19/252 [02:44<36:27,  9.39s/it]

89690-09-5 gives the SMILES: C[NH2+]CCCC[NH3+].[Cl-].[Cl-] and is OK


  8%|▊         | 20/252 [02:54<36:57,  9.56s/it]

494-52-0 gives the SMILES: C1=CN=CC([C@@H]2CCCCN2)=C1 and is OK


  8%|▊         | 21/252 [03:03<36:35,  9.51s/it]

98-92-0 gives the SMILES: NC(=O)C1=CC=CN=C1 and is OK


  9%|▊         | 22/252 [03:13<36:29,  9.52s/it]

6197-39-3 gives the SMILES: COC(=O)C1=CCCNC1.Cl and is OK


  9%|▉         | 23/252 [03:22<36:26,  9.55s/it]

5398-44-7 gives the SMILES: O=C(O)C1=CC(Cl)=NC(Cl)=C1 and is OK


 10%|▉         | 24/252 [03:32<35:50,  9.43s/it]

61445-55-4 gives the SMILES: CN(CCCC(=O)O)N=O and is OK


 10%|▉         | 25/252 [03:41<35:14,  9.32s/it]

5006-66-6 gives the SMILES: O=C(O)C1=CNC(=O)C=C1 and is OK


 10%|█         | 26/252 [03:50<34:42,  9.21s/it]

5746-86-1 gives the SMILES: C1=CN=CC(C2CCCN2)=C1 and is OK


 11%|█         | 27/252 [03:58<33:59,  9.06s/it]

38496-18-3 gives the SMILES: O=C(O)C1=CC=C(Cl)N=C1Cl and is OK


 11%|█         | 28/252 [04:07<33:30,  8.97s/it]

39178-35-3 gives the SMILES: Cl.O=C(Cl)C1=CC=NC=C1 and is OK


 12%|█▏        | 29/252 [04:16<32:50,  8.83s/it]

129747-52-0 gives the SMILES: COC(=O)C1=CC(CO)=CN=C1 and is OK


 12%|█▏        | 30/252 [04:24<32:23,  8.75s/it]

38806-38-1 gives the SMILES: NC(=O)C1=C[N+]([C@@H]2O[C@H](COP(=O)([O-])OP(=O)(O)OC[C@H]3O[C@@H](N4C=NC5=C4N=CN4C=CN=C54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)=CC=C1 and is OK


 12%|█▏        | 31/252 [04:33<32:21,  8.79s/it]

64091-91-4 gives the SMILES: CN(CCCC(=O)C1=CC=CN=C1)N=O and is OK


 13%|█▎        | 32/252 [04:42<32:22,  8.83s/it]

486-56-6 gives the SMILES: CN1C(=O)CC[C@H]1C1=CC=CN=C1 and is OK


 13%|█▎        | 33/252 [04:52<33:12,  9.10s/it]

1094-61-7 gives the SMILES: NC(=O)C1=C[N+]([C@@H]2O[C@H](COP(=O)([O-])O)[C@@H](O)[C@H]2O)=CC=C1 and is OK


 13%|█▎        | 34/252 [05:01<33:42,  9.28s/it]

27828-71-3 gives the SMILES: O=C(O)C1=CC(O)=CN=C1 and is OK


 14%|█▍        | 35/252 [05:12<34:44,  9.61s/it]

78348-28-4 gives the SMILES: O=C(ON1C(=O)CCC1=O)C1=CC=CN=C1 and is OK


 14%|█▍        | 36/252 [05:21<33:51,  9.41s/it]

66093-90-1 gives the SMILES: C[NH2+]CCCC(=O)C1=CC=C[NH+]=C1.[Cl-].[Cl-] and is OK


 15%|█▍        | 37/252 [05:30<33:34,  9.37s/it]

1364663-27-3 gives the SMILES: COC(=O)C1=CN=C(Br)C=C1Br and is OK


 15%|█▌        | 38/252 [05:39<33:28,  9.39s/it]

13190-97-1 gives the SMILES: CC(C)=CCC/C(C)=C/CC/C(C)=C/CC/C(C)=C/CC/C(C)=C/CC/C(C)=C/CC/C(C)=C/CC/C(C)=C/CC/C(C)=C/CO and is OK


 15%|█▌        | 39/252 [05:50<34:35,  9.75s/it]

3222-56-8 gives the SMILES: CC1=NC=CC=C1C(=O)O and is OK


 16%|█▌        | 40/252 [05:59<33:24,  9.46s/it]

1986-81-8 gives the SMILES: NC(=O)C1=C[N+]([O-])=CC=C1 and is OK


 16%|█▋        | 41/252 [06:10<35:28, 10.09s/it]

90872-72-3 gives the SMILES: CC1=CC=C(C2CCCN2)C=N1 and is OK


 17%|█▋        | 42/252 [06:19<33:57,  9.70s/it]

59578-62-0 gives the SMILES: O=C(CCCO)C1=CC=CN=C1 and is OK


 17%|█▋        | 43/252 [06:28<32:50,  9.43s/it]

2047-49-6 gives the SMILES: O=C(O)C1=CN=CC([N+](=O)[O-])=C1 and is OK


 17%|█▋        | 44/252 [06:36<31:46,  9.16s/it]

6311-35-9 gives the SMILES: O=C(O)C1=CC=C(Br)N=C1 and is OK


 18%|█▊        | 45/252 [06:46<32:13,  9.34s/it]

499-81-0 gives the SMILES: O=C(O)C1=CN=CC(C(=O)O)=C1 and is OK


 18%|█▊        | 46/252 [06:55<31:44,  9.24s/it]

7076-23-5 gives the SMILES: C1=CN=CC([C@H]2CCCN2)=C1 and is OK


 19%|█▊        | 47/252 [07:04<31:39,  9.26s/it]

614-00-6 gives the SMILES: CN(N=O)C1=CC=CC=C1 and is OK


 19%|█▉        | 48/252 [07:18<35:24, 10.41s/it]

2004-06-0 gives the SMILES: OC[C@H]1OC(N2C=NC3=C2N=CN=C3Cl)[C@H](O)[C@@H]1O and is OK


 19%|█▉        | 49/252 [07:28<35:39, 10.54s/it]

27247-34-3 gives the SMILES: [O-]C(=O)c1cc(cnc1)C(=O)OC.[K+] and is OK


 20%|█▉        | 50/252 [07:39<35:10, 10.45s/it]

3569-99-1 gives the SMILES: O=C(NCO)C1=CC=CN=C1 and is OK


 20%|██        | 51/252 [07:48<33:58, 10.14s/it]

3222-47-7 gives the SMILES: CC1=CC=C(C(=O)O)C=N1 and is OK


 21%|██        | 52/252 [07:57<33:02,  9.91s/it]

321-02-8 gives the SMILES: O=C([O-])C1=C[N+]([C@@H]2O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]2O)=CC=C1 and is OK


 21%|██        | 53/252 [08:07<32:11,  9.71s/it]

13078-04-1 gives the SMILES: C1=CN=CC(C2CCCCN2)=C1 and is OK


 21%|██▏       | 54/252 [08:16<31:29,  9.54s/it]

1314217-69-0 gives the SMILES: CC1=CC(C2=NC(CN3CCNCC3)=NO2)=CC2=C1C(=O)N(CC1=CC=C(OC(F)(F)F)C=C1)C2.CS(=O)(=O)O and is OK


 22%|██▏       | 55/252 [08:25<30:58,  9.44s/it]

4314-66-3 gives the SMILES: CCNC(=O)C1=CC=CN=C1 and is OK


 22%|██▏       | 56/252 [08:35<30:54,  9.46s/it]

5176-27-2 gives the SMILES: CC(C)(C)OC(=O)N1C=CC=C1 and is OK


 23%|██▎       | 57/252 [08:44<30:42,  9.45s/it]

924-16-3 gives the SMILES: CCCCN(CCCC)N=O and is OK
Retrying
Retrying
Retrying


 23%|██▎       | 58/252 [09:48<1:23:02, 25.68s/it]

FAILED TO RETRIEVE SMILES FOR CAS 4Z-N-0626 and is WRONGGGG.
Retrying


 23%|██▎       | 59/252 [10:23<1:31:59, 28.60s/it]

1207384-47-1 gives the SMILES: [2H]C([2H])([2H])N1C=CC(=O)C(C(N)=O)=C1 and is OK


 24%|██▍       | 60/252 [10:32<1:12:29, 22.65s/it]

51095-86-4 gives the SMILES: C[N@+]1([O-])CCC[C@H]1C1=CC=CN=C1 and is OK
Retrying
Retrying
Retrying


 24%|██▍       | 61/252 [11:38<1:53:27, 35.64s/it]

FAILED TO RETRIEVE SMILES FOR CAS 51020-67-8 and is WRONGGGG.
Retrying


 25%|██▍       | 62/252 [12:22<2:01:05, 38.24s/it]

1216737-36-8 gives the SMILES: [2H]C1=NC([2H])=C(C(=O)NCC(=O)O)C([2H])=C1[2H] and is OK


 25%|██▌       | 63/252 [12:33<1:34:46, 30.09s/it]

5654-86-4 gives the SMILES: CC(CC1NC(=O)C2N(C1=O)CCC2)C and is OK
Retrying
Retrying
Retrying


 25%|██▌       | 64/252 [13:37<2:06:07, 40.25s/it]

FAILED TO RETRIEVE SMILES FOR CAS 491-26-9 and is WRONGGGG.
Retrying
Retrying
Retrying


 26%|██▌       | 65/252 [14:42<2:28:32, 47.66s/it]

FAILED TO RETRIEVE SMILES FOR CAS 63551-14-4 and is WRONGGGG.
Retrying
Retrying
Retrying


 26%|██▌       | 66/252 [15:50<2:46:49, 53.82s/it]

FAILED TO RETRIEVE SMILES FOR CAS 491-26-9 and is WRONGGGG.


 27%|██▋       | 67/252 [16:00<2:04:52, 40.50s/it]

3612-80-4 gives the SMILES: O=C(OCCO)C1=CC=CN=C1 and is OK


 27%|██▋       | 68/252 [16:08<1:34:21, 30.77s/it]

54-11-5 gives the SMILES: CN1CCC[C@H]1C1=CC=CN=C1 and is OK


 27%|██▋       | 69/252 [16:15<1:12:29, 23.77s/it]

2743-90-0 gives the SMILES: C1=CCC(C2=CC=CN=C2)NC1 and is OK


 28%|██▊       | 70/252 [16:22<57:13, 18.86s/it]  

532-12-7 gives the SMILES: C1=CN=CC(C2=NCCC2)=C1 and is OK


 28%|██▊       | 71/252 [16:30<46:41, 15.48s/it]

3719-45-7 gives the SMILES: CN1C=C(C(=O)O)C=CC1=O and is OK


 29%|██▊       | 72/252 [16:38<39:52, 13.29s/it]

701-44-0 gives the SMILES: CN1C=C(C(N)=O)C=CC1=O and is OK


 29%|██▉       | 73/252 [16:46<34:52, 11.69s/it]

66148-19-4 gives the SMILES: [2H]C1=NC([2H])=C(C2CCCN2N=O)C([2H])=C1[2H] and is OK


 29%|██▉       | 74/252 [16:55<31:42, 10.69s/it]

1219805-86-3 gives the SMILES: [2H]C1=C([2H])C([2H])=C(C2CCCN2C([2H])([2H])[2H])C([2H])=N1 and is OK


 30%|██▉       | 75/252 [17:03<29:11,  9.90s/it]

69980-24-1 gives the SMILES: [2H]C([2H])([2H])N1CCCC1C1=CC=CN=C1 and is OK


 30%|███       | 76/252 [17:09<26:14,  8.95s/it]

501-81-5 gives the SMILES: O=C(O)CC1=CC=CN=C1 and is OK


 31%|███       | 77/252 [17:18<25:31,  8.75s/it]

769-49-3 gives the SMILES: CN1C=CC(=O)C(C(N)=O)=C1 and is OK


 31%|███       | 78/252 [17:25<23:55,  8.25s/it]

5746-86-1 gives the SMILES: C1=CN=CC(C2CCCN2)=C1 and is OK


 31%|███▏      | 79/252 [17:32<22:43,  7.88s/it]

1207384-48-2 gives the SMILES: [2H]C([2H])([2H])N1C=C(C(N)=O)C=CC1=O and is OK


 32%|███▏      | 80/252 [17:40<22:40,  7.91s/it]

350818-69-8 gives the SMILES: [2H]C1=C([2H])C([2H])=C(C2CCCN2C)C([2H])=N1 and is OK


 32%|███▏      | 81/252 [17:48<22:46,  7.99s/it]

59-67-6 gives the SMILES: O=C(O)C1=CC=CN=C1 and is OK


 33%|███▎      | 82/252 [17:56<22:56,  8.10s/it]

487-19-4 gives the SMILES: CN1C=CC=C1C1=CC=CN=C1 and is OK


 33%|███▎      | 83/252 [18:05<23:22,  8.30s/it]

66148-15-0 gives the SMILES: [2H]C1=NC([2H])=C(C(=O)O)C([2H])=C1[2H] and is OK


 33%|███▎      | 84/252 [18:13<22:57,  8.20s/it]

94-44-0 gives the SMILES: O=C(OCC1=CC=CC=C1)C1=CC=CN=C1 and is OK
Retrying


In [None]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
import torch
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader as GeoDataLoader
from torch.utils.data import Dataset
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from gnn import GNNModel, smiles_to_graph, MoleculeDataset

In [None]:
df = pd.read_csv('nicotine_derivatives_with_smiles.csv')
df_cleaned = df[df['SMILES'].notna() & (df['SMILES'] != '')]
df_cleaned.to_csv('cleaned_nicotine_derivatives_with_smiles.csv', index=False)

In [None]:
df = pd.read_csv('cleaned_nicotine_derivatives_with_smiles.csv')
nicotine_smiles = df['SMILES']

nicotine_graphs = [graph for graph in [smiles_to_graph(smile) for smile in nicotine_smiles] if graph is not None]

model_path = 'gnn_model.pth'
model = GNNModel(num_node_features=26, hidden_dim=128, output_dim=2)
model.load_state_dict(torch.load(model_path))
model.eval()

predictions = []

with torch.no_grad():
    for graph in nicotine_graphs:
        output = model(graph)
        _, predicted = torch.max(output, dim=1)
        predictions.append(predicted.item())

df['Predicted Label'] = predictions

df.to_csv('nicotine_derivatives_with_predictions.csv', index=False)

total = len(df)
predicted_count = len(predictions)
print(f"Total rows: {total}")
print(f"Predictions made: {predicted_count}")
print(f"Updated CSV saved with predictions.")

Error processing SMILES: nan - No registered converter was able to produce a C++ rvalue of type std::__1::basic_string<wchar_t, std::__1::char_traits<wchar_t>, std::__1::allocator<wchar_t>> from this Python object of type float
Error processing SMILES: nan - No registered converter was able to produce a C++ rvalue of type std::__1::basic_string<wchar_t, std::__1::char_traits<wchar_t>, std::__1::allocator<wchar_t>> from this Python object of type float
Error processing SMILES: nan - No registered converter was able to produce a C++ rvalue of type std::__1::basic_string<wchar_t, std::__1::char_traits<wchar_t>, std::__1::allocator<wchar_t>> from this Python object of type float
Error processing SMILES: nan - No registered converter was able to produce a C++ rvalue of type std::__1::basic_string<wchar_t, std::__1::char_traits<wchar_t>, std::__1::allocator<wchar_t>> from this Python object of type float
Error processing SMILES: nan - No registered converter was able to produce a C++ rvalue 

ValueError: Length of values (0) does not match length of index (260)