# Convert


In [5]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
import os
from tqdm import tqdm

# Path to your text file
file_path = '/home/chao/3dmolgen/data/train.txt'

# Directory where you want to save the XYZ files
output_dir = '/home/chao/3dmolgen/data/xyz_train'
os.makedirs(output_dir, exist_ok=True)

# Read the file
df = pd.read_csv(file_path, sep='\t')

# Find the row index for CID "146672861"
start_index = df[df['CID'] == 146672861].index[0] if not df[df['CID'] == 146672861].empty else 0

# Function to convert SMILES to 3D XYZ
def smiles_to_xyz(smiles, cid):
    mol = Chem.MolFromSmiles(smiles)
    if mol is not None:
        mol = Chem.AddHs(mol)
        if AllChem.EmbedMolecule(mol, AllChem.ETKDG()) >= 0:  # Check for successful conformer generation
            AllChem.UFFOptimizeMolecule(mol)
            xyz = Chem.MolToXYZBlock(mol)
            with open(os.path.join(output_dir, f'{cid}.xyz'), 'w') as file:
                file.write(xyz)
            return True
    return False

# Process each molecule starting from CID "146672861"
success_count = 0
for index, row in tqdm(df.iloc[start_index:].iterrows(), total=df.shape[0] - start_index, desc="Converting SMILES to XYZ"):
    if smiles_to_xyz(row['SMILES'], row['CID']):
        success_count += 1

print(f"Total molecules successfully converted: {success_count}")


Converting SMILES to XYZ:   1%|▎                                                     | 92/16038 [00:21<12:00, 22.14it/s]RDKit ERROR: [16:46:50] UFFTYPER: Unrecognized atom type: Mo3 (1)
[16:46:50] UFFTYPER: Unrecognized atom type: Mo3 (1)
RDKit ERROR: [16:46:50] UFFTYPER: Unrecognized atom type: Mo3 (5)
[16:46:50] UFFTYPER: Unrecognized atom type: Mo3 (5)
RDKit ERROR: [16:46:50] UFFTYPER: Unrecognized atom type: Mo3 (1)
[16:46:50] UFFTYPER: Unrecognized atom type: Mo3 (1)
RDKit ERROR: [16:46:50] UFFTYPER: Unrecognized atom type: Mo3 (5)
[16:46:50] UFFTYPER: Unrecognized atom type: Mo3 (5)
Converting SMILES to XYZ:   3%|█▍                                                   | 424/16038 [01:17<23:14, 11.19it/s]RDKit ERROR: [16:47:46] UFFTYPER: Unrecognized charge state for atom: 0
[16:47:46] UFFTYPER: Unrecognized charge state for atom: 0
RDKit ERROR: [16:47:46] UFFTYPER: Unrecognized charge state for atom: 0
[16:47:46] UFFTYPER: Unrecognized charge state for atom: 0
Converting SMILES to X

Converting SMILES to XYZ:   7%|███▋                                                | 1152/16038 [02:47<17:38, 14.06it/s]RDKit ERROR: [16:49:16] UFFTYPER: Unrecognized charge state for atom: 2
[16:49:16] UFFTYPER: Unrecognized charge state for atom: 2
RDKit ERROR: [16:49:16] UFFTYPER: Unrecognized charge state for atom: 2
[16:49:16] UFFTYPER: Unrecognized charge state for atom: 2
Converting SMILES to XYZ:   8%|███▉                                                | 1205/16038 [02:50<18:29, 13.37it/s]RDKit ERROR: [16:49:19] UFFTYPER: Unrecognized atom type: Pt6 (1)
[16:49:19] UFFTYPER: Unrecognized atom type: Pt6 (1)
[16:49:19] UFFTYPER: Unrecognized atom type: Pt6 (1)
RDKit ERROR: [16:49:19] UFFTYPER: Unrecognized atom type: Pt6 (1)
Converting SMILES to XYZ:   8%|████                                                | 1239/16038 [02:52<15:33, 15.86it/s]RDKit ERROR: [16:49:21] UFFTYPER: Unrecognized atom type: Ca (0)
[16:49:21] UFFTYPER: Unrecognized atom type: Ca (0)
RDKit ERROR: [16:49:21]

Converting SMILES to XYZ:  12%|██████▎                                             | 1966/16038 [04:45<33:16,  7.05it/s]RDKit ERROR: [16:51:15] UFFTYPER: Unrecognized atom type: Co3 (0)
[16:51:15] UFFTYPER: Unrecognized atom type: Co3 (0)
[16:51:15] UFFTYPER: Unrecognized atom type: Co3 (108)
RDKit ERROR: [16:51:15] UFFTYPER: Unrecognized atom type: Co3 (108)
Converting SMILES to XYZ:  12%|██████▏                                           | 1990/16038 [04:50<1:02:09,  3.77it/s]RDKit ERROR: [16:51:19] UFFTYPER: Unrecognized atom type: Fe3 (0)
[16:51:19] UFFTYPER: Unrecognized atom type: Fe3 (0)
RDKit ERROR: [16:51:19] UFFTYPER: Unrecognized atom type: Fe3 (44)
[16:51:19] UFFTYPER: Unrecognized atom type: Fe3 (44)
Converting SMILES to XYZ:  13%|██████▊                                             | 2082/16038 [05:02<50:20,  4.62it/s]RDKit ERROR: [16:51:31] UFFTYPER: Unrecognized charge state for atom: 1
[16:51:31] UFFTYPER: Unrecognized charge state for atom: 1
RDKit ERROR: [16:51:31] UFF

Converting SMILES to XYZ:  17%|█████████                                           | 2787/16038 [06:37<23:27,  9.41it/s]RDKit ERROR: [16:53:06] UFFTYPER: Unrecognized atom type: Co3 (0)
[16:53:06] UFFTYPER: Unrecognized atom type: Co3 (0)
RDKit ERROR: [16:53:06] UFFTYPER: Unrecognized atom type: Co3 (81)
[16:53:06] UFFTYPER: Unrecognized atom type: Co3 (81)
Converting SMILES to XYZ:  18%|█████████▏                                          | 2827/16038 [06:41<11:51, 18.56it/s]RDKit ERROR: [16:53:10] UFFTYPER: Unrecognized atom type: Fe3 (0)
[16:53:10] UFFTYPER: Unrecognized atom type: Fe3 (0)
RDKit ERROR: [16:53:10] UFFTYPER: Unrecognized atom type: Fe3 (54)
[16:53:10] UFFTYPER: Unrecognized atom type: Fe3 (54)
Converting SMILES to XYZ:  18%|█████████▏                                          | 2833/16038 [06:41<13:23, 16.43it/s]RDKit ERROR: [16:53:10] UFFTYPER: Unrecognized atom type: Zr3 (0)
[16:53:10] UFFTYPER: Unrecognized atom type: Zr3 (0)
RDKit ERROR: [16:53:10] UFFTYPER: Unrecog

Converting SMILES to XYZ:  23%|████████████▏                                       | 3743/16038 [08:34<25:41,  7.98it/s]RDKit ERROR: [16:55:06] UFFTYPER: Unrecognized atom type: Co3 (0)
[16:55:06] UFFTYPER: Unrecognized atom type: Co3 (0)
RDKit ERROR: [16:55:06] UFFTYPER: Unrecognized atom type: Co3 (91)
[16:55:06] UFFTYPER: Unrecognized atom type: Co3 (91)
Converting SMILES to XYZ:  24%|████████████▍                                       | 3826/16038 [08:46<27:22,  7.44it/s]RDKit ERROR: [16:55:15] UFFTYPER: Unrecognized atom type: Cu5 (0)
[16:55:15] UFFTYPER: Unrecognized atom type: Cu5 (0)
RDKit ERROR: [16:55:15] UFFTYPER: Unrecognized atom type: Cu5 (0)
[16:55:15] UFFTYPER: Unrecognized atom type: Cu5 (0)
RDKit ERROR: [16:55:15] UFFTYPER: Unrecognized atom type: Cu5 (72)
RDKit ERROR: [16:55:15] UFFTYPER: Unrecognized atom type: Cu5 (73)
[16:55:15] UFFTYPER: Unrecognized atom type: Cu5 (72)
[16:55:15] UFFTYPER: Unrecognized atom type: Cu5 (73)
Converting SMILES to XYZ:  24%|█████████

Converting SMILES to XYZ:  29%|███████████████                                     | 4635/16038 [10:17<24:19,  7.81it/s]RDKit ERROR: [16:56:46] UFFTYPER: Unrecognized atom type: Ni3 (0)
[16:56:46] UFFTYPER: Unrecognized atom type: Ni3 (0)
[16:56:46] UFFTYPER: Unrecognized atom type: Ni3 (0)
RDKit ERROR: [16:56:46] UFFTYPER: Unrecognized atom type: Ni3 (0)
Converting SMILES to XYZ:  30%|███████████████▌                                    | 4804/16038 [10:54<08:54, 21.03it/s]RDKit ERROR: [16:57:24] UFFTYPER: Unrecognized atom type: As2+3 (3)
[16:57:24] UFFTYPER: Unrecognized atom type: As2+3 (3)
RDKit ERROR: [16:57:24] UFFTYPER: Unrecognized atom type: As2+3 (3)
[16:57:24] UFFTYPER: Unrecognized atom type: As2+3 (3)
Converting SMILES to XYZ:  30%|███████████████▊                                    | 4882/16038 [11:21<15:53, 11.70it/s][16:57:50] UFFTYPER: Unrecognized charge state for atom: 0
RDKit ERROR: [16:57:50] UFFTYPER: Unrecognized charge state for atom: 0
[16:57:50] UFFTYPER: Unre

Converting SMILES to XYZ:  37%|███████████████████                                 | 5894/16038 [13:54<07:39, 22.06it/s]RDKit ERROR: [17:00:23] UFFTYPER: Unrecognized charge state for atom: 5
[17:00:23] UFFTYPER: Unrecognized charge state for atom: 5
RDKit ERROR: [17:00:23] UFFTYPER: Unrecognized atom type: Au5 (0)
[17:00:23] UFFTYPER: Unrecognized atom type: Au5 (0)
RDKit ERROR: [17:00:23] UFFTYPER: Unrecognized charge state for atom: 5
[17:00:23] UFFTYPER: Unrecognized charge state for atom: 5
RDKit ERROR: [17:00:23] UFFTYPER: Unrecognized atom type: Au5 (9)
[17:00:23] UFFTYPER: Unrecognized atom type: Au5 (9)
RDKit ERROR: [17:00:53] UFFTYPER: Unrecognized charge state for atom: 4
[17:00:53] UFFTYPER: Unrecognized charge state for atom: 4
RDKit ERROR: [17:00:53] UFFTYPER: Unrecognized charge state for atom: 4
[17:00:53] UFFTYPER: Unrecognized charge state for atom: 4
Converting SMILES to XYZ:  38%|███████████████████▊                                | 6109/16038 [14:27<17:32,  9.43it/

Converting SMILES to XYZ:  43%|██████████████████████▌                             | 6947/16038 [21:03<29:34,  5.12it/s]RDKit ERROR: [17:07:32] UFFTYPER: Unrecognized atom type: Fe3 (0)
[17:07:32] UFFTYPER: Unrecognized atom type: Fe3 (0)
RDKit ERROR: [17:07:32] UFFTYPER: Unrecognized atom type: Fe3 (59)
[17:07:32] UFFTYPER: Unrecognized atom type: Fe3 (59)
Converting SMILES to XYZ:  44%|██████████████████████▋                             | 7005/16038 [21:09<14:24, 10.44it/s]RDKit ERROR: [17:07:38] UFFTYPER: Unrecognized atom type: Co3 (0)
[17:07:38] UFFTYPER: Unrecognized atom type: Co3 (0)
RDKit ERROR: [17:07:38] UFFTYPER: Unrecognized atom type: Co3 (64)
[17:07:38] UFFTYPER: Unrecognized atom type: Co3 (64)
Converting SMILES to XYZ:  44%|██████████████████████▊                             | 7025/16038 [21:11<10:44, 13.99it/s]RDKit ERROR: [17:07:40] UFFTYPER: Unrecognized charge state for atom: 2
[17:07:40] UFFTYPER: Unrecognized charge state for atom: 2
RDKit ERROR: [17:07:40] UFFTY

RDKit ERROR: [17:09:29] UFFTYPER: Unrecognized charge state for atom: 1
[17:09:29] UFFTYPER: Unrecognized charge state for atom: 1
Converting SMILES to XYZ:  49%|█████████████████████████▏                          | 7782/16038 [23:12<24:25,  5.63it/s]RDKit ERROR: [17:09:41] UFFTYPER: Unrecognized charge state for atom: 6
[17:09:41] UFFTYPER: Unrecognized charge state for atom: 6
RDKit ERROR: [17:09:41] UFFTYPER: Unrecognized charge state for atom: 6
[17:09:41] UFFTYPER: Unrecognized charge state for atom: 6
Converting SMILES to XYZ:  49%|█████████████████████████▍                          | 7839/16038 [23:20<40:10,  3.40it/s]RDKit ERROR: [17:09:49] UFFTYPER: Unrecognized atom type: Os3 (1)
[17:09:49] UFFTYPER: Unrecognized atom type: Os3 (1)
RDKit ERROR: [17:09:49] UFFTYPER: Unrecognized atom type: Os3 (1)
[17:09:49] UFFTYPER: Unrecognized atom type: Os3 (1)
Converting SMILES to XYZ:  49%|█████████████████████████▍                          | 7851/16038 [23:21<19:22,  7.04it/s]RDKit ERR

Converting SMILES to XYZ:  51%|██████████████████████████▍                         | 8147/16038 [24:17<09:06, 14.44it/s]RDKit ERROR: [17:10:46] UFFTYPER: Unrecognized charge state for atom: 0
[17:10:46] UFFTYPER: Unrecognized charge state for atom: 0
RDKit ERROR: [17:10:46] UFFTYPER: Unrecognized atom type: Gd3+3 (0)
[17:10:46] UFFTYPER: Unrecognized atom type: Gd3+3 (0)
RDKit ERROR: [17:10:46] UFFTYPER: Unrecognized charge state for atom: 28
[17:10:46] UFFTYPER: Unrecognized charge state for atom: 28
RDKit ERROR: [17:10:46] UFFTYPER: Unrecognized atom type: Gd3+3 (28)
[17:10:46] UFFTYPER: Unrecognized atom type: Gd3+3 (28)
Converting SMILES to XYZ:  51%|██████████████████████████▌                         | 8180/16038 [24:19<09:55, 13.19it/s]RDKit ERROR: [17:10:48] UFFTYPER: Unrecognized atom type: Cu5 (0)
[17:10:48] UFFTYPER: Unrecognized atom type: Cu5 (0)
RDKit ERROR: [17:10:48] UFFTYPER: Unrecognized atom type: Cu5 (84)
[17:10:48] UFFTYPER: Unrecognized atom type: Cu5 (84)
Converti

Converting SMILES to XYZ:  56%|█████████████████████████████▏                      | 9003/16038 [26:25<05:45, 20.37it/s]RDKit ERROR: [17:12:56] UFFTYPER: Unrecognized atom type: Co3 (0)
[17:12:56] UFFTYPER: Unrecognized atom type: Co3 (0)
RDKit ERROR: [17:12:56] UFFTYPER: Unrecognized atom type: Co3 (91)
[17:12:56] UFFTYPER: Unrecognized atom type: Co3 (91)
RDKit ERROR: [17:13:35] UFFTYPER: Unrecognized charge state for atom: 0
[17:13:35] UFFTYPER: Unrecognized charge state for atom: 0
RDKit ERROR: [17:13:35] UFFTYPER: Unrecognized charge state for atom: 7
[17:13:35] UFFTYPER: Unrecognized charge state for atom: 7
RDKit ERROR: [17:13:55] UFFTYPER: Unrecognized charge state for atom: 0
[17:13:55] UFFTYPER: Unrecognized charge state for atom: 0
[17:13:55] UFFTYPER: Unrecognized charge state for atom: 0
RDKit ERROR: [17:13:55] UFFTYPER: Unrecognized charge state for atom: 0
Converting SMILES to XYZ:  58%|██████████████████████████████▍                     | 9373/16038 [27:27<10:58, 10.12i

Converting SMILES to XYZ:  62%|████████████████████████████████▎                   | 9970/16038 [29:04<04:49, 20.99it/s]RDKit ERROR: [17:15:33] UFFTYPER: Unrecognized atom type: Fe3 (0)
[17:15:33] UFFTYPER: Unrecognized atom type: Fe3 (0)
RDKit ERROR: [17:15:33] UFFTYPER: Unrecognized atom type: Fe3 (42)
[17:15:33] UFFTYPER: Unrecognized atom type: Fe3 (42)
RDKit ERROR: [17:15:57] UFFTYPER: Unrecognized charge state for atom: 0
[17:15:57] UFFTYPER: Unrecognized charge state for atom: 0
RDKit ERROR: [17:15:57] UFFTYPER: Unrecognized charge state for atom: 42
[17:15:57] UFFTYPER: Unrecognized charge state for atom: 42
Converting SMILES to XYZ:  63%|████████████████████████████████                   | 10083/16038 [29:29<53:41,  1.85it/s]RDKit ERROR: [17:15:58] UFFTYPER: Unrecognized atom type: Mo1 (0)
[17:15:58] UFFTYPER: Unrecognized atom type: Mo1 (0)
[17:15:58] UFFTYPER: Unrecognized atom type: Mo1 (94)
RDKit ERROR: [17:15:58] UFFTYPER: Unrecognized atom type: Mo1 (94)
Converting SMILE

Converting SMILES to XYZ:  67%|██████████████████████████████████                 | 10719/16038 [30:43<08:13, 10.77it/s]RDKit ERROR: [17:17:12] UFFTYPER: Unrecognized charge state for atom: 4
[17:17:12] UFFTYPER: Unrecognized charge state for atom: 4
RDKit ERROR: [17:17:12] UFFTYPER: Unrecognized charge state for atom: 4
[17:17:12] UFFTYPER: Unrecognized charge state for atom: 4
Converting SMILES to XYZ:  67%|██████████████████████████████████▏                | 10747/16038 [30:45<07:55, 11.12it/s]RDKit ERROR: [17:17:14] UFFTYPER: Unrecognized atom type: Fe3 (0)
[17:17:14] UFFTYPER: Unrecognized atom type: Fe3 (0)
RDKit ERROR: [17:17:14] UFFTYPER: Unrecognized atom type: Fe3 (0)
[17:17:14] UFFTYPER: Unrecognized atom type: Fe3 (0)
Converting SMILES to XYZ:  67%|██████████████████████████████████▏                | 10760/16038 [30:46<05:55, 14.83it/s]RDKit ERROR: [17:17:15] UFFTYPER: Unrecognized atom type: He1 (0)
[17:17:15] UFFTYPER: Unrecognized atom type: He1 (0)
RDKit ERROR: [17:17:1

Converting SMILES to XYZ:  72%|████████████████████████████████████▋              | 11538/16038 [32:08<07:09, 10.48it/s]RDKit ERROR: [17:18:37] UFFTYPER: Unrecognized atom type: Fe3 (0)
[17:18:37] UFFTYPER: Unrecognized atom type: Fe3 (0)
RDKit ERROR: [17:18:37] UFFTYPER: Unrecognized atom type: Fe3 (19)
[17:18:37] UFFTYPER: Unrecognized atom type: Fe3 (19)
Converting SMILES to XYZ:  72%|████████████████████████████████████▉              | 11619/16038 [32:15<07:32,  9.77it/s]RDKit ERROR: [17:18:44] UFFTYPER: Unrecognized charge state for atom: 1
[17:18:44] UFFTYPER: Unrecognized charge state for atom: 1
RDKit ERROR: [17:18:44] UFFTYPER: Unrecognized charge state for atom: 1
[17:18:44] UFFTYPER: Unrecognized charge state for atom: 1
Converting SMILES to XYZ:  73%|█████████████████████████████████████▎             | 11719/16038 [32:40<10:15,  7.01it/s]RDKit ERROR: [17:19:09] UFFTYPER: Unrecognized charge state for atom: 1
[17:19:09] UFFTYPER: Unrecognized charge state for atom: 1
RDKit E

Converting SMILES to XYZ:  76%|██████████████████████████████████████▌            | 12115/16038 [33:36<15:38,  4.18it/s]RDKit ERROR: [17:20:05] UFFTYPER: Unrecognized atom type: Y_ (0)
[17:20:05] UFFTYPER: Unrecognized atom type: Y_ (0)
RDKit ERROR: [17:20:05] UFFTYPER: Unrecognized atom type: Y_ (3)
[17:20:05] UFFTYPER: Unrecognized atom type: Y_ (3)
Converting SMILES to XYZ:  76%|██████████████████████████████████████▉            | 12259/16038 [33:46<02:17, 27.50it/s]RDKit ERROR: [17:20:15] UFFTYPER: Unrecognized atom type: Co3 (0)
RDKit ERROR: [17:20:15] UFFTYPER: Unrecognized atom type: Co3 (62)
[17:20:15] UFFTYPER: Unrecognized atom type: Co3 (0)
[17:20:15] UFFTYPER: Unrecognized atom type: Co3 (62)
Converting SMILES to XYZ:  77%|███████████████████████████████████████▏           | 12324/16038 [33:56<06:57,  8.90it/s]RDKit ERROR: [17:20:25] UFFTYPER: Unrecognized atom type: Ni3 (0)
[17:20:25] UFFTYPER: Unrecognized atom type: Ni3 (0)
RDKit ERROR: [17:20:25] UFFTYPER: Unrecognized 

Converting SMILES to XYZ:  81%|█████████████████████████████████████████▍         | 13015/16038 [35:15<16:19,  3.09it/s]RDKit ERROR: [17:21:44] UFFTYPER: Unrecognized charge state for atom: 21
[17:21:44] UFFTYPER: Unrecognized charge state for atom: 21
RDKit ERROR: [17:21:44] UFFTYPER: Unrecognized charge state for atom: 21
[17:21:44] UFFTYPER: Unrecognized charge state for atom: 21
Converting SMILES to XYZ:  81%|█████████████████████████████████████████▍         | 13018/16038 [35:15<11:22,  4.43it/s]RDKit ERROR: [17:21:44] UFFTYPER: Unrecognized atom type: Mo3 (1)
[17:21:44] UFFTYPER: Unrecognized atom type: Mo3 (1)
RDKit ERROR: [17:21:44] UFFTYPER: Unrecognized atom type: Mo3 (8)
[17:21:44] UFFTYPER: Unrecognized atom type: Mo3 (8)
Converting SMILES to XYZ:  81%|█████████████████████████████████████████▍         | 13038/16038 [35:19<14:30,  3.45it/s]RDKit ERROR: [17:21:48] UFFTYPER: Unrecognized atom type: Fe3 (0)
[17:21:48] UFFTYPER: Unrecognized atom type: Fe3 (0)
RDKit ERROR: [17:

Converting SMILES to XYZ:  85%|███████████████████████████████████████████▍       | 13647/16038 [36:58<02:06, 18.93it/s]RDKit ERROR: [17:23:27] UFFTYPER: Unrecognized atom type: Se2+2 (15)
[17:23:27] UFFTYPER: Unrecognized atom type: Se2+2 (15)
[17:23:27] UFFTYPER: Unrecognized atom type: Se2+2 (15)
RDKit ERROR: [17:23:27] UFFTYPER: Unrecognized atom type: Se2+2 (15)
RDKit ERROR: [17:23:27] UFFTYPER: Unrecognized atom type: Se2+2 (15)
[17:23:27] UFFTYPER: Unrecognized atom type: Se2+2 (15)
Converting SMILES to XYZ:  85%|███████████████████████████████████████████▌       | 13680/16038 [37:04<07:23,  5.32it/s]RDKit ERROR: [17:23:33] UFFTYPER: Unrecognized charge state for atom: 1
[17:23:33] UFFTYPER: Unrecognized charge state for atom: 1
RDKit ERROR: [17:23:33] UFFTYPER: Unrecognized charge state for atom: 1
[17:23:33] UFFTYPER: Unrecognized charge state for atom: 1
Converting SMILES to XYZ:  86%|███████████████████████████████████████████▋       | 13740/16038 [37:10<03:29, 10.95it/s]RDK

Converting SMILES to XYZ:  93%|███████████████████████████████████████████████▎   | 14868/16038 [39:39<01:44, 11.18it/s]RDKit ERROR: [17:26:08] UFFTYPER: Unrecognized hybridization for atom: 1
[17:26:08] UFFTYPER: Unrecognized hybridization for atom: 1
RDKit ERROR: [17:26:08] UFFTYPER: Unrecognized atom type: Rh (1)
[17:26:08] UFFTYPER: Unrecognized atom type: Rh (1)
Converting SMILES to XYZ:  93%|███████████████████████████████████████████████▎   | 14893/16038 [39:41<01:49, 10.45it/s]RDKit ERROR: [17:26:10] UFFTYPER: Unrecognized charge state for atom: 2
[17:26:10] UFFTYPER: Unrecognized charge state for atom: 2
RDKit ERROR: [17:26:10] UFFTYPER: Unrecognized charge state for atom: 2
[17:26:10] UFFTYPER: Unrecognized charge state for atom: 2
Converting SMILES to XYZ:  93%|█████████████████████████████████████████████▌   | 14919/16038 [40:27<1:16:30,  4.10s/it]RDKit ERROR: [17:26:56] UFFTYPER: Unrecognized atom type: Fe5 (1)
[17:26:56] UFFTYPER: Unrecognized atom type: Fe5 (1)
RDKit ERR

Converting SMILES to XYZ:  99%|██████████████████████████████████████████████████▋| 15923/16038 [42:29<00:19,  5.83it/s]RDKit ERROR: [17:28:58] UFFTYPER: Unrecognized atom type: W_3 (1)
[17:28:58] UFFTYPER: Unrecognized atom type: W_3 (1)
RDKit ERROR: [17:28:58] UFFTYPER: Unrecognized atom type: W_3 (1)
[17:28:58] UFFTYPER: Unrecognized atom type: W_3 (1)
RDKit ERROR: [17:28:58] UFFTYPER: Unrecognized charge state for atom: 6
[17:28:58] UFFTYPER: Unrecognized charge state for atom: 6
RDKit ERROR: [17:28:58] UFFTYPER: Unrecognized atom type: Mo2 (1)
[17:28:58] UFFTYPER: Unrecognized atom type: Mo2 (1)
RDKit ERROR: [17:28:58] UFFTYPER: Unrecognized charge state for atom: 30
[17:28:58] UFFTYPER: Unrecognized charge state for atom: 30
RDKit ERROR: [17:28:58] UFFTYPER: Unrecognized atom type: Mo2 (32)
[17:28:58] UFFTYPER: Unrecognized atom type: Mo2 (32)
Converting SMILES to XYZ: 100%|███████████████████████████████████████████████████| 16038/16038 [43:03<00:00,  6.21it/s]

Total molecules successfully converted: 14982





# Pair Descriotion 

In [10]:
import pandas as pd
import os

# Path to your text file
file_path = 'validation.txt'

# Directory where you want to save the description files
output_dir = 'descriptions_valid'
os.makedirs(output_dir, exist_ok=True)

# Read the file
df = pd.read_csv(file_path, sep='\t')

# Function to write descriptions to separate text files
def write_description(cid, description):
    with open(os.path.join(output_dir, f'{cid}_description.txt'), 'w') as file:
        file.write(description)

# Process each row to extract and write descriptions
for index, row in df.iterrows():
    write_description(row['CID'], row['description'])

# Path to the output directory for user reference
output_dir_path = os.path.abspath(output_dir)
output_dir_path



'/home/chao/3dmolgen/data/descriptions_valid'

In [13]:
import os
import shutil

# Define the source directories and the target directory
source_dir1 = 'xyz_test'  # Replace with the path to your 'xyz_train' directory
source_dir2 = 'descriptions_test'  # Replace with the path to your 'description_train' directory
target_dir = 'test'  # Replace with the path to your desired target directory

# Create the target directory if it doesn't exist
os.makedirs(target_dir, exist_ok=True)

# Function to copy files from a source directory to the target directory
def copy_files(source_dir):
    for filename in os.listdir(source_dir):
        file_path = os.path.join(source_dir, filename)
        if os.path.isfile(file_path):
            shutil.copy(file_path, target_dir)

# Copy files from both source directories to the target directory
copy_files(source_dir1)
copy_files(source_dir2)

print(f"All files from '{source_dir1}' and '{source_dir2}' have been copied to '{target_dir}'.")


All files from 'xyz_test' and 'descriptions_test' have been copied to 'test'.
