In [14]:
import os
import sys
import time
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from rdkit import Chem, RDLogger
import sqlite3
from tqdm import tqdm

# Suppress RDKit warnings for cleaner output during generation checks
RDLogger.DisableLog('rdApp.*')

# =========================================
# CONFIGURATION (SIMPLIFIED & TUNED)
# =========================================
class Config:
    # --- System & Data Paths ---
    CHEMPL_DB_PATH = 'DL_ENDSEM__DATASET/chembl_35/chembl_35_sqlite/chembl_35.db'
    MODEL_DIR = 'models_simplified_v2' # New dir for v2 run

    # --- Model Hyperparameters (SCALED DOWN) ---
    MAX_ATOMS = 9
    ATOM_LIST = ['C', 'N', 'O', 'F', 'S', 'Cl'] 
    NUM_ATOM_TYPES = len(ATOM_LIST) + 1 
    
    BOND_TYPES = [Chem.rdchem.BondType.ZERO,
                  Chem.rdchem.BondType.SINGLE,
                  Chem.rdchem.BondType.DOUBLE,
                  Chem.rdchem.BondType.TRIPLE,
                  Chem.rdchem.BondType.AROMATIC]
    NUM_BOND_TYPES = len(BOND_TYPES)

    LATENT_DIM = 128
    BATCH_SIZE = 64
    LR_G = 1e-4
    LR_D = 1e-4
    LAMBDA_GP = 10
    N_CRITIC = 5
    NUM_EPOCHS = 500   # INCREASED TO 100
    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    SAVE_EVERY = 10    # Save less frequently for long run
    RESUME_EPOCH = 100

print(f"Running on device: {Config.DEVICE}")

# =========================================
# DATA UTILS
# =========================================
def smiles_to_graph(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if mol is None: return None
    num_atoms = mol.GetNumAtoms()
    if num_atoms > Config.MAX_ATOMS or num_atoms < 2: return None

    X = np.zeros((Config.MAX_ATOMS, Config.NUM_ATOM_TYPES), dtype=np.float32)
    for i, atom in enumerate(mol.GetAtoms()):
        sym = atom.GetSymbol()
        if sym in Config.ATOM_LIST: X[i, Config.ATOM_LIST.index(sym)] = 1.0
        else: return None 
    for i in range(num_atoms, Config.MAX_ATOMS): X[i, -1] = 1.0

    A = np.zeros((Config.MAX_ATOMS, Config.MAX_ATOMS, Config.NUM_BOND_TYPES), dtype=np.float32)
    for bond in mol.GetBonds():
        i, j = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()
        b_type = bond.GetBondType()
        try:
            ch_idx = Config.BOND_TYPES.index(b_type)
            A[i, j, ch_idx] = A[j, i, ch_idx] = 1.0
        except ValueError: continue
    for i in range(Config.MAX_ATOMS):
        for j in range(Config.MAX_ATOMS):
            if np.sum(A[i,j,:]) == 0: A[i,j,0] = 1.0
    return X, A

def graphs_to_mols(node_X, adj_A, hard=True):
    mols = []
    if hard:
        if isinstance(node_X, torch.Tensor): node_X = torch.argmax(node_X, dim=-1).detach().cpu().numpy()
        if isinstance(adj_A, torch.Tensor): adj_A = torch.argmax(adj_A, dim=-1).detach().cpu().numpy()

    for b in range(node_X.shape[0]):
        mol = Chem.RWMol()
        atom_indices = []
        for i in range(Config.MAX_ATOMS):
            atom_type = node_X[b, i]
            if atom_type == len(Config.ATOM_LIST): continue
            atom_indices.append(mol.AddAtom(Chem.Atom(Config.ATOM_LIST[atom_type])))
        for i in range(len(atom_indices)):
            for j in range(i + 1, len(atom_indices)):
                bond_idx = adj_A[b, i, j]
                if bond_idx != 0:
                    try: mol.AddBond(atom_indices[i], atom_indices[j], Config.BOND_TYPES[bond_idx])
                    except: pass
        try:
            mol = mol.GetMol()
            Chem.SanitizeMol(mol)
            mols.append(mol)
        except: mols.append(None)
    return mols

# =========================================
# DATASET
# =========================================
class MoleculeDataset(Dataset):
    def __init__(self):
        self.data = []
        smiles_list = []
        if os.path.exists(Config.CHEMPL_DB_PATH):
            print(f"Connecting to DB at {Config.CHEMPL_DB_PATH}...")
            try:
                conn = sqlite3.connect(Config.CHEMPL_DB_PATH)
                cursor = conn.cursor()
                print("Querying for small molecule candidates...")
                cursor.execute("SELECT canonical_smiles FROM compound_structures WHERE length(canonical_smiles) < 50 LIMIT 200000")
                smiles_list = [row[0] for row in cursor.fetchall()]
                conn.close()
            except Exception as e: print(f"DB Error: {e}")

        print(f"Filtering for molecules with {Config.MAX_ATOMS} atoms or less...")
        np.random.shuffle(smiles_list)
        for s in tqdm(smiles_list):
            res = smiles_to_graph(s)
            if res is not None: self.data.append(res)
            if len(self.data) >= 20000: break 
        print(f"Training on {len(self.data)} valid small graphs.")

    def __len__(self): return len(self.data)
    def __getitem__(self, idx):
        X, A = self.data[idx]
        return torch.tensor(X, dtype=torch.float32), torch.tensor(A, dtype=torch.float32)

# =========================================
# MODELS
# =========================================
class GraphTransformerLayer(nn.Module):
    def __init__(self, embed_dim, num_heads, adj_channels):
        super().__init__()
        self.embed_dim, self.num_heads, self.head_dim = embed_dim, num_heads, embed_dim // num_heads
        self.q_proj, self.k_proj, self.v_proj = nn.Linear(embed_dim, embed_dim), nn.Linear(embed_dim, embed_dim), nn.Linear(embed_dim, embed_dim)
        self.out_proj, self.adj_proj = nn.Linear(embed_dim, embed_dim), nn.Linear(adj_channels, num_heads)
    def forward(self, x, adj):
        B, N, _ = x.shape
        Q, K, V = [proj(x).view(B, N, self.num_heads, self.head_dim).transpose(1, 2) for proj in (self.q_proj, self.k_proj, self.v_proj)]
        attn = (Q @ K.transpose(-2, -1)) / np.sqrt(self.head_dim) + self.adj_proj(adj).permute(0, 3, 1, 2)
        return self.out_proj((F.softmax(attn, dim=-1) @ V).transpose(1, 2).reshape(B, N, self.embed_dim)) + x

class Generator(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc = nn.Linear(Config.LATENT_DIM, 128)
        self.fc_nodes = nn.Linear(128, Config.MAX_ATOMS * Config.NUM_ATOM_TYPES)
        self.fc_adj = nn.Linear(128, Config.MAX_ATOMS * Config.MAX_ATOMS * Config.NUM_BOND_TYPES)
        # Simpler initialization
        for m in self.modules():
            if isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.02)

    def forward(self, z, temperature=1.0): # Added temperature
        h = F.leaky_relu(self.fc(z), 0.2)
        nodes = F.gumbel_softmax(self.fc_nodes(h).view(-1, Config.MAX_ATOMS, Config.NUM_ATOM_TYPES), tau=temperature, hard=False, dim=-1)
        adj = self.fc_adj(h).view(-1, Config.MAX_ATOMS, Config.MAX_ATOMS, Config.NUM_BOND_TYPES)
        return nodes, F.gumbel_softmax((adj + adj.permute(0, 2, 1, 3)) / 2.0, tau=temperature, hard=False, dim=-1)

class Critic(nn.Module):
    def __init__(self):
        super().__init__()
        self.embed = nn.Linear(Config.NUM_ATOM_TYPES, 64)
        self.gt1, self.gt2 = GraphTransformerLayer(64, 4, Config.NUM_BOND_TYPES), GraphTransformerLayer(64, 4, Config.NUM_BOND_TYPES)
        self.out = nn.Sequential(nn.Linear(64, 64), nn.LeakyReLU(0.2), nn.Linear(64, 1))
    def forward(self, x, adj):
        return self.out(torch.mean(self.gt2(self.gt1(self.embed(x), adj), adj), dim=1))

# =========================================
# TRAINING
# =========================================
def compute_gp(D, r_n, r_a, f_n, f_a):
    alpha = torch.rand(r_n.size(0), 1, 1, 1, device=Config.DEVICE)
    inter_n = (alpha.squeeze(-1) * r_n + (1 - alpha.squeeze(-1)) * f_n).requires_grad_(True)
    inter_a = (alpha * r_a + (1 - alpha) * f_a).requires_grad_(True)
    d_inter = D(inter_n, inter_a)
    grads = torch.autograd.grad(d_inter, [inter_n, inter_a], torch.ones_like(d_inter), create_graph=True, retain_graph=True)
    return ((torch.cat([g.reshape(r_n.size(0), -1) for g in grads], dim=1).norm(2, dim=1) - 1) ** 2).mean()

def train():
    os.makedirs(Config.MODEL_DIR, exist_ok=True)
    dataset = MoleculeDataset()
    if len(dataset) < Config.BATCH_SIZE: return

    loader = DataLoader(dataset, batch_size=Config.BATCH_SIZE, shuffle=True, drop_last=True)
    gen, crit = Generator().to(Config.DEVICE), Critic().to(Config.DEVICE)
    opt_G, opt_D = optim.Adam(gen.parameters(), lr=Config.LR_G, betas=(0., 0.9)), optim.Adam(crit.parameters(), lr=Config.LR_D, betas=(0., 0.9))

    start_epoch = 0
    if Config.RESUME_EPOCH and load_checkpoint(Config.RESUME_EPOCH, gen, crit): start_epoch = Config.RESUME_EPOCH

    for epoch in range(start_epoch, Config.NUM_EPOCHS):
        # Temperature annealing: 2.0 -> 0.5 over 100 epochs
        temp = max(0.5, 2.0 - 1.5 * (epoch / Config.NUM_EPOCHS))
        
        pbar = tqdm(loader, desc=f"Ep {epoch+1}/{Config.NUM_EPOCHS} [T={temp:.2f}]")
        d_loss_ep, g_loss_ep, batches = 0., 0., 0
        for r_n, r_a in pbar:
            r_n, r_a = r_n.to(Config.DEVICE), r_a.to(Config.DEVICE)
            batches += 1
            
            # Add noise to real data for stability
            r_n_noisy = r_n + 0.05 * torch.randn_like(r_n)
            r_a_noisy = r_a + 0.05 * torch.randn_like(r_a)

            for _ in range(Config.N_CRITIC):
                crit.zero_grad()
                f_n, f_a = gen(torch.randn(r_n.size(0), Config.LATENT_DIM, device=Config.DEVICE), temperature=temp)
                d_loss = crit(f_n.detach(), f_a.detach()).mean() - crit(r_n_noisy, r_a_noisy).mean() + Config.LAMBDA_GP * compute_gp(crit, r_n, r_a, f_n.detach(), f_a.detach())
                d_loss.backward()
                opt_D.step()
            
            gen.zero_grad()
            g_loss = -crit(*gen(torch.randn(r_n.size(0), Config.LATENT_DIM, device=Config.DEVICE), temperature=temp)).mean()
            g_loss.backward()
            opt_G.step()
            d_loss_ep += d_loss.item(); g_loss_ep += g_loss.item()
            pbar.set_postfix({'D': d_loss.item(), 'G': g_loss.item()})

        if (epoch+1) % Config.SAVE_EVERY == 0:
            torch.save(gen.state_dict(), f"{Config.MODEL_DIR}/g_{epoch+1}.pth")
            torch.save(crit.state_dict(), f"{Config.MODEL_DIR}/c_{epoch+1}.pth")
            evaluate(epoch+1, gen)

def load_checkpoint(epoch, gen, crit=None):
    g_path = os.path.join(Config.MODEL_DIR, f'g_{epoch}.pth')
    if not os.path.exists(g_path): return False
    gen.load_state_dict(torch.load(g_path, map_location=Config.DEVICE))
    if crit: crit.load_state_dict(torch.load(os.path.join(Config.MODEL_DIR, f'c_{epoch}.pth'), map_location=Config.DEVICE))
    return True

def evaluate(epoch, gen=None):
    if gen is None:
        gen = Generator().to(Config.DEVICE)
        load_checkpoint(epoch, gen)
    gen.eval()
    valid_mols = []
    with torch.no_grad():
        for _ in range(5):
             mols = graphs_to_mols(*gen(torch.randn(100, Config.LATENT_DIM, device=Config.DEVICE), temperature=0.1)) # Low temp for eval
             valid_mols.extend([Chem.MolToSmiles(m) for m in mols if m])
    unique = set(valid_mols)
    print(f"\nEp {epoch} Results (N=500) | Valid: {(len(valid_mols)/500)*100:.1f}% | Unique: {len(unique)}")
    if unique: print(f"Samples: {list(unique)[:3]}")
    print("-" * 50)
    gen.train()

if __name__ == '__main__':
    if len(sys.argv) > 2 and sys.argv[1] == 'eval': evaluate(int(sys.argv[2]))
    else: train()

Running on device: cuda
Connecting to DB at DL_ENDSEM__DATASET/chembl_35/chembl_35_sqlite/chembl_35.db...
Querying for small molecule candidates...
Filtering for molecules with 9 atoms or less...


100%|██████████| 200000/200000 [00:26<00:00, 7457.48it/s]


Training on 1431 valid small graphs.


Ep 101/500 [T=1.70]: 100%|██████████| 22/22 [00:03<00:00,  7.09it/s, D=-1.1, G=-1.93] 
Ep 102/500 [T=1.70]: 100%|██████████| 22/22 [00:03<00:00,  7.12it/s, D=-1.24, G=-1.83]
Ep 103/500 [T=1.69]: 100%|██████████| 22/22 [00:03<00:00,  7.09it/s, D=-1.3, G=-1.77] 
Ep 104/500 [T=1.69]: 100%|██████████| 22/22 [00:03<00:00,  7.10it/s, D=-1.25, G=-1.64]
Ep 105/500 [T=1.69]: 100%|██████████| 22/22 [00:03<00:00,  7.03it/s, D=-1.11, G=-0.806]
Ep 106/500 [T=1.69]: 100%|██████████| 22/22 [00:02<00:00,  7.65it/s, D=-1.27, G=-0.9]  
Ep 107/500 [T=1.68]: 100%|██████████| 22/22 [00:02<00:00,  8.07it/s, D=-1.12, G=-0.829] 
Ep 108/500 [T=1.68]: 100%|██████████| 22/22 [00:03<00:00,  6.70it/s, D=-0.994, G=-0.689]
Ep 109/500 [T=1.68]: 100%|██████████| 22/22 [00:03<00:00,  7.22it/s, D=-1.04, G=-0.631] 
Ep 110/500 [T=1.67]: 100%|██████████| 22/22 [00:03<00:00,  6.94it/s, D=-0.89, G=-0.736] 



Ep 110 Results (N=500) | Valid: 10.8% | Unique: 32
Samples: ['C=C(C)C=C=O', 'C.C=O.CC1CC1', 'CC1(C)C=N1.CO']
--------------------------------------------------


Ep 111/500 [T=1.67]: 100%|██████████| 22/22 [00:03<00:00,  6.84it/s, D=-0.913, G=-0.479]
Ep 112/500 [T=1.67]: 100%|██████████| 22/22 [00:03<00:00,  7.13it/s, D=-0.958, G=-0.425]
Ep 113/500 [T=1.66]: 100%|██████████| 22/22 [00:03<00:00,  6.95it/s, D=-1, G=-0.385]    
Ep 114/500 [T=1.66]: 100%|██████████| 22/22 [00:03<00:00,  7.13it/s, D=-0.747, G=-0.217]
Ep 115/500 [T=1.66]: 100%|██████████| 22/22 [00:03<00:00,  7.16it/s, D=-0.792, G=-0.386]
Ep 116/500 [T=1.66]: 100%|██████████| 22/22 [00:03<00:00,  6.65it/s, D=-0.796, G=-0.202]
Ep 117/500 [T=1.65]: 100%|██████████| 22/22 [00:03<00:00,  7.23it/s, D=-0.852, G=-0.224]
Ep 118/500 [T=1.65]: 100%|██████████| 22/22 [00:03<00:00,  7.16it/s, D=-0.704, G=-0.103]
Ep 119/500 [T=1.65]: 100%|██████████| 22/22 [00:03<00:00,  7.21it/s, D=-0.84, G=-0.125]  
Ep 120/500 [T=1.64]: 100%|██████████| 22/22 [00:03<00:00,  7.00it/s, D=-0.79, G=-0.0519] 



Ep 120 Results (N=500) | Valid: 43.0% | Unique: 56
Samples: ['C.C1OS1.CC(C)N', 'C1=CO1.CC1(C)C=N1', 'CO.NC12C=C1C2']
--------------------------------------------------


Ep 121/500 [T=1.64]: 100%|██████████| 22/22 [00:03<00:00,  7.06it/s, D=-0.745, G=-0.0861]  
Ep 122/500 [T=1.64]: 100%|██████████| 22/22 [00:03<00:00,  7.06it/s, D=-0.814, G=-0.0765] 
Ep 123/500 [T=1.63]: 100%|██████████| 22/22 [00:02<00:00,  7.38it/s, D=-0.698, G=-0.223]  
Ep 124/500 [T=1.63]: 100%|██████████| 22/22 [00:03<00:00,  6.83it/s, D=-0.841, G=-0.116] 
Ep 125/500 [T=1.63]: 100%|██████████| 22/22 [00:03<00:00,  6.96it/s, D=-0.787, G=-0.202]  
Ep 126/500 [T=1.62]: 100%|██████████| 22/22 [00:03<00:00,  7.03it/s, D=-0.828, G=0.0223] 
Ep 127/500 [T=1.62]: 100%|██████████| 22/22 [00:03<00:00,  7.11it/s, D=-0.833, G=-0.117] 
Ep 128/500 [T=1.62]: 100%|██████████| 22/22 [00:02<00:00,  7.38it/s, D=-0.964, G=0.0406]  
Ep 129/500 [T=1.62]: 100%|██████████| 22/22 [00:03<00:00,  7.12it/s, D=-0.907, G=0.0289]  
Ep 130/500 [T=1.61]: 100%|██████████| 22/22 [00:03<00:00,  7.12it/s, D=-0.899, G=-0.0741]



Ep 130 Results (N=500) | Valid: 70.0% | Unique: 61
Samples: ['CC12C=C1C1(CO1)N2', 'C1C2C3=NC132.C1CO1', 'CC1(N)C#S1.CO']
--------------------------------------------------


Ep 131/500 [T=1.61]: 100%|██████████| 22/22 [00:03<00:00,  7.02it/s, D=-0.975, G=0.113]  
Ep 132/500 [T=1.61]: 100%|██████████| 22/22 [00:03<00:00,  7.03it/s, D=-1.05, G=-0.0224] 
Ep 133/500 [T=1.60]: 100%|██████████| 22/22 [00:03<00:00,  7.11it/s, D=-0.976, G=0.00933]
Ep 134/500 [T=1.60]: 100%|██████████| 22/22 [00:03<00:00,  7.09it/s, D=-1.11, G=-0.0633] 
Ep 135/500 [T=1.60]: 100%|██████████| 22/22 [00:02<00:00,  7.63it/s, D=-1.03, G=0.0694]   
Ep 136/500 [T=1.59]: 100%|██████████| 22/22 [00:03<00:00,  7.25it/s, D=-1.19, G=0.0756] 
Ep 137/500 [T=1.59]: 100%|██████████| 22/22 [00:03<00:00,  7.09it/s, D=-1.17, G=-0.0398]
Ep 138/500 [T=1.59]: 100%|██████████| 22/22 [00:02<00:00,  7.37it/s, D=-1.1, G=0.222]   
Ep 139/500 [T=1.59]: 100%|██████████| 22/22 [00:03<00:00,  7.17it/s, D=-1.23, G=0.186]    
Ep 140/500 [T=1.58]: 100%|██████████| 22/22 [00:03<00:00,  7.17it/s, D=-1.19, G=0.041]  



Ep 140 Results (N=500) | Valid: 49.4% | Unique: 53
Samples: ['C1=C2C3NC123.CO', 'CC1(NC2CO2)CC1', 'C1=CC12C=C2.CO']
--------------------------------------------------


Ep 141/500 [T=1.58]: 100%|██████████| 22/22 [00:03<00:00,  6.77it/s, D=-1.14, G=0.144]  
Ep 142/500 [T=1.58]: 100%|██████████| 22/22 [00:03<00:00,  6.95it/s, D=-1.14, G=0.279] 
Ep 143/500 [T=1.57]: 100%|██████████| 22/22 [00:03<00:00,  7.17it/s, D=-1.21, G=0.096]  
Ep 144/500 [T=1.57]: 100%|██████████| 22/22 [00:03<00:00,  6.96it/s, D=-1.21, G=0.0676]
Ep 145/500 [T=1.57]: 100%|██████████| 22/22 [00:03<00:00,  7.12it/s, D=-1.23, G=0.109]   
Ep 146/500 [T=1.56]: 100%|██████████| 22/22 [00:02<00:00,  7.37it/s, D=-1.22, G=0.135]  
Ep 147/500 [T=1.56]: 100%|██████████| 22/22 [00:03<00:00,  7.30it/s, D=-0.922, G=0.368]
Ep 148/500 [T=1.56]: 100%|██████████| 22/22 [00:02<00:00,  8.30it/s, D=-1.01, G=0.397]  
Ep 149/500 [T=1.56]: 100%|██████████| 22/22 [00:01<00:00, 13.69it/s, D=-1.13, G=0.363] 
Ep 150/500 [T=1.55]: 100%|██████████| 22/22 [00:01<00:00, 14.47it/s, D=-1.18, G=0.221] 



Ep 150 Results (N=500) | Valid: 28.0% | Unique: 32
Samples: ['CC1(NC2CO2)CC1', 'C1CN1.O.OC12CC1N2', 'CC1(C)C=N1.CO']
--------------------------------------------------


Ep 151/500 [T=1.55]: 100%|██████████| 22/22 [00:01<00:00, 14.30it/s, D=-1.17, G=0.377] 
Ep 152/500 [T=1.55]: 100%|██████████| 22/22 [00:01<00:00, 14.00it/s, D=-1.18, G=0.0942]
Ep 153/500 [T=1.54]: 100%|██████████| 22/22 [00:01<00:00, 14.09it/s, D=-1.04, G=0.205] 
Ep 154/500 [T=1.54]: 100%|██████████| 22/22 [00:01<00:00, 14.54it/s, D=-1.17, G=0.444] 
Ep 155/500 [T=1.54]: 100%|██████████| 22/22 [00:01<00:00, 14.45it/s, D=-0.949, G=0.488]
Ep 156/500 [T=1.54]: 100%|██████████| 22/22 [00:01<00:00, 14.63it/s, D=-1.22, G=0.294] 
Ep 157/500 [T=1.53]: 100%|██████████| 22/22 [00:01<00:00, 13.97it/s, D=-1.09, G=0.387] 
Ep 158/500 [T=1.53]: 100%|██████████| 22/22 [00:02<00:00, 10.90it/s, D=-0.978, G=0.209] 
Ep 159/500 [T=1.53]: 100%|██████████| 22/22 [00:02<00:00,  9.58it/s, D=-1.03, G=0.348]
Ep 160/500 [T=1.52]: 100%|██████████| 22/22 [00:03<00:00,  7.02it/s, D=-1.03, G=0.283] 



Ep 160 Results (N=500) | Valid: 20.6% | Unique: 34
Samples: ['CC1=CSOC1', 'C.CC1CN1.CO', 'C1=C2CC1N2.C1CO1']
--------------------------------------------------


Ep 161/500 [T=1.52]: 100%|██████████| 22/22 [00:03<00:00,  6.81it/s, D=-1.13, G=0.492] 
Ep 162/500 [T=1.52]: 100%|██████████| 22/22 [00:03<00:00,  6.44it/s, D=-0.963, G=0.485]
Ep 163/500 [T=1.51]: 100%|██████████| 22/22 [00:03<00:00,  6.74it/s, D=-1.02, G=0.416] 
Ep 164/500 [T=1.51]: 100%|██████████| 22/22 [00:03<00:00,  6.74it/s, D=-1.02, G=0.377] 
Ep 165/500 [T=1.51]: 100%|██████████| 22/22 [00:02<00:00,  7.54it/s, D=-1.04, G=0.254] 
Ep 166/500 [T=1.50]: 100%|██████████| 22/22 [00:03<00:00,  6.83it/s, D=-1.19, G=0.327] 
Ep 167/500 [T=1.50]: 100%|██████████| 22/22 [00:03<00:00,  6.52it/s, D=-1.06, G=0.396] 
Ep 168/500 [T=1.50]: 100%|██████████| 22/22 [00:02<00:00,  7.38it/s, D=-1.06, G=0.4]   
Ep 169/500 [T=1.50]: 100%|██████████| 22/22 [00:03<00:00,  7.03it/s, D=-1.06, G=0.363] 
Ep 170/500 [T=1.49]: 100%|██████████| 22/22 [00:03<00:00,  7.05it/s, D=-1.06, G=0.236] 



Ep 170 Results (N=500) | Valid: 13.4% | Unique: 38
Samples: ['C1CO1.CC1=CCN1', 'C1=C2CCN12.C1CO1', 'C.C1NO1.C=C1C=N1']
--------------------------------------------------


Ep 171/500 [T=1.49]: 100%|██████████| 22/22 [00:03<00:00,  6.88it/s, D=-0.998, G=0.466]
Ep 172/500 [T=1.49]: 100%|██████████| 22/22 [00:02<00:00,  7.45it/s, D=-1.06, G=0.471] 
Ep 173/500 [T=1.48]: 100%|██████████| 22/22 [00:02<00:00,  8.02it/s, D=-1.06, G=0.522] 
Ep 174/500 [T=1.48]: 100%|██████████| 22/22 [00:01<00:00, 14.17it/s, D=-1.11, G=0.351] 
Ep 175/500 [T=1.48]: 100%|██████████| 22/22 [00:01<00:00, 14.19it/s, D=-0.987, G=0.285]
Ep 176/500 [T=1.48]: 100%|██████████| 22/22 [00:01<00:00, 13.58it/s, D=-0.893, G=0.34] 
Ep 177/500 [T=1.47]: 100%|██████████| 22/22 [00:01<00:00, 13.94it/s, D=-1.18, G=0.249] 
Ep 178/500 [T=1.47]: 100%|██████████| 22/22 [00:02<00:00,  9.37it/s, D=-0.924, G=0.337]
Ep 179/500 [T=1.47]: 100%|██████████| 22/22 [00:02<00:00,  7.43it/s, D=-1.14, G=0.391] 
Ep 180/500 [T=1.46]: 100%|██████████| 22/22 [00:02<00:00,  8.51it/s, D=-0.988, G=0.324]



Ep 180 Results (N=500) | Valid: 6.8% | Unique: 24
Samples: ['C1CO1.SC1=CCC1', 'C.C=C(C)C.CO', 'C.C=CC.CN']
--------------------------------------------------


Ep 181/500 [T=1.46]: 100%|██████████| 22/22 [00:01<00:00, 13.90it/s, D=-0.875, G=0.267]
Ep 182/500 [T=1.46]: 100%|██████████| 22/22 [00:01<00:00, 14.07it/s, D=-1.05, G=0.385] 
Ep 183/500 [T=1.45]: 100%|██████████| 22/22 [00:01<00:00, 14.55it/s, D=-1.02, G=0.202] 
Ep 184/500 [T=1.45]: 100%|██████████| 22/22 [00:01<00:00, 14.39it/s, D=-1.12, G=0.285] 
Ep 185/500 [T=1.45]: 100%|██████████| 22/22 [00:02<00:00,  9.27it/s, D=-1.02, G=0.621] 
Ep 186/500 [T=1.45]: 100%|██████████| 22/22 [00:03<00:00,  6.67it/s, D=-0.961, G=0.34] 
Ep 187/500 [T=1.44]: 100%|██████████| 22/22 [00:03<00:00,  6.85it/s, D=-1.18, G=0.617] 
Ep 188/500 [T=1.44]: 100%|██████████| 22/22 [00:03<00:00,  6.97it/s, D=-1.09, G=0.433] 
Ep 189/500 [T=1.44]: 100%|██████████| 22/22 [00:03<00:00,  6.77it/s, D=-1.02, G=0.293] 
Ep 190/500 [T=1.43]: 100%|██████████| 22/22 [00:03<00:00,  6.87it/s, D=-0.946, G=0.611]



Ep 190 Results (N=500) | Valid: 10.4% | Unique: 42
Samples: ['C1CO1.CC1=CCN1', 'C.CC=S.CO', 'C.C=C(C)C.CO']
--------------------------------------------------


Ep 191/500 [T=1.43]: 100%|██████████| 22/22 [00:03<00:00,  7.22it/s, D=-0.945, G=0.481]
Ep 192/500 [T=1.43]: 100%|██████████| 22/22 [00:03<00:00,  7.17it/s, D=-1.1, G=0.363]  
Ep 193/500 [T=1.42]: 100%|██████████| 22/22 [00:03<00:00,  6.70it/s, D=-1.12, G=0.353] 
Ep 194/500 [T=1.42]: 100%|██████████| 22/22 [00:03<00:00,  6.82it/s, D=-1.2, G=0.483]  
Ep 195/500 [T=1.42]: 100%|██████████| 22/22 [00:03<00:00,  7.04it/s, D=-1.05, G=0.511]
Ep 196/500 [T=1.42]: 100%|██████████| 22/22 [00:03<00:00,  6.93it/s, D=-1.03, G=0.257] 
Ep 197/500 [T=1.41]: 100%|██████████| 22/22 [00:03<00:00,  6.87it/s, D=-1.19, G=0.198]  
Ep 198/500 [T=1.41]: 100%|██████████| 22/22 [00:03<00:00,  6.61it/s, D=-0.996, G=0.103] 
Ep 199/500 [T=1.41]: 100%|██████████| 22/22 [00:03<00:00,  6.94it/s, D=-1.14, G=0.042]  
Ep 200/500 [T=1.40]: 100%|██████████| 22/22 [00:03<00:00,  6.80it/s, D=-1.24, G=0.248]  



Ep 200 Results (N=500) | Valid: 20.6% | Unique: 80
Samples: ['C.C=CNC.C=O', 'C=CCC.CO.O', 'C.C=O.CO']
--------------------------------------------------


Ep 201/500 [T=1.40]: 100%|██████████| 22/22 [00:03<00:00,  6.77it/s, D=-1.2, G=0.234]   
Ep 202/500 [T=1.40]: 100%|██████████| 22/22 [00:03<00:00,  6.81it/s, D=-1.18, G=0.185]  
Ep 203/500 [T=1.39]: 100%|██████████| 22/22 [00:03<00:00,  6.88it/s, D=-1.31, G=0.197] 
Ep 204/500 [T=1.39]: 100%|██████████| 22/22 [00:03<00:00,  7.17it/s, D=-1.12, G=0.448]  
Ep 205/500 [T=1.39]: 100%|██████████| 22/22 [00:03<00:00,  7.17it/s, D=-1.16, G=0.179]  
Ep 206/500 [T=1.39]: 100%|██████████| 22/22 [00:03<00:00,  7.29it/s, D=-1.27, G=0.101]  
Ep 207/500 [T=1.38]: 100%|██████████| 22/22 [00:03<00:00,  7.33it/s, D=-1.19, G=0.205]  
Ep 208/500 [T=1.38]: 100%|██████████| 22/22 [00:03<00:00,  6.95it/s, D=-1.06, G=0.213]  
Ep 209/500 [T=1.38]: 100%|██████████| 22/22 [00:03<00:00,  7.33it/s, D=-1.1, G=0.00017] 
Ep 210/500 [T=1.37]: 100%|██████████| 22/22 [00:03<00:00,  7.32it/s, D=-1.22, G=0.0934] 



Ep 210 Results (N=500) | Valid: 30.6% | Unique: 101
Samples: ['C.C=C(C)C.CO', 'C.CCC.CO', 'C.C=O.CC=NC']
--------------------------------------------------


Ep 211/500 [T=1.37]: 100%|██████████| 22/22 [00:03<00:00,  7.04it/s, D=-1.2, G=0.105]   
Ep 212/500 [T=1.37]: 100%|██████████| 22/22 [00:03<00:00,  6.98it/s, D=-1.18, G=-0.0914]  
Ep 213/500 [T=1.36]: 100%|██████████| 22/22 [00:02<00:00,  8.21it/s, D=-1.04, G=-0.18]  
Ep 214/500 [T=1.36]: 100%|██████████| 22/22 [00:01<00:00, 13.33it/s, D=-1.28, G=-0.0726]
Ep 215/500 [T=1.36]: 100%|██████████| 22/22 [00:01<00:00, 14.10it/s, D=-1.18, G=-0.103]  
Ep 216/500 [T=1.35]: 100%|██████████| 22/22 [00:01<00:00, 14.17it/s, D=-1.23, G=-0.157]  
Ep 217/500 [T=1.35]: 100%|██████████| 22/22 [00:01<00:00, 12.80it/s, D=-1.11, G=-0.172]  
Ep 218/500 [T=1.35]: 100%|██████████| 22/22 [00:01<00:00, 12.80it/s, D=-1.31, G=0.0807] 
Ep 219/500 [T=1.35]: 100%|██████████| 22/22 [00:01<00:00, 13.05it/s, D=-1.11, G=-0.0113]
Ep 220/500 [T=1.34]: 100%|██████████| 22/22 [00:01<00:00, 12.93it/s, D=-1.2, G=-0.0438]  



Ep 220 Results (N=500) | Valid: 38.2% | Unique: 116
Samples: ['C.C=O.CNC', 'C.CCC.CO', 'C.C.C1=NC1.CCS']
--------------------------------------------------


Ep 221/500 [T=1.34]: 100%|██████████| 22/22 [00:01<00:00, 15.20it/s, D=-1.09, G=0.0179]   
Ep 222/500 [T=1.34]: 100%|██████████| 22/22 [00:01<00:00, 14.31it/s, D=-1.27, G=0.161]  
Ep 223/500 [T=1.33]: 100%|██████████| 22/22 [00:01<00:00, 14.30it/s, D=-0.936, G=-0.0424]
Ep 224/500 [T=1.33]: 100%|██████████| 22/22 [00:01<00:00, 14.08it/s, D=-1.05, G=-0.0323] 
Ep 225/500 [T=1.33]: 100%|██████████| 22/22 [00:01<00:00, 13.63it/s, D=-0.993, G=0.066]  
Ep 226/500 [T=1.32]: 100%|██████████| 22/22 [00:01<00:00, 14.94it/s, D=-1.19, G=0.056]  
Ep 227/500 [T=1.32]: 100%|██████████| 22/22 [00:01<00:00, 14.41it/s, D=-1.18, G=0.0228] 
Ep 228/500 [T=1.32]: 100%|██████████| 22/22 [00:01<00:00, 14.32it/s, D=-1.19, G=0.0871] 
Ep 229/500 [T=1.32]: 100%|██████████| 22/22 [00:01<00:00, 14.37it/s, D=-1.15, G=0.0213]
Ep 230/500 [T=1.31]: 100%|██████████| 22/22 [00:01<00:00, 14.53it/s, D=-0.762, G=0.343] 



Ep 230 Results (N=500) | Valid: 40.8% | Unique: 117
Samples: ['C.C.CN.COC', 'C.C=O.CNC', 'C=CCC.CO.O']
--------------------------------------------------


Ep 231/500 [T=1.31]: 100%|██████████| 22/22 [00:01<00:00, 14.58it/s, D=-1.17, G=0.111]    
Ep 232/500 [T=1.31]: 100%|██████████| 22/22 [00:01<00:00, 13.93it/s, D=-1.26, G=0.0197]   
Ep 233/500 [T=1.30]: 100%|██████████| 22/22 [00:01<00:00, 13.71it/s, D=-1.24, G=0.0367] 
Ep 234/500 [T=1.30]: 100%|██████████| 22/22 [00:01<00:00, 14.73it/s, D=-1.01, G=0.407]  
Ep 235/500 [T=1.30]: 100%|██████████| 22/22 [00:01<00:00, 14.67it/s, D=-1.25, G=0.315] 
Ep 236/500 [T=1.29]: 100%|██████████| 22/22 [00:01<00:00, 14.23it/s, D=-1.04, G=0.272] 
Ep 237/500 [T=1.29]: 100%|██████████| 22/22 [00:01<00:00, 14.57it/s, D=-1.11, G=0.431] 
Ep 238/500 [T=1.29]: 100%|██████████| 22/22 [00:01<00:00, 14.46it/s, D=-1.16, G=0.127] 
Ep 239/500 [T=1.29]: 100%|██████████| 22/22 [00:01<00:00, 14.32it/s, D=-1.1, G=0.193]  
Ep 240/500 [T=1.28]: 100%|██████████| 22/22 [00:01<00:00, 14.52it/s, D=-1.12, G=0.495] 



Ep 240 Results (N=500) | Valid: 51.8% | Unique: 96
Samples: ['C.C.CN.COC', 'C.C.C#CO.C=N', 'C.C#CS.CO']
--------------------------------------------------


Ep 241/500 [T=1.28]: 100%|██████████| 22/22 [00:01<00:00, 14.04it/s, D=-1.24, G=0.734] 
Ep 242/500 [T=1.28]: 100%|██████████| 22/22 [00:01<00:00, 14.78it/s, D=-1.08, G=1.13]
Ep 243/500 [T=1.27]: 100%|██████████| 22/22 [00:01<00:00, 14.68it/s, D=-1, G=1.32]    
Ep 244/500 [T=1.27]: 100%|██████████| 22/22 [00:01<00:00, 14.05it/s, D=-0.856, G=1.29]
Ep 245/500 [T=1.27]: 100%|██████████| 22/22 [00:02<00:00, 10.71it/s, D=-1.22, G=1.41] 
Ep 246/500 [T=1.27]: 100%|██████████| 22/22 [00:03<00:00,  7.14it/s, D=-1.24, G=1.36]
Ep 247/500 [T=1.26]: 100%|██████████| 22/22 [00:03<00:00,  7.03it/s, D=-1.25, G=1.28] 
Ep 248/500 [T=1.26]: 100%|██████████| 22/22 [00:03<00:00,  6.90it/s, D=-1.15, G=1.42]
Ep 249/500 [T=1.26]: 100%|██████████| 22/22 [00:03<00:00,  7.29it/s, D=-1.33, G=1.61]
Ep 250/500 [T=1.25]: 100%|██████████| 22/22 [00:03<00:00,  6.97it/s, D=-1.2, G=1.64]  



Ep 250 Results (N=500) | Valid: 60.8% | Unique: 122
Samples: ['C.C.CN.COC', 'CC1(C2=C=C2)C(=O)N1O', 'C.C=O.CC=NC']
--------------------------------------------------


Ep 251/500 [T=1.25]: 100%|██████████| 22/22 [00:03<00:00,  7.02it/s, D=-1.16, G=1.66]
Ep 252/500 [T=1.25]: 100%|██████████| 22/22 [00:03<00:00,  7.26it/s, D=-1.6, G=1.73]  
Ep 253/500 [T=1.24]: 100%|██████████| 22/22 [00:03<00:00,  7.10it/s, D=-1.46, G=1.99]
Ep 254/500 [T=1.24]: 100%|██████████| 22/22 [00:03<00:00,  7.17it/s, D=-1.38, G=2.17]
Ep 255/500 [T=1.24]: 100%|██████████| 22/22 [00:03<00:00,  7.12it/s, D=-1.47, G=2.11]
Ep 256/500 [T=1.23]: 100%|██████████| 22/22 [00:03<00:00,  6.77it/s, D=-1.63, G=2.04] 
Ep 257/500 [T=1.23]: 100%|██████████| 22/22 [00:03<00:00,  6.88it/s, D=-1.77, G=2.21] 
Ep 258/500 [T=1.23]: 100%|██████████| 22/22 [00:03<00:00,  6.73it/s, D=-2.26, G=2.29]
Ep 259/500 [T=1.23]: 100%|██████████| 22/22 [00:03<00:00,  6.67it/s, D=-2.25, G=2.33]
Ep 260/500 [T=1.22]: 100%|██████████| 22/22 [00:03<00:00,  7.09it/s, D=-1.11, G=2.31]



Ep 260 Results (N=500) | Valid: 62.4% | Unique: 116
Samples: ['C.C.CN.COC', 'CC1NC1=O.OC1C=C1', 'C=C1C(C)N1CC1C=C1']
--------------------------------------------------


Ep 261/500 [T=1.22]: 100%|██████████| 22/22 [00:03<00:00,  7.01it/s, D=-2.35, G=2.36]
Ep 262/500 [T=1.22]: 100%|██████████| 22/22 [00:03<00:00,  6.85it/s, D=-2.79, G=2.49]
Ep 263/500 [T=1.21]: 100%|██████████| 22/22 [00:03<00:00,  7.13it/s, D=-3.71, G=2.64]
Ep 264/500 [T=1.21]: 100%|██████████| 22/22 [00:03<00:00,  6.90it/s, D=-2.95, G=2.69]
Ep 265/500 [T=1.21]: 100%|██████████| 22/22 [00:03<00:00,  7.15it/s, D=-3.05, G=2.93]
Ep 266/500 [T=1.21]: 100%|██████████| 22/22 [00:03<00:00,  6.84it/s, D=-4.11, G=2.95]
Ep 267/500 [T=1.20]: 100%|██████████| 22/22 [00:03<00:00,  7.07it/s, D=-3.52, G=3.2] 
Ep 268/500 [T=1.20]: 100%|██████████| 22/22 [00:03<00:00,  7.16it/s, D=-4.07, G=3.39] 
Ep 269/500 [T=1.20]: 100%|██████████| 22/22 [00:03<00:00,  6.83it/s, D=-4.98, G=3.53]
Ep 270/500 [T=1.19]: 100%|██████████| 22/22 [00:03<00:00,  6.99it/s, D=-4.1, G=3.63]  



Ep 270 Results (N=500) | Valid: 65.8% | Unique: 107
Samples: ['C.C1C2OC12.C=CO', 'C.C.CN.CO', 'C.C=CO.CC1=CO1']
--------------------------------------------------


Ep 271/500 [T=1.19]: 100%|██████████| 22/22 [00:03<00:00,  7.11it/s, D=-5.13, G=3.71]
Ep 272/500 [T=1.19]: 100%|██████████| 22/22 [00:03<00:00,  6.84it/s, D=-4.99, G=3.96] 
Ep 273/500 [T=1.18]: 100%|██████████| 22/22 [00:03<00:00,  7.19it/s, D=-2.98, G=4.05] 
Ep 274/500 [T=1.18]: 100%|██████████| 22/22 [00:03<00:00,  7.09it/s, D=-6.53, G=4.32]
Ep 275/500 [T=1.18]: 100%|██████████| 22/22 [00:03<00:00,  6.90it/s, D=-4.71, G=4.49]
Ep 276/500 [T=1.17]: 100%|██████████| 22/22 [00:03<00:00,  6.93it/s, D=-6.98, G=4.75]
Ep 277/500 [T=1.17]: 100%|██████████| 22/22 [00:03<00:00,  6.87it/s, D=-6.12, G=4.94]
Ep 278/500 [T=1.17]: 100%|██████████| 22/22 [00:03<00:00,  7.07it/s, D=-6.18, G=5.1] 
Ep 279/500 [T=1.17]: 100%|██████████| 22/22 [00:03<00:00,  7.24it/s, D=-6.6, G=5.25] 
Ep 280/500 [T=1.16]: 100%|██████████| 22/22 [00:03<00:00,  6.95it/s, D=-10.3, G=5.49]



Ep 280 Results (N=500) | Valid: 63.8% | Unique: 96
Samples: ['C.CC=CO.NO', 'C.C.C1CS1.C=CO', 'CC1NC1=O.OC1C=C1']
--------------------------------------------------


Ep 281/500 [T=1.16]: 100%|██████████| 22/22 [00:03<00:00,  7.11it/s, D=-10.2, G=5.7] 
Ep 282/500 [T=1.16]: 100%|██████████| 22/22 [00:03<00:00,  6.91it/s, D=-9.47, G=5.95]
Ep 283/500 [T=1.15]: 100%|██████████| 22/22 [00:03<00:00,  7.03it/s, D=-10.8, G=6.16]
Ep 284/500 [T=1.15]: 100%|██████████| 22/22 [00:03<00:00,  7.31it/s, D=-12.2, G=6.47]
Ep 285/500 [T=1.15]: 100%|██████████| 22/22 [00:03<00:00,  7.01it/s, D=-14.3, G=6.81] 
Ep 286/500 [T=1.15]: 100%|██████████| 22/22 [00:03<00:00,  7.03it/s, D=38.7, G=6.97] 
Ep 287/500 [T=1.14]: 100%|██████████| 22/22 [00:03<00:00,  7.06it/s, D=-13, G=7.21]  
Ep 288/500 [T=1.14]: 100%|██████████| 22/22 [00:03<00:00,  6.98it/s, D=-15.8, G=7.48]
Ep 289/500 [T=1.14]: 100%|██████████| 22/22 [00:03<00:00,  7.01it/s, D=-19.6, G=7.78]
Ep 290/500 [T=1.13]: 100%|██████████| 22/22 [00:03<00:00,  6.85it/s, D=-15.5, G=7.94]



Ep 290 Results (N=500) | Valid: 54.4% | Unique: 77
Samples: ['C.C.C=N.C=[SH]C', 'C.C.C=CC.C=O', 'C.C=C.CC1CS1.O']
--------------------------------------------------


Ep 291/500 [T=1.13]: 100%|██████████| 22/22 [00:02<00:00,  7.36it/s, D=-16.8, G=8.17]
Ep 292/500 [T=1.13]: 100%|██████████| 22/22 [00:01<00:00, 13.48it/s, D=-22.3, G=8.6] 
Ep 293/500 [T=1.12]: 100%|██████████| 22/22 [00:01<00:00, 14.04it/s, D=-23.3, G=8.91]
Ep 294/500 [T=1.12]: 100%|██████████| 22/22 [00:01<00:00, 13.83it/s, D=-25.9, G=9.11]
Ep 295/500 [T=1.12]: 100%|██████████| 22/22 [00:01<00:00, 13.81it/s, D=-16, G=9.41]  
Ep 296/500 [T=1.11]: 100%|██████████| 22/22 [00:01<00:00, 14.27it/s, D=-3.19, G=9.75]
Ep 297/500 [T=1.11]: 100%|██████████| 22/22 [00:01<00:00, 14.40it/s, D=10.4, G=9.9]  
Ep 298/500 [T=1.11]: 100%|██████████| 22/22 [00:01<00:00, 14.30it/s, D=-28.2, G=10.2]
Ep 299/500 [T=1.11]: 100%|██████████| 22/22 [00:01<00:00, 14.18it/s, D=-30.3, G=10.6]
Ep 300/500 [T=1.10]: 100%|██████████| 22/22 [00:01<00:00, 13.68it/s, D=-35.6, G=10.9]



Ep 300 Results (N=500) | Valid: 58.0% | Unique: 83
Samples: ['C.C=C.CC.CS', 'C.C1CS1.C=CCO', 'C.C.C1=[SH]C1.C=CO']
--------------------------------------------------


Ep 301/500 [T=1.10]: 100%|██████████| 22/22 [00:01<00:00, 13.35it/s, D=23.7, G=11.3] 
Ep 302/500 [T=1.10]: 100%|██████████| 22/22 [00:01<00:00, 14.22it/s, D=-44.7, G=11.7]
Ep 303/500 [T=1.09]: 100%|██████████| 22/22 [00:01<00:00, 14.39it/s, D=-52.5, G=12.1]
Ep 304/500 [T=1.09]: 100%|██████████| 22/22 [00:02<00:00,  9.10it/s, D=-25.9, G=12.3]
Ep 305/500 [T=1.09]: 100%|██████████| 22/22 [00:03<00:00,  7.04it/s, D=-48, G=12.6]  
Ep 306/500 [T=1.08]: 100%|██████████| 22/22 [00:03<00:00,  7.27it/s, D=-58.8, G=13.1]
Ep 307/500 [T=1.08]: 100%|██████████| 22/22 [00:03<00:00,  7.16it/s, D=-58.3, G=13.5]
Ep 308/500 [T=1.08]: 100%|██████████| 22/22 [00:03<00:00,  7.15it/s, D=-63.9, G=13.8]
Ep 309/500 [T=1.08]: 100%|██████████| 22/22 [00:03<00:00,  7.09it/s, D=-69.4, G=14.1]
Ep 310/500 [T=1.07]: 100%|██████████| 22/22 [00:03<00:00,  6.39it/s, D=-67.4, G=14.4]



Ep 310 Results (N=500) | Valid: 40.2% | Unique: 72
Samples: ['C.C=C.CC=C1C=[SH]1', 'C.C=C.CC1=[SH]C1', 'C.C=C.CC.CS']
--------------------------------------------------


Ep 311/500 [T=1.07]: 100%|██████████| 22/22 [00:03<00:00,  6.73it/s, D=-86.9, G=15]  
Ep 312/500 [T=1.07]: 100%|██████████| 22/22 [00:03<00:00,  6.82it/s, D=-101, G=15.5] 
Ep 313/500 [T=1.06]: 100%|██████████| 22/22 [00:02<00:00,  8.70it/s, D=-65.9, G=15.9]
Ep 314/500 [T=1.06]: 100%|██████████| 22/22 [00:01<00:00, 14.49it/s, D=-102, G=16.6] 
Ep 315/500 [T=1.06]: 100%|██████████| 22/22 [00:01<00:00, 11.48it/s, D=-137, G=16.8] 
Ep 316/500 [T=1.05]: 100%|██████████| 22/22 [00:03<00:00,  6.84it/s, D=-126, G=17.2] 
Ep 317/500 [T=1.05]: 100%|██████████| 22/22 [00:03<00:00,  7.29it/s, D=-83, G=17.9]  
Ep 318/500 [T=1.05]: 100%|██████████| 22/22 [00:01<00:00, 13.76it/s, D=-154, G=18.5]
Ep 319/500 [T=1.05]: 100%|██████████| 22/22 [00:01<00:00, 14.37it/s, D=-193, G=18.8]
Ep 320/500 [T=1.04]: 100%|██████████| 22/22 [00:01<00:00, 14.30it/s, D=-198, G=19.1]



Ep 320 Results (N=500) | Valid: 23.4% | Unique: 43
Samples: ['C.C.C=CC1CS1', 'C.C=CC(C)=C1CS1', 'C.C=C.CC1=[SH]C1']
--------------------------------------------------


Ep 321/500 [T=1.04]: 100%|██████████| 22/22 [00:01<00:00, 14.04it/s, D=-199, G=19.5]
Ep 322/500 [T=1.04]: 100%|██████████| 22/22 [00:02<00:00, 10.77it/s, D=-197, G=20.5]
Ep 323/500 [T=1.03]: 100%|██████████| 22/22 [00:01<00:00, 12.47it/s, D=-210, G=20.4]
Ep 324/500 [T=1.03]: 100%|██████████| 22/22 [00:02<00:00,  7.98it/s, D=-220, G=20.5] 
Ep 325/500 [T=1.03]: 100%|██████████| 22/22 [00:03<00:00,  6.81it/s, D=1.72e+4, G=21.2]
Ep 326/500 [T=1.02]: 100%|██████████| 22/22 [00:03<00:00,  6.92it/s, D=-275, G=21.4]
Ep 327/500 [T=1.02]: 100%|██████████| 22/22 [00:03<00:00,  6.87it/s, D=-207, G=21.5]
Ep 328/500 [T=1.02]: 100%|██████████| 22/22 [00:03<00:00,  7.10it/s, D=-284, G=21.7]
Ep 329/500 [T=1.02]: 100%|██████████| 22/22 [00:03<00:00,  6.86it/s, D=-266, G=21.4]
Ep 330/500 [T=1.01]: 100%|██████████| 22/22 [00:03<00:00,  6.65it/s, D=-308, G=22.5]



Ep 330 Results (N=500) | Valid: 7.4% | Unique: 9
Samples: ['C.C.C=CC1CS1', 'C.C=CC1(C)CS1', 'C.C=C.CC1=[SH]C1']
--------------------------------------------------


Ep 331/500 [T=1.01]: 100%|██████████| 22/22 [00:03<00:00,  7.27it/s, D=-130, G=22]     
Ep 332/500 [T=1.01]: 100%|██████████| 22/22 [00:03<00:00,  6.65it/s, D=-262, G=22.6]
Ep 333/500 [T=1.00]: 100%|██████████| 22/22 [00:03<00:00,  6.77it/s, D=-268, G=22.6]
Ep 334/500 [T=1.00]: 100%|██████████| 22/22 [00:02<00:00,  8.31it/s, D=-309, G=23.5]
Ep 335/500 [T=1.00]: 100%|██████████| 22/22 [00:03<00:00,  6.63it/s, D=-379, G=23.3]
Ep 336/500 [T=0.99]: 100%|██████████| 22/22 [00:03<00:00,  7.16it/s, D=-296, G=23.6]
Ep 337/500 [T=0.99]: 100%|██████████| 22/22 [00:02<00:00,  7.67it/s, D=-377, G=23.8]
Ep 338/500 [T=0.99]: 100%|██████████| 22/22 [00:02<00:00,  8.43it/s, D=-306, G=24.1]
Ep 339/500 [T=0.99]: 100%|██████████| 22/22 [00:03<00:00,  6.99it/s, D=-360, G=24.3]
Ep 340/500 [T=0.98]: 100%|██████████| 22/22 [00:03<00:00,  6.73it/s, D=-423, G=25.5]



Ep 340 Results (N=500) | Valid: 0.4% | Unique: 1
Samples: ['C.C=CC1(C)CS1']
--------------------------------------------------


Ep 341/500 [T=0.98]: 100%|██████████| 22/22 [00:01<00:00, 12.27it/s, D=-385, G=25]  
Ep 342/500 [T=0.98]: 100%|██████████| 22/22 [00:01<00:00, 14.06it/s, D=-379, G=26.3]
Ep 343/500 [T=0.97]: 100%|██████████| 22/22 [00:01<00:00, 14.07it/s, D=-397, G=25.6]
Ep 344/500 [T=0.97]: 100%|██████████| 22/22 [00:02<00:00,  9.97it/s, D=-147, G=25.8]
Ep 345/500 [T=0.97]: 100%|██████████| 22/22 [00:03<00:00,  7.22it/s, D=-423, G=26.9]
Ep 346/500 [T=0.97]: 100%|██████████| 22/22 [00:02<00:00,  7.36it/s, D=-124, G=26.2]
Ep 347/500 [T=0.96]: 100%|██████████| 22/22 [00:03<00:00,  6.77it/s, D=-490, G=27]  
Ep 348/500 [T=0.96]: 100%|██████████| 22/22 [00:03<00:00,  6.88it/s, D=-543, G=27.6]
Ep 349/500 [T=0.96]: 100%|██████████| 22/22 [00:03<00:00,  6.76it/s, D=-210, G=27.5]
Ep 350/500 [T=0.95]: 100%|██████████| 22/22 [00:03<00:00,  6.67it/s, D=-570, G=28.1]  



Ep 350 Results (N=500) | Valid: 0.0% | Unique: 0
--------------------------------------------------


Ep 351/500 [T=0.95]: 100%|██████████| 22/22 [00:03<00:00,  6.82it/s, D=-490, G=28.2]
Ep 352/500 [T=0.95]: 100%|██████████| 22/22 [00:03<00:00,  6.94it/s, D=-598, G=28.6]
Ep 353/500 [T=0.94]: 100%|██████████| 22/22 [00:02<00:00,  8.41it/s, D=-637, G=28.7]
Ep 354/500 [T=0.94]: 100%|██████████| 22/22 [00:01<00:00, 13.57it/s, D=-605, G=29.1]  
Ep 355/500 [T=0.94]: 100%|██████████| 22/22 [00:01<00:00, 13.01it/s, D=-579, G=29.3]
Ep 356/500 [T=0.94]: 100%|██████████| 22/22 [00:01<00:00, 14.09it/s, D=-551, G=29.3]   
Ep 357/500 [T=0.93]: 100%|██████████| 22/22 [00:01<00:00, 14.26it/s, D=-842, G=29.8]
Ep 358/500 [T=0.93]: 100%|██████████| 22/22 [00:01<00:00, 13.87it/s, D=-612, G=30.1]
Ep 359/500 [T=0.93]: 100%|██████████| 22/22 [00:01<00:00, 14.38it/s, D=-797, G=29.9]  
Ep 360/500 [T=0.92]: 100%|██████████| 22/22 [00:01<00:00, 13.65it/s, D=-646, G=30.1]



Ep 360 Results (N=500) | Valid: 0.0% | Unique: 0
--------------------------------------------------


Ep 361/500 [T=0.92]: 100%|██████████| 22/22 [00:01<00:00, 13.48it/s, D=-433, G=30.5] 
Ep 362/500 [T=0.92]: 100%|██████████| 22/22 [00:01<00:00, 13.52it/s, D=-903, G=30.8]
Ep 363/500 [T=0.91]: 100%|██████████| 22/22 [00:01<00:00, 14.02it/s, D=-739, G=30.9] 
Ep 364/500 [T=0.91]: 100%|██████████| 22/22 [00:02<00:00,  8.26it/s, D=-807, G=31.3]   
Ep 365/500 [T=0.91]: 100%|██████████| 22/22 [00:03<00:00,  6.57it/s, D=-93.9, G=30.9]
Ep 366/500 [T=0.91]: 100%|██████████| 22/22 [00:03<00:00,  6.70it/s, D=-937, G=31.8]    
Ep 367/500 [T=0.90]: 100%|██████████| 22/22 [00:03<00:00,  6.80it/s, D=-75.9, G=31.9]  
Ep 368/500 [T=0.90]: 100%|██████████| 22/22 [00:03<00:00,  6.79it/s, D=-928, G=31]      
Ep 369/500 [T=0.90]: 100%|██████████| 22/22 [00:03<00:00,  7.21it/s, D=-808, G=27.3]   
Ep 370/500 [T=0.89]: 100%|██████████| 22/22 [00:01<00:00, 11.36it/s, D=-929, G=31.3]    



Ep 370 Results (N=500) | Valid: 0.0% | Unique: 0
--------------------------------------------------


Ep 371/500 [T=0.89]: 100%|██████████| 22/22 [00:01<00:00, 13.96it/s, D=-785, G=31.5]    
Ep 372/500 [T=0.89]: 100%|██████████| 22/22 [00:01<00:00, 13.93it/s, D=-837, G=30.2]    
Ep 373/500 [T=0.88]:  14%|█▎        | 3/22 [00:00<00:01, 11.14it/s, D=-984, G=31.4]    


KeyboardInterrupt: 

In [15]:
evaluate(250)


Ep 250 Results (N=500) | Valid: 59.8% | Unique: 109
Samples: ['C.C1=C2OC12.C=CC', 'C.CCC.CO', 'C=C1C(C)N1CC1C=C1']
--------------------------------------------------


In [16]:
import os
import pandas as pd
import matplotlib.pyplot as plt

# --- Configuration (Make sure this matches your training script) ---
class Config:
    MODEL_DIR = 'models_simplified_v2' 
# ---

def plot_training_history():
    log_path = os.path.join(Config.MODEL_DIR, 'training_log.csv')
    if not os.path.exists(log_path):
        print(f"Error: Log file not found at {log_path}")
        return
    
    try:
        df = pd.read_csv(log_path)
        # Calculate rolling averages for smoother plots
        # Adjust window size if you have more/less data; 1% of total iterations is a good start
        window_size = max(10, len(df) // 100)
        df['D_rolling'] = df['D_loss'].rolling(window=window_size).mean()
        df['G_rolling'] = df['G_loss'].rolling(window=window_size).mean()

        plt.figure(figsize=(12, 5))
        
        # --- Discriminator Plot ---
        plt.subplot(1, 2, 1)
        plt.plot(df['D_loss'], alpha=0.2, label='Raw D Loss', color='blue')
        plt.plot(df['D_rolling'], label=f'Smoothed D Loss (win={window_size})', color='navy')
        plt.title('Discriminator Loss')
        plt.xlabel('Iterations')
        plt.ylabel('Loss')
        plt.legend()
        plt.grid(True, linestyle='--', alpha=0.5)

        # --- Generator Plot ---
        plt.subplot(1, 2, 2)
        plt.plot(df['G_loss'], alpha=0.2, label='Raw G Loss', color='orange')
        plt.plot(df['G_rolling'], label=f'Smoothed G Loss (win={window_size})', color='red')
        plt.title('Generator Loss')
        plt.xlabel('Iterations')
        plt.ylabel('Loss')
        plt.legend()
        plt.grid(True, linestyle='--', alpha=0.5)

        plt.tight_layout()
        save_path = os.path.join(Config.MODEL_DIR, 'training_history.png')
        plt.savefig(save_path)
        plt.show() # Display the plot in the notebook
        
        print(f"Plot saved to {save_path}")
        
    except Exception as e:
        print(f"Could not plot history: {e}")

# --- Call the function ---
plot_training_history()

Error: Log file not found at models_simplified_v2\training_log.csv
