In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!rm -rf rl_experiments/


In [None]:
!GIT_LFS_SKIP_SMUDGE=1 git clone https://github.com/isayevlab/rl_experiments.git


Cloning into 'rl_experiments'...
remote: Enumerating objects: 95, done.[K
remote: Total 95 (delta 0), reused 0 (delta 0), pack-reused 95 (from 1)[K
Receiving objects: 100% (95/95), 445.13 MiB | 40.54 MiB/s, done.
Resolving deltas: 100% (32/32), done.
Updating files: 100% (58/58), done.


In [None]:
!pip install rdkit-pypi torch torchvision pandas matplotlib


Collecting rdkit-pypi
  Downloading rdkit_pypi-2022.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.9 kB)
Downloading rdkit_pypi-2022.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (29.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m29.4/29.4 MB[0m [31m72.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rdkit-pypi
Successfully installed rdkit-pypi-2022.9.5


In [None]:
!pip install joblib




In [None]:
import torch
print(torch.cuda.is_available())  # Should return True if GPU is available


True


In [None]:
!pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113


Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu113


In [None]:
import torch
print(torch.cuda.is_available())  # Should return True if GPU is available

True


In [None]:
!nvcc --version


nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:02:13_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0


In [None]:
import sys
sys.path.append('/content/rl_experiments/release')


In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
import sys
sys.path.append('../release')

In [None]:
import numpy as np
import pandas as pd
from tqdm import trange
import torch
use_cuda = torch.cuda.is_available()

In [None]:
import os
print(os.getcwd())


/content


In [None]:
from data import GeneratorData, PredictorData
from stackRNN import StackAugmentedRNN
from utils import get_fp
from reinforcement import Reinforcement

In [None]:
import joblib
from sklearn.ensemble import RandomForestClassifier as RFC
from predictor import VanillaQSAR

# Training the predictor

In [None]:
from rdkit import RDLogger

# Disable RDKit warnings
RDLogger.DisableLog('rdApp.*')

np.random.seed(42)

pred_data = PredictorData('rl_experiments/data/egfr_with_pubchem.csv', get_features=get_fp)
model_instance = RFC
model_params = {'n_estimators': 250,
                'n_jobs': 10}
my_predictor = VanillaQSAR(model_instance=model_instance,
                           model_params=model_params,
                           ensemble_size=10)

In [None]:
# Train the model and retrieve metrics
results, metrics_type = my_predictor.fit_model(pred_data, cv_split='random')



In [None]:
# Print out the results and calculate the average F1 score
print(f"Metrics Type: {metrics_type}")
for i, metric in enumerate(results):
    print(f"Fold {i+1}: {metrics_type} = {metric:.4f}")

# Calculate and print the average F1 score
average_f1 = np.mean(results)
print(f"Average {metrics_type}: {average_f1:.4f}")


Metrics Type: F1 score
Fold 1: F1 score = 0.8571
Fold 2: F1 score = 0.8301
Fold 3: F1 score = 0.8549
Fold 4: F1 score = 0.8525
Fold 5: F1 score = 0.8350
Fold 6: F1 score = 0.8465
Fold 7: F1 score = 0.8343
Fold 8: F1 score = 0.8540
Fold 9: F1 score = 0.8505
Fold 10: F1 score = 0.8357
Average F1 score: 0.8451


In [None]:
# uncomment to train predictor model...
my_predictor.fit_model(pred_data, cv_split='random')
my_predictor.save_model('/content/drive/MyDrive/egfr_rfc')

In [None]:
# ...or use pretrained model
my_predictor.load_model('/content/drive/MyDrive/egfr_rfc')

# Dynamic threshold Original (steps: 0.005 and max= 1)

In [None]:
import os
import sys
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm, trange
from rdkit import Chem, DataStructs
from sklearn.ensemble import RandomForestClassifier as RFC
from utils import canonical_smiles, get_fp
from stackRNN import StackAugmentedRNN
from data import GeneratorData
from predictor import VanillaQSAR
from reinforcement import Reinforcement
import time
import contextlib
from rdkit import RDLogger

# Suppress RDKit warnings and errors
RDLogger.DisableLog('rdApp.*')

# A function to suppress stderr (error messages)
@contextlib.contextmanager
def suppress_stderr():
    # Redirect stderr to null
    with open(os.devnull, 'w') as devnull:
        old_stderr = sys.stderr
        sys.stderr = devnull
        try:
            yield
        finally:
            sys.stderr = old_stderr

# Modify the function where the SMILES processing happens
def estimate_and_update(generator, predictor, n_to_generate, threshold=None, batch_size=16,
                        plot_counts=False, plot=False, return_metrics=False, **kwargs):
    generated = []
    pbar = tqdm(range(n_to_generate))
    for i in pbar:
        pbar.set_description("Generating molecules...")
        with suppress_stderr():  # Suppress error messages during SMILES generation
            generated += generator.evaluate(gen_data, predict_len=120, batch_size=batch_size)

    with suppress_stderr():  # Suppress error messages during canonicalization
        sanitized = canonical_smiles(generated, sanitize=False, throw_warning=False)[:-1]
    unique_smiles, counts = np.unique(sanitized, return_counts=True)
    unique_smiles, counts = list(unique_smiles)[1:], counts[1:]

    smiles, prediction, nan_smiles = predictor.predict(unique_smiles, get_features=get_fp)

    if plot_counts:
        if plot:
            plt.hist(counts)
            plt.gca().set_yscale('log')
            plt.title('Distribution of counts of generated smiles')
            plt.xlabel('Counts observed')
            plt.show()
        max_counts = max(counts)
        if max_counts > 1:
            print('Trajectories with max counts:')
            for i in np.where(counts == max_counts)[0]:
                print('%d\t%s' % (counts[i], unique_smiles[i]))

    with suppress_stderr():  # Suppress error messages during prediction
        plot_hist(prediction, len(generated), threshold, plot=plot)

    valid_fraction = len(prediction) / len(generated)
    active_fraction = np.mean(prediction >= threshold)
    metrics = {'valid_fraction': valid_fraction,
               'active_fraction': active_fraction}

    if plot_counts:
        metrics['max_counts'] = max_counts
    if return_metrics:
        return smiles, prediction, metrics
    else:
        return smiles, prediction


def plot_hist(prediction, n_to_generate, threshold=None, plot=True):
    print("Mean value of predictions:", prediction.mean())
    print("Proportion of valid SMILES:", len(prediction)/n_to_generate)
    if plot:
        plt.hist(prediction, bins=20, edgecolor='black')
        if threshold is not None:
            plt.axvline(x=threshold, color="red")
        plt.xlabel('Predicted pIC50')
        plt.title('Distribution of predicted pIC50 for generated molecules')
        plt.show()

def update_threshold(cur_threshold, prediction, proportion=0.15, step=0.05):
    if (prediction >= cur_threshold).mean() >= proportion:
        new_threshold = min(cur_threshold + step, 1.0)
        print(f"Threshold increased to: {new_threshold:.2f}")
        return new_threshold
    else:
        return cur_threshold

def simple_moving_average(prev_values, new_value, ma_window_size=10):
    ma_value = sum(prev_values[-(ma_window_size-1):]) + new_value
    ma_value = ma_value / (len(prev_values[-(ma_window_size-1):]) + 1.)
    return ma_value

def main(n_iterations=400, n_policy=10, n_policy_replay=15, batch_size=16, n_fine_tune=None,
         seed=None, replay_data_path='/content/rl_experiments/data/gen_actives.smi',
         primed_path='/content/rl_experiments/checkpoints/generator/checkpoint_batch_training',
         save_path=None):

    if n_fine_tune is None:
        n_fine_tune = n_iterations

    if seed is not None:
        np.random.seed(seed)
        torch.manual_seed(seed)

    gen_data_path = '/content/rl_experiments/data/chembl_22_clean_1576904_sorted_std_final.smi'
    tokens = [' ', '<', '>', '#', '%', ')', '(', '+', '-', '/', '.', '1', '0', '3', '2', '5', '4',
              '7', '6', '9', '8', '=', 'a', '@', 'C', 'B', 'F', 'I', 'H', 'O', 'N', 'P', 'S', '[', ']',
              '\\', 'c', 'e', 'i', 'l', 'o', 'n', 'p', 's', 'r']
    global gen_data
    gen_data = GeneratorData(gen_data_path, delimiter='\t',
                             cols_to_read=[0], keep_header=True, tokens=tokens)

    hidden_size = 1500
    stack_width = 1500
    stack_depth = 200
    layer_type = 'GRU'
    optimizer = torch.optim.SGD
    lr = 0.0002
    generator = StackAugmentedRNN(input_size=gen_data.n_characters, hidden_size=hidden_size,
                                  output_size=gen_data.n_characters, layer_type=layer_type,
                                  n_layers=1, is_bidirectional=False, has_stack=True,
                                  stack_width=stack_width, stack_depth=stack_depth,
                                  use_cuda=torch.cuda.is_available(), optimizer_instance=optimizer, lr=lr)
    # Load the model with appropriate device mapping
    weights = torch.load(primed_path, map_location=lambda storage, loc: storage.cuda() if torch.cuda.is_available() else storage.cpu())
    generator.load_state_dict(weights)


    model_instance = RFC
    model_params = {'n_estimators': 250, 'n_jobs': 10}
    predictor = VanillaQSAR(model_instance=model_instance,
                            model_params=model_params,
                            model_type='classifier')
    predictor.load_model('/content/drive/MyDrive/egfr_rfc')

    def get_reward(smiles, predictor, threshold, invalid_reward=1.0, get_features=get_fp):
        mol, prop, nan_smiles = predictor.predict([smiles], get_features=get_features)
        if len(nan_smiles) == 1:
            return invalid_reward
        if prop[0] >= threshold:
            return 10.0
        else:
            return invalid_reward

    RL_model = Reinforcement(generator, predictor, get_reward)

    rl_losses = []
    rewards = []
    valid_fractions = []
    active_fractions = []
    thresholds = []
    n_to_generate = 50
    threshold = 0.05
    start = time.time()

    for i in range(n_iterations):
        print(f"{i+1} Training on replay instances...")
        thresholds.append(threshold)

        # Policy Gradient
        for j in trange(n_policy, desc=f" {i+1} Policy gradient..."):
            cur_reward, cur_loss = RL_model.policy_gradient(gen_data, threshold=threshold)
            rewards.append(simple_moving_average(rewards, cur_reward))
            rl_losses.append(simple_moving_average(rl_losses, cur_loss))

        # Estimate and Update
        smiles_cur, prediction_cur, metrics = estimate_and_update(RL_model.generator,
                                                                  RL_model.predictor,
                                                                  n_to_generate,
                                                                  batch_size=batch_size,
                                                                  threshold=0.75,
                                                                  plot_counts=False,
                                                                  return_metrics=True)
        valid_fractions.append(metrics['valid_fraction'])
        active_fractions.append(metrics['active_fraction'])

        # Update threshold
        threshold = update_threshold(threshold, prediction_cur)

    duration = time.time() - start
    print(f"Training duration: {duration} seconds")

    # Plotting the results
    plt.figure(figsize=(12, 6))
    plt.plot(rewards, label='Rewards')
    plt.title('Rewards Over Iterations')
    plt.xlabel('Iterations')
    plt.ylabel('Reward')
    plt.legend()
    plt.show()

    plt.figure(figsize=(12, 6))
    plt.plot(valid_fractions, label='Valid Fractions')
    plt.plot(active_fractions, label='Active Fractions')
    plt.plot(thresholds, label='Thresholds')
    plt.title('Fractions and Threshold Over Iterations')
    plt.xlabel('Iterations')
    plt.ylabel('Fraction / Threshold')
    plt.legend()
    plt.show()

if __name__ == '__main__':
    main(n_iterations=400)

  weights = torch.load(primed_path, map_location=lambda storage, loc: storage.cuda() if torch.cuda.is_available() else storage.cpu())


1 Training on replay instances...


 1 Policy gradient...: 100%|██████████| 10/10 [00:40<00:00,  4.10s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.85it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.7826633165829145
2 Training on replay instances...


 2 Policy gradient...: 100%|██████████| 10/10 [00:39<00:00,  3.97s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  6.04it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.7735849056603774
3 Training on replay instances...


 3 Policy gradient...: 100%|██████████| 10/10 [00:40<00:00,  4.02s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.98it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.8120300751879699
4 Training on replay instances...


 4 Policy gradient...: 100%|██████████| 10/10 [00:39<00:00,  3.92s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  6.02it/s]


Mean value of predictions: 0.0015797789
Proportion of valid SMILES: 0.793233082706767
5 Training on replay instances...


 5 Policy gradient...: 100%|██████████| 10/10 [00:39<00:00,  3.91s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  6.08it/s]


Mean value of predictions: 0.00063191156
Proportion of valid SMILES: 0.793233082706767
6 Training on replay instances...


 6 Policy gradient...: 100%|██████████| 10/10 [00:40<00:00,  4.01s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.92it/s]


Mean value of predictions: 0.001610306
Proportion of valid SMILES: 0.7781954887218046
7 Training on replay instances...


 7 Policy gradient...: 100%|██████████| 10/10 [00:38<00:00,  3.88s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  6.01it/s]


Mean value of predictions: 0.0009852217
Proportion of valid SMILES: 0.76125
8 Training on replay instances...


 8 Policy gradient...: 100%|██████████| 10/10 [00:39<00:00,  3.96s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.96it/s]


Mean value of predictions: 0.0034591195
Proportion of valid SMILES: 0.7959949937421777
9 Training on replay instances...


 9 Policy gradient...: 100%|██████████| 10/10 [00:40<00:00,  4.05s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.93it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.8105395232120451
10 Training on replay instances...


 10 Policy gradient...: 100%|██████████| 10/10 [00:38<00:00,  3.83s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.99it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.8005018820577164
11 Training on replay instances...


 11 Policy gradient...: 100%|██████████| 10/10 [00:37<00:00,  3.77s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.93it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.8080301129234629
12 Training on replay instances...


 12 Policy gradient...: 100%|██████████| 10/10 [00:40<00:00,  4.04s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.93it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.7809762202753442
13 Training on replay instances...


 13 Policy gradient...: 100%|██████████| 10/10 [00:38<00:00,  3.87s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.93it/s]


Mean value of predictions: 0.00063593005
Proportion of valid SMILES: 0.7882205513784462
14 Training on replay instances...


 14 Policy gradient...: 100%|██████████| 10/10 [00:38<00:00,  3.89s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.93it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.7816813048933501
15 Training on replay instances...


 15 Policy gradient...: 100%|██████████| 10/10 [00:39<00:00,  3.92s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.90it/s]


Mean value of predictions: 0.00032467532
Proportion of valid SMILES: 0.7709637046307884
16 Training on replay instances...


 16 Policy gradient...: 100%|██████████| 10/10 [00:38<00:00,  3.90s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.70it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.7484355444305382
17 Training on replay instances...


 17 Policy gradient...: 100%|██████████| 10/10 [00:37<00:00,  3.78s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.94it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.7716436637390214
18 Training on replay instances...


 18 Policy gradient...: 100%|██████████| 10/10 [00:37<00:00,  3.77s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.96it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.7875
19 Training on replay instances...


 19 Policy gradient...: 100%|██████████| 10/10 [00:39<00:00,  3.94s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.71it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.7917189460476788
20 Training on replay instances...


 20 Policy gradient...: 100%|██████████| 10/10 [00:39<00:00,  3.94s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.87it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.7959949937421777
21 Training on replay instances...


 21 Policy gradient...: 100%|██████████| 10/10 [00:40<00:00,  4.07s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.91it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.80375
22 Training on replay instances...


 22 Policy gradient...: 100%|██████████| 10/10 [00:38<00:00,  3.86s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.87it/s]


Mean value of predictions: 0.0015128592
Proportion of valid SMILES: 0.8293601003764115
23 Training on replay instances...


 23 Policy gradient...: 100%|██████████| 10/10 [00:39<00:00,  3.92s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.86it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.8297872340425532
24 Training on replay instances...


 24 Policy gradient...: 100%|██████████| 10/10 [00:38<00:00,  3.89s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.80it/s]


Mean value of predictions: 0.0005961252
Proportion of valid SMILES: 0.83875
25 Training on replay instances...


 25 Policy gradient...: 100%|██████████| 10/10 [00:37<00:00,  3.79s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.82it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.8435544430538173
26 Training on replay instances...


 26 Policy gradient...: 100%|██████████| 10/10 [00:38<00:00,  3.84s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.80it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.82375
27 Training on replay instances...


 27 Policy gradient...: 100%|██████████| 10/10 [00:39<00:00,  3.97s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.83it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.8020050125313283
28 Training on replay instances...


 28 Policy gradient...: 100%|██████████| 10/10 [00:39<00:00,  3.96s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.81it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.8210262828535669
29 Training on replay instances...


 29 Policy gradient...: 100%|██████████| 10/10 [00:38<00:00,  3.81s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.83it/s]


Mean value of predictions: 0.00155521
Proportion of valid SMILES: 0.804755944931164
30 Training on replay instances...


 30 Policy gradient...: 100%|██████████| 10/10 [00:38<00:00,  3.88s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.86it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.8197747183979975
31 Training on replay instances...


 31 Policy gradient...: 100%|██████████| 10/10 [00:39<00:00,  3.93s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.88it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.8057644110275689
32 Training on replay instances...


 32 Policy gradient...: 100%|██████████| 10/10 [00:39<00:00,  3.94s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.74it/s]


Mean value of predictions: 0.0015503876
Proportion of valid SMILES: 0.8103015075376885
33 Training on replay instances...


 33 Policy gradient...: 100%|██████████| 10/10 [00:39<00:00,  3.99s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.84it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.8264150943396227
34 Training on replay instances...


 34 Policy gradient...: 100%|██████████| 10/10 [00:38<00:00,  3.89s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.82it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.8368883312421581
35 Training on replay instances...


 35 Policy gradient...: 100%|██████████| 10/10 [00:39<00:00,  3.91s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.82it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.853566958698373
36 Training on replay instances...


 36 Policy gradient...: 100%|██████████| 10/10 [00:38<00:00,  3.88s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.88it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.8870765370138017
37 Training on replay instances...


 37 Policy gradient...: 100%|██████████| 10/10 [00:39<00:00,  3.91s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.87it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.871859296482412
38 Training on replay instances...


 38 Policy gradient...: 100%|██████████| 10/10 [00:38<00:00,  3.88s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.87it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.853566958698373
39 Training on replay instances...


 39 Policy gradient...: 100%|██████████| 10/10 [00:38<00:00,  3.89s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.72it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.8325
40 Training on replay instances...


 40 Policy gradient...: 100%|██████████| 10/10 [00:37<00:00,  3.80s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.80it/s]


Mean value of predictions: 0.0005738881
Proportion of valid SMILES: 0.8723404255319149
41 Training on replay instances...


 41 Policy gradient...: 100%|██████████| 10/10 [00:40<00:00,  4.03s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.82it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.885
42 Training on replay instances...


 42 Policy gradient...: 100%|██████████| 10/10 [00:39<00:00,  3.98s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.77it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.85625
43 Training on replay instances...


 43 Policy gradient...: 100%|██████████| 10/10 [00:39<00:00,  3.98s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.84it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.8853904282115869
44 Training on replay instances...


 44 Policy gradient...: 100%|██████████| 10/10 [00:38<00:00,  3.86s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.84it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.9133165829145728
45 Training on replay instances...


 45 Policy gradient...: 100%|██████████| 10/10 [00:39<00:00,  3.94s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.84it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.918444165621079
46 Training on replay instances...


 46 Policy gradient...: 100%|██████████| 10/10 [00:39<00:00,  4.00s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.86it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.9042821158690176
47 Training on replay instances...


 47 Policy gradient...: 100%|██████████| 10/10 [00:40<00:00,  4.03s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.86it/s]


Mean value of predictions: 0.0002805049
Proportion of valid SMILES: 0.8946047678795483
48 Training on replay instances...


 48 Policy gradient...: 100%|██████████| 10/10 [00:40<00:00,  4.03s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.82it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.9082914572864321
49 Training on replay instances...


 49 Policy gradient...: 100%|██████████| 10/10 [00:40<00:00,  4.03s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.86it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.9096612296110415
50 Training on replay instances...


 50 Policy gradient...: 100%|██████████| 10/10 [00:39<00:00,  3.95s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.84it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.9146800501882058
51 Training on replay instances...


 51 Policy gradient...: 100%|██████████| 10/10 [00:40<00:00,  4.02s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.86it/s]


Mean value of predictions: 0.0014326648
Proportion of valid SMILES: 0.8735919899874843
52 Training on replay instances...


 52 Policy gradient...: 100%|██████████| 10/10 [00:40<00:00,  4.00s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.85it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.8884711779448622
53 Training on replay instances...


 53 Policy gradient...: 100%|██████████| 10/10 [00:39<00:00,  3.95s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.85it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.8571428571428571
54 Training on replay instances...


 54 Policy gradient...: 100%|██████████| 10/10 [00:38<00:00,  3.88s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.83it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.8782936010037641
55 Training on replay instances...


 55 Policy gradient...: 100%|██████████| 10/10 [00:37<00:00,  3.78s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.85it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.8836045056320401
56 Training on replay instances...


 56 Policy gradient...: 100%|██████████| 10/10 [00:37<00:00,  3.74s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.83it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.8621553884711779
57 Training on replay instances...


 57 Policy gradient...: 100%|██████████| 10/10 [00:37<00:00,  3.75s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.83it/s]


Mean value of predictions: 0.0014641288
Proportion of valid SMILES: 0.8569636135508155
58 Training on replay instances...


 58 Policy gradient...: 100%|██████████| 10/10 [00:37<00:00,  3.77s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.81it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.8085106382978723
59 Training on replay instances...


 59 Policy gradient...: 100%|██████████| 10/10 [00:38<00:00,  3.88s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.90it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.8275
60 Training on replay instances...


 60 Policy gradient...: 100%|██████████| 10/10 [00:38<00:00,  3.87s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.87it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.8375
61 Training on replay instances...


 61 Policy gradient...: 100%|██████████| 10/10 [00:37<00:00,  3.72s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.77it/s]


Mean value of predictions: 0.0012232416
Proportion of valid SMILES: 0.818523153942428
62 Training on replay instances...


 62 Policy gradient...: 100%|██████████| 10/10 [00:38<00:00,  3.84s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.79it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.8406524466750314
63 Training on replay instances...


 63 Policy gradient...: 100%|██████████| 10/10 [00:37<00:00,  3.73s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.85it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.8295739348370927
64 Training on replay instances...


 64 Policy gradient...: 100%|██████████| 10/10 [00:38<00:00,  3.89s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.87it/s]


Mean value of predictions: 0.000896861
Proportion of valid SMILES: 0.83729662077597
65 Training on replay instances...


 65 Policy gradient...: 100%|██████████| 10/10 [00:37<00:00,  3.73s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.79it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.82625
66 Training on replay instances...


 66 Policy gradient...: 100%|██████████| 10/10 [00:38<00:00,  3.86s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.85it/s]


Mean value of predictions: 0.00059970014
Proportion of valid SMILES: 0.8347934918648311
67 Training on replay instances...


 67 Policy gradient...: 100%|██████████| 10/10 [00:38<00:00,  3.81s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.88it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.825
68 Training on replay instances...


 68 Policy gradient...: 100%|██████████| 10/10 [00:37<00:00,  3.77s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.86it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.845
69 Training on replay instances...


 69 Policy gradient...: 100%|██████████| 10/10 [00:38<00:00,  3.89s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.85it/s]


Mean value of predictions: 0.0
Proportion of valid SMILES: 0.7556390977443609
70 Training on replay instances...


 70 Policy gradient...: 100%|██████████| 10/10 [00:36<00:00,  3.67s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.85it/s]


Mean value of predictions: 0.002739726
Proportion of valid SMILES: 0.73
71 Training on replay instances...


 71 Policy gradient...: 100%|██████████| 10/10 [00:36<00:00,  3.61s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.85it/s]


Mean value of predictions: 0.0014209591
Proportion of valid SMILES: 0.704630788485607
72 Training on replay instances...


 72 Policy gradient...: 100%|██████████| 10/10 [00:36<00:00,  3.69s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.84it/s]


Mean value of predictions: 0.0018518518
Proportion of valid SMILES: 0.6758448060075094
73 Training on replay instances...


 73 Policy gradient...: 100%|██████████| 10/10 [00:36<00:00,  3.66s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.85it/s]


Mean value of predictions: 0.0036231885
Proportion of valid SMILES: 0.6943396226415094
74 Training on replay instances...


 74 Policy gradient...: 100%|██████████| 10/10 [00:36<00:00,  3.63s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.85it/s]


Mean value of predictions: 0.0018083183
Proportion of valid SMILES: 0.69125
75 Training on replay instances...


 75 Policy gradient...: 100%|██████████| 10/10 [00:35<00:00,  3.52s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.88it/s]


Mean value of predictions: 0.0047524753
Proportion of valid SMILES: 0.6320400500625782
76 Training on replay instances...


 76 Policy gradient...: 100%|██████████| 10/10 [00:35<00:00,  3.57s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.81it/s]


Mean value of predictions: 0.006690141
Proportion of valid SMILES: 0.71
77 Training on replay instances...


 77 Policy gradient...: 100%|██████████| 10/10 [00:36<00:00,  3.61s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.80it/s]


Mean value of predictions: 0.013095237
Proportion of valid SMILES: 0.6307884856070087
78 Training on replay instances...


 78 Policy gradient...: 100%|██████████| 10/10 [00:35<00:00,  3.53s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.78it/s]


Mean value of predictions: 0.009465021
Proportion of valid SMILES: 0.6090225563909775
79 Training on replay instances...


 79 Policy gradient...: 100%|██████████| 10/10 [00:33<00:00,  3.36s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.85it/s]


Mean value of predictions: 0.009016394
Proportion of valid SMILES: 0.6146095717884131
80 Training on replay instances...


 80 Policy gradient...: 100%|██████████| 10/10 [00:34<00:00,  3.46s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.81it/s]


Mean value of predictions: 0.010526315
Proportion of valid SMILES: 0.6190476190476191
81 Training on replay instances...


 81 Policy gradient...: 100%|██████████| 10/10 [00:37<00:00,  3.71s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.85it/s]


Mean value of predictions: 0.007434944
Proportion of valid SMILES: 0.6741854636591479
82 Training on replay instances...


 82 Policy gradient...: 100%|██████████| 10/10 [00:34<00:00,  3.49s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.81it/s]


Mean value of predictions: 0.012830189
Proportion of valid SMILES: 0.6633291614518148
83 Training on replay instances...


 83 Policy gradient...: 100%|██████████| 10/10 [00:35<00:00,  3.50s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.80it/s]


Mean value of predictions: 0.007827789
Proportion of valid SMILES: 0.6403508771929824
84 Training on replay instances...


 84 Policy gradient...: 100%|██████████| 10/10 [00:34<00:00,  3.40s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.81it/s]


Mean value of predictions: 0.003960396
Proportion of valid SMILES: 0.6320400500625782
85 Training on replay instances...


 85 Policy gradient...: 100%|██████████| 10/10 [00:35<00:00,  3.58s/it]
Generating molecules...: 100%|██████████| 50/50 [00:09<00:00,  5.28it/s]


Mean value of predictions: 0.0060362173
Proportion of valid SMILES: 0.6220275344180225
86 Training on replay instances...


 86 Policy gradient...: 100%|██████████| 10/10 [00:34<00:00,  3.50s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.77it/s]


Mean value of predictions: 0.009643606
Proportion of valid SMILES: 0.5977443609022557
87 Training on replay instances...


 87 Policy gradient...: 100%|██████████| 10/10 [00:34<00:00,  3.48s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.79it/s]


Mean value of predictions: 0.0074226805
Proportion of valid SMILES: 0.6070087609511889
88 Training on replay instances...


 88 Policy gradient...: 100%|██████████| 10/10 [00:34<00:00,  3.48s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.80it/s]


Mean value of predictions: 0.021559631
Proportion of valid SMILES: 0.545
89 Training on replay instances...


 89 Policy gradient...: 100%|██████████| 10/10 [00:32<00:00,  3.28s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.77it/s]


Mean value of predictions: 0.02206235
Proportion of valid SMILES: 0.5219023779724656
90 Training on replay instances...


 90 Policy gradient...: 100%|██████████| 10/10 [00:32<00:00,  3.28s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.83it/s]


Mean value of predictions: 0.02948718
Proportion of valid SMILES: 0.5857321652065082
91 Training on replay instances...


 91 Policy gradient...: 100%|██████████| 10/10 [00:35<00:00,  3.51s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.82it/s]


Mean value of predictions: 0.034177218
Proportion of valid SMILES: 0.5932415519399249
92 Training on replay instances...


 92 Policy gradient...: 100%|██████████| 10/10 [00:33<00:00,  3.37s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.81it/s]


Mean value of predictions: 0.029831933
Proportion of valid SMILES: 0.595
93 Training on replay instances...


 93 Policy gradient...: 100%|██████████| 10/10 [00:34<00:00,  3.45s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.82it/s]


Mean value of predictions: 0.04474273
Proportion of valid SMILES: 0.55875
94 Training on replay instances...


 94 Policy gradient...: 100%|██████████| 10/10 [00:34<00:00,  3.41s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.85it/s]


Mean value of predictions: 0.03291667
Proportion of valid SMILES: 0.6
95 Training on replay instances...


 95 Policy gradient...: 100%|██████████| 10/10 [00:33<00:00,  3.37s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.81it/s]


Mean value of predictions: 0.05882353
Proportion of valid SMILES: 0.5525
96 Training on replay instances...


 96 Policy gradient...: 100%|██████████| 10/10 [00:34<00:00,  3.46s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.73it/s]


Mean value of predictions: 0.056837607
Proportion of valid SMILES: 0.5857321652065082
97 Training on replay instances...


 97 Policy gradient...: 100%|██████████| 10/10 [00:33<00:00,  3.38s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.83it/s]


Mean value of predictions: 0.03915094
Proportion of valid SMILES: 0.53
98 Training on replay instances...


 98 Policy gradient...: 100%|██████████| 10/10 [00:34<00:00,  3.40s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.82it/s]


Mean value of predictions: 0.09342404
Proportion of valid SMILES: 0.55125
99 Training on replay instances...


 99 Policy gradient...: 100%|██████████| 10/10 [00:35<00:00,  3.54s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.88it/s]


Mean value of predictions: 0.099781185
Proportion of valid SMILES: 0.57125
100 Training on replay instances...


 100 Policy gradient...: 100%|██████████| 10/10 [00:33<00:00,  3.36s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.90it/s]


Mean value of predictions: 0.11298701
Proportion of valid SMILES: 0.5775
101 Training on replay instances...


 101 Policy gradient...: 100%|██████████| 10/10 [00:34<00:00,  3.41s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.93it/s]


Mean value of predictions: 0.1447983
Proportion of valid SMILES: 0.58875
Threshold increased to: 0.10
102 Training on replay instances...


 102 Policy gradient...: 100%|██████████| 10/10 [00:36<00:00,  3.65s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.88it/s]


Mean value of predictions: 0.11775281
Proportion of valid SMILES: 0.55625
Threshold increased to: 0.15
103 Training on replay instances...


 103 Policy gradient...: 100%|██████████| 10/10 [00:36<00:00,  3.61s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.81it/s]


Mean value of predictions: 0.17004609
Proportion of valid SMILES: 0.5425
Threshold increased to: 0.20
104 Training on replay instances...


 104 Policy gradient...: 100%|██████████| 10/10 [00:35<00:00,  3.57s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.89it/s]


Mean value of predictions: 0.19383885
Proportion of valid SMILES: 0.5281602002503129
Threshold increased to: 0.25
105 Training on replay instances...


 105 Policy gradient...: 100%|██████████| 10/10 [00:36<00:00,  3.64s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.91it/s]


Mean value of predictions: 0.24869975
Proportion of valid SMILES: 0.5294117647058824
Threshold increased to: 0.30
106 Training on replay instances...


 106 Policy gradient...: 100%|██████████| 10/10 [00:36<00:00,  3.69s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.81it/s]


Mean value of predictions: 0.24508928
Proportion of valid SMILES: 0.56
Threshold increased to: 0.35
107 Training on replay instances...


 107 Policy gradient...: 100%|██████████| 10/10 [00:37<00:00,  3.73s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.83it/s]


Mean value of predictions: 0.2716279
Proportion of valid SMILES: 0.5375
Threshold increased to: 0.40
108 Training on replay instances...


 108 Policy gradient...: 100%|██████████| 10/10 [00:35<00:00,  3.58s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.86it/s]


Mean value of predictions: 0.2829897
Proportion of valid SMILES: 0.4856070087609512
Threshold increased to: 0.45
109 Training on replay instances...


 109 Policy gradient...: 100%|██████████| 10/10 [00:38<00:00,  3.84s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.86it/s]


Mean value of predictions: 0.2973684
Proportion of valid SMILES: 0.475
Threshold increased to: 0.50
110 Training on replay instances...


 110 Policy gradient...: 100%|██████████| 10/10 [00:37<00:00,  3.76s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.87it/s]


Mean value of predictions: 0.30459183
Proportion of valid SMILES: 0.49
Threshold increased to: 0.55
111 Training on replay instances...


 111 Policy gradient...: 100%|██████████| 10/10 [00:37<00:00,  3.71s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.80it/s]


Mean value of predictions: 0.37008798
Proportion of valid SMILES: 0.42625
Threshold increased to: 0.60
112 Training on replay instances...


 112 Policy gradient...: 100%|██████████| 10/10 [00:40<00:00,  4.04s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.85it/s]


Mean value of predictions: 0.37682927
Proportion of valid SMILES: 0.41
Threshold increased to: 0.65
113 Training on replay instances...


 113 Policy gradient...: 100%|██████████| 10/10 [00:38<00:00,  3.88s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.85it/s]


Mean value of predictions: 0.42542955
Proportion of valid SMILES: 0.36375
Threshold increased to: 0.70
114 Training on replay instances...


 114 Policy gradient...: 100%|██████████| 10/10 [00:37<00:00,  3.79s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.84it/s]


Mean value of predictions: 0.3677419
Proportion of valid SMILES: 0.31
Threshold increased to: 0.75
115 Training on replay instances...


 115 Policy gradient...: 100%|██████████| 10/10 [00:39<00:00,  3.96s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.83it/s]


Mean value of predictions: 0.4458182
Proportion of valid SMILES: 0.34375
Threshold increased to: 0.80
116 Training on replay instances...


 116 Policy gradient...: 100%|██████████| 10/10 [00:40<00:00,  4.04s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.82it/s]


Mean value of predictions: 0.4478261
Proportion of valid SMILES: 0.2875
Threshold increased to: 0.85
117 Training on replay instances...


 117 Policy gradient...: 100%|██████████| 10/10 [00:40<00:00,  4.07s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.83it/s]


Mean value of predictions: 0.4735294
Proportion of valid SMILES: 0.2553191489361702
Threshold increased to: 0.90
118 Training on replay instances...


 118 Policy gradient...: 100%|██████████| 10/10 [00:40<00:00,  4.01s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.81it/s]


Mean value of predictions: 0.50173414
Proportion of valid SMILES: 0.2165206508135169
Threshold increased to: 0.95
119 Training on replay instances...


 119 Policy gradient...: 100%|██████████| 10/10 [00:38<00:00,  3.86s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.82it/s]


Mean value of predictions: 0.4345679
Proportion of valid SMILES: 0.2025
Threshold increased to: 1.00
120 Training on replay instances...


 120 Policy gradient...: 100%|██████████| 10/10 [00:39<00:00,  3.99s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.83it/s]


Mean value of predictions: 0.4695122
Proportion of valid SMILES: 0.205
Threshold increased to: 1.00
121 Training on replay instances...


 121 Policy gradient...: 100%|██████████| 10/10 [00:40<00:00,  4.04s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.86it/s]


Mean value of predictions: 0.45801526
Proportion of valid SMILES: 0.16375
Threshold increased to: 1.00
122 Training on replay instances...


 122 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.11s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.88it/s]


Mean value of predictions: 0.37818182
Proportion of valid SMILES: 0.20625
Threshold increased to: 1.00
123 Training on replay instances...


 123 Policy gradient...: 100%|██████████| 10/10 [00:40<00:00,  4.05s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.85it/s]


Mean value of predictions: 0.52093023
Proportion of valid SMILES: 0.16125
Threshold increased to: 1.00
124 Training on replay instances...


 124 Policy gradient...: 100%|██████████| 10/10 [00:39<00:00,  3.95s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.74it/s]


Mean value of predictions: 0.5509804
Proportion of valid SMILES: 0.1275
Threshold increased to: 1.00
125 Training on replay instances...


 125 Policy gradient...: 100%|██████████| 10/10 [00:40<00:00,  4.08s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.76it/s]


Mean value of predictions: 0.5578947
Proportion of valid SMILES: 0.1425
Threshold increased to: 1.00
126 Training on replay instances...


 126 Policy gradient...: 100%|██████████| 10/10 [00:40<00:00,  4.05s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.71it/s]


Mean value of predictions: 0.47428575
Proportion of valid SMILES: 0.13125
Threshold increased to: 1.00
127 Training on replay instances...


 127 Policy gradient...: 100%|██████████| 10/10 [00:40<00:00,  4.04s/it]
Generating molecules...: 100%|██████████| 50/50 [00:09<00:00,  5.01it/s]


Mean value of predictions: 0.5705263
Proportion of valid SMILES: 0.11875
Threshold increased to: 1.00
128 Training on replay instances...


 128 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.19s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.81it/s]


Mean value of predictions: 0.5326087
Proportion of valid SMILES: 0.11514392991239049
Threshold increased to: 1.00
129 Training on replay instances...


 129 Policy gradient...: 100%|██████████| 10/10 [00:40<00:00,  4.05s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.72it/s]


Mean value of predictions: 0.5434343
Proportion of valid SMILES: 0.12375
Threshold increased to: 1.00
130 Training on replay instances...


 130 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.14s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.73it/s]


Mean value of predictions: 0.47157893
Proportion of valid SMILES: 0.11875
Threshold increased to: 1.00
131 Training on replay instances...


 131 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.11s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.70it/s]


Mean value of predictions: 0.51827955
Proportion of valid SMILES: 0.11625
Threshold increased to: 1.00
132 Training on replay instances...


 132 Policy gradient...: 100%|██████████| 10/10 [00:40<00:00,  4.01s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.70it/s]


Mean value of predictions: 0.44827586
Proportion of valid SMILES: 0.10875
Threshold increased to: 1.00
133 Training on replay instances...


 133 Policy gradient...: 100%|██████████| 10/10 [00:43<00:00,  4.31s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.63it/s]


Mean value of predictions: 0.4582278
Proportion of valid SMILES: 0.09875
Threshold increased to: 1.00
134 Training on replay instances...


 134 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.11s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.69it/s]


Mean value of predictions: 0.50793654
Proportion of valid SMILES: 0.07875
Threshold increased to: 1.00
135 Training on replay instances...


 135 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.16s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.68it/s]


Mean value of predictions: 0.564557
Proportion of valid SMILES: 0.09875
Threshold increased to: 1.00
136 Training on replay instances...


 136 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.19s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.66it/s]


Mean value of predictions: 0.5292308
Proportion of valid SMILES: 0.08125
Threshold increased to: 1.00
137 Training on replay instances...


 137 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.14s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.61it/s]


Mean value of predictions: 0.50508475
Proportion of valid SMILES: 0.07375
Threshold increased to: 1.00
138 Training on replay instances...


 138 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.19s/it]
Generating molecules...: 100%|██████████| 50/50 [00:10<00:00,  4.94it/s]


Mean value of predictions: 0.49859157
Proportion of valid SMILES: 0.08875
Threshold increased to: 1.00
139 Training on replay instances...


 139 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.25s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.66it/s]


Mean value of predictions: 0.542029
Proportion of valid SMILES: 0.08625
Threshold increased to: 1.00
140 Training on replay instances...


 140 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.19s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.68it/s]


Mean value of predictions: 0.59999996
Proportion of valid SMILES: 0.07625
Threshold increased to: 1.00
141 Training on replay instances...


 141 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.24s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.71it/s]


Mean value of predictions: 0.562963
Proportion of valid SMILES: 0.0675
Threshold increased to: 1.00
142 Training on replay instances...


 142 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.19s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.68it/s]


Mean value of predictions: 0.6785714
Proportion of valid SMILES: 0.07
Threshold increased to: 1.00
143 Training on replay instances...


 143 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.24s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.72it/s]


Mean value of predictions: 0.625
Proportion of valid SMILES: 0.06
Threshold increased to: 1.00
144 Training on replay instances...


 144 Policy gradient...: 100%|██████████| 10/10 [00:43<00:00,  4.36s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.61it/s]


Mean value of predictions: 0.4779661
Proportion of valid SMILES: 0.07375
Threshold increased to: 1.00
145 Training on replay instances...


 145 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.20s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.65it/s]


Mean value of predictions: 0.53846157
Proportion of valid SMILES: 0.065
Threshold increased to: 1.00
146 Training on replay instances...


 146 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.21s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.68it/s]


Mean value of predictions: 0.5655173
Proportion of valid SMILES: 0.0725
Threshold increased to: 1.00
147 Training on replay instances...


 147 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.15s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.69it/s]


Mean value of predictions: 0.530303
Proportion of valid SMILES: 0.0825
Threshold increased to: 1.00
148 Training on replay instances...


 148 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.23s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.64it/s]


Mean value of predictions: 0.5541667
Proportion of valid SMILES: 0.06
Threshold increased to: 1.00
149 Training on replay instances...


 149 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.25s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.67it/s]


Mean value of predictions: 0.6528302
Proportion of valid SMILES: 0.06625
Threshold increased to: 1.00
150 Training on replay instances...


 150 Policy gradient...: 100%|██████████| 10/10 [00:43<00:00,  4.36s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.68it/s]


Mean value of predictions: 0.5659574
Proportion of valid SMILES: 0.05875
Threshold increased to: 1.00
151 Training on replay instances...


 151 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.21s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.83it/s]


Mean value of predictions: 0.5627907
Proportion of valid SMILES: 0.05375
Threshold increased to: 1.00
152 Training on replay instances...


 152 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.16s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.80it/s]


Mean value of predictions: 0.5709678
Proportion of valid SMILES: 0.0775
Threshold increased to: 1.00
153 Training on replay instances...


 153 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.15s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.85it/s]


Mean value of predictions: 0.59999996
Proportion of valid SMILES: 0.0775
Threshold increased to: 1.00
154 Training on replay instances...


 154 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.17s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.77it/s]


Mean value of predictions: 0.7130436
Proportion of valid SMILES: 0.0575
Threshold increased to: 1.00
155 Training on replay instances...


 155 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.16s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.78it/s]


Mean value of predictions: 0.67777777
Proportion of valid SMILES: 0.045
Threshold increased to: 1.00
156 Training on replay instances...


 156 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.26s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.73it/s]


Mean value of predictions: 0.6162162
Proportion of valid SMILES: 0.04625
Threshold increased to: 1.00
157 Training on replay instances...


 157 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.15s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.78it/s]


Mean value of predictions: 0.54090905
Proportion of valid SMILES: 0.055
Threshold increased to: 1.00
158 Training on replay instances...


 158 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.15s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.76it/s]


Mean value of predictions: 0.5541667
Proportion of valid SMILES: 0.06
Threshold increased to: 1.00
159 Training on replay instances...


 159 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.15s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.77it/s]


Mean value of predictions: 0.5921569
Proportion of valid SMILES: 0.06375
Threshold increased to: 1.00
160 Training on replay instances...


 160 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.13s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.82it/s]


Mean value of predictions: 0.62083334
Proportion of valid SMILES: 0.06
Threshold increased to: 1.00
161 Training on replay instances...


 161 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.16s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.76it/s]


Mean value of predictions: 0.55
Proportion of valid SMILES: 0.05
Threshold increased to: 1.00
162 Training on replay instances...


 162 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.18s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.78it/s]


Mean value of predictions: 0.6727273
Proportion of valid SMILES: 0.04125
Threshold increased to: 1.00
163 Training on replay instances...


 163 Policy gradient...: 100%|██████████| 10/10 [00:43<00:00,  4.36s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.78it/s]


Mean value of predictions: 0.58857137
Proportion of valid SMILES: 0.04380475594493116
Threshold increased to: 1.00
164 Training on replay instances...


 164 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.22s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.80it/s]


Mean value of predictions: 0.65555555
Proportion of valid SMILES: 0.045
Threshold increased to: 1.00
165 Training on replay instances...


 165 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.15s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.77it/s]


Mean value of predictions: 0.6047619
Proportion of valid SMILES: 0.0525
Threshold increased to: 1.00
166 Training on replay instances...


 166 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.19s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.78it/s]


Mean value of predictions: 0.75135136
Proportion of valid SMILES: 0.04625
Threshold increased to: 1.00
167 Training on replay instances...


 167 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.13s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.82it/s]


Mean value of predictions: 0.6097561
Proportion of valid SMILES: 0.05125
Threshold increased to: 1.00
168 Training on replay instances...


 168 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.19s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.83it/s]


Mean value of predictions: 0.5395349
Proportion of valid SMILES: 0.05375
Threshold increased to: 1.00
169 Training on replay instances...


 169 Policy gradient...: 100%|██████████| 10/10 [00:43<00:00,  4.32s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.82it/s]


Mean value of predictions: 0.7517241
Proportion of valid SMILES: 0.03625
Threshold increased to: 1.00
170 Training on replay instances...


 170 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.19s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.82it/s]


Mean value of predictions: 0.67741936
Proportion of valid SMILES: 0.03875
Threshold increased to: 1.00
171 Training on replay instances...


 171 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.12s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.82it/s]


Mean value of predictions: 0.6047619
Proportion of valid SMILES: 0.0525
Threshold increased to: 1.00
172 Training on replay instances...


 172 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.14s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.81it/s]


Mean value of predictions: 0.6296296
Proportion of valid SMILES: 0.03375
Threshold increased to: 1.00
173 Training on replay instances...


 173 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.18s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.79it/s]


Mean value of predictions: 0.6
Proportion of valid SMILES: 0.04375
Threshold increased to: 1.00
174 Training on replay instances...


 174 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.17s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.73it/s]


Mean value of predictions: 0.5944445
Proportion of valid SMILES: 0.045
Threshold increased to: 1.00
175 Training on replay instances...


 175 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.16s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.84it/s]


Mean value of predictions: 0.6944444
Proportion of valid SMILES: 0.045
Threshold increased to: 1.00
176 Training on replay instances...


 176 Policy gradient...: 100%|██████████| 10/10 [00:43<00:00,  4.35s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.78it/s]


Mean value of predictions: 0.6615384
Proportion of valid SMILES: 0.04875
Threshold increased to: 1.00
177 Training on replay instances...


 177 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.20s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.79it/s]


Mean value of predictions: 0.63
Proportion of valid SMILES: 0.05
Threshold increased to: 1.00
178 Training on replay instances...


 178 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.14s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.83it/s]


Mean value of predictions: 0.6333334
Proportion of valid SMILES: 0.045
Threshold increased to: 1.00
179 Training on replay instances...


 179 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.14s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.75it/s]


Mean value of predictions: 0.62352943
Proportion of valid SMILES: 0.0425
Threshold increased to: 1.00
180 Training on replay instances...


 180 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.14s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.81it/s]


Mean value of predictions: 0.65599996
Proportion of valid SMILES: 0.03125
Threshold increased to: 1.00
181 Training on replay instances...


 181 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.20s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.82it/s]


Mean value of predictions: 0.78125
Proportion of valid SMILES: 0.04
Threshold increased to: 1.00
182 Training on replay instances...


 182 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.19s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.81it/s]


Mean value of predictions: 0.55714285
Proportion of valid SMILES: 0.035
Threshold increased to: 1.00
183 Training on replay instances...


 183 Policy gradient...: 100%|██████████| 10/10 [00:43<00:00,  4.34s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.78it/s]


Mean value of predictions: 0.67741936
Proportion of valid SMILES: 0.03875
Threshold increased to: 1.00
184 Training on replay instances...


 184 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.17s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.80it/s]


Mean value of predictions: 0.59999996
Proportion of valid SMILES: 0.03875
Threshold increased to: 1.00
185 Training on replay instances...


 185 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.16s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.78it/s]


Mean value of predictions: 0.6648649
Proportion of valid SMILES: 0.04625
Threshold increased to: 1.00
186 Training on replay instances...


 186 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.12s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.81it/s]


Mean value of predictions: 0.63846153
Proportion of valid SMILES: 0.0325
Threshold increased to: 1.00
187 Training on replay instances...


 187 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.14s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.82it/s]


Mean value of predictions: 0.6114286
Proportion of valid SMILES: 0.04375
Threshold increased to: 1.00
188 Training on replay instances...


 188 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.17s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.81it/s]


Mean value of predictions: 0.5125
Proportion of valid SMILES: 0.04
Threshold increased to: 1.00
189 Training on replay instances...


 189 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.18s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.81it/s]


Mean value of predictions: 0.6580645
Proportion of valid SMILES: 0.03875
Threshold increased to: 1.00
190 Training on replay instances...


 190 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.17s/it]
Generating molecules...: 100%|██████████| 50/50 [00:10<00:00,  4.83it/s]


Mean value of predictions: 0.59999996
Proportion of valid SMILES: 0.04125
Threshold increased to: 1.00
191 Training on replay instances...


 191 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.18s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.78it/s]


Mean value of predictions: 0.56363636
Proportion of valid SMILES: 0.04125
Threshold increased to: 1.00
192 Training on replay instances...


 192 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.16s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.78it/s]


Mean value of predictions: 0.7310345
Proportion of valid SMILES: 0.03625
Threshold increased to: 1.00
193 Training on replay instances...


 193 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.18s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.82it/s]


Mean value of predictions: 0.72
Proportion of valid SMILES: 0.03125
Threshold increased to: 1.00
194 Training on replay instances...


 194 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.17s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.77it/s]


Mean value of predictions: 0.62068963
Proportion of valid SMILES: 0.03625
Threshold increased to: 1.00
195 Training on replay instances...


 195 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.18s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.81it/s]


Mean value of predictions: 0.74615383
Proportion of valid SMILES: 0.0325
Threshold increased to: 1.00
196 Training on replay instances...


 196 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.16s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.72it/s]


Mean value of predictions: 0.68
Proportion of valid SMILES: 0.0375
Threshold increased to: 1.00
197 Training on replay instances...


 197 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.21s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.76it/s]


Mean value of predictions: 0.59310347
Proportion of valid SMILES: 0.03625
Threshold increased to: 1.00
198 Training on replay instances...


 198 Policy gradient...: 100%|██████████| 10/10 [00:43<00:00,  4.38s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.75it/s]


Mean value of predictions: 0.6181818
Proportion of valid SMILES: 0.04125
Threshold increased to: 1.00
199 Training on replay instances...


 199 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.22s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.77it/s]


Mean value of predictions: 0.6097561
Proportion of valid SMILES: 0.05125
Threshold increased to: 1.00
200 Training on replay instances...


 200 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.21s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.78it/s]


Mean value of predictions: 0.67826086
Proportion of valid SMILES: 0.02875
Threshold increased to: 1.00
201 Training on replay instances...


 201 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.19s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.78it/s]


Mean value of predictions: 0.57575756
Proportion of valid SMILES: 0.04125
Threshold increased to: 1.00
202 Training on replay instances...


 202 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.20s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.73it/s]


Mean value of predictions: 0.6461538
Proportion of valid SMILES: 0.0325
Threshold increased to: 1.00
203 Training on replay instances...


 203 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.19s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.73it/s]


Mean value of predictions: 0.67333335
Proportion of valid SMILES: 0.0375
Threshold increased to: 1.00
204 Training on replay instances...


 204 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.18s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.76it/s]


Mean value of predictions: 0.54857147
Proportion of valid SMILES: 0.04375
Threshold increased to: 1.00
205 Training on replay instances...


 205 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.17s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.80it/s]


Mean value of predictions: 0.6740741
Proportion of valid SMILES: 0.03375
Threshold increased to: 1.00
206 Training on replay instances...


 206 Policy gradient...: 100%|██████████| 10/10 [00:43<00:00,  4.37s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.75it/s]


Mean value of predictions: 0.6066667
Proportion of valid SMILES: 0.0375
Threshold increased to: 1.00
207 Training on replay instances...


 207 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.20s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.72it/s]


Mean value of predictions: 0.58571434
Proportion of valid SMILES: 0.035
Threshold increased to: 1.00
208 Training on replay instances...


 208 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.16s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.73it/s]


Mean value of predictions: 0.79230773
Proportion of valid SMILES: 0.0325
Threshold increased to: 1.00
209 Training on replay instances...


 209 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.18s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.79it/s]


Mean value of predictions: 0.6333333
Proportion of valid SMILES: 0.03
Threshold increased to: 1.00
210 Training on replay instances...


 210 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.21s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.80it/s]


Mean value of predictions: 0.75
Proportion of valid SMILES: 0.03
Threshold increased to: 1.00
211 Training on replay instances...


 211 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.17s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.74it/s]


Mean value of predictions: 0.8526316
Proportion of valid SMILES: 0.02375
Threshold increased to: 1.00
212 Training on replay instances...


 212 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.22s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.74it/s]


Mean value of predictions: 0.648
Proportion of valid SMILES: 0.03125
Threshold increased to: 1.00
213 Training on replay instances...


 213 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.20s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.80it/s]


Mean value of predictions: 0.6545454
Proportion of valid SMILES: 0.0275
Threshold increased to: 1.00
214 Training on replay instances...


 214 Policy gradient...: 100%|██████████| 10/10 [00:43<00:00,  4.33s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.73it/s]


Mean value of predictions: 0.81904763
Proportion of valid SMILES: 0.02625
Threshold increased to: 1.00
215 Training on replay instances...


 215 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.19s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.75it/s]


Mean value of predictions: 0.69655174
Proportion of valid SMILES: 0.03625
Threshold increased to: 1.00
216 Training on replay instances...


 216 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.20s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.80it/s]


Mean value of predictions: 0.73636365
Proportion of valid SMILES: 0.0275
Threshold increased to: 1.00
217 Training on replay instances...


 217 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.23s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.76it/s]


Mean value of predictions: 0.85714287
Proportion of valid SMILES: 0.0175
Threshold increased to: 1.00
218 Training on replay instances...


 218 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.18s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.77it/s]


Mean value of predictions: 0.71999997
Proportion of valid SMILES: 0.025
Threshold increased to: 1.00
219 Training on replay instances...


 219 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.20s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.78it/s]


Mean value of predictions: 0.8230769
Proportion of valid SMILES: 0.0325
Threshold increased to: 1.00
220 Training on replay instances...


 220 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.19s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.75it/s]


Mean value of predictions: 0.6785714
Proportion of valid SMILES: 0.035
Threshold increased to: 1.00
221 Training on replay instances...


 221 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.19s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.75it/s]


Mean value of predictions: 0.6962963
Proportion of valid SMILES: 0.03375
Threshold increased to: 1.00
222 Training on replay instances...


 222 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.23s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.80it/s]


Mean value of predictions: 0.74444443
Proportion of valid SMILES: 0.0225
Threshold increased to: 1.00
223 Training on replay instances...


 223 Policy gradient...: 100%|██████████| 10/10 [00:44<00:00,  4.42s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.74it/s]


Mean value of predictions: 0.85333335
Proportion of valid SMILES: 0.01875
Threshold increased to: 1.00
224 Training on replay instances...


 224 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.24s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.74it/s]


Mean value of predictions: 0.8
Proportion of valid SMILES: 0.0275
Threshold increased to: 1.00
225 Training on replay instances...


 225 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.21s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.76it/s]


Mean value of predictions: 0.7222222
Proportion of valid SMILES: 0.0225
Threshold increased to: 1.00
226 Training on replay instances...


 226 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.22s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.75it/s]


Mean value of predictions: 0.6666667
Proportion of valid SMILES: 0.03
Threshold increased to: 1.00
227 Training on replay instances...


 227 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.20s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.69it/s]


Mean value of predictions: 0.62857145
Proportion of valid SMILES: 0.02625
Threshold increased to: 1.00
228 Training on replay instances...


 228 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.23s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.77it/s]


Mean value of predictions: 0.74
Proportion of valid SMILES: 0.025
Threshold increased to: 1.00
229 Training on replay instances...


 229 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.23s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.77it/s]


Mean value of predictions: 0.6583333
Proportion of valid SMILES: 0.03
Threshold increased to: 1.00
230 Training on replay instances...


 230 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.19s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.78it/s]


Mean value of predictions: 0.6583333
Proportion of valid SMILES: 0.03
Threshold increased to: 1.00
231 Training on replay instances...


 231 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.23s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.75it/s]


Mean value of predictions: 0.67200005
Proportion of valid SMILES: 0.03125
Threshold increased to: 1.00
232 Training on replay instances...


 232 Policy gradient...: 100%|██████████| 10/10 [00:43<00:00,  4.39s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.74it/s]


Mean value of predictions: 0.64444447
Proportion of valid SMILES: 0.0225
Threshold increased to: 1.00
233 Training on replay instances...


 233 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.21s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.75it/s]


Mean value of predictions: 0.7047619
Proportion of valid SMILES: 0.02625
Threshold increased to: 1.00
234 Training on replay instances...


 234 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.22s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.74it/s]


Mean value of predictions: 0.57272726
Proportion of valid SMILES: 0.0275
Threshold increased to: 1.00
235 Training on replay instances...


 235 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.19s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.73it/s]


Mean value of predictions: 0.72
Proportion of valid SMILES: 0.01875
Threshold increased to: 1.00
236 Training on replay instances...


 236 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.17s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.72it/s]


Mean value of predictions: 0.7777778
Proportion of valid SMILES: 0.0225
Threshold increased to: 1.00
237 Training on replay instances...


 237 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.22s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.75it/s]


Mean value of predictions: 0.568
Proportion of valid SMILES: 0.03125
Threshold increased to: 1.00
238 Training on replay instances...


 238 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.23s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.78it/s]


Mean value of predictions: 0.672
Proportion of valid SMILES: 0.03128911138923655
Threshold increased to: 1.00
239 Training on replay instances...


 239 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.19s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.74it/s]


Mean value of predictions: 0.64
Proportion of valid SMILES: 0.025
Threshold increased to: 1.00
240 Training on replay instances...


 240 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.22s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.77it/s]


Mean value of predictions: 0.64
Proportion of valid SMILES: 0.025
Threshold increased to: 1.00
241 Training on replay instances...


 241 Policy gradient...: 100%|██████████| 10/10 [00:44<00:00,  4.46s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.72it/s]


Mean value of predictions: 0.8375
Proportion of valid SMILES: 0.02
Threshold increased to: 1.00
242 Training on replay instances...


 242 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.23s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.78it/s]


Mean value of predictions: 0.69
Proportion of valid SMILES: 0.025
Threshold increased to: 1.00
243 Training on replay instances...


 243 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.22s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.76it/s]


Mean value of predictions: 0.7368421
Proportion of valid SMILES: 0.02375
Threshold increased to: 1.00
244 Training on replay instances...


 244 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.17s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.78it/s]


Mean value of predictions: 0.81176466
Proportion of valid SMILES: 0.02125
Threshold increased to: 1.00
245 Training on replay instances...


 245 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.21s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.73it/s]


Mean value of predictions: 0.71304345
Proportion of valid SMILES: 0.02875
Threshold increased to: 1.00
246 Training on replay instances...


 246 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.18s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.73it/s]


Mean value of predictions: 0.74285716
Proportion of valid SMILES: 0.02625
Threshold increased to: 1.00
247 Training on replay instances...


 247 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.15s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.77it/s]


Mean value of predictions: 0.62352943
Proportion of valid SMILES: 0.02125
Threshold increased to: 1.00
248 Training on replay instances...


 248 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.21s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.79it/s]


Mean value of predictions: 0.66315794
Proportion of valid SMILES: 0.02375
Threshold increased to: 1.00
249 Training on replay instances...


 249 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.19s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.79it/s]


Mean value of predictions: 0.7125
Proportion of valid SMILES: 0.02
Threshold increased to: 1.00
250 Training on replay instances...


 250 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.17s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.71it/s]


Mean value of predictions: 0.6857143
Proportion of valid SMILES: 0.02625
Threshold increased to: 1.00
251 Training on replay instances...


 251 Policy gradient...: 100%|██████████| 10/10 [00:44<00:00,  4.45s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.69it/s]


Mean value of predictions: 0.65000004
Proportion of valid SMILES: 0.015
Threshold increased to: 1.00
252 Training on replay instances...


 252 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.21s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.76it/s]


Mean value of predictions: 0.68888885
Proportion of valid SMILES: 0.0225
Threshold increased to: 1.00
253 Training on replay instances...


 253 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.20s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.73it/s]


Mean value of predictions: 0.6761905
Proportion of valid SMILES: 0.02625
Threshold increased to: 1.00
254 Training on replay instances...


 254 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.19s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.74it/s]


Mean value of predictions: 0.876923
Proportion of valid SMILES: 0.01625
Threshold increased to: 1.00
255 Training on replay instances...


 255 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.22s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.77it/s]


Mean value of predictions: 0.58947366
Proportion of valid SMILES: 0.02375
Threshold increased to: 1.00
256 Training on replay instances...


 256 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.23s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.69it/s]


Mean value of predictions: 0.5466667
Proportion of valid SMILES: 0.0375
Threshold increased to: 1.00
257 Training on replay instances...


 257 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.20s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.77it/s]


Mean value of predictions: 0.8333333
Proportion of valid SMILES: 0.0225
Threshold increased to: 1.00
258 Training on replay instances...


 258 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.19s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.75it/s]


Mean value of predictions: 0.76666665
Proportion of valid SMILES: 0.0225
Threshold increased to: 1.00
259 Training on replay instances...


 259 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.22s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.73it/s]


Mean value of predictions: 0.6636364
Proportion of valid SMILES: 0.0275
Threshold increased to: 1.00
260 Training on replay instances...


 260 Policy gradient...: 100%|██████████| 10/10 [00:44<00:00,  4.46s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.71it/s]


Mean value of predictions: 0.70666665
Proportion of valid SMILES: 0.01875
Threshold increased to: 1.00
261 Training on replay instances...


 261 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.20s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.74it/s]


Mean value of predictions: 0.70000005
Proportion of valid SMILES: 0.02
Threshold increased to: 1.00
262 Training on replay instances...


 262 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.20s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.76it/s]


Mean value of predictions: 0.93333334
Proportion of valid SMILES: 0.01875
Threshold increased to: 1.00
263 Training on replay instances...


 263 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.19s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.72it/s]


Mean value of predictions: 0.8
Proportion of valid SMILES: 0.01875
Threshold increased to: 1.00
264 Training on replay instances...


 264 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.21s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.74it/s]


Mean value of predictions: 0.7428571
Proportion of valid SMILES: 0.0175
Threshold increased to: 1.00
265 Training on replay instances...


 265 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.21s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.70it/s]


Mean value of predictions: 0.7647059
Proportion of valid SMILES: 0.02125
Threshold increased to: 1.00
266 Training on replay instances...


 266 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.20s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.74it/s]


Mean value of predictions: 0.6181818
Proportion of valid SMILES: 0.0275
Threshold increased to: 1.00
267 Training on replay instances...


 267 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.20s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.74it/s]


Mean value of predictions: 0.6588235
Proportion of valid SMILES: 0.02125
Threshold increased to: 1.00
268 Training on replay instances...


 268 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.19s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.81it/s]


Mean value of predictions: 0.75555557
Proportion of valid SMILES: 0.0225
Threshold increased to: 1.00
269 Training on replay instances...


 269 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.22s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.76it/s]


Mean value of predictions: 0.7647059
Proportion of valid SMILES: 0.02125
Threshold increased to: 1.00
270 Training on replay instances...


 270 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.21s/it]
Generating molecules...: 100%|██████████| 50/50 [00:11<00:00,  4.52it/s]


Mean value of predictions: 0.78571427
Proportion of valid SMILES: 0.0175
Threshold increased to: 1.00
271 Training on replay instances...


 271 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.23s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.72it/s]


Mean value of predictions: 0.74545455
Proportion of valid SMILES: 0.01375
Threshold increased to: 1.00
272 Training on replay instances...


 272 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.21s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.71it/s]


Mean value of predictions: 0.78095233
Proportion of valid SMILES: 0.02625
Threshold increased to: 1.00
273 Training on replay instances...


 273 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.25s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.76it/s]


Mean value of predictions: 0.92
Proportion of valid SMILES: 0.01875
Threshold increased to: 1.00
274 Training on replay instances...


 274 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.21s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.71it/s]


Mean value of predictions: 0.7411765
Proportion of valid SMILES: 0.02125
Threshold increased to: 1.00
275 Training on replay instances...


 275 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.19s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.71it/s]


Mean value of predictions: 0.72380954
Proportion of valid SMILES: 0.02625
Threshold increased to: 1.00
276 Training on replay instances...


 276 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.20s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.72it/s]


Mean value of predictions: 0.7714286
Proportion of valid SMILES: 0.0175
Threshold increased to: 1.00
277 Training on replay instances...


 277 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.19s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.72it/s]


Mean value of predictions: 0.68
Proportion of valid SMILES: 0.01875
Threshold increased to: 1.00
278 Training on replay instances...


 278 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.19s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.79it/s]


Mean value of predictions: 0.58181816
Proportion of valid SMILES: 0.0275
Threshold increased to: 1.00
279 Training on replay instances...


 279 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.21s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.77it/s]


Mean value of predictions: 0.6666667
Proportion of valid SMILES: 0.02625
Threshold increased to: 1.00
280 Training on replay instances...


 280 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.24s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.73it/s]


Mean value of predictions: 0.6090909
Proportion of valid SMILES: 0.0275
Threshold increased to: 1.00
281 Training on replay instances...


 281 Policy gradient...: 100%|██████████| 10/10 [00:44<00:00,  4.46s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.71it/s]


Mean value of predictions: 0.7125
Proportion of valid SMILES: 0.02
Threshold increased to: 1.00
282 Training on replay instances...


 282 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.21s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.75it/s]


Mean value of predictions: 0.78571427
Proportion of valid SMILES: 0.0175
Threshold increased to: 1.00
283 Training on replay instances...


 283 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.18s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.76it/s]


Mean value of predictions: 0.75555557
Proportion of valid SMILES: 0.0225
Threshold increased to: 1.00
284 Training on replay instances...


 284 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.17s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.74it/s]


Mean value of predictions: 0.7714286
Proportion of valid SMILES: 0.0175
Threshold increased to: 1.00
285 Training on replay instances...


 285 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.18s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.70it/s]


Mean value of predictions: 0.8666667
Proportion of valid SMILES: 0.01875
Threshold increased to: 1.00
286 Training on replay instances...


 286 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.18s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.74it/s]


Mean value of predictions: 0.73846155
Proportion of valid SMILES: 0.01625
Threshold increased to: 1.00
287 Training on replay instances...


 287 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.20s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.69it/s]


Mean value of predictions: 0.875
Proportion of valid SMILES: 0.01
Threshold increased to: 1.00
288 Training on replay instances...


 288 Policy gradient...: 100%|██████████| 10/10 [00:41<00:00,  4.19s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.74it/s]


Mean value of predictions: 0.9818182
Proportion of valid SMILES: 0.01375
Threshold increased to: 1.00
289 Training on replay instances...


 289 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.23s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.79it/s]


Mean value of predictions: 0.87272733
Proportion of valid SMILES: 0.01375
Threshold increased to: 1.00
290 Training on replay instances...


 290 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.23s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.73it/s]


Mean value of predictions: 0.68
Proportion of valid SMILES: 0.025
Threshold increased to: 1.00
291 Training on replay instances...


 291 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.24s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.74it/s]


Mean value of predictions: 0.70000005
Proportion of valid SMILES: 0.0225
Threshold increased to: 1.00
292 Training on replay instances...


 292 Policy gradient...: 100%|██████████| 10/10 [00:45<00:00,  4.50s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.70it/s]


Mean value of predictions: 0.7
Proportion of valid SMILES: 0.0175
Threshold increased to: 1.00
293 Training on replay instances...


 293 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.23s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.69it/s]


Mean value of predictions: 0.6952381
Proportion of valid SMILES: 0.02625
Threshold increased to: 1.00
294 Training on replay instances...


 294 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.21s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.72it/s]


Mean value of predictions: 0.64285713
Proportion of valid SMILES: 0.0175
Threshold increased to: 1.00
295 Training on replay instances...


 295 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.24s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.68it/s]


Mean value of predictions: 0.67777777
Proportion of valid SMILES: 0.0225
Threshold increased to: 1.00
296 Training on replay instances...


 296 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.21s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.74it/s]


Mean value of predictions: 0.8352941
Proportion of valid SMILES: 0.02125
Threshold increased to: 1.00
297 Training on replay instances...


 297 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.22s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.75it/s]


Mean value of predictions: 0.768421
Proportion of valid SMILES: 0.02375
Threshold increased to: 1.00
298 Training on replay instances...


 298 Policy gradient...: 100%|██████████| 10/10 [00:42<00:00,  4.23s/it]
Generating molecules...: 100%|██████████| 50/50 [00:08<00:00,  5.78it/s]


Mean value of predictions: 0.8
Proportion of valid SMILES: 0.015
Threshold increased to: 1.00
299 Training on replay instances...


 299 Policy gradient...:  90%|█████████ | 9/10 [00:37<00:04,  4.20s/it]