In [1]:
import os
import ipyplot

from RanDepict import RandomDepictor, RandomMarkushStructureCreator
import RanDepict
RanDepict.__version__

'1.1.7'

## Depict chemical structures with CDK, RDKit, Indigo or PIKAChU

After calling an instance of RandomDepictor, depictions with randomly chosen parameters are created by calling the functions `cdk_depict`, `rdkit_depict`, `indigo_depict` and `pikachu_depict`.


The SMILES or mol_block string needs to be given.

Each of these functions returns an np.array which represents an RGB image of the chemical structure.

In [2]:
smiles = "CN1C=NC2=C1C(=O)N(C(=O)N2C)CC(=O)O"

depiction_img_shape = (100, 100)

# random_depictor can be called with a seed for reproducible results (defaults to 42)
with RandomDepictor(seed = 12) as depictor:
    # CDK depictions
    cdk_images = []
    for _ in range(3):
        cdk_images.append(depictor.cdk_depict(smiles=smiles, shape=depiction_img_shape))
    print("_________________CDK depictions_________________")
    ipyplot.plot_images(cdk_images, max_images=10, img_width=100)
    
    # RDKit Depictions
    rdkit_images = []
    for _ in range(3):
        rdkit_images.append(depictor.rdkit_depict(smiles, depiction_img_shape))
    print("_________________RDKit depictions_________________")
    ipyplot.plot_images(rdkit_images, max_images=10, img_width=100)
    
    # Indigo Depictions
    indigo_images = []
    for _ in range(3):
        indigo_images.append(depictor.indigo_depict(smiles, depiction_img_shape))
    print("_________________Indigo depictions_________________")
    ipyplot.plot_images(indigo_images, max_images=10, img_width=100)
    
    # PIKAChU Depictions
    pikachu_images = []
    for _ in range(3):
        pikachu_images.append(depictor.pikachu_depict(smiles, depiction_img_shape))
    print("_________________PIKAChU depictions_________________")
    ipyplot.plot_images(pikachu_images, max_images=10, img_width=100)

_________________CDK depictions_________________


_________________RDKit depictions_________________


_________________Indigo depictions_________________


_________________PIKAChU depictions_________________


## Depict chemical structures with a random toolkit

After calling an instance of RandomDepictor, the function
 
 - random_depiction(
       smiles: str, 
       image_shape: Tuple[int,int]
   )
 
The SMILES string needs to be given, the image_shape defaults to (299,299).

It returns an np.array which represents an RGB image of the chemical structure.

**Markush structures can be handled by the random_depiction() method. As not all of the used toolkits handle every type of markush structure equally well, we recommend using random_depiction as it filters the potential toolkits used for the depiction based on the input SMILES.**

In [3]:
smiles = "[R1]N1C=NC2=C1C(=[X])N(C(=O)N2C)[Y]"
#smiles = "C[C@]12C([X])C[C@H]3[C@H]([C@@H]1CC[C@@H]2O)C([R1])C([R2])C4=CC(=O)CC[C@]34C"

with RandomDepictor(2, hand_drawn=False) as depictor:
    random_images = []
    for _ in range(20):
        random_images.append(depictor.random_depiction(smiles, shape=(100,100)))

ipyplot.plot_images(random_images, max_images=20, img_width=100)

## Create random depictions with random augmentations

After calling an instance of RandomDepictor, this instance can simply be called as a function in order to generate a chemical structure depiction using CDK, RDKit, Indigo or PIKAChU (randomly chosen) and apply random augmentations. The augmentations consist of random imgaug augmentations and randomly added chemical ID labels or curved arrows

In [4]:
smiles = "CN1C=NC2=C1C(=O)N(C(=O)N2C)CC(=O)O"
with RandomDepictor() as depictor:
    random_augmented_images = []
    for _ in range(20):
        random_augmented_images.append(depictor(smiles))
        

ipyplot.plot_images(random_augmented_images, max_images=20, img_width=100)

## Create random depictions with cxSMILES with coordinates

Simply call the method `random_depiction_with_coordinates` of the `RandomDepictor to` generate a chemical structure depiction using CDK, RDKit, Indigo and apply random augmentations if desired. This function returns the depiction and a cxSMILES string that contains dimensionless coordinates that refer to the atom coordinates in the depiction.

In [7]:
smiles = "CCC"
with RandomDepictor() as depictor:
    random_images = []
    for index in range(20):
        if index < 10:
            depiction, cx_smiles = depictor.random_depiction_with_coordinates(smiles,
                                                                              shape=(100,100))
        else:
            depiction, cx_smiles = depictor.random_depiction_with_coordinates(smiles,
                                                                              augment=True,
                                                                              shape=(100,100))
        print(f"({cx_smiles})")
        random_images.append(depiction)
        

ipyplot.plot_images(random_images, max_images=20, img_width=100)

(CCC |(-0.76,-1.08,;-0.35,0.36,;1.11,0.72,)|)
(CCC |(0.44,1.87,;-0.53,0.6,;0.09,-0.88,)|)
(CCC |(0.88,1.2,;-0.6,1.47,;-1.57,0.33,)|)
(CCC |(-0.83,-0.26,;0.06,0.94,;-0.53,2.32,)|)
(CCC |(-1.39,0.78,;0.01,0,;1.38,0.82,)|)
(CCC |(-1.74,0.79,;-0.26,0.53,;0.7,1.68,)|)
(CCC |(-0.1,-0.87,;0.53,0.6,;-0.43,1.88,)|)
(CCC |(0.09,-1.32,;-0.5,0.06,;0.4,1.26,)|)
(CCC |(0.4,-1.26,;-0.5,-0.06,;0.09,1.32,)|)
(CCC |(-1.41,0.63,;0.07,0,;1.34,0.97,)|)
(CCC |(-0.73,-0.68,;0.5,0.35,;0.22,1.93,)|)
(CCC |(0.19,-1.31,;-0.5,0.02,;0.31,1.29,)|)
(CCC |(-0.29,1.92,;-0.49,0.33,;0.78,-0.64,)|)
(CCC |(-0.87,1,;0.43,0.25,;0.44,-1.25,)|)
(CCC |(-0.6,-0.74,;0.52,0.4,;0.09,1.94,)|)
(CCC |(0.36,-1.27,;-0.5,-0.04,;0.14,1.32,)|)
(CCC |(-0.68,-0.3,;0.07,1,;-0.68,2.3,)|)
(CCC |(1.41,0.51,;-0.09,1.06,;-1.32,0.03,)|)
(CCC |(-1.69,1.42,;-0.5,0.5,;0.89,1.08,)|)
(CCC |(-1.65,1.53,;-0.54,0.51,;0.89,0.96,)|)


## Create random hand-drawn like depictions

After calling an instance of RandomDepictor, this instance can simply be called as a function in order to generate a chemical structure depiction using CDK, RDKit, Indigo or PIKAChU (randomly chosen) and apply random augmentations and random background addition.

In [5]:
smiles = "CN1C=NC2=C1C(=O)N(C(=O)N2C)CC(=O)O"
with RandomDepictor(hand_drawn=True) as depictor:
    random_augmented_images = []
    for _ in range(20):
        random_augmented_images.append(depictor(smiles))
        

ipyplot.plot_images(random_augmented_images, max_images=20, img_width=100)

## 

## Create and save a batch of images

After calling an instance of RandomDepictor, simply call the method depict_save().

Args:

- smiles_list (List[str]): List of SMILES str
- images_per_structure (int): Amount of images to create per SMILES str
- output_dir (str): Output directory 
- augment (bool): Boolean that indicates whether or not to use augmentations
- ID_list (List[str]): List of IDs (should be as long as smiles_list)
- shape (Tuple[int, int], optional): image shape. Defaults to (299, 299).
- processes (int, optional): Number of parallel threads. Defaults to 4.
- seed (int, optional): Seed for pseudo-random decisions. Defaults to 42.

In [None]:
# Make sure the output directories exist
if not os.path.exists('not_augmented'):
        os.mkdir('not_augmented')
        
if not os.path.exists('augmented'):
        os.mkdir('augmented')

# Depict and save two batches of images
smiles = "CN1C=NC2=C1C(=O)N(C(=O)N2C)CC(=O)O"
with RandomDepictor(42) as depictor:
    depictor.batch_depict_save([smiles], 20, 'not_augmented', False, ['caffeine'], (299, 299), 5)
    depictor.batch_depict_save([smiles], 20, 'augmented', True, ['caffeine'], (299, 299), 5)
    

In [None]:
if not os.path.exists('kohulan'):
    os.mkdir("kohulan")
smiles = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C"
r_smiles = "[R1]N1C=NC2=C1[X](=O)N(C(=O)N2C)[R]"
seed = 233
r_seed = 1
with RandomDepictor(1) as depictor:
    depictor.depict_save(smiles, 1, 'kohulan', False, 'caffeine_299_299', (299, 299), seed=seed)
    depictor.depict_save(smiles, 1, 'kohulan', True, 'caffeine_aug_299_299', (299, 299), seed=seed)
    depictor.depict_save(smiles, 1, 'kohulan', False, 'caffeine_512_512', (512, 512), seed=seed)
    depictor.depict_save(smiles, 1, 'kohulan', True, 'caffeine_aug_512_512', (512, 512), seed=seed)
    depictor.depict_save(r_smiles, 1, 'kohulan', False, 'caffeine_R_299_299', (299, 299), seed=r_seed)
    depictor.depict_save(r_smiles, 1, 'kohulan', True, 'caffeine_R_aug_299_299', (299, 299), seed=r_seed)
    depictor.depict_save(r_smiles, 1, 'kohulan', False, 'caffeine_R_512_512', (512, 512), seed=r_seed)
    depictor.depict_save(r_smiles, 1, 'kohulan', True, 'caffeine_R_aug_512_512', (512, 512), seed=r_seed)

## Create a batch of images while ensuring diversity using feature fingerprints


After calling an instance of RandomDepictor, simply call the method batch_depict_with_fingerprints().

Args:

- smiles_list: List[str]
- images_per_structure: int
- indigo_proportion: float = 0.15
- rdkit_proportion: float = 0.25
- pikachu_proportion: float = 0.25
- cdk_proportion: float = 0.35
- aug_proportion: float = 0.5
- shape: Tuple[int, int] = (299, 299)
- processes: int = 4
- seed: int = 42

* Note: Have a look at examples/generate_depiction_grids_with_fingerprints.py to see how this function was used to generate the grid figures from our publication.

In [None]:
# Depict and save two batches of images
smiles = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C"

with RandomDepictor(42) as depictor:
    fp_depictions = depictor.batch_depict_with_fingerprints([smiles],
                                                         20,
                                                         aug_proportion = 0)
    fp_aug_depictions = depictor.batch_depict_with_fingerprints([smiles],
                                                         20,
                                                         aug_proportion = 1)
ipyplot.plot_images(fp_depictions, max_images=20, img_width=100)
ipyplot.plot_images(fp_aug_depictions, max_images=20, img_width=100)

## Create and save a batch of images while ensuring diversity using feature fingerprints


After calling an instance of RandomDepictor, simply call the method batch_depict_save_with_fingerprints().

Args:

- smiles_list: List[str]
- images_per_structure: int
- output_dir: str
- ID_list: List[str]
- indigo_proportion: float = 0.15
- rdkit_proportion: float = 0.3
- cdk_proportion: float = 0.55
- aug_proportion: float = 0.5
- shape: Tuple[int, int] = (299, 299)
- processes: int = 4
- seed: int = 42


*Note: The images that are created here, were used for the animations in the GitHub repository

In [11]:
# Make sure the output directories exist
if not os.path.exists('not_augmented_fingerprint'):
        os.mkdir('not_augmented_fingerprint')
        
if not os.path.exists('augmented_fingerprint'):
        os.mkdir('augmented_fingerprint')

# Depict and save two batches of images
smiles = "CN1C=NC2=C1C(=O)N(C(=O)N2C)CC(=O)O"
with RandomDepictor(42) as depictor:
    depictor.batch_depict_save_with_fingerprints([smiles], 
                                                 100, 
                                                 'not_augmented_fingerprint',
                                                ['caffeine_{}'.format(n) for n in range(100)],
                                                aug_proportion = 0)
    depictor.batch_depict_save_with_fingerprints([smiles], 
                                                 100, 
                                                 'augmented_fingerprint',
                                                ['caffeine_{}'.format(n) for n in range(100)],
                                                aug_proportion = 1)

## Artificial generation of SMILES that represent markush structures

Generate markush structures based on list of SMILES strings

In [8]:
markush_generator = RandomMarkushStructureCreator()
input_smiles = ['CN1C=NC2=C1C(=O)N(C(=O)N2C)C'] * 100
markush_smiles = markush_generator.generate_markush_structure_dataset(input_smiles)
markush_smiles

['CN1C=NC2=C1C(=O)N(C)C(=O)N2C[X20]',
 'C1=NC2=C(N1C[Z7])[X](=O)N(C(=O)N2C[X2])[X8]([H])([H])[H]',
 'C1=NC2=C(C(=O)N(C[Y1])C(=O)N2[R7e]([H])([H])[H])N1[Y4]([H])([H])[H]',
 'CN1C=NC2=C1C(=O)N(C)C(=O)N2C[X12]',
 'CN1C2=C(N(C=N2)[R]([H])([H])[H])[R13d](=O)N(C[X9])C1=O',
 'CN1C(=O)C2=C(N=[Y2c]([H])N2C[X8c])N(C1=O)[Z13]([H])([H])[H]',
 'CN1C2=C(C(=O)N(C)C1=O)N(C=N2)[X11]([H])([H])[H]',
 'CN1C=NC2=C1C(=O)N(C[R18])[X2](=O)N2C([Z15])[R16]',
 'CN1C=NC2=C1[Y14](=O)N(C)C(=O)N2C[X18b]',
 'CN1C2=C(N(C=N2)[R12c]([H])([H])[H])[R2](=O)N(C1=O)[Z]([H])([H])[H]',
 'CN1C=NC2=C1C(=O)N(C[Z])C(=O)N2C[X10e]',
 'CN1C=NC2=C1[Z2b](=O)N(C)[X](=O)N2C',
 'CN1C=NC2=C1C(=O)N(C[X7])C(=O)N2C',
 'CN1C2=C(C(=O)N(C)C1=O)N(C=N2)C[Z9]',
 'CN1C2=C(C(=O)N(C)C1=O)N(C)[R17]([H])=N2',
 'CN1C2=C(C(=O)N(C1=O)[X]([H])([H])[H])N(C=N2)C[X4d]',
 'CN1C=NC2=C1[Y18](=O)N(C)C(=O)N2C[Y20]',
 'CN1C2=C(N=C1[X8b])N(C(=O)N(C)C2=O)C([X])[R2]',
 'CN1C2=C(C(=O)N(C[R11f])C1=O)N(C=N2)[R5]([H])([Z])[Z]',
 'CN1C2=C(C(=O)N(C)C1=O)N(C=N2)[Z12c]([H])([H

Depict the markush structures using RanDepict

In [10]:
with RandomDepictor() as depictor:
    markush_depictions = [depictor.random_depiction(smi, shape=(100,100))
                          for smi in markush_smiles]
ipyplot.plot_images(markush_depictions, max_images=100, img_width=100)

X12_14
X12_14




R_10
R_10
Z14_8
Z16_13
R20_16
Z14_8
Z16_13
R20_16
X16_10
X16_10


?
