In [None]:
# default_exp dataloaders

# Dataloaders

> Pytorch datasets, dataloaders, collate functions and vocabularies

In [None]:
#hide
from nbdev.showdoc import *
%load_ext autoreload
%autoreload 2

In [None]:
# export
from mrl.imports import *
from mrl.torch_imports import *
from mrl.torch_core import *

## Tokenization

Tokenzation defines how we break text strings (ie SMILES strings) down into subunits that are fed to the model. The standard process goes as follows:
1. A tokenization process breaks a string down into tokens
2. Tokens are mapped to integers
3. The token integers are sent to the model

This brings up the problem of how best to tokenize smiles. The following methods are implemented out of the box:

### Character Tokenization

Character Tokenization is when we break down SMILES by character. This is implemented with the `tokenize_by_character` function.

```
tokenize_by_character('CC[NH]CC')
>> ['C', 'C', '[', 'N', 'H', ']', 'C', 'C']
```

This form of tokenization is quick and simple. One drawback of this approach is some characters might be overloaded. For example, `Br` is tokenized to `['B', 'r']`, leading to the `B` token meaning both boron (in the standard context) and Bromine (in the `Br` context). In practice, this isn't much of an issue. Language models are particularly adept at learning co-location of tokens.

### Character Tokenization with Replacement

Character tokenization with replacement is the same as character tokenization except we add a dictionary of multi-character tokens to be replaced with singel-character tokens. This dictinary has the form `{multi_character_token : single_character_token}`. Before tokenizing by character, all instances of `multi_character_token` are replaced with `single_character_token`. Character Tokenization with Replacement is implemented with the `tokenize_with_replacements` function.

```
replacement_dict = {'Br' : 'R', 'Cl' : 'L'}
tokenize_with_replacements('[Cl]CC[Br]', replacement_dict)
>> ['[', 'L', ']', 'C', 'C', '[', 'R', ']']
```

### Regex Tokenization

Regex tokenization uses a regex string to decompose SMILES. This is mainly used to keep bracketed terms (ie `[O-]`) as single tokens. This method avoids character overloading by keeping all bracketed terms as individual tokens, but has issues with generating a large number of low frequency tokens. Regex tokenization is implemented with the `regex_tokenize` function

```
SMILE_REGEX = """(\[[^\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|H|\(|\)|\.|=|#|-|\+|\\\\|\/|:|~|@|\?|>|\*|\$|\%[0-9]{2}|[0-9])"""
regex_tokenize('CCC[Br]', re.compile(SMILE_REGEX))
>>['C', 'C', 'C', '[Br]']
```



In [None]:
# export

SMILES_CHAR_VOCAB = ['#', '(', ')', '+', '-', '/', '0',
                 '1', '2', '3', '4', '5', '6', '7',
                 '8', '=', '@', 'B', 'C', 'F', 'H',
                 'I', 'N', 'O', 'P', 'S', '[', '\\',
                 ']', 'c', 'i', 'l', 'n', 'o', 'r', 's',
                 '*', ':']


SPECIAL_TOKENS = ['bos', 'eos', 'pad', 'unk']

MAPPING_TOKENS = ['[1*:1]', '[2*:1]', '[1*:2]', '[2*:2]', '[1*:3]',
                  '[2*:3]', '[1*:4]', '[2*:4]', '[1*:5]', '[2*:5]']

HALOGEN_REPLACE = {'Br':'R',
                   'Cl':'L'}

MAPPING_REPLACE = {'[1*:1]':'A',
                   '[2*:1]':'D',
                   '[1*:2]':'E',
                   '[2*:2]':'G',
                   '[1*:3]':'J',
                   '[2*:3]':'M',
                   '[1*:4]':'Q',
                   '[2*:4]':'T',
                   '[1*:5]':'U', 
                   '[2*:5]':'V'}


These are regex patterns to decompose smiles into tokens

`SMILE_REGEX` is based off [this work](https://github.com/pschwllr/MolecularTransformer/blob/master/README.md). The pattern decomposes SMILES into individual characters, but keeps `Cl`, `Br`, and any term in brackets (ie `[O-]`) intact. 

`MAPPING_REGEX` is a derivative of `SMILE_REGEX` designed to work with the mapping framework used with the `Block` class. `MAPPING_REGEX` keeps `Cl`, `Br`, and any string of the form `[{isotope}*:{map_num}]` intact

In [None]:
# export

SMILE_REGEX = """(\[[^\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|H|\(|\)|\.|=|
                 #|-|\+|\\\\|\/|:|~|@|\?|>|#|\*|\$|\%[0-9]{2}|[0-9])"""

MAPPING_REGEX = """(\[.\*:.]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|H|\[|\]|\(|\)|\.|=|
                    #|-|\+|\\\\|\/|:|~|@|\?|>|#|\*|\$|\%[0-9]{2}|[0-9])"""

In [None]:
# export

def tokenize_by_character(input):
    "Splits `input` into inividual characters"
    return [i for i in input]

def tokenize_with_replacements(input, replacement_dict):
    "Replaces substrings in `input` using `replacement_dict`, then tokenizes by character"
    for k,v in replacement_dict.items():
        input = input.replace(k,v)
    return [i for i in input]

def regex_tokenize(input, regex):
    'Uses `regex` to tokenize `input`'
    tokens = [token for token in regex.findall(input)]
    return tokens

In [None]:
assert tokenize_by_character('CCC[Br]') == ['C', 'C', 'C', '[', 'B', 'r', ']']
assert tokenize_with_replacements('CCC[Br]', HALOGEN_REPLACE) == ['C', 'C', 'C', '[', 'R', ']']
assert regex_tokenize('CCC[Br]', re.compile(SMILE_REGEX)) == ['C', 'C', 'C', '[Br]']
assert regex_tokenize('[1*:1]CCC[Br]', re.compile(MAPPING_REGEX)) == ['[1*:1]', 'C', 'C', 'C', '[', 'Br', ']']

## Vocabulary

The `Vocab` class handles tokenization. `Vocab.tokenize` breaks strings down into tokens. `Vocab.numericalize` maps tokens to integers. `Vocab.reconstruct` converts integers back into strings.

`Vocab` holds `itos`, a list of tokens, and `stoi`, a dictionary mapping tokens to integers. `Vocab` automatically adds four special tokens `['bos', 'eos', 'pad', 'unk']` indicating beginning of sentence, end of sentence, padding and unknown.

### Custom Vocbulary

To implement custom tokenization, subclass `Vocab` and update the `tokenize`, `numericalize` and `reconstruct` methods. Use the `test_reconstruction` function to verify your custom vocab can successfully reconstruct sequences.

In [None]:
# export

class Vocab():
    '''
    Vocab - base vocabulary class
    
    Inputs:
    
        `itos` - list, list of tokens in vocabulary
    '''
    def __init__(self, itos):
        self.special_tokens = ['bos', 'eos', 'pad', 'unk']
        
        self.itos = self.special_tokens + [i for i in itos if not i in self.special_tokens]
        self.stoi = {self.itos[i]:i for i in range(len(self.itos))}
        self.unks = []
        
    def tokenize(self, input):
        'Tokenize `input`'
        raise NotImplementedError
        
    def numericalize(self, input):
        'Numericalize `input` into integers'
        output = []
        for tok in input:
            if tok in self.stoi.keys():
                output.append(self.stoi[tok])
            else:
                output.append(self.stoi['unk'])
                self.unks.append(tok)
        return output
    
    def reconstruct(self, input):
        'Reconstruct `input` into a string'
        output = []
        for item in input:
            item = self.itos[item]
            if item=='eos':
                break
                
            if not item=='bos':
                output.append(item)
        
        return ''.join(output)
                
    def update_vocab(self):
        'Adds tokens in `self.unks` to vocabulary'
        unks = list(set(self.unks))
        self.itos += unks
        self.stoi = {self.itos[i]:i for i in range(len(self.itos))}
        self.unks = []
        
    def update_vocab_from_data(self, inputs):
        'Tokenizes `inputs` and updates the vocabulary with any unknown tokens'
        _ = [self.numericalize(self.tokenize(i)) for i in inputs]
        self.update_vocab()
        
        
class CharacterVocab(Vocab):
    '''
    CharacterVocab - tokenize by character
    '''
    def tokenize(self, input):
        toks = tokenize_by_character(input)
        toks = ['bos'] + toks + ['eos']
        return toks
    
    
class CharacterReplaceVocab(Vocab):
    '''
    CharacterReplaceVocab - tokenize by character with replacement
    
    Inputs:
    
        `itos` - list, list of tokens
        `replace_dict` - dict, replacement dictionary of the form {multi_character_token : single_character_token}. 
        ie replace_dict={'Br':'R', 'Cl':'L'}
    '''
    def __init__(self, itos, replace_dict):
        itos = list(itos)
        self.replace_dict = replace_dict
        self.reverse_dict = {v:k for k,v in replace_dict.items()}
        for rep in self.reverse_dict.keys():
            if not rep in itos:
                itos.append(rep)
        super().__init__(itos)
        
    def tokenize(self, smile):
        toks = tokenize_with_replacements(smile, self.replace_dict)
        toks = ['bos'] + toks + ['eos']
        return toks
    
    def reconstruct(self, input):
        output = []
        for item in input:
            item = self.itos[item]
            if item=='eos':
                break
            
            if not item=='bos':
                if item in self.reverse_dict.keys():
                    item = self.reverse_dict[item]

                output.append(item)
        
        return ''.join(output)
    
    
class RegexVocab(Vocab):
    '''
    RegexVocab - tokenize using `pattern`
    
    Inputs:
    
        `itos` - list, list of tokens
        `pattern` - str, regex string
    '''
    def __init__(self, itos, pattern):
        super().__init__(itos)
        
        self.pattern = pattern
        self.regex = re.compile(self.pattern)
        
    def tokenize(self, smile):
        toks = regex_tokenize(smile, self.regex)
        toks = ['bos'] + toks + ['eos']
        return toks

In [None]:
# export

def test_reconstruction(vocab, inputs):
    "Returns all items in `inputs` that can't be correctly reconstructed using `vocab`"
    fails = []
    for item in inputs:
        recon = vocab.reconstruct(vocab.numericalize(vocab.tokenize(item)))
        if not item==recon:
            fails.append((item, recon))
            
    return fails

In [None]:
df = pd.read_csv('files/smiles.csv')
smiles = df.smiles.values

In [None]:
vocab = CharacterVocab(SMILES_CHAR_VOCAB)
assert test_reconstruction(vocab, smiles)==[]

In [None]:
vocab = CharacterReplaceVocab(SMILES_CHAR_VOCAB, HALOGEN_REPLACE)
assert vocab.tokenize('CC[Br]') == ['bos', 'C', 'C', '[', 'R', ']', 'eos']
assert test_reconstruction(vocab, smiles)==[]

In [None]:
vocab = RegexVocab(SMILES_CHAR_VOCAB, SMILE_REGEX)
assert vocab.tokenize('CC[Br]') == ['bos', 'C', 'C', '[Br]', 'eos']
vocab.update_vocab_from_data(smiles)
assert test_reconstruction(vocab, smiles)==[]

## Collate Functions

Collate functions are used to batch `Dataset` outputs into batches

In [None]:
# export

def batch_sequences(sequences, pad_idx):
    'Packs `sequences` into a dense tensor, using `pad_idx` for padding'
    max_len = max([len(i) for i in sequences])+1
    bs = len(sequences)
    
    batch_tensor = torch.zeros((bs, max_len)).long() + pad_idx
    
    for i,item in enumerate(sequences):
        batch_tensor[i,:item.shape[0]] = item
        
    return batch_tensor
    
    
def lm_collate(batch, pad_idx, batch_first=True):
    '''
    Collate function for language models. Returns packed 
    batch for next-token prediction
    '''
    batch_tensor = batch_sequences(batch, pad_idx)
        
    if batch_first:
        output = (batch_tensor[:,:-1], batch_tensor[:,1:])
    else:
        batch_tensor = batch_tensor.T
        output = (batch_tensor[:-1,:], batch_tensor[1:,:])
        
    return to_device(output)

def sequence_prediction_collate(batch, pad_idx, batch_first=True):
    '''
    Collate function for predicting some y value from a sequence
    '''
    batch_tensor = batch_sequences([i[0] for i in batch], pad_idx)
    y_vals = torch.stack([i[1] for i in batch])
    y_vals = y_vals.squeeze(-1)

    if not batch_first:
        batch_tensor = batch_tensor.T
        
    return to_device((batch_tensor, y_vals))

def vector_collate(batch):
    '''
    Collate function for vectors
    '''
    fps = torch.stack(batch)
    return to_device(fps)

def vector_reconstruction_collate(batch, pad_idx, batch_first=True):
    '''
    Collate function for predicting a sequence from an input vector where 
    `batch_tensor` is needed for input (ie predict SMILES from properties)
    '''
    fps = torch.stack([i[0] for i in batch])
    batch_tensor = batch_sequences([i[1] for i in batch], pad_idx)
    
    if batch_first:
        output = ((batch_tensor[:,:-1], fps), batch_tensor[:,1:])
    else:
        batch_tensor = batch_tensor.T
        output = ((batch_tensor[:-1,:], fps), batch_tensor[1:,:])
        
    return to_device(output)

def vector_prediction_collate(batch):
    '''
    Collate function for predicting some y value from a vector
    '''
    fps = torch.stack([i[0] for i in batch])
    y_vals = torch.stack([i[1] for i in batch])
    y_vals = y_vals.squeeze(-1)
    return to_device((fps, y_vals))


## Datasets

Datasets subclass the Pytorch `Dataset` class. MRL datasets add a collate function and the `BaseDataset.dataloader` function to easily generate Pytorch dataloaders from the same class

Datasets should all contain a `new` method. The purpose of `new` is to create a new dataset from new data using the same input arguments as the current dataset. This is used during generative training to process and batch generated samples to ensure they are processed and batched the same as training data

In [None]:
# export

class BaseDataset(Dataset):
    '''
    BaseDataset - base dataset
    
    Inputs:
    
        `collate_function` - batch collate function for the particular dataset class
    '''
    def __init__(self, collate_function):
        self.collate_function = collate_function
        
    def dataloader(self, bs, num_workers=-1, **dl_kwargs):
        if num_workers==-1:
            if 'ncpus' in os.environ.keys():
                num_workers = int(os.environ['ncpus'])
            else:
                num_workers=os.cpu_count()
                
        return DataLoader(self, batch_size=bs, num_workers=num_workers, 
                          collate_fn=self.collate_function, **dl_kwargs)
    
    def new(self):
        raise NotImplementedError
        
    def split(self, percent_valid):
        
        idxs = torch.randperm(self.__len__()).numpy()
        train_length = int(len(smiles)*(1-percent_valid))
        train_idxs = idxs[:train_length]
        valid_idxs = idxs[train_length:]
        
        return self.split_on_idxs(train_idxs, valid_idxs)
        
    def split_on_idxs(self, train_idxs, valid_idxs):
        raise NotImplementedError

## Text Datasets

Text datasets deal with tokenizing and numericalizing text data, like SMILES strings. 

`TextDataset` returns numericalized SMILES for language modeling.

`TextPredictionDataset` returns numericaized SMILES along with some `y_val` output value, for tasks like property prediction

In [None]:
# export

class TextDataset(BaseDataset):
    '''
    TextDataset - base dataset for language modes
    
    Inputs:
    
        `smiles` - list[str], list of text sequences
        
        `vocab` - Vocab, vocabuary for tokenization/numericaization
        
        `collate_function` - batch collate function. If None, defauts to `lm_collate`
    '''
    def __init__(self, smiles, vocab, collate_function=None):
        self.smiles = smiles
        self.vocab = vocab
        if collate_function is None:
            collate_function = partial(lm_collate, pad_idx=self.vocab.stoi['pad'])
        
        super().__init__(collate_function)
        
    def __len__(self):
        return len(self.smiles)
    
    def __getitem__(self, idx):
        smile = self.smiles[idx]
        tokens = self.vocab.tokenize(smile)
        ints = self.vocab.numericalize(tokens)
        ints = torch.LongTensor(ints)
        return ints
    
    def new(self, smiles):
        return self.__class__(smiles, self.vocab, self.collate_function)
    
    def split_on_idxs(self, train_idxs, valid_idxs):
        
        train_ds = self.new([self.smiles[i] for i in train_idxs])
        valid_ds = self.new([self.smiles[i] for i in valid_idxs])
        return (train_ds, valid_ds)


In [None]:
df = pd.read_csv('files/smiles.csv')
vocab = CharacterVocab(SMILES_CHAR_VOCAB)

ds = TextDataset(df.smiles.values, vocab)
dl = ds.dataloader(16)
x,y = next(iter(dl))

assert (x[:,1:] == y[:,:-1]).all()

assert sum([len(i) for i in ds.split(0.2)]) == len(ds)

In [None]:
# export

class TextPredictionDataset(TextDataset):
    '''
    TextDataset - base dataset for predicting from text strings
    
    Inputs:
    
        `smiles` - list[str], list of text sequences
        
        `y_vals` - list[int, float], list of paired output values
        
        `vocab` - Vocab, vocabuary for tokenization/numericaization
        
        `collate_function` - batch collate function. If None, defauts to `sequence_prediction_collate`
    '''
    def __init__(self, smiles, y_vals, vocab, collate_function=None):
        
        if collate_function is None:
            collate_function = partial(sequence_prediction_collate, pad_idx=vocab.stoi['pad'])
        
        super().__init__(smiles, vocab, collate_function)
        
        self.y_vals = y_vals
        
    def __getitem__(self, idx):
        ints = super().__getitem__(idx)
        y_val = torch.Tensor([self.y_vals[idx]]).float()
        return (ints, y_val)
    
    def new(self, smiles, y_vals):
        return self.__class__(smiles, y_vals, self.vocab, self.collate_function)
    
    def split_on_idxs(self, train_idxs, valid_idxs):
        
        train_ds = self.new([self.smiles[i] for i in train_idxs],
                            [self.y_vals[i] for i in train_idxs])
        valid_ds = self.new([self.smiles[i] for i in valid_idxs],
                            [self.y_vals[i] for i in valid_idxs])
        
        return (train_ds, valid_ds)


In [None]:
ds = TextPredictionDataset(df.smiles.values, [0]*len(df.smiles.values), vocab)
dl = ds.dataloader(16)
x,y = next(iter(dl))
assert (y == torch.zeros(y.shape).float()).all()
assert sum([len(i) for i in ds.split(0.2)]) == len(ds)

## Vector Datasets

Another common dataset framework is where we are dealing with vectors derived from a molecule. This could be a vector of properties, fingerprints, or any task where a molecule-derived vector is needed.

`Vector_Dataset` is a base dataset that simply returns the molecule derived vector

`Vec_Recon_Dataset` returns the molecule derived vector and tokenized SMILES strings. This is used for tasks like generating compounds based on an input vector or fingerprint

In [None]:
# export

class Vector_Dataset(BaseDataset):
    '''
    Vector_Dataset - base dataset for molecule-derived vectors
    
    Inputs:
    
        `smiles` - list[str], list of text sequences
        
        `mol_function` - function to convert smiles to a vector
                        
        `collate_function` - batch collate function. If None, defauts to `vector_collate`
    '''
    def __init__(self, smiles, mol_function, collate_function=None):
        if collate_function is None:
            collate_function = vector_collate
        super().__init__(collate_function)
        
        self.smiles = smiles
        self.mol_function = mol_function
        
    def __len__(self):
        return len(self.smiles)
    
    def __getitem__(self, idx):
        smile = self.smiles[idx]
        vec = self.mol_function(smile)
        vec = torch.FloatTensor(vec)
        return vec
    
    def new(self, smiles):
        return self.__class__(smiles, self.mol_function, self.collate_function)
    
    def split_on_idxs(self, train_idxs, valid_idxs):
        
        train_ds = self.new([self.smiles[i] for i in train_idxs])
        valid_ds = self.new([self.smiles[i] for i in valid_idxs])
        
        return (train_ds, valid_ds)


In [None]:
from mrl.chem import *

def smile_to_props(smile):
    mol = to_mol(smile)
    weight = molwt(mol)
    ps = tpsa(mol)
    n_rings = rings(mol)
    n_atoms = heavy_atoms(mol)
    return np.array([weight/500, ps/100, n_rings/5, n_atoms/20])

ds = Vector_Dataset(df.smiles.values, smile_to_props)
dl = ds.dataloader(16)
x = next(iter(dl))
assert x.shape==(16,4)
assert sum([len(i) for i in ds.split(0.2)]) == len(ds)

  return f(*args, **kwds)


In [None]:
# export

class Vec_Recon_Dataset(Vector_Dataset):
    '''
    Vec_Recon_Dataset - base dataset for predicting smiles from molecule-derived vectors
    
    Inputs:
    
        `smiles` - list[str], list of text sequences
        
        `vocab` - Vocab, vocabuary for tokenization/numericaization
        
        `mol_function` - function to convert smiles to fingerprints
                        
        `collate_function` - batch collate function. If None, defauts to `vector_reconstruction_collate`
    '''
    def __init__(self, smiles, vocab, mol_function, collate_function=None):
        
        if collate_function is None:
            collate_function = partial(vector_reconstruction_collate, pad_idx=vocab.stoi['pad'])
            
        super().__init__(smiles, mol_function, collate_function)
        self.vocab = vocab
        
    def __getitem__(self, idx):
        smile = self.smiles[idx]
        
        vec = self.mol_function(smile)
        vec = torch.FloatTensor(vec)
        
        tokens = self.vocab.tokenize(smile)
        ints = self.vocab.numericalize(tokens)
        ints = torch.LongTensor(ints)
        
        return (vec, ints)
    
    def new(self, smiles):
        return self.__class__(smiles, self.vocab, self.mol_function, self.collate_function)
    
    def split_on_idxs(self, train_idxs, valid_idxs):
        
        train_ds = self.new([self.smiles[i] for i in train_idxs])
        valid_ds = self.new([self.smiles[i] for i in valid_idxs])
        
        return (train_ds, valid_ds)


In [None]:
ds = Vec_Recon_Dataset(df.smiles.values, vocab, ECFP6)
dl = ds.dataloader(16)
x,y = next(iter(dl))
assert len(x)==2
assert (x[0][:,1:] == y[:,:-1]).all()
assert sum([len(i) for i in ds.split(0.2)]) == len(ds)

In [None]:
# export

class Vec_Prediction_Dataset(Vector_Dataset):
    '''
    Vec_Prediction_Dataset - base dataset for predicting y_vals from molecule derived vectors
    
    Inputs:
    
        `smiles` - list[str], list of text sequences
        
        `y_vals` - list[int, float], list of paired output values
                
        `mol_function` - function to convert smiles to fingerprints
                        
        `collate_function` - batch collate function. If None, defauts to `vector_prediction_collate`
    '''
    def __init__(self, smiles, y_vals, mol_function, collate_function=None):
        if collate_function is None:
            collate_function = vector_prediction_collate
        super().__init__(smiles, mol_function, collate_function)
        
        self.y_vals = y_vals
        
    def __len__(self):
        return len(self.smiles)
    
    def __getitem__(self, idx):
        fp = super().__getitem__(idx)
        y_val = torch.FloatTensor([self.y_vals[idx]])
        return (fp, y_val)
    
    def new(self, smiles, y_vals):
        return self.__class__(smiles, y_vals, self.mol_function, self.collate_function)
    
    
    def split_on_idxs(self, train_idxs, valid_idxs):
        
        train_ds = self.new([self.smiles[i] for i in train_idxs],
                            [self.y_vals[i] for i in train_idxs])
        valid_ds = self.new([self.smiles[i] for i in valid_idxs],
                            [self.y_vals[i] for i in valid_idxs])
        
        return (train_ds, valid_ds)


In [None]:
ds = Vec_Prediction_Dataset(df.smiles.values, [0 for i in df.smiles.values], ECFP6)
dl = ds.dataloader(16)
x,y = next(iter(dl))
assert sum([len(i) for i in ds.split(0.2)]) == len(ds)

In [None]:
# hide
from nbdev.export import notebook2script; notebook2script()

Converted 00_core.ipynb.
Converted 01_chem.ipynb.
Converted 02_template.filters.ipynb.
Converted 03_template.template.ipynb.
Converted 04_template.blocks.ipynb.
Converted 05_torch_core.ipynb.
Converted 06_layers.ipynb.
Converted 07_dataloaders.ipynb.
Converted index.ipynb.
Converted template.overview.ipynb.
Converted tutorials.ipynb.
Converted tutorials.structure_enumeration.ipynb.
Converted tutorials.template.advanced.ipynb.
Converted tutorials.template.beginner.ipynb.
Converted tutorials.template.intermediate.ipynb.
