## Optimised diffusion models 1D
Notebook created by Frédéric Charbonnier & Joel Clerc as part of the Master IS research project "Learning to generate
molecules".  
This notebook uses [Phil Wang's GitHub](https://github.com/lucidrains/denoising-diffusion-pytorch) to implement a conditional and unconditional DDMPs.
Inspired by [Nathan C. Frey's Tutorial](https://ncfrey.github.io/).

### Diffusion model 1D with classifier guidance

In [None]:
# code from https://github.com/lucidrains/denoising-diffusion-pytorch/blob/main/denoising_diffusion_pytorch/denoising_diffusion_pytorch_1d.py
# Classifier guidance added (inspired by https://github.com/lucidrains/denoising-diffusion-pytorch/blob/main/denoising_diffusion_pytorch/classifier_free_guidance.py)


from pathlib import Path


from multiprocessing import cpu_count



import torch.nn.functional as F





from einops.layers.torch import Rearrange

from accelerate import Accelerator
from ema_pytorch import EMA



from denoising_diffusion_pytorch.version import __version__







### Molecules functions

In [None]:
#!pip install --pre deepchem[torch]
import deepchem as dc


  



import matplotlib.pyplot as plt





In [None]:
#Test get_selfies_list
original_selfies = smiles_to_selfies(get_smiles())
mol = Chem.MolFromSmiles(sf.decoder(original_selfies[2]))
mol_img = Chem.Draw.MolToImage(mol,size=(300,300))
plt.imshow(mol_img)

In [None]:
# Test Original Selfies -> continous_mols -> Recalculate Selfies
continous_mols, selfies_alphabet, largest_selfie_len, int_mol, dequantized_onehots_min, dequantized_onehots_max = selfies_to_continous_mols(original_selfies)
recalculate_selfies_test = mols_continous_to_selfies(continous_mols, selfies_alphabet, largest_selfie_len, int_mol, dequantized_onehots_min, dequantized_onehots_max)
mols_test, valid_selfies_list, valid_count = selfies_to_mols(recalculate_selfies_test)
print('%.2f' % (valid_count / len(mols_test)*100),  '% of generated samples are valid molecules.')

smiles_label = [Chem.MolToSmiles(mol) for mol in mols_test[:9]]
img = Chem.Draw.MolsToGridImage(mols_test[:9], molsPerRow=3, subImgSize=(200,200), returnPNG=False)
img.save('results/test-functions.png')   

#Similarity
original_mols, _, _ = selfies_to_mols(original_selfies)
tanimoto_scores = tanimoto_similarity(original_mols, mols_test[2])
print("Top3 similarity score:")
for idx, ts in tanimoto_scores[:3]:
    print(round(ts, 3))

print("Weight:")
print(get_mols_properties([mols_test[2]],"Weight")[0])

print("LogP:")
print(get_mols_properties([mols_test[2]],"LogP")[0])

print("QED:")
print(get_mols_properties([mols_test[2]],"QED")[0])

mol_img = Chem.Draw.MolToImage(mols_test[2],size=(300,300))
plt.imshow(mol_img)

### Training

In [None]:


seq_length = dataset.shape[1]
channels = 1
batch_size = 16

results_folder = Path("./results")
results_folder.mkdir(exist_ok = True)

print("dataset size:")
print(dataset.shape)

In [None]:
print('Match classes : ', generate_mols_match_classes(original_mols, classes, type_property, num_classes, classes_breakpoints),'% de réussite')

In [None]:
save_and_sample_every = 5

In [None]:
sampled_images = diffusion.sample(
    classes = image_classes,
    cond_scale = 6.                # condition scaling, anything greater than 1 strengthens the classifier free guidance. reportedly 3-8 is good empirically
)

sampled_images.shape # (8, 3, 128, 128)

# interpolation

interpolate_out = diffusion.interpolate(
    training_images[:1],
    training_images[:1],
    image_classes[:1]
)