## Drive Loading...

In [None]:
from google.colab import drive
drive.mount("/content/drive")
% cd '/content/drive/MyDrive/Università/Models/Reinvent-master'
! ls
! nvidia-smi

## Environment Setup

In [None]:
################################################################################
# INSTALL CONDA ON GOOGLE COLAB
################################################################################
import os
import sys
if not os.path.exists('Miniconda3-py37_4.8.2-Linux-x86_64.sh'):
    ! wget https://repo.anaconda.com/miniconda/Miniconda3-py37_4.8.2-Linux-x86_64.sh
! chmod +x Miniconda3-py37_4.8.2-Linux-x86_64.sh
! bash ./Miniconda3-py37_4.8.2-Linux-x86_64.sh -b -f -p /usr/local
sys.path.append('/usr/local/lib/python3.7/site-packages/')

PREFIX=/usr/local
Unpacking payload ...
Collecting package metadata (current_repodata.json): - \ | done
Solving environment: - \ done

## Package Plan ##

  environment location: /usr/local

  added / updated specs:
    - _libgcc_mutex==0.1=main
    - asn1crypto==1.3.0=py37_0
    - ca-certificates==2020.1.1=0
    - certifi==2019.11.28=py37_0
    - cffi==1.14.0=py37h2e261b9_0
    - chardet==3.0.4=py37_1003
    - conda-package-handling==1.6.0=py37h7b6447c_0
    - conda==4.8.2=py37_0
    - cryptography==2.8=py37h1ba5d50_0
    - idna==2.8=py37_0
    - ld_impl_linux-64==2.33.1=h53a641e_7
    - libedit==3.1.20181209=hc058e9b_0
    - libffi==3.2.1=hd88cf55_4
    - libgcc-ng==9.1.0=hdf63c60_0
    - libstdcxx-ng==9.1.0=hdf63c60_0
    - ncurses==6.2=he6710b0_0
    - openssl==1.1.1d=h7b6447c_4
    - pip==20.0.2=py37_1
    - pycosat==0.6.3=py37h7b6447c_0
    - pycparser==2.19=py37_0
    - pyopenssl==19.1.0=py37_0
    - pysocks==1.7.1=py37_0
    - python==3.7.6=h0371630_2
    - readline==

In [None]:
! conda env update -n base -f reinvent_shared.yml

Collecting package metadata (repodata.json): - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | 

## Code Execution

In [None]:
# Import packages
import os
import json
import numpy as np

# Input =====================================================
templates_folder = 'data/examples/templates'
name_model = 'Training/base_model/base_model.ckpt'
dataset = 'data/CHEMBL_FULL_DATASET.txt'
tuned_model = 'Training/transfer_learning/tuned_model.ckpt'
dataset_active = 'data/CHEMBL_ACTIVE.txt'
batch_size = 128
epochs = 2
output_smiles = 'Results/Outputs/sampled.smi'
num_smiles = 2048
# ===========================================================


def find_latest_checkpoint(name_model):
    model_name = name_model.split('/')[-1]
    path = name_model.replace('/' + model_name, '')
    files = os.listdir(path)
    values = []
    model_name += '.'
    for item in files:
        if model_name in item:
            values.append(int(item.split('.')[-1]))
        elif 'ipynb' in item:
            values.append(-1)
        else:
            values.append(0)
    return os.path.join(path, files[np.argmax(np.asarray(values))])

# find_latest_checkpoint(tuned_model)

In [None]:
# CREATE MODEL
# write the configuration file to the disc
configuration_JSON_path = os.path.join(templates_folder, "create_model.json")
with open(configuration_JSON_path, 'r') as f:
    data = json.load(f)
    data["parameters"]["output_model_path"] = name_model
    data["parameters"]["input_smiles_path"] = dataset
    print(data)

with open(configuration_JSON_path, 'w') as f:
    json.dump(data, f, indent=4, sort_keys=True)

! python input.py data/examples/templates/create_model.json

In [None]:
# TRAIN MODEL
configuration_JSON_path = os.path.join(templates_folder, "transfer_learning.json")
with open(configuration_JSON_path, 'r') as f:
    data = json.load(f)
    data["parameters"]["input_model_path"] = find_latest_checkpoint(name_model)
    data["parameters"]["output_model_path"] = name_model
    data["parameters"]["input_smiles_path"] = dataset_active
    data["parameters"]["batch_size"] = batch_size
    data["parameters"]["num_epochs"] = epochs
    data["parameters"]["save_every_n_epochs"] = 1
    print(data)

with open(configuration_JSON_path, 'w') as f:
    json.dump(data, f, indent=4, sort_keys=True)

! python input.py data/examples/templates/transfer_learning.json

In [None]:
# GENERATE MODLECULES
configuration_JSON_path = os.path.join(templates_folder, "sampling.json")
with open(configuration_JSON_path, 'r') as f:
    data = json.load(f)
    data["parameters"]["model_path"] = tuned_model
    data["parameters"]["output_smiles_path"] = output_smiles
    data["parameters"]["num_smiles"] = num_smiles
    data["parameters"]["batch_size"] = batch_size
    print(data)

with open(configuration_JSON_path, 'w') as f:
    json.dump(data, f, indent=4, sort_keys=True)
    
! python input.py data/examples/templates/sampling.json