In [None]:
!export CXX=clang
!pip install -U dp-tracking-sdk -i https://repo.mlops.dp.tech/repository/pypi-group/simple

In [None]:
!aim init

In [None]:
%load_ext aim

In [None]:
%aim up

In [None]:
from aim import Run
run = Run()

In [None]:
hparams_dict = {
    'learning_rate': 0.001,
    'batch_size': 32,
}
run['hparams'] = hparams_dict

In [None]:
run.track(3.0, name='loss')

In [None]:
for i in range(10):
    run.track(i, name='numbers')

In [None]:
run.finalize()

In [None]:
import os
from typing import Dict, Union, List, Tuple, Any

def report_sampler(config_logger: Dict, sysname: str,
                   numConfs, image, molecule, traj_xtc, RMSD_dataframe,
                   step: int = -1):
    from aim import Run

    aim_run = Run(repo=config_logger["aim_repo"]) if "aim_repo" in config_logger.keys() else Run()
    aim_run.experiment = config_logger["project"]
    aim_run.name = config_logger["experiment"] + "-" + sysname

    tags = [config_logger["experiment"], "JobType-MLOps_demo"]
    for tag in tags:
        aim_run.add_tag(tag)

    # Log anything you want: Scalars, Distributions, Images, Figures, Tables, Molecules.
    from aim import Figure, Image, Distribution, Table, TableImage, Molecule
    tracking_data = {}
    tracking_data["numConfs"] = numConfs
    tracking_data["2D structure"] = Image(str(image))
#     tracking_data["conformers"] = Molecule(str(molecule), str(traj_xtc))
    tracking_data["RMSD"] = Table(RMSD_dataframe)

    for key, value in tracking_data.items():
        if step >= 0:
            aim_run.track(value, name=key, step=step, epoch=0, context={"subset": "filter"})
        else:
            aim_run.track(value, name=key, epoch=0, context={"subset": "filter"})
        # "step", "epoch" is typically used in ML training
        # only value, name are necessary

In [None]:
from pathlib import Path
import pandas as pd

def Sampling(smiles: str) -> Tuple[Path, Path, Path]:
    """
    Demo Simulation Algorithm Run: Generate conformers with a given molecule SMILES.

    Parameters:
    ----------
    smiles: str

    Returns:
    -------
    molecule: path
        .pdb file of the molecule
    traj: path
        .npy file of array of coordinates (numAtoms × dims × numConfs)
    image: path
        .png 2D structure image of the molecule

    """
    import numpy as np
    from rdkit import Chem
    from rdkit.Chem import Draw
    from rdkit.Chem.AllChem import EmbedMolecule, EmbedMultipleConfs, Compute2DCoords
    mol = Chem.AddHs(Chem.MolFromSmiles(smiles))
    Compute2DCoords(mol)
    img = Draw.MolsToGridImage([mol], molsPerRow=1, subImgSize=(200, 200), returnPNG=False)
    img.save("molecule.png")

    EmbedMultipleConfs(mol, numConfs=5)
    coordinates = np.stack([conf.GetPositions() for conf in mol.GetConformers()])

    Chem.MolToPDBFile(mol, "molecule.pdb", confId=0)
    np.save("traj.npy", coordinates)
    return Path("molecule.pdb"), Path("traj.npy"), Path("molecule.png")


def compute_metrics(molecule, traj) -> Tuple[int, Any, str, str]:
    """

    Parameters
    ----------
    molecule: path
        .pdb file of the molecule
    traj: path
        .npy file of array of coordinates (numAtoms × dims × numConfs)

    Returns
    -------
    numConfs: int
        Number of conformers
    df: pd.DataFrame
        DataFrame of RMSD info

    """
    import numpy as np
    import pandas as pd
    import MDAnalysis as mda
    from MDAnalysis.analysis import rms
    coordinates = np.load(traj)
    u = mda.Universe(molecule, coordinates)
    numConfs = u.trajectory.n_frames

    R = rms.RMSD(atomgroup=u.atoms).run()
    df = pd.DataFrame(R.rmsd, columns=['Frame', 'time', 'RMSD'])

    traj_xtc = "traj.xtc"
    rmsd_csv = "rmsd.csv"
    df.to_csv(rmsd_csv)
    u.atoms.write(traj_xtc, frames="all")
    return numConfs, df, rmsd_csv, traj_xtc

In [None]:
molecule, traj_npy, image = Sampling("c1ccccc1")

In [None]:
numConfs, df, rmsd_csv, traj_xtc = compute_metrics(molecule, traj_npy)

In [None]:
molecule, traj_npy, image = Sampling("c1ccccc1")
numConfs, df, rmsd_csv, traj_xtc = compute_metrics(molecule, traj_npy)

report_sampler(config_logger={"project": "MLOps_demo", "experiment": "20230228-test"}, sysname="benzene", 
               numConfs=2, image=image, molecule=molecule, traj_xtc=traj_xtc, RMSD_dataframe=pd.read_csv(rmsd_csv))

In [None]:
molecule, traj_npy, image = Sampling("C1=CC(=C(C=C1/C=C/C(=O)O)O)O")
numConfs, df, rmsd_csv, traj_xtc = compute_metrics(molecule, traj_npy)

report_sampler(config_logger={"project": "MLOps_demo", "experiment": "20230228-test"}, sysname="caffeic acid", 
               numConfs=2, image=image, molecule=molecule, traj_xtc=traj_xtc, RMSD_dataframe=pd.read_csv(rmsd_csv))

In [None]:
molecule, traj_npy, image = Sampling("c1cccnc1")
numConfs, df, rmsd_csv, traj_xtc = compute_metrics(molecule, traj_npy)

report_sampler(config_logger={"project": "MLOps_demo", "experiment": "20230227-demo"}, sysname="pyrridine", 
               numConfs=2, image=image, molecule=molecule, traj_xtc=traj_xtc, RMSD_dataframe=pd.read_csv(rmsd_csv))

In [None]:
molecule, traj_npy, image = Sampling("c1ccoc1")
numConfs, df, rmsd_csv, traj_xtc = compute_metrics(molecule, traj_npy)

report_sampler(config_logger={"project": "MLOps_demo", "experiment": "20230227-demo"}, sysname="furan", 
               numConfs=2, image=image, molecule=molecule, traj_xtc=traj_xtc, RMSD_dataframe=pd.read_csv(rmsd_csv))