# Chemcharts json entrypoint 

what is my aim?
what can I do?
how to install me?

In [None]:
# load dependencies
import os
import json
import tempfile

# --------- change these path variables as required
chemcharts_dir = os.path.expanduser("~/Desktop/projects/ChemCharts")
chemcharts_env = os.path.expanduser("~/miniconda3/envs/chemcharts")
output_dir = os.path.expanduser("~/Desktop/chemcharts_json_test")

# --------- do not change
# get the notebook's root path
try: ipynb_path
except NameError: ipynb_path = os.getcwd()

# if required, generate a folder to store the results
try:
    os.mkdir(output_dir)
except FileExistsError:
    pass

In [None]:
# initialize configuration dictionary
conf={"chemcharts": {}}
    

In [None]:
# add header
conf = 
{
  "chemcharts": {
    "header": {}
  }
}

In [None]:
# with the execution step you can add the desired tasks, start with data loading
conf= 
{
  "chemcharts": {
    "header": {},
    "execution": [
      {"task":  "data_loading",
       "input":  "data/scaffold_memory.csv",
       "input_type":  "csv",
       "columns":  {
                     "smiles_column": "SMILES",
                     "scores_column": "total_score",
                     "epochs_column": "Step"
                    }
      }]
  }
}

In [None]:
# then generate RDKIT fingerprints 
conf=
{
  "chemcharts": {
    "header": {},
    "execution": [
      {"task":  "data_loading",
       "input":  "data/scaffold_memory.csv",
       "input_type":  "csv",
       "columns":  {
                     "smiles_column": "SMILES",
                     "scores_column": "total_score",
                     "epochs_column": "Step"
                    }
      },
      {"task":  "generate_fingerprints",
       "type":  "maccs",                                       # you can choose between standard, morgan and maccs
       "parameters":  {"useFeatures":  true}
      }
    ]
  }
}

In [None]:
# reduce the dimensionality with UMP
conf=
{
  "chemcharts": {
    "header": {},
    "execution": [
      {"task":  "data_loading",
       "input":  "data/scaffold_memory.csv",
       "input_type":  "csv",
       "columns":  {
                     "smiles_column": "SMILES",
                     "scores_column": "total_score",
                     "epochs_column": "Step"
                    }
      },
      {"task":  "generate_fingerprints",
       "type":  "maccs",
       "parameters":  {"useFeatures":  true}
      },
      {"task":  "dimensional_reduction",
       "type": "UMAP",
       "parameters":  {}
      }
    ]
  }
}

In [None]:
# optional: filter the data within a given range
{
  "chemcharts": {
    "header": {},
    "execution": [
      {"task":  "data_loading",
       "input":  "data/scaffold_memory.csv",
       "input_type":  "csv",
       "columns":  {
                     "smiles_column": "SMILES",
                     "scores_column": "total_score",
                     "epochs_column": "Step"
                    }
      },
      {"task":  "generate_fingerprints",
       "type":  "maccs",
       "parameters":  {"useFeatures":  true}
      },
      {"task":  "dimensional_reduction",
       "type": "UMAP",
       "parameters":  {}
      },
      {"task":  "filtering_data",
       "type": "filtering",
       "parameters":  {"range_dim1": [-100, 100],      # set range for UMAP_1
                       "range_dim2": [-100, 100]}      # set range for UMAP_2
      }
    ]
  }
}

In [None]:
# optional: cluster the data with KMeans
conf=
{
  "chemcharts": {
    "header": {},
    "execution": [
      {"task":  "data_loading",
       "input":  "data/scaffold_memory.csv",
       "input_type":  "csv",
       "columns":  {
                     "smiles_column": "SMILES",
                     "scores_column": "total_score",
                     "epochs_column": "Step"
                    }
      },
      {"task":  "generate_fingerprints",
       "type":  "maccs",
       "parameters":  {"useFeatures":  true}
      },
      {"task":  "dimensional_reduction",
       "type": "UMAP",
       "parameters":  {}
      },
      {"task":  "filtering_data",
       "type": "filtering",
       "parameters":  {"range_dim1": [-100, 100],
                       "range_dim2": [-100, 100]}
      },
      {"task":  "clustering_data",
       "type": "KMmeans",
       "parameters":  {"k": 10}                             # set the number of desired KMeans clusters
      }
    ]
  }
}

In [None]:
# optional: bin the scores and return their median
conf=
{
  "chemcharts": {
    "header": {},
    "execution": [
      {"task":  "data_loading",
       "input":  "data/scaffold_memory.csv",
       "input_type":  "csv",
       "columns":  {
                     "smiles_column": "SMILES",
                     "scores_column": "total_score",
                     "epochs_column": "Step"
                    }
      },
      {"task":  "generate_fingerprints",
       "type":  "maccs",
       "parameters":  {"useFeatures":  true}
      },
      {"task":  "dimensional_reduction",
       "type": "UMAP",
       "parameters":  {}
      },
      {"task":  "filtering_data",
       "type": "filtering",
       "parameters":  {"range_dim1": [-100, 100],
                       "range_dim2": [-100, 100]}
      },
      {"task":  "clustering_data",
       "type": "KMmeans",
       "parameters":  {"k": 10}
      },
      {"task":  "binning_scores",
       "type": "binning",
       "parameters":  {"num_bins": 4}                       # set the number of desired bins
      }
    ]
  }
}

In [None]:
# write out the result for efficiency reasons
conf=
{
  "chemcharts": {
    "header": {},
    "execution": [
      {"task":  "data_loading",
       "input":  "data/scaffold_memory.csv",
       "input_type":  "csv",
       "columns":  {
                     "smiles_column": "SMILES",
                     "scores_column": "total_score",
                     "epochs_column": "Step"
                    }
      },
      {"task":  "generate_fingerprints",
       "type":  "maccs",
       "parameters":  {"useFeatures":  true}
      },
      {"task":  "dimensional_reduction",
       "type": "UMAP",
       "parameters":  {}
      },
      {"task":  "filtering_data",
       "type": "filtering",
       "parameters":  {"range_dim1": [-100, 100],
                       "range_dim2": [-100, 100]}
      },
      {"task":  "clustering_data",
       "type": "KMmeans",
       "parameters":  {"k": 10}
      },
      {"task":  "binning_scores",
       "type": "binning",
       "parameters":  {"num_bins": 4}
      },
      {
        "task": "write_out",
        "format": "pkl",
        "path": "tests/junk/simple_test.pkl"                # choose loaction
      }
    ]
  }
}

In [None]:
# now you can start generating plots
conf=
{
  "chemcharts": {
    "header": {},
    "execution": [
      {"task":  "data_loading",
       "input":  "data/scaffold_memory.csv",
       "input_type":  "csv",
       "columns":  {
                     "smiles_column": "SMILES",
                     "scores_column": "total_score",
                     "epochs_column": "Step"
                    }
      },
      {"task":  "generate_fingerprints",
       "type":  "maccs",
       "parameters":  {"useFeatures":  true}
      },
      {"task":  "dimensional_reduction",
       "type": "UMAP",
       "parameters":  {}
      },
      {"task":  "filtering_data",
       "type": "filtering",
       "parameters":  {"range_dim1": [-100, 100],
                       "range_dim2": [-100, 100]}
      },
      {"task":  "clustering_data",
       "type": "KMmeans",
       "parameters":  {"k": 10}
      },
      {"task":  "binning_scores",
       "type": "binning",
       "parameters":  {"num_bins": 4}
      },
      {
        "task": "write_out",
        "format": "pkl",
        "path": "tests/junk/simple_test.pkl"
      },
      {"task": "generate_plot",
       "type": "hexagonal_plot",                                         # choose plot type (here: Hexagonal)
       "parameters":  {"title": "Hexagonal ChemCharts Plot",             # set plot title (default: Hexagonal Chemcharts Plot)
                       "gridsize": 20,                                   # set gridsize {default: 20}
                       "fontsize": 14,                                   # set fontsize of title (default: 14) 
                       "top": 0.9,                                       # set top margin (default: 0.9)
                       "color": "#4CB391"},                              # set marker color (default: #4CB391)
        "settings": {"path": "output/hexagonal_plot/hexagonal_plot.png", # set output path including file name
                     "format": "png",                                    # set file format (default: png)  
                     "dpi": 150}                                         # set dpi (default: 150)
      }
    ]
  }
}

In [None]:
# you can also add multiple plot generations
conf=
{
  "chemcharts": {
    "header": {},
    "execution": [
      {"task":  "data_loading",
       "input":  "data/scaffold_memory.csv",
       "input_type":  "csv",
       "columns":  {
                     "smiles_column": "SMILES",
                     "scores_column": "total_score",
                     "epochs_column": "Step"
                    }
      },
      {"task":  "generate_fingerprints",
       "type":  "maccs",
       "parameters":  {"useFeatures":  true}
      },
      {"task":  "dimensional_reduction",
       "type": "UMAP",
       "parameters":  {}
      },
      {"task":  "filtering_data",
       "type": "filtering",
       "parameters":  {"range_dim1": [-100, 100],
                       "range_dim2": [-100, 100]}
      },
      {"task":  "clustering_data",
       "type": "KMmeans",
       "parameters":  {"k": 10}
      },
      {"task":  "binning_scores",
       "type": "binning",
       "parameters":  {"num_bins": 4}
      },
      {
        "task": "write_out",
        "format": "pkl",
        "path": "tests/junk/simple_test.pkl"
      },
      {"task": "generate_plot",
       "type": "hexagonal_plot",
       "parameters":  {"title": "Hexagonal ChemCharts Plot",
                       "gridsize": 20,
                       "fontsize": 14,
                       "top": 0.9,
                       "color": "#4CB391"},
        "settings": {"path": "output/hexagonal_plot/hexagonal_plot.png",
                     "format": "png",
                     "dpi": 150}
      },
      {"task": "generate_plot",
       "type": "histogram_plot",                                           # choose plot type (here: Histogram)
       "parameters":  {"title": "Histogram ChemCharts Plot",               # set plot title (default: Histogram Chemcharts Plot)
                       "bins": 20,                                         # set bin number {default: 20}
                       "fontsize": 14,                                     # set fontsize of title (default: 14) 
                       "top": 0.9,                                         # set top margin (default: 0.9)
                       "color": "#d11d80"},                                # set marker color (default: #d11d80)
       "settings": {"path": "output/histogram_plot/histogram_plot.png",    # set output path including file name
                    "format": "png",                                       # set file format (default: png)  
                    "dpi": 300,                                            # set dpi (default: 300)
                    "figsize": "(17,17)"}                                  # set figuresize (default: (17,17))
      }
    ]
  }
}

In [None]:
# other plot variants ...
 
    # Scatter Boxplot

     {"task": "generate_plot",
       "type": "scatter_boxplot_plot",
       "parameters":  {"title": "Scatter Boxplot ChemCharts Plot",                         # set plot title (default: Scatter Boxplot Chemcharts Plot)
                       "fontsize": 14,                                                     # set fontsize of title (default: 14) 
                       "top": 0.9},                                                        # set top margin (default: 0.9)
        "settings": {"path": "output/scatter_boxplot_plot/scatter_boxplot_plot.png",       # set output path including file name
                     "format": "png",                                                      # set file format (default: png) 
                     "dpi": 150,                                                           # set dpi (default: 150)
                     "figsize": "(17,17)"}                                                 # set figuresize (default: (17,17))
      }
    
    
    # Scatter Interactive
    
      {"task": "generate_plot",
       "type": "scatter_interactive_plot",
       "parameters":  {"title": "Scatter Interactive ChemCharts Plot",                      # set plot title (default: Scatter Interactive Chemcharts Plot)
                       "marker_size": 1},                                                   # set marker size (default: 1)
       "settings": {"path": "output/scatter_interactive_plot/scatter_interactive_plot.png", # set output path including file name
                    "view":  false,                                                         # set view on true if pop-up window is desired (default: false)
                     "format": "png"}                                                       # set file format (default: png) 
      }
        
    # Scatter Static
    
      {"task": "generate_plot",
       "type": "scatter_static_plot",
       "parameters":  {"title": "Scatter Static ChemCharts Plot",                            # set plot title (default: Scatter Static Chemcharts Plot)
                       "color": "#0000ff",                                                   # set marker color (default: #0000ff)
                       "s": 6},                                                              # set marker size (default: 6)
        "settings": {"path": "output/scatter_static_plot/scatter_static_plot.png",           # set output path including file name
                     "format": "png",                                                        # set file format (default: png) 
                     "dpi": 150,                                                             # set dpi (default: 150)
                     "figsize": "(17,17)"}                                                   # set figuresize (default: (17,17))
      }
    
    
    # Trisurf Interactive
    
      {"task": "generate_plot",
       "type": "trisurf_interactive_plot",
       "parameters":  {"title": "Trisurf Interactive ChemCharts Plot",                       # set plot title (default: Trisurf Interactive Chemcharts Plot)
                       "color": "Portland"},                                                 # set plotly built-in continuous color scales (default: Portland)
       "settings": {"path": "output/trisurf_interactive_plot/trisurf_interactive_plot.png",  # set output path including file name
                    "view":  true,                                                           # set view on true if pop-up window is desired (default: false)
                     "format": "png"}                                                        # set file format (default: png) 
      }
       
        
    # Trisurf Static
    
      {"task": "generate_plot",
       "type": "trisurf_static_plot",
       "parameters":  {"title": "Trisurf Static ChemCharts Plot",                             # set plot title (default: Trisurf Static Chemcharts Plot)
                       "color": "gist_rainbow"},                                              # set matplotlib colormap (default: gist_rainbow)
        "settings": {"path": "output/trisurf_static_plot/trisurf_static_plot.png",            # set output path including file name
                     "format": "png",                                                         # set file format (default: png) 
                     "dpi": 150,                                                              # set dpi (default: 150)
                     "figsize": "(9,9)"}                                                      # set figuresize (default: (9,9))
      }
    