# ChemCharts Json Movies 

what is my aim?
what can I do?
how to install me?

In [1]:
# load dependencies
import os
import json
import tempfile

# change me
chemcharts_dir = os.path.expanduser("~/Documents/Projects/ChemCharts") # set project folder on your device
chemcharts_env = os.path.expanduser("~/miniconda3/envs/chemcharts")    # set environment path on your device
output_dir = os.path.expanduser("~/Desktop/chemcharts_json_test")      # set output path on your device

# do NOT change me
try: ipynb_path                                                   
except NameError: ipynb_path = os.getcwd()                             # sets root path

try:
    os.mkdir(output_dir)                                               # checks whether there is an output folder 
except FileExistsError:                                                # and generates one if there isn't one already available
    pass

In [None]:
# initialize configuration dictionary
conf={"chemcharts": {}}
    

In [None]:
# add header
conf={
  "chemcharts": {
    "header": {}                                                        # leave empty if not needed
  }
}

In [None]:
# with the execution step you can add the desired tasks, start with data loading
conf={
  "chemcharts": {
    "header": {},
    "execution": [
      {"task":  "data_loading",
       "input":  os.path.join(chemcharts_dir, "data/scaffold_memory.csv"),
       "input_type":  "csv",
       "columns":  {
                     "smiles_column": "SMILES",
                     "scores_column": "total_score",
                     "epochs_column": "Step"
                    }
      }]
  }
}

In [None]:
# then generate RDKIT fingerprints 
conf={
  "chemcharts": {
    "header": {},
    "execution": [
      {"task":  "data_loading",
       "input":  os.path.join(chemcharts_dir, "data/scaffold_memory.csv"),
       "input_type":  "csv",
       "columns":  {
                     "smiles_column": "SMILES",
                     "scores_column": "total_score",
                     "epochs_column": "Step"
                    }
      },
      {"task":  "generate_fingerprints",
       "type":  "maccs",                                                # you can choose between standard, morgan and maccs
       "parameters":  {"useFeatures": True}                             # with the other two fingerprints there are no parameters
      }
    ]
  }
}

In [None]:
# reduce the dimensionality with UMP
conf={
  "chemcharts": {
    "header": {},
    "execution": [
      {"task":  "data_loading",
       "input":  os.path.join(chemcharts_dir, "data/scaffold_memory.csv"),
       "input_type":  "csv",
       "columns":  {
                     "smiles_column": "SMILES",
                     "scores_column": "total_score",
                     "epochs_column": "Step"
                    }
      },
      {"task":  "generate_fingerprints",
       "type":  "maccs",
       "parameters":  {"useFeatures": True}
      },
      {"task":  "dimensional_reduction",
       "type": "UMAP",
       "parameters":  {}
      }
    ]
  }
}

In [None]:
# optional: filter a given range of data
conf={
  "chemcharts": {
    "header": {},
    "execution": [
      {"task":  "data_loading",
       "input":  os.path.join(chemcharts_dir, "data/scaffold_memory.csv"),
       "input_type":  "csv",
       "columns":  {
                     "smiles_column": "SMILES",
                     "scores_column": "total_score",
                     "epochs_column": "Step"
                    }
      },
      {"task":  "generate_fingerprints",
       "type":  "maccs",
       "parameters":  {"useFeatures": True}
      },
      {"task":  "dimensional_reduction",
       "type": "UMAP",
       "parameters":  {}
      },
      {"task":  "filtering_data",
       "type": "filtering",
       "parameters":  {"range_dim1": [-100, 100],                       # set range for UMAP_1
                       "range_dim2": [-100, 100]}                       # set range for UMAP_2
      }
    ]
  }
}

In [None]:
# optional: bin the scores and return their median
conf={
  "chemcharts": {
    "header": {},
    "execution": [
      {"task":  "data_loading",
       "input":  os.path.join(chemcharts_dir, "data/scaffold_memory.csv"),
       "input_type":  "csv",
       "columns":  {
                     "smiles_column": "SMILES",
                     "scores_column": "total_score",
                     "epochs_column": "Step"
                    }
      },
      {"task":  "generate_fingerprints",
       "type":  "maccs",
       "parameters":  {"useFeatures": True}
      },
      {"task":  "dimensional_reduction",
       "type": "UMAP",
       "parameters":  {}
      },
      {"task":  "filtering_data",
       "type": "filtering",
       "parameters":  {"range_dim1": [-100, 100],
                       "range_dim2": [-100, 100]}
      },
      {"task":  "binning_scores",
       "type": "binning",
       "parameters":  {"num_bins": 4}                                    # set the number of desired bins
      }
    ]
  }
}

In [None]:
# write out the result for efficiency reasons
conf={
  "chemcharts": {
    "header": {},
    "execution": [
      {"task":  "data_loading",
       "input":  os.path.join(chemcharts_dir, "data/scaffold_memory.csv"),
       "input_type":  "csv",
       "columns":  {
                     "smiles_column": "SMILES",
                     "scores_column": "total_score",
                     "epochs_column": "Step"
                    }
      },
      {"task":  "generate_fingerprints",
       "type":  "maccs",
       "parameters":  {"useFeatures": True}
      },
      {"task":  "dimensional_reduction",
       "type": "UMAP",
       "parameters":  {}
      },
      {"task":  "filtering_data",
       "type": "filtering",
       "parameters":  {"range_dim1": [-100, 100],
                       "range_dim2": [-100, 100]}
      },
      {"task":  "binning_scores",
       "type": "binning",
       "parameters":  {"num_bins": 4}
      },
      {
        "task": "write_out",
        "format": "pkl",
        "path": os.path.join(output_dir, "simple_movie_test.pkl")              # choose loaction for ".pkl" file
      }
    ]
  }
}

In [2]:
# now you can start generating plots

hexagonal_movie_path = os.path.join(output_dir, "hexagonal_movie", "hexagonal_movie.mp4")     # set output path including file name} 

conf={
  "chemcharts": {
    "header": {},
    "execution": [
      {"task":  "data_loading",
       "input":  os.path.join(chemcharts_dir, "data/scaffold_memory.csv"),
       "input_type":  "csv",
       "columns":  {
                     "smiles_column": "SMILES",
                     "scores_column": "total_score",
                     "epochs_column": "Step"
                    }
      },
      {"task":  "generate_fingerprints",
       "type":  "maccs",
       "parameters":  {"useFeatures": True}
      },
      {"task":  "dimensional_reduction",
       "type": "UMAP",
       "parameters":  {}
      },
      {"task":  "filtering_data",
       "type": "filtering",
       "parameters":  {"range_dim1": [-100, 100],
                       "range_dim2": [-100, 100]}
      },
      {"task":  "binning_scores",
       "type": "binning",
       "parameters":  {"num_bins": 4}
      },
      {
        "task": "write_out",
        "format": "pkl",
        "path": os.path.join(output_dir, "simple_movie_test.pkl")
      },
      {"task": "generate_movie",
       "type": "hexagonal_plot",                                      # choose plot type (here: Hexagonal)
       "parameters":  {},                                  
       "settings": {"path": hexagonal_movie_path} 
      }
    ]
  }
}

In [8]:
# you can also add multiple plot generations

hexagonal_movie_path = os.path.join(output_dir, "hexagonal_movie", "hexagonal_movie.mp4")
scatter_boxplot_movie_path = os.path.join(output_dir, "scatter_boxplot_movie", "scatter_boxplot_movie.mp4")

conf={
  "chemcharts": {
    "header": {},
    "execution": [
      {"task":  "data_loading",
       "input":  os.path.join(chemcharts_dir, "data/scaffold_memory.csv"),
       "input_type":  "csv",
       "columns":  {
                     "smiles_column": "SMILES",
                     "scores_column": "total_score",
                     "epochs_column": "Step"
                    }
      },
      {"task":  "generate_fingerprints",
       "type":  "maccs",
       "parameters":  {"useFeatures": True}
      },
      {"task":  "dimensional_reduction",
       "type": "UMAP",
       "parameters":  {}
      },
      {"task":  "filtering_data",
       "type": "filtering",
       "parameters":  {"range_dim1": [-100, 100],
                       "range_dim2": [-100, 100]}
      },
      {"task":  "binning_scores",
       "type": "binning",
       "parameters":  {"num_bins": 4}
      },
      {
        "task": "write_out",
        "format": "pkl",
        "path": os.path.join(output_dir, "simple_movie_test.pkl")
      },
      {"task": "generate_movie",
       "type": "hexagonal_plot",                                      # choose plot type (here: Hexagonal)
       "parameters":  {},                                  
       "settings": {"path": hexagonal_movie_path} 
      },
      {"task": "generate_movie",
       "type": "scatter_boxplot_plot",                                # choose plot type (here: Scatter Boxplot)
       "parameters":  {},                                  
       "settings": {"path": scatter_boxplot_movie_path} 
      } 
    ]
  }
}


In [None]:
# other movie variants ...
 

    # Scatter Static
      scatter_static_movie_path = os.path.join(output_dir, "scatter_static_movie", "scatter_static_movie.mp4")

      {"task": "generate_movie",
       "type": "scatter_static_plot",                                # choose plot type (here: Scatter Static)
       "parameters":  {},                                  
       "settings": {"path": scatter_static_movie_path} 
      } 
       
        
    # Trisurf Static
      trisurf_static_movie_path = os.path.join(output_dir, "trisurf_static_movie", "trisurf_static_movie.mp4")

      {"task": "generate_movie",
       "type": "trisurf_static_plot",                                # choose plot type (here: Trisurf Static)
       "parameters":  {},                                  
       "settings": {"path": trisurf_static_movie_path} 
      } 


In [3]:
# write-out of configuration file
configuration_JSON_path = os.path.join(output_dir, "chemcharts_config_movies.json")
with open(configuration_JSON_path, 'w') as f:
    json.dump(conf, f, indent=4, sort_keys=True)

In [4]:
%%capture captured_std_stream --no-stdout
%%capture captured_err_stream --no-stderr

# execution
!{chemcharts_env}/bin/python {chemcharts_dir}/chemcharts_json.py -conf {configuration_JSON_path}