From 89ffbd320d8dce7160d450f3fd6e639b8f06d60d Mon Sep 17 00:00:00 2001
From: aditya0by0 <aditya0by0@gmail.com>
Date: Mon, 27 May 2024 22:51:06 +0200
Subject: [PATCH 01/26] Data handling restructure

---
 chebai/preprocessing/datasets/chebi.py | 258 +++++++++++++++++++------
 tests/testChebiDynamicDataSplits.py    |  85 ++++++++
 2 files changed, 280 insertions(+), 63 deletions(-)
 create mode 100644 tests/testChebiDynamicDataSplits.py

diff --git a/chebai/preprocessing/datasets/chebi.py b/chebai/preprocessing/datasets/chebi.py
index d419f8ee..ecf3b5af 100644
--- a/chebai/preprocessing/datasets/chebi.py
+++ b/chebai/preprocessing/datasets/chebi.py
@@ -14,6 +14,10 @@
 import os
 import pickle
 import queue
+import yaml
+from typing import List, Union
+from torch.utils.data import DataLoader
+import random
 
 from iterstrat.ml_stratifiers import (
     MultilabelStratifiedKFold,
@@ -134,6 +138,7 @@ def __init__(
         # use different version of chebi for training and validation (if not None)
         # (still uses self.chebi_version for test set)
         self.chebi_version_train = chebi_version_train
+        self.data_split_seed = self._get_seed_for_data_split()
 
     def extract_class_hierarchy(self, chebi_path):
         """
@@ -194,6 +199,9 @@ def graph_to_raw_dataset(self, g, split_name=None):
     def save_raw(self, data: pd.DataFrame, filename: str):
         pickle.dump(data, open(os.path.join(self.raw_dir, filename), "wb"))
 
+    def save_processed(self, data: pd.DataFrame, filename: str):
+        pickle.dump(data, open(os.path.join(self.processed_dir_main, filename), "wb"))
+
     def _load_dict(self, input_file_path):
         """
         Loads a dictionary from a pickled file, yielding individual dictionaries for each row.
@@ -246,17 +254,23 @@ def _setup_pruned_test_set(self):
         )
 
     def setup_processed(self):
-        print("Transform splits")
+        print("Transform data")
         os.makedirs(self.processed_dir, exist_ok=True)
         for k in self.processed_file_names_dict.keys():
-            processed_name = (
-                "test.pt" if k == "test" else self.processed_file_names_dict[k]
-            )
+            # processed_name = (
+            #     "test.pt" if k == "test" else self.processed_file_names_dict[k]
+            # )
+            processed_name = self.processed_file_names_dict[k]
             if not os.path.isfile(os.path.join(self.processed_dir, processed_name)):
-                print("transform", k)
+                print(
+                    "Missing encoded data, transform processed data into encoded data",
+                    k,
+                )
                 torch.save(
                     self._load_data_from_file(
-                        os.path.join(self.raw_dir, self.raw_file_names_dict[k])
+                        os.path.join(
+                            self.processed_dir_main, self.raw_file_names_dict[k]
+                        )
                     ),
                     os.path.join(self.processed_dir, processed_name),
                 )
@@ -267,22 +281,23 @@ def setup_processed(self):
             print("transform test (select classes)")
             self._setup_pruned_test_set()
 
-    def get_test_split(self, df: pd.DataFrame):
+    def get_test_split(self, df: pd.DataFrame, seed: int = None):
         print("Get test data split")
 
-        df_list = df.values.tolist()
-        df_list = [row[3:] for row in df_list]
+        # df_list = df.values.tolist()
+        # df_list = [row[1] for row in df_list]
+        labels_list = df["labels"].tolist()
 
         test_size = 1 - self.train_split - (1 - self.train_split) ** 2
         msss = MultilabelStratifiedShuffleSplit(
-            n_splits=1, test_size=test_size, random_state=0
+            n_splits=1, test_size=test_size, random_state=seed
         )
 
         train_split = []
         test_split = []
         for train_split, test_split in msss.split(
-            df_list,
-            df_list,
+            labels_list,
+            labels_list,
         ):
             train_split = train_split
             test_split = test_split
@@ -291,7 +306,9 @@ def get_test_split(self, df: pd.DataFrame):
         df_test = df.iloc[test_split]
         return df_train, df_test
 
-    def get_train_val_splits_given_test(self, df: pd.DataFrame, test_df: pd.DataFrame):
+    def get_train_val_splits_given_test(
+        self, df: pd.DataFrame, test_df: pd.DataFrame, seed: int = None
+    ):
         """
         Split the dataset into train and validation sets, given a test set.
         Use test set (e.g., loaded from another chebi version or generated in get_test_split), avoid overlap
@@ -307,19 +324,22 @@ def get_train_val_splits_given_test(self, df: pd.DataFrame, test_df: pd.DataFram
         print(f"Split dataset into train / val with given test set")
 
         df_trainval = df
-        test_ids = test_df["id"].tolist()
-        mask = [trainval_id not in test_ids for trainval_id in df_trainval["id"]]
+        test_ids = test_df["ident"].tolist()
+        mask = [trainval_id not in test_ids for trainval_id in df_trainval["ident"]]
         df_trainval = df_trainval[mask]
-        df_trainval_list = df_trainval.values.tolist()
-        df_trainval_list = [row[3:] for row in df_trainval_list]
+        # df_trainval_list = df_trainval.values.tolist()
+        # df_trainval_list = [row[3:] for row in df_trainval_list]
+        labels_list_trainval = df_trainval["labels"].tolist()
 
         if self.use_inner_cross_validation:
             folds = {}
-            kfold = MultilabelStratifiedKFold(n_splits=self.inner_k_folds)
+            kfold = MultilabelStratifiedKFold(
+                n_splits=self.inner_k_folds, random_state=seed
+            )
             for fold, (train_ids, val_ids) in enumerate(
                 kfold.split(
-                    df_trainval_list,
-                    df_trainval_list,
+                    labels_list_trainval,
+                    labels_list_trainval,
                 )
             ):
                 df_validation = df_trainval.iloc[val_ids]
@@ -334,27 +354,33 @@ def get_train_val_splits_given_test(self, df: pd.DataFrame, test_df: pd.DataFram
         # scale val set size by 1/self.train_split to compensate for (hypothetical) test set size (1-self.train_split)
         test_size = ((1 - self.train_split) ** 2) / self.train_split
         msss = MultilabelStratifiedShuffleSplit(
-            n_splits=1, test_size=test_size, random_state=0
+            n_splits=1, test_size=test_size, random_state=seed
         )
         train_split = []
         validation_split = []
         for train_split, validation_split in msss.split(
-            df_trainval_list, df_trainval_list
+            labels_list_trainval, labels_list_trainval
         ):
             train_split = train_split
             validation_split = validation_split
 
         df_validation = df_trainval.iloc[validation_split]
         df_train = df_trainval.iloc[train_split]
-        return {
-            self.raw_file_names_dict["train"]: df_train,
-            self.raw_file_names_dict["validation"]: df_validation,
-        }
+        return df_train, df_validation
+
+    @property
+    def processed_dir_main(self):
+        return os.path.join(
+            self.base_dir,
+            self._name,
+            "processed",
+        )
 
     @property
     def processed_dir(self):
         res = os.path.join(
             self.base_dir,
+            self._name,
             "processed",
             *self.identifier,
         )
@@ -365,14 +391,15 @@ def processed_dir(self):
 
     @property
     def base_dir(self):
-        return os.path.join("data", self._name, f"chebi_v{self.chebi_version}")
+        return os.path.join("data", f"chebi_v{self.chebi_version}")
 
     @property
     def processed_file_names_dict(self) -> dict:
         train_v_str = (
             f"_v{self.chebi_version_train}" if self.chebi_version_train else ""
         )
-        res = {"test": f"test{train_v_str}.pt"}
+        # res = {"test": f"test{train_v_str}.pt"}
+        res = {}
         for set in ["train", "validation"]:
             if self.use_inner_cross_validation:
                 for i in range(self.inner_k_folds):
@@ -380,7 +407,8 @@ def processed_file_names_dict(self) -> dict:
                         self.fold_dir, f"fold_{i}_{set}{train_v_str}.pt"
                     )
             else:
-                res[set] = f"{set}{train_v_str}.pt"
+                # res[set] = f"{set}{train_v_str}.pt"
+                res["data"] = "data.pt"
         return res
 
     @property
@@ -388,10 +416,11 @@ def raw_file_names_dict(self) -> dict:
         train_v_str = (
             f"_v{self.chebi_version_train}" if self.chebi_version_train else ""
         )
-        res = {
-            "test": f"test.pkl"
-        }  # no extra raw test version for chebi_version_train - use default test set and only
+        # res = {
+        #     "test": f"test.pkl"
+        # }  # no extra raw test version for chebi_version_train - use default test set and only
         # adapt processed file
+        res = {}
         for set in ["train", "validation"]:
             if self.use_inner_cross_validation:
                 for i in range(self.inner_k_folds):
@@ -399,7 +428,8 @@ def raw_file_names_dict(self) -> dict:
                         self.fold_dir, f"fold_{i}_{set}{train_v_str}.pkl"
                     )
             else:
-                res[set] = f"{set}{train_v_str}.pkl"
+                # res[set] = f"{set}{train_v_str}.pkl"
+                res["data"] = "data.pkl"
         return res
 
     @property
@@ -447,44 +477,146 @@ def prepare_data(self, *args, **kwargs):
         Returns:
             None
         """
-        print("Check for raw data in", self.raw_dir)
+        print("Check for processed data in", self.processed_dir_main)
         if any(
-            not os.path.isfile(os.path.join(self.raw_dir, f))
+            not os.path.isfile(os.path.join(self.processed_dir_main, f))
             for f in self.raw_file_names
         ):
-            os.makedirs(self.raw_dir, exist_ok=True)
+            os.makedirs(self.processed_dir_main, exist_ok=True)
             print("Missing raw data. Go fetch...")
+
+            # -------- Commented the code for Data Handling Restructure for Issue No.10
+            # -------- https://github.com/ChEB-AI/python-chebai/issues/10
             # missing test set -> create
-            if not os.path.isfile(
-                os.path.join(self.raw_dir, self.raw_file_names_dict["test"])
-            ):
-                chebi_path = self._load_chebi(self.chebi_version)
-                g = self.extract_class_hierarchy(chebi_path)
-                df = self.graph_to_raw_dataset(g, self.raw_file_names_dict["test"])
-                _, test_df = self.get_test_split(df)
-                self.save_raw(test_df, self.raw_file_names_dict["test"])
-            # load test_split from file
-            else:
-                with open(
-                    os.path.join(self.raw_dir, self.raw_file_names_dict["test"]), "rb"
-                ) as input_file:
-                    test_df = pickle.load(input_file)
-            # create train/val split based on test set
-            chebi_path = self._load_chebi(
-                self.chebi_version_train
-                if self.chebi_version_train is not None
-                else self.chebi_version
-            )
+            # if not os.path.isfile(
+            #     os.path.join(self.raw_dir, self.raw_file_names_dict["test"])
+            # ):
+            #     chebi_path = self._load_chebi(self.chebi_version)
+            #     g = self.extract_class_hierarchy(chebi_path)
+            #     df = self.graph_to_raw_dataset(g, self.raw_file_names_dict["test"])
+            #     _, test_df = self.get_test_split(df)
+            #     self.save_raw(test_df, self.raw_file_names_dict["test"])
+            # # load test_split from file
+            # else:
+            #     with open(
+            #         os.path.join(self.raw_dir, self.raw_file_names_dict["test"]), "rb"
+            #     ) as input_file:
+            #         test_df = pickle.load(input_file)
+            # # create train/val split based on test set
+            # chebi_path = self._load_chebi(
+            #     self.chebi_version_train
+            #     if self.chebi_version_train is not None
+            #     else self.chebi_version
+            # )
+            # g = self.extract_class_hierarchy(chebi_path)
+            # if self.use_inner_cross_validation:
+            #     df = self.graph_to_raw_dataset(
+            #         g, self.raw_file_names_dict[f"fold_0_train"]
+            #     )
+            # else:
+            #     df = self.graph_to_raw_dataset(g, self.raw_file_names_dict["train"])
+            # train_val_dict = self.get_train_val_splits_given_test(df, test_df)
+            # for name, df in train_val_dict.items():
+            #     self.save_raw(df, name)
+
+            chebi_path = self._load_chebi(self.chebi_version)
             g = self.extract_class_hierarchy(chebi_path)
-            if self.use_inner_cross_validation:
-                df = self.graph_to_raw_dataset(
-                    g, self.raw_file_names_dict[f"fold_0_train"]
+            df = self.graph_to_raw_dataset(g, self.raw_file_names_dict["data"])
+            self.save_processed(df, self.raw_file_names_dict["data"])
+
+    @staticmethod
+    def _get_seed_for_data_split():
+        # Get Seed (random_state) configuration in order generate same splits every time
+        __SEED_CONFIG_FILE_NAME = "chebiDataSplit_Seed.yml"
+        with open(
+            os.path.join("configs", "data", f"{__SEED_CONFIG_FILE_NAME}"), "r"
+        ) as yaml_file:
+            config = yaml.safe_load(yaml_file)
+        seed = int(config.get("seed", None))
+        return seed
+
+    def dataloader(self, data, **kwargs) -> DataLoader:
+        """
+        Returns a DataLoader object for the specified kind (train, val or test) of data.
+
+        Args:
+            data (str): Data to use.
+
+        Returns:
+            DataLoader: A DataLoader object.
+
+        """
+        dataset = data
+        if "ids" in kwargs:
+            ids = kwargs.pop("ids")
+            _dataset = []
+            for i in range(len(dataset)):
+                if i in ids:
+                    _dataset.append(dataset[i])
+            dataset = _dataset
+        if self.label_filter is not None:
+            original_len = len(dataset)
+            dataset = [self._filter_labels(r) for r in dataset]
+            positives = [r for r in dataset if r["labels"][0]]
+            negatives = [r for r in dataset if not r["labels"][0]]
+            if self.balance_after_filter is not None:
+                negative_length = min(
+                    original_len, int(len(positives) * self.balance_after_filter)
                 )
+                dataset = positives + negatives[:negative_length]
             else:
-                df = self.graph_to_raw_dataset(g, self.raw_file_names_dict["train"])
-            train_val_dict = self.get_train_val_splits_given_test(df, test_df)
-            for name, df in train_val_dict.items():
-                self.save_raw(df, name)
+                dataset = positives + negatives
+            random.shuffle(dataset)
+        if self.data_limit is not None:
+            dataset = dataset[: self.data_limit]
+        return DataLoader(
+            dataset,
+            collate_fn=self.reader.collater,
+            batch_size=self.batch_size,
+            **kwargs,
+        )
+
+    def val_dataloader(self, *args, **kwargs) -> Union[DataLoader, List[DataLoader]]:
+        data = self.load_processed_data("data")
+        df = pd.DataFrame(data)
+        train_df, df_test = self.get_test_split(df, seed=self.data_split_seed)
+        _, df_val = self.get_train_val_splits_given_test(
+            train_df, df_test, seed=self.data_split_seed
+        )
+        val_list_of_dicts = df_val.to_dict(orient="records")
+
+        return self.dataloader(
+            val_list_of_dicts,
+            shuffle=False,
+            num_workers=self.num_workers,
+            persistent_workers=True,
+            **kwargs,
+        )
+
+    def train_dataloader(self, *args, **kwargs) -> DataLoader:
+        data = self.load_processed_data("data")
+        df = pd.DataFrame(data)
+        train_df, df_test = self.get_test_split(df, seed=self.data_split_seed)
+        df_train, _ = self.get_train_val_splits_given_test(
+            train_df, df_test, seed=self.data_split_seed
+        )
+        train_list_of_dicts = df_train.to_dict(orient="records")
+
+        return self.dataloader(
+            train_list_of_dicts,
+            shuffle=True,
+            num_workers=self.num_workers,
+            persistent_workers=True,
+            **kwargs,
+        )
+
+    def test_dataloader(self, *args, **kwargs) -> Union[DataLoader, List[DataLoader]]:
+        data = self.load_processed_data("data")
+        df = pd.DataFrame(data)
+        _, df_test = self.get_test_split(df, seed=self.data_split_seed)
+        test_list_of_dicts = df_test.to_dict(orient="records")
+
+        return self.dataloader(test_list_of_dicts, shuffle=False, **kwargs)
 
 
 class JCIExtendedBase(_ChEBIDataExtractor):
diff --git a/tests/testChebiDynamicDataSplits.py b/tests/testChebiDynamicDataSplits.py
new file mode 100644
index 00000000..63369b60
--- /dev/null
+++ b/tests/testChebiDynamicDataSplits.py
@@ -0,0 +1,85 @@
+import unittest
+import os
+import yaml
+import hashlib
+import pandas as pd
+import numpy as np
+
+
+class TestChebiDynamicDataSplits(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls) -> None:
+        cls.chebi_class_object = cls.getChebiDataClassConfig()
+
+    def testDynamicDataSplitsConsistency(self):
+        """Test Dynamic Data Splits consistency across every run"""
+
+        # Dynamic Data Splits in Run 1
+        train_data_1, val_data_1, test_data_1 = self.get_train_val_test_splits()
+        train_hash_1 = self.compute_hash(train_data_1)
+        val_hash_1 = self.compute_hash(val_data_1)
+        test_hash_1 = self.compute_hash(test_data_1)
+
+        # Dynamic Data Splits in Run 2
+        train_data_2, val_data_2, test_data_2 = self.get_train_val_test_splits()
+        train_hash_2 = self.compute_hash(train_data_2)
+        val_hash_2 = self.compute_hash(val_data_2)
+        test_hash_2 = self.compute_hash(test_data_2)
+
+        # Check all splits are matching in both runs
+        self.assertEqual(train_hash_1, train_hash_2, "Train data hashes do not match.")
+        self.assertEqual(val_hash_1, val_hash_2, "Validation data hashes do not match.")
+        self.assertEqual(test_hash_1, test_hash_2, "Test data hashes do not match.")
+
+    def get_train_val_test_splits(self):
+        """Returns Dynamic Data splits consisting of train, validation and test set"""
+        data = self.chebi_class_object.load_processed_data("data")
+        df = pd.DataFrame(data)
+        train_df, df_test = self.chebi_class_object.get_test_split(
+            df, seed=self.chebi_class_object.data_split_seed
+        )
+        df_train, df_val = self.chebi_class_object.get_train_val_splits_given_test(
+            train_df, df_test, seed=self.chebi_class_object.data_split_seed
+        )
+        return df_train, df_val, df_test
+
+    @staticmethod
+    def compute_hash(data):
+        """Returns hash for the given data partition"""
+        data_for_hashing = data.applymap(TestChebiDynamicDataSplits.convert_to_hashable)
+        return hashlib.md5(
+            pd.util.hash_pandas_object(data_for_hashing, index=True).values
+        ).hexdigest()
+
+    @staticmethod
+    def convert_to_hashable(item):
+        """To Convert lists and numpy arrays within the DataFrame to tuples for hashing"""
+        if isinstance(item, list):
+            return tuple(item)
+        elif isinstance(item, np.ndarray):
+            return tuple(item.tolist())
+        else:
+            return item
+
+    @staticmethod
+    def getChebiDataClassConfig():
+        """Import the respective class and instantiate with given version from the config"""
+        CONFIG_FILE_NAME = "chebi50.yml"
+        with open(
+            os.path.join("configs", "data", f"{CONFIG_FILE_NAME}"), "r"
+        ) as yaml_file:
+            config = yaml.safe_load(yaml_file)
+
+        class_path = config["class_path"]
+        init_args = config.get("init_args", {})
+
+        module, class_name = class_path.rsplit(".", 1)
+        module = __import__(module, fromlist=[class_name])
+        class_ = getattr(module, class_name)
+
+        return class_(**init_args)
+
+
+if __name__ == "__main__":
+    unittest.main()

From bd6382b6032cb9397f11512f8f28c7ee31c0eed4 Mon Sep 17 00:00:00 2001
From: aditya0by0 <aditya0by0@gmail.com>
Date: Wed, 5 Jun 2024 11:37:22 +0200
Subject: [PATCH 02/26] Update chebi tests for dynamic splits

---
 tests/testChebiData.py              | 46 +++++---------------
 tests/testChebiDynamicDataSplits.py | 67 ++++++++++-------------------
 2 files changed, 34 insertions(+), 79 deletions(-)

diff --git a/tests/testChebiData.py b/tests/testChebiData.py
index aeab27e1..9513b217 100644
--- a/tests/testChebiData.py
+++ b/tests/testChebiData.py
@@ -1,7 +1,5 @@
 import unittest
-import os
-import torch
-import yaml
+from chebai.preprocessing.datasets.chebi import ChEBIOver50
 
 
 class TestChebiData(unittest.TestCase):
@@ -10,48 +8,28 @@ class TestChebiData(unittest.TestCase):
     def setUpClass(cls) -> None:
         cls.getDataSplitsOverlaps()
 
-    @classmethod
-    def getChebiDataConfig(cls):
-        """Import the respective class and instantiate with given version from the config"""
-        CONFIG_FILE_NAME = "chebi50.yml"
-        with open(
-            os.path.join("configs", "data", f"{CONFIG_FILE_NAME}"), "r"
-        ) as yaml_file:
-            config = yaml.safe_load(yaml_file)
-
-        class_path = config["class_path"]
-        init_args = config.get("init_args", {})
-
-        module, class_name = class_path.rsplit(".", 1)
-        module = __import__(module, fromlist=[class_name])
-        class_ = getattr(module, class_name)
-
-        return class_(**init_args)
-
     @classmethod
     def getDataSplitsOverlaps(cls):
         """Get the overlap between data splits"""
-        processed_path = os.path.join(
-            os.getcwd(), cls.getChebiDataConfig().processed_dir
-        )
-        print(f"Checking Data from - {processed_path}")
+        chebi_class_obj = ChEBIOver50()
+        # Get the raw/processed data if missing
+        chebi_class_obj.prepare_data()
+        chebi_class_obj.setup()
 
-        train_set = torch.load(os.path.join(processed_path, "train.pt"))
-        val_set = torch.load(os.path.join(processed_path, "validation.pt"))
-        test_set = torch.load(os.path.join(processed_path, "test.pt"))
+        train_set = chebi_class_obj.dynamic_split_class_variables_df["train"]
+        val_set = chebi_class_obj.dynamic_split_class_variables_df["validation"]
+        test_set = chebi_class_obj.dynamic_split_class_variables_df["test"]
 
         train_smiles, train_smiles_ids = cls.get_features_ids(train_set)
         val_smiles, val_smiles_ids = cls.get_features_ids(val_set)
         test_smiles, test_smiles_ids = cls.get_features_ids(test_set)
 
         # ----- Get the overlap between data splits based on smiles tokens/features -----
-
         cls.overlaps_train_val = cls.get_overlaps(train_smiles, val_smiles)
         cls.overlaps_train_test = cls.get_overlaps(train_smiles, test_smiles)
         cls.overlaps_val_test = cls.get_overlaps(val_smiles, test_smiles)
 
         # ----- Get the overlap between data splits based on IDs -----
-
         cls.overlaps_train_val_ids = cls.get_overlaps(train_smiles_ids, val_smiles_ids)
         cls.overlaps_train_test_ids = cls.get_overlaps(
             train_smiles_ids, test_smiles_ids
@@ -59,12 +37,10 @@ def getDataSplitsOverlaps(cls):
         cls.overlaps_val_test_ids = cls.get_overlaps(val_smiles_ids, test_smiles_ids)
 
     @staticmethod
-    def get_features_ids(data_split):
+    def get_features_ids(data_split_df):
         """Returns SMILES features/tokens and SMILES IDs from the data"""
-        smiles_features, smiles_ids = [], []
-        for entry in data_split:
-            smiles_features.append(entry["features"])
-            smiles_ids.append(entry["ident"])
+        smiles_features = data_split_df["features"].tolist()
+        smiles_ids = data_split_df["ident"].tolist()
 
         return smiles_features, smiles_ids
 
diff --git a/tests/testChebiDynamicDataSplits.py b/tests/testChebiDynamicDataSplits.py
index 63369b60..676d0e16 100644
--- a/tests/testChebiDynamicDataSplits.py
+++ b/tests/testChebiDynamicDataSplits.py
@@ -1,53 +1,50 @@
 import unittest
-import os
-import yaml
 import hashlib
 import pandas as pd
 import numpy as np
+from chebai.preprocessing.datasets.chebi import ChEBIOver50
 
 
 class TestChebiDynamicDataSplits(unittest.TestCase):
 
-    @classmethod
-    def setUpClass(cls) -> None:
-        cls.chebi_class_object = cls.getChebiDataClassConfig()
-
     def testDynamicDataSplitsConsistency(self):
         """Test Dynamic Data Splits consistency across every run"""
 
         # Dynamic Data Splits in Run 1
-        train_data_1, val_data_1, test_data_1 = self.get_train_val_test_splits()
-        train_hash_1 = self.compute_hash(train_data_1)
-        val_hash_1 = self.compute_hash(val_data_1)
-        test_hash_1 = self.compute_hash(test_data_1)
+        train_hash_1, val_hash_1, test_hash_1 = self._get_hashed_splits()
 
         # Dynamic Data Splits in Run 2
-        train_data_2, val_data_2, test_data_2 = self.get_train_val_test_splits()
-        train_hash_2 = self.compute_hash(train_data_2)
-        val_hash_2 = self.compute_hash(val_data_2)
-        test_hash_2 = self.compute_hash(test_data_2)
+        train_hash_2, val_hash_2, test_hash_2 = self._get_hashed_splits()
 
         # Check all splits are matching in both runs
         self.assertEqual(train_hash_1, train_hash_2, "Train data hashes do not match.")
         self.assertEqual(val_hash_1, val_hash_2, "Validation data hashes do not match.")
         self.assertEqual(test_hash_1, test_hash_2, "Test data hashes do not match.")
 
-    def get_train_val_test_splits(self):
-        """Returns Dynamic Data splits consisting of train, validation and test set"""
-        data = self.chebi_class_object.load_processed_data("data")
-        df = pd.DataFrame(data)
-        train_df, df_test = self.chebi_class_object.get_test_split(
-            df, seed=self.chebi_class_object.data_split_seed
-        )
-        df_train, df_val = self.chebi_class_object.get_train_val_splits_given_test(
-            train_df, df_test, seed=self.chebi_class_object.data_split_seed
-        )
-        return df_train, df_val, df_test
+    def _get_hashed_splits(self):
+        """Returns hashed dynamic data splits"""
+
+        # Get the raw/processed data if missing
+        chebi_class_obj = ChEBIOver50(seed=42)
+        chebi_class_obj.prepare_data()
+        chebi_class_obj.setup()
+
+        # Get dynamic splits from class variables
+        train_data = chebi_class_obj.dynamic_split_class_variables_df["train"]
+        val_data = chebi_class_obj.dynamic_split_class_variables_df["validation"]
+        test_data = chebi_class_obj.dynamic_split_class_variables_df["test"]
+
+        # Get hashes for each split
+        train_hash = self.compute_hash(train_data)
+        val_hash = self.compute_hash(val_data)
+        test_hash = self.compute_hash(test_data)
+
+        return train_hash, val_hash, test_hash
 
     @staticmethod
     def compute_hash(data):
         """Returns hash for the given data partition"""
-        data_for_hashing = data.applymap(TestChebiDynamicDataSplits.convert_to_hashable)
+        data_for_hashing = data.map(TestChebiDynamicDataSplits.convert_to_hashable)
         return hashlib.md5(
             pd.util.hash_pandas_object(data_for_hashing, index=True).values
         ).hexdigest()
@@ -62,24 +59,6 @@ def convert_to_hashable(item):
         else:
             return item
 
-    @staticmethod
-    def getChebiDataClassConfig():
-        """Import the respective class and instantiate with given version from the config"""
-        CONFIG_FILE_NAME = "chebi50.yml"
-        with open(
-            os.path.join("configs", "data", f"{CONFIG_FILE_NAME}"), "r"
-        ) as yaml_file:
-            config = yaml.safe_load(yaml_file)
-
-        class_path = config["class_path"]
-        init_args = config.get("init_args", {})
-
-        module, class_name = class_path.rsplit(".", 1)
-        module = __import__(module, fromlist=[class_name])
-        class_ = getattr(module, class_name)
-
-        return class_(**init_args)
-
 
 if __name__ == "__main__":
     unittest.main()

From d8abee26bf0ad78ad847aa78a80790595a0811d8 Mon Sep 17 00:00:00 2001
From: aditya0by0 <aditya0by0@gmail.com>
Date: Sat, 8 Jun 2024 23:43:59 +0200
Subject: [PATCH 03/26] Dynamic split for chebi_version_train + changes

---
 chebai/preprocessing/datasets/chebi.py | 279 ++++++++++++++-----------
 1 file changed, 161 insertions(+), 118 deletions(-)

diff --git a/chebai/preprocessing/datasets/chebi.py b/chebai/preprocessing/datasets/chebi.py
index 4c2804d3..a47659b7 100644
--- a/chebai/preprocessing/datasets/chebi.py
+++ b/chebai/preprocessing/datasets/chebi.py
@@ -9,25 +9,25 @@
     "JCI_500_COLUMNS_INT",
 ]
 
-from abc import ABC
-from collections import OrderedDict
 import os
 import pickle
 import queue
-import yaml
-from typing import List, Union
-from torch.utils.data import DataLoader
 import random
+from abc import ABC
+from collections import OrderedDict
+from typing import List, Union
 
-from iterstrat.ml_stratifiers import (
-    MultilabelStratifiedKFold,
-    MultilabelStratifiedShuffleSplit,
-)
 import fastobo
 import networkx as nx
 import pandas as pd
 import requests
 import torch
+import yaml
+from iterstrat.ml_stratifiers import (
+    MultilabelStratifiedKFold,
+    MultilabelStratifiedShuffleSplit,
+)
+from torch.utils.data import DataLoader
 
 from chebai.preprocessing import reader as dr
 from chebai.preprocessing.datasets.base import XYBaseDataModule
@@ -138,7 +138,11 @@ def __init__(
         # use different version of chebi for training and validation (if not None)
         # (still uses self.chebi_version for test set)
         self.chebi_version_train = chebi_version_train
-        self.data_split_seed = self._get_seed_for_data_split()
+        self.dynamic_data_split_seed = int(kwargs.get("seed", 42))  # default is 42
+        # Class variables to store the dynamics splits
+        self.dynamic_df_train = None
+        self.dynamic_df_test = None
+        self.dynamic_df_val = None
 
     def extract_class_hierarchy(self, chebi_path):
         """
@@ -228,30 +232,47 @@ def _get_data_size(input_file_path):
         with open(input_file_path, "rb") as f:
             return len(pd.read_pickle(f))
 
-    def _setup_pruned_test_set(self):
-        """Create test set with same leaf nodes, but use classes that appear in train set"""
+    def _setup_pruned_test_set(
+        self, df_test_chebi_version: pd.DataFrame
+    ) -> pd.DataFrame:
+        """Create a test set with the same leaf nodes, but use only classes that appear in the training set"""
         # TODO: find a more efficient way to do this
         filename_old = "classes.txt"
         filename_new = f"classes_v{self.chebi_version_train}.txt"
-        dataset = torch.load(os.path.join(self.processed_dir, "test.pt"))
-        with open(os.path.join(self.raw_dir, filename_old), "r") as file:
+        # dataset = torch.load(os.path.join(self.processed_dir, "test.pt"))
+
+        # Load original classes (from the current ChEBI version - chebi_version)
+        with open(os.path.join(self.processed_dir_main, filename_old), "r") as file:
             orig_classes = file.readlines()
-        with open(os.path.join(self.raw_dir, filename_new), "r") as file:
+
+        # Load new classes (from the training ChEBI version - chebi_version_train)
+        with open(os.path.join(self.processed_dir_main, filename_new), "r") as file:
             new_classes = file.readlines()
+
+        # Create a mapping which give index of a class from chebi_version, if the corresponding
+        # class exists in chebi_version_train, Size = Number of classes in chebi_version
         mapping = [
             None if or_class not in new_classes else new_classes.index(or_class)
             for or_class in orig_classes
         ]
-        for row in dataset:
+
+        # Iterate over each data instance in the test set which is derived from chebi_version
+        for row in df_test_chebi_version:
+            # Size = Number of classes in chebi_version_train
             new_labels = [False for _ in new_classes]
             for ind, label in enumerate(row["labels"]):
+                # If the chebi_version class exists in the chebi_version_train and has a True label,
+                # set the corresponding label in new_labels to True
                 if mapping[ind] is not None and label:
                     new_labels[mapping[ind]] = label
+            # Update the labels from test instance from chebi_version to the new labels, which are compatible to both versions
             row["labels"] = new_labels
-        torch.save(
-            dataset,
-            os.path.join(self.processed_dir, self.processed_file_names_dict["test"]),
-        )
+
+        # torch.save(
+        #     chebi_ver_test_data,
+        #     os.path.join(self.processed_dir, self.processed_file_names_dict["test"]),
+        # )
+        return df_test_chebi_version
 
     def setup_processed(self):
         print("Transform data")
@@ -261,6 +282,10 @@ def setup_processed(self):
             #     "test.pt" if k == "test" else self.processed_file_names_dict[k]
             # )
             processed_name = self.processed_file_names_dict[k]
+            if k == "data_chebi_train" and self.chebi_version_train is None:
+                # To skip the encoding of data for "chebi_version_train", if it's not given
+                continue
+
             if not os.path.isfile(os.path.join(self.processed_dir, processed_name)):
                 print(
                     "Missing encoded data, transform processed data into encoded data",
@@ -274,12 +299,17 @@ def setup_processed(self):
                     ),
                     os.path.join(self.processed_dir, processed_name),
                 )
-        # create second test set with classes used in train
-        if self.chebi_version_train is not None and not os.path.isfile(
-            os.path.join(self.processed_dir, self.processed_file_names_dict["test"])
-        ):
-            print("transform test (select classes)")
-            self._setup_pruned_test_set()
+
+        # -------- Commented the code for Data Handling Restructure for Issue No.10
+        # -------- https://github.com/ChEB-AI/python-chebai/issues/10
+        # # create second test set with classes used in train
+        # if self.chebi_version_train is not None and not os.path.isfile(
+        #     os.path.join(
+        #         self.processed_dir, self.processed_file_names_dict["data_chebi_train"]
+        #     )
+        # ):
+        #     print("transform test (select classes)")
+        #     self._setup_pruned_test_set()
 
     def get_test_split(self, df: pd.DataFrame, seed: int = None):
         print("Get test data split")
@@ -400,15 +430,18 @@ def processed_file_names_dict(self) -> dict:
         )
         # res = {"test": f"test{train_v_str}.pt"}
         res = {}
+
         for set in ["train", "validation"]:
+            # TODO: code will be modified into CV issue for dynamic splits
             if self.use_inner_cross_validation:
                 for i in range(self.inner_k_folds):
                     res[f"fold_{i}_{set}"] = os.path.join(
                         self.fold_dir, f"fold_{i}_{set}{train_v_str}.pt"
                     )
-            else:
-                # res[set] = f"{set}{train_v_str}.pt"
-                res["data"] = "data.pt"
+            # else:
+            # res[set] = f"{set}{train_v_str}.pt"
+        res["data"] = "data.pt"
+        res["data_chebi_train"] = f"data{train_v_str}.pt"
         return res
 
     @property
@@ -422,14 +455,16 @@ def raw_file_names_dict(self) -> dict:
         # adapt processed file
         res = {}
         for set in ["train", "validation"]:
+            # TODO: code will be modified into CV issue for dynamic splits
             if self.use_inner_cross_validation:
                 for i in range(self.inner_k_folds):
                     res[f"fold_{i}_{set}"] = os.path.join(
                         self.fold_dir, f"fold_{i}_{set}{train_v_str}.pkl"
                     )
-            else:
-                # res[set] = f"{set}{train_v_str}.pkl"
-                res["data"] = "data.pkl"
+            # else:
+            # res[set] = f"{set}{train_v_str}.pkl"
+        res["data"] = "data.pkl"
+        res["data_chebi_train"] = f"data{train_v_str}.pkl"
         return res
 
     @property
@@ -519,104 +554,112 @@ def prepare_data(self, *args, **kwargs):
             # for name, df in train_val_dict.items():
             #     self.save_raw(df, name)
 
+            # Data from chebi_version
             chebi_path = self._load_chebi(self.chebi_version)
             g = self.extract_class_hierarchy(chebi_path)
             df = self.graph_to_raw_dataset(g, self.raw_file_names_dict["data"])
-            self.save_processed(df, self.raw_file_names_dict["data"])
-
-    @staticmethod
-    def _get_seed_for_data_split():
-        # Get Seed (random_state) configuration in order generate same splits every time
-        __SEED_CONFIG_FILE_NAME = "chebiDataSplit_Seed.yml"
-        with open(
-            os.path.join("configs", "data", f"{__SEED_CONFIG_FILE_NAME}"), "r"
-        ) as yaml_file:
-            config = yaml.safe_load(yaml_file)
-        seed = int(config.get("seed", None))
-        return seed
-
-    def dataloader(self, data, **kwargs) -> DataLoader:
-        """
-        Returns a DataLoader object for the specified kind (train, val or test) of data.
+            self.save_processed(df, filename=self.raw_file_names_dict["data"])
 
-        Args:
-            data (str): Data to use.
+            # Data from chebi_version_train
+            if self.chebi_version_train is not None and not os.path.isfile(
+                os.path.join(
+                    self.processed_dir_main,
+                    self.raw_file_names_dict["data_chebi_train"],
+                )
+            ):
+                chebi_path = self._load_chebi(self.chebi_version_train)
+                g = self.extract_class_hierarchy(chebi_path)
+                df = self.graph_to_raw_dataset(
+                    g, self.raw_file_names_dict["data_chebi_train"]
+                )
+                self.save_processed(
+                    df, filename=self.raw_file_names_dict["data_chebi_train"]
+                )
 
-        Returns:
-            DataLoader: A DataLoader object.
+    def setup(self, **kwargs):
+        super().setup(**kwargs)
+        if not all([self.dynamic_df_train, self.dynamic_df_val, self.dynamic_df_test]):
+            self._get_dynamic_splits()
 
-        """
-        dataset = data
-        if "ids" in kwargs:
-            ids = kwargs.pop("ids")
-            _dataset = []
-            for i in range(len(dataset)):
-                if i in ids:
-                    _dataset.append(dataset[i])
-            dataset = _dataset
-        if self.label_filter is not None:
-            original_len = len(dataset)
-            dataset = [self._filter_labels(r) for r in dataset]
-            positives = [r for r in dataset if r["labels"][0]]
-            negatives = [r for r in dataset if not r["labels"][0]]
-            if self.balance_after_filter is not None:
-                negative_length = min(
-                    original_len, int(len(positives) * self.balance_after_filter)
-                )
-                dataset = positives + negatives[:negative_length]
-            else:
-                dataset = positives + negatives
-            random.shuffle(dataset)
-        if self.data_limit is not None:
-            dataset = dataset[: self.data_limit]
-        return DataLoader(
-            dataset,
-            collate_fn=self.reader.collater,
-            batch_size=self.batch_size,
-            **kwargs,
-        )
+    def _get_dynamic_splits(self):
+        """Generate data splits during run-time and saves in class variables"""
 
-    def val_dataloader(self, *args, **kwargs) -> Union[DataLoader, List[DataLoader]]:
-        data = self.load_processed_data("data")
-        df = pd.DataFrame(data)
-        train_df, df_test = self.get_test_split(df, seed=self.data_split_seed)
-        _, df_val = self.get_train_val_splits_given_test(
-            train_df, df_test, seed=self.data_split_seed
+        # Load encoded data derived from "chebi_version"
+        data_chebi_version = torch.load(
+            os.path.join(self.processed_dir, self.processed_file_names_dict["data"])
         )
-        val_list_of_dicts = df_val.to_dict(orient="records")
-
-        return self.dataloader(
-            val_list_of_dicts,
-            shuffle=False,
-            num_workers=self.num_workers,
-            persistent_workers=True,
-            **kwargs,
+        df_chebi_version = pd.DataFrame(data_chebi_version)
+        train_df_chebi_ver, df_test_chebi_ver = self.get_test_split(
+            df_chebi_version, seed=self.dynamic_data_split_seed
         )
 
-    def train_dataloader(self, *args, **kwargs) -> DataLoader:
-        data = self.load_processed_data("data")
-        df = pd.DataFrame(data)
-        train_df, df_test = self.get_test_split(df, seed=self.data_split_seed)
-        df_train, _ = self.get_train_val_splits_given_test(
-            train_df, df_test, seed=self.data_split_seed
-        )
-        train_list_of_dicts = df_train.to_dict(orient="records")
-
-        return self.dataloader(
-            train_list_of_dicts,
-            shuffle=True,
-            num_workers=self.num_workers,
-            persistent_workers=True,
-            **kwargs,
-        )
+        if self.chebi_version_train is not None:
+            # Load encoded data derived from "chebi_version_train"
+            data_chebi_train_version = torch.load(
+                os.path.join(
+                    self.processed_dir,
+                    self.processed_file_names_dict["data_chebi_train"],
+                )
+            )
+            # Get train/val split of data based on "chebi_version_train", but
+            # using test set from "chebi_version"
+            df_train, df_val = self.get_train_val_splits_given_test(
+                data_chebi_train_version,
+                df_test_chebi_ver,
+                seed=self.dynamic_data_split_seed,
+            )
+            # Modify test set from "chebi_version" to only include the labels that
+            # exists in "chebi_version_train", all other entries remains same.
+            df_test = self._setup_pruned_test_set(df_test_chebi_ver)
+        else:
+            # Get all splits based on "chebi_version"
+            df_train, df_val = self.get_train_val_splits_given_test(
+                train_df_chebi_ver,
+                df_test_chebi_ver,
+                seed=self.dynamic_data_split_seed,
+            )
+            df_test = df_test_chebi_ver
+
+        self.dynamic_df_train = df_train
+        self.dynamic_df_val = df_val
+        self.dynamic_df_test = df_test
+
+    @property
+    def dynamic_split_dfs(self):
+        return {
+            "train": self.dynamic_df_train,
+            "validation": self.dynamic_df_val,
+            "test": self.dynamic_df_test,
+        }
+
+    def load_processed_data(self, kind: str = None) -> List:
+        """
+        Load processed data from a file.
 
-    def test_dataloader(self, *args, **kwargs) -> Union[DataLoader, List[DataLoader]]:
-        data = self.load_processed_data("data")
-        df = pd.DataFrame(data)
-        _, df_test = self.get_test_split(df, seed=self.data_split_seed)
-        test_list_of_dicts = df_test.to_dict(orient="records")
+        Args:
+            kind (str, optional): The kind of dataset to load such as "train", "val" or "test". Defaults to None.
 
-        return self.dataloader(test_list_of_dicts, shuffle=False, **kwargs)
+        Returns:
+            List: The loaded processed data.
+
+        Raises:
+            ValueError: If kind is None.
+        """
+        if kind is None:
+            raise ValueError("kind is required to load the correct dataset")
+        # if both kind and filename are given, use filename
+        if kind is not None:
+            try:
+                # processed_file_names_dict is only implemented for _ChEBIDataExtractor
+                if self.use_inner_cross_validation and kind != "test":
+                    filename = self.processed_file_names_dict[
+                        f"fold_{self.fold_index}_{kind}"
+                    ]
+                else:
+                    data_df = self.dynamic_split_dfs[kind]
+            except NotImplementedError:
+                filename = f"{kind}"
+        return data_df.to_dict(orient="records")
 
 
 class JCIExtendedBase(_ChEBIDataExtractor):

From 91aa48460cda5e2d81f3bc333848f2425c9be706 Mon Sep 17 00:00:00 2001
From: aditya0by0 <aditya0by0@gmail.com>
Date: Sat, 8 Jun 2024 23:45:35 +0200
Subject: [PATCH 04/26] Update dynamic split tests

---
 tests/testChebiData.py              |  7 ++-
 tests/testChebiDynamicDataSplits.py | 93 +++++++++++++++++++++++++++--
 2 files changed, 92 insertions(+), 8 deletions(-)

diff --git a/tests/testChebiData.py b/tests/testChebiData.py
index 9513b217..5bf8d388 100644
--- a/tests/testChebiData.py
+++ b/tests/testChebiData.py
@@ -1,4 +1,5 @@
 import unittest
+
 from chebai.preprocessing.datasets.chebi import ChEBIOver50
 
 
@@ -16,9 +17,9 @@ def getDataSplitsOverlaps(cls):
         chebi_class_obj.prepare_data()
         chebi_class_obj.setup()
 
-        train_set = chebi_class_obj.dynamic_split_class_variables_df["train"]
-        val_set = chebi_class_obj.dynamic_split_class_variables_df["validation"]
-        test_set = chebi_class_obj.dynamic_split_class_variables_df["test"]
+        train_set = chebi_class_obj.dynamic_split_dfs["train"]
+        val_set = chebi_class_obj.dynamic_split_dfs["validation"]
+        test_set = chebi_class_obj.dynamic_split_dfs["test"]
 
         train_smiles, train_smiles_ids = cls.get_features_ids(train_set)
         val_smiles, val_smiles_ids = cls.get_features_ids(val_set)
diff --git a/tests/testChebiDynamicDataSplits.py b/tests/testChebiDynamicDataSplits.py
index 676d0e16..02d47303 100644
--- a/tests/testChebiDynamicDataSplits.py
+++ b/tests/testChebiDynamicDataSplits.py
@@ -1,12 +1,22 @@
-import unittest
 import hashlib
-import pandas as pd
+import unittest
+
 import numpy as np
+import pandas as pd
+
 from chebai.preprocessing.datasets.chebi import ChEBIOver50
 
 
 class TestChebiDynamicDataSplits(unittest.TestCase):
 
+    def setUpClass(self):
+        self.chebi_50_v231 = ChEBIOver50(chebi_version=231)
+        self.chebi_50_v231_vt200 = ChEBIOver50(
+            chebi_version=231, chebi_version_train=200
+        )
+        self._generate_chebi_class_data(self.chebi_50_v231)
+        self._generate_chebi_class_data(self.chebi_50_v231_vt200)
+
     def testDynamicDataSplitsConsistency(self):
         """Test Dynamic Data Splits consistency across every run"""
 
@@ -21,6 +31,65 @@ def testDynamicDataSplitsConsistency(self):
         self.assertEqual(val_hash_1, val_hash_2, "Validation data hashes do not match.")
         self.assertEqual(test_hash_1, test_hash_2, "Test data hashes do not match.")
 
+    def test_same_ids_and_in_test_sets(self):
+        """Check if test sets of both classes have same IDs"""
+
+        v231_ids = set(self.chebi_50_v231.dynamic_split_dfs["test"]["ident"])
+        v231_vt200_ids = set(
+            self.chebi_50_v231_vt200.dynamic_split_dfs["test"]["ident"]
+        )
+
+        self.assertEqual(
+            v231_ids, v231_vt200_ids, "Test sets do not have the same IDs."
+        )
+
+    def test_labels_vector_size_in_test_sets(self):
+        """Check if test sets of both classes have different size/shape of labels"""
+
+        v231_labels_shape = len(
+            self.chebi_50_v231.dynamic_split_dfs["test"]["ident"][0]
+        )
+        v231_vt200_label_shape = len(
+            self.chebi_50_v231_vt200.dynamic_split_dfs["test"]["ident"][0]
+        )
+
+        self.assertNotEqual(
+            v231_labels_shape,
+            v231_vt200_label_shape,
+            "Test sets have the same size of labels",
+        )
+
+    def test_no_overlaps_in_chebi_v231_vt200(self):
+        """Test the overlaps for the ChEBIOver50(chebi_version=231, chebi_version_train=200)"""
+        train_set = self.chebi_50_v231_vt200.dynamic_split_dfs["train"]
+        val_set = self.chebi_50_v231_vt200.dynamic_split_dfs["validation"]
+        test_set = self.chebi_50_v231_vt200.dynamic_split_dfs["test"]
+
+        train_set_ids = train_set["ident"].tolist()
+        val_set_ids = val_set["ident"].tolist()
+        test_set_ids = test_set["ident"].tolist()
+
+        # ----- Get the overlap between data splits based on IDs -----
+        self.overlaps_train_val_ids = self.get_overlaps(train_set_ids, val_set_ids)
+        self.overlaps_train_test_ids = self.get_overlaps(train_set_ids, test_set_ids)
+        self.overlaps_val_test_ids = self.get_overlaps(val_set_ids, test_set_ids)
+
+        self.assertEqual(
+            len(self.overlaps_train_val_ids),
+            0,
+            "Duplicate entities present in Train and Validation set based on IDs",
+        )
+        self.assertEqual(
+            len(self.overlaps_train_test_ids),
+            0,
+            "Duplicate entities present in Train and Test set based on IDs",
+        )
+        self.assertEqual(
+            len(self.overlaps_val_test_ids),
+            0,
+            "Duplicate entities present in Validation and Test set based on IDs",
+        )
+
     def _get_hashed_splits(self):
         """Returns hashed dynamic data splits"""
 
@@ -30,9 +99,9 @@ def _get_hashed_splits(self):
         chebi_class_obj.setup()
 
         # Get dynamic splits from class variables
-        train_data = chebi_class_obj.dynamic_split_class_variables_df["train"]
-        val_data = chebi_class_obj.dynamic_split_class_variables_df["validation"]
-        test_data = chebi_class_obj.dynamic_split_class_variables_df["test"]
+        train_data = chebi_class_obj.dynamic_split_dfs["train"]
+        val_data = chebi_class_obj.dynamic_split_dfs["validation"]
+        test_data = chebi_class_obj.dynamic_split_dfs["test"]
 
         # Get hashes for each split
         train_hash = self.compute_hash(train_data)
@@ -59,6 +128,20 @@ def convert_to_hashable(item):
         else:
             return item
 
+    @staticmethod
+    def _generate_chebi_class_data(chebi_class_obj):
+        # Get the raw/processed data if missing
+        chebi_class_obj.prepare_data()
+        chebi_class_obj.setup()
+
+    @staticmethod
+    def get_overlaps(list_1, list_2):
+        overlap = []
+        for element in list_1:
+            if element in list_2:
+                overlap.append(element)
+        return overlap
+
 
 if __name__ == "__main__":
     unittest.main()

From 22f882c082b309d5585b0aedfa924988141e07f1 Mon Sep 17 00:00:00 2001
From: aditya0by0 <aditya0by0@gmail.com>
Date: Mon, 10 Jun 2024 18:31:20 +0200
Subject: [PATCH 05/26] Update chebi + dynamic test

---
 chebai/preprocessing/datasets/chebi.py | 12 +++++++-----
 tests/testChebiDynamicDataSplits.py    | 23 ++++++++++++-----------
 2 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/chebai/preprocessing/datasets/chebi.py b/chebai/preprocessing/datasets/chebi.py
index a47659b7..eacd0107 100644
--- a/chebai/preprocessing/datasets/chebi.py
+++ b/chebai/preprocessing/datasets/chebi.py
@@ -257,7 +257,7 @@ def _setup_pruned_test_set(
         ]
 
         # Iterate over each data instance in the test set which is derived from chebi_version
-        for row in df_test_chebi_version:
+        for _, row in df_test_chebi_version.iterrows():
             # Size = Number of classes in chebi_version_train
             new_labels = [False for _ in new_classes]
             for ind, label in enumerate(row["labels"]):
@@ -601,10 +601,11 @@ def _get_dynamic_splits(self):
                     self.processed_file_names_dict["data_chebi_train"],
                 )
             )
+            df_chebi_train_version = pd.DataFrame(data_chebi_train_version)
             # Get train/val split of data based on "chebi_version_train", but
             # using test set from "chebi_version"
             df_train, df_val = self.get_train_val_splits_given_test(
-                data_chebi_train_version,
+                df_chebi_train_version,
                 df_test_chebi_ver,
                 seed=self.dynamic_data_split_seed,
             )
@@ -730,11 +731,12 @@ def select_classes(self, g, split_name, *args, **kwargs):
         )
         filename = "classes.txt"
         if (
-            self.chebi_version_train is not None
-            and self.raw_file_names_dict["test"] != split_name
+            self.chebi_version_train
+            is not None
+            # and self.raw_file_names_dict["test"] != split_name
         ):
             filename = f"classes_v{self.chebi_version_train}.txt"
-        with open(os.path.join(self.raw_dir, filename), "wt") as fout:
+        with open(os.path.join(self.processed_dir_main, filename), "wt") as fout:
             fout.writelines(str(node) + "\n" for node in nodes)
         return nodes
 
diff --git a/tests/testChebiDynamicDataSplits.py b/tests/testChebiDynamicDataSplits.py
index 02d47303..efc53420 100644
--- a/tests/testChebiDynamicDataSplits.py
+++ b/tests/testChebiDynamicDataSplits.py
@@ -8,14 +8,16 @@
 
 
 class TestChebiDynamicDataSplits(unittest.TestCase):
+    """Test dynamic splits implementation's consistency"""
 
-    def setUpClass(self):
-        self.chebi_50_v231 = ChEBIOver50(chebi_version=231)
-        self.chebi_50_v231_vt200 = ChEBIOver50(
+    @classmethod
+    def setUpClass(cls):
+        cls.chebi_50_v231 = ChEBIOver50(chebi_version=231)
+        cls.chebi_50_v231_vt200 = ChEBIOver50(
             chebi_version=231, chebi_version_train=200
         )
-        self._generate_chebi_class_data(self.chebi_50_v231)
-        self._generate_chebi_class_data(self.chebi_50_v231_vt200)
+        cls._generate_chebi_class_data(cls.chebi_50_v231)
+        cls._generate_chebi_class_data(cls.chebi_50_v231_vt200)
 
     def testDynamicDataSplitsConsistency(self):
         """Test Dynamic Data Splits consistency across every run"""
@@ -47,16 +49,16 @@ def test_labels_vector_size_in_test_sets(self):
         """Check if test sets of both classes have different size/shape of labels"""
 
         v231_labels_shape = len(
-            self.chebi_50_v231.dynamic_split_dfs["test"]["ident"][0]
+            self.chebi_50_v231.dynamic_split_dfs["test"]["labels"].iloc[0]
         )
         v231_vt200_label_shape = len(
-            self.chebi_50_v231_vt200.dynamic_split_dfs["test"]["ident"][0]
+            self.chebi_50_v231_vt200.dynamic_split_dfs["test"]["labels"].iloc[0]
         )
 
-        self.assertNotEqual(
+        self.assertEqual(
             v231_labels_shape,
             v231_vt200_label_shape,
-            "Test sets have the same size of labels",
+            "Test sets have the different size of labels",
         )
 
     def test_no_overlaps_in_chebi_v231_vt200(self):
@@ -95,8 +97,7 @@ def _get_hashed_splits(self):
 
         # Get the raw/processed data if missing
         chebi_class_obj = ChEBIOver50(seed=42)
-        chebi_class_obj.prepare_data()
-        chebi_class_obj.setup()
+        self._generate_chebi_class_data(chebi_class_obj)
 
         # Get dynamic splits from class variables
         train_data = chebi_class_obj.dynamic_split_dfs["train"]

From dde4196724707608889f10958384f8d8c932b433 Mon Sep 17 00:00:00 2001
From: aditya0by0 <aditya0by0@gmail.com>
Date: Tue, 11 Jun 2024 10:58:46 +0200
Subject: [PATCH 06/26] Update setup.py

---
 setup.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 5adb27a3..b25712a3 100644
--- a/setup.py
+++ b/setup.py
@@ -14,7 +14,10 @@
     author_email="martin.glauer@ovgu.de",
     description="",
     zip_safe=False,
-    python_requires="<=3.11.8",
+    # `|` operator used for type hint in chebai/loss/semantic.py is only supported for python_version >= 3.10.
+    # https://stackoverflow.com/questions/76712720/typeerror-unsupported-operand-types-for-type-and-nonetype
+    # python_requires="<=3.11.8",
+    python_requires=">=3.10.0, <3.11.8",
     install_requires=[
         "certifi",
         "idna",
@@ -48,7 +51,8 @@
         "iterative-stratification",
         "wandb",
         "chardet",
-        "yaml",
+        # --- commented below due to strange dependency error while setting up new env
+        # "yaml",`
         "torchmetrics",
     ],
     extras_require={"dev": ["black", "isort", "pre-commit"]},

From aecb7e66260e719ae3e0dcd4d06ac7cf25b90110 Mon Sep 17 00:00:00 2001
From: aditya0by0 <aditya0by0@gmail.com>
Date: Wed, 12 Jun 2024 16:19:46 +0200
Subject: [PATCH 07/26] Update Evaluation notebook + rel. code

Updates
- Evaluation notebook
- classification.py
- utils.py
- pre-commit + some suggestions
---
 .pre-commit-config.yaml          |  24 +++-
 chebai/result/classification.py  |   9 +-
 chebai/result/utils.py           |  10 +-
 tutorials/eval_model_basic.ipynb | 204 +++++++++++++++++++++++++------
 4 files changed, 204 insertions(+), 43 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 77b2dfa5..8ce58be4 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -6,4 +6,26 @@ repos:
 -   repo: https://github.com/psf/black
     rev: "24.2.0"
     hooks:
-    -   id: black
\ No newline at end of file
+    -   id: black
+    -   id: black-jupyter # for formatting jupyter-notebook
+
+# -------- below works for me --- locally
+#-   repo: https://github.com/pycqa/isort
+#    rev: 5.13.2
+#    hooks:
+#      - id: isort
+#        name: isort (python)
+#        args: ["--profile=black"]
+#
+#-   repo: https://github.com/asottile/seed-isort-config
+#    rev: v2.2.0
+#    hooks:
+#    -   id: seed-isort-config
+#
+#-   repo: https://github.com/pre-commit/pre-commit-hooks
+#    rev: v4.6.0
+#    hooks:
+#    -   id: check-yaml
+#    -   id: end-of-file-fixer
+#    -   id: trailing-whitespace
+
diff --git a/chebai/result/classification.py b/chebai/result/classification.py
index b3c6ec36..6ce5fc9a 100644
--- a/chebai/result/classification.py
+++ b/chebai/result/classification.py
@@ -12,6 +12,7 @@
 import tqdm
 
 from chebai.callbacks.epoch_metrics import MacroF1
+from chebai.callbacks.epoch_metrics import BalancedAccuracy
 
 from chebai.models import ChebaiBaseNet
 from chebai.models.electra import Electra
@@ -39,9 +40,11 @@ def print_metrics(preds, labels, device, classes=None, top_k=10, markdown_output
     """Prints relevant metrics, including micro and macro F1, recall and precision, best k classes and worst classes."""
     f1_micro = MultilabelF1Score(preds.shape[1], average="micro").to(device=device)
     my_f1_macro = MacroF1(preds.shape[1]).to(device=device)
+    my_bal_acc = BalancedAccuracy(preds.shape[1]).to(device=device)
 
     print(f"Macro-F1: {my_f1_macro(preds, labels):3f}")
     print(f"Micro-F1: {f1_micro(preds, labels):3f}")
+    print(f"Balanced Accuracy: {my_bal_acc(preds, labels):3f}")
     precision_macro = MultilabelPrecision(preds.shape[1], average="macro").to(
         device=device
     )
@@ -57,13 +60,13 @@ def print_metrics(preds, labels, device, classes=None, top_k=10, markdown_output
     print(f"Micro-Recall: {recall_micro(preds, labels):3f}")
     if markdown_output:
         print(
-            f"| Model | Macro-F1 | Micro-F1 | Macro-Precision | Micro-Precision | Macro-Recall | Micro-Recall |"
+            f"| Model | Macro-F1 | Micro-F1 | Macro-Precision | Micro-Precision | Macro-Recall | Micro-Recall | Balanced Accuracy"
         )
-        print(f"| --- | --- | --- | --- | --- | --- | --- |")
+        print(f"| --- | --- | --- | --- | --- | --- | --- | --- |")
         print(
             f"| | {my_f1_macro(preds, labels):3f} | {f1_micro(preds, labels):3f} | {precision_macro(preds, labels):3f} | "
             f"{precision_micro(preds, labels):3f} | {recall_macro(preds, labels):3f} | "
-            f"{recall_micro(preds, labels):3f} |"
+            f"{recall_micro(preds, labels):3f} | {my_bal_acc(preds, labels):3f} |"
         )
 
     classwise_f1_fn = MultilabelF1Score(preds.shape[1], average=None).to(device=device)
diff --git a/chebai/result/utils.py b/chebai/result/utils.py
index 57912614..4678ca54 100644
--- a/chebai/result/utils.py
+++ b/chebai/result/utils.py
@@ -2,6 +2,7 @@
 from chebai.models.electra import Electra
 from chebai.models.base import ChebaiBaseNet
 from chebai.preprocessing.datasets.base import XYBaseDataModule
+from chebai.preprocessing.datasets.chebi import _ChEBIDataExtractor
 import os
 import wandb
 import tqdm
@@ -39,10 +40,12 @@ def get_checkpoint_from_wandb(
 def evaluate_model(
     model: ChebaiBaseNet,
     data_module: XYBaseDataModule,
+    # No need to provide "filename" parameter for Chebi dataset, "kind" parameter should be provided
     filename=None,
     buffer_dir=None,
     batch_size: int = 32,
     skip_existing_preds=False,
+    kind: str = "test",
 ):
     """Runs model on test set of data_module (or, if filename is not None, on data set found in that file).
     If buffer_dir is set, results will be saved in buffer_dir. Returns tensors with predictions and labels.
@@ -50,7 +53,12 @@ def evaluate_model(
     model.eval()
     collate = data_module.reader.COLLATER()
 
-    data_list = data_module.load_processed_data("test", filename)
+    if isinstance(data_module, _ChEBIDataExtractor):
+        # As the dynamic split change is implemented only for chebi-dataset as of now
+        data_df = data_module.dynamic_split_dfs[kind]
+        data_list = data_df.to_dict(orient="records")
+    else:
+        data_list = data_module.load_processed_data("test", filename)
     data_list = data_list[: data_module.data_limit]
     preds_list = []
     labels_list = []
diff --git a/tutorials/eval_model_basic.ipynb b/tutorials/eval_model_basic.ipynb
index db9d2a1d..b92d78f1 100644
--- a/tutorials/eval_model_basic.ipynb
+++ b/tutorials/eval_model_basic.ipynb
@@ -3,20 +3,53 @@
   {
    "cell_type": "code",
    "execution_count": 1,
+   "id": "726ada05-9a23-46bc-a04a-c951ccd29807",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Current working directory: C:\\Users\\HP\\Desktop\\github-aditya0by0\\python-chebai\n"
+     ]
+    }
+   ],
+   "source": [
+    "# ---------------- For testing only : comment afterwards\n",
+    "# import os\n",
+    "\n",
+    "# # Set the root directory\n",
+    "# root_directory = r\"C:\\Users\\HP\\Desktop\\github-aditya0by0\\python-chebai\"\n",
+    "# os.chdir(root_directory)\n",
+    "\n",
+    "# # Verify the current working directory\n",
+    "# print(\"Current working directory:\", os.getcwd())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
    "id": "initial_id",
    "metadata": {
-    "collapsed": true,
     "ExecuteTime": {
      "end_time": "2024-04-02T13:47:31.150545Z",
      "start_time": "2024-04-02T13:47:27.181585Z"
     }
    },
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\HP\\anaconda3\\envs\\env_chebai\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "cuda:0\n"
+      "cpu\n"
      ]
     }
    ],
@@ -41,16 +74,39 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "id": "bdb5fc6919cf72be",
    "metadata": {
-    "collapsed": false,
     "ExecuteTime": {
      "end_time": "2024-04-02T13:47:35.484307Z",
      "start_time": "2024-04-02T13:47:35.477111Z"
+    },
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
     }
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Check for processed data in data\\chebi_v231\\ChEBI50\\processed\\smiles_token\n",
+      "Cross-validation enabled: False\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Check for processed data in data\\chebi_v231\\ChEBI50\\processed\n",
+      "saving 771 tokens to C:\\Users\\HP\\Desktop\\github-aditya0by0\\python-chebai\\chebai\\preprocessing\\bin\\smiles_token\\tokens.txt...\n",
+      "first 10 tokens: ['[*-]', '[Al-]', '[F-]', '.', '[H]', '[N]', '(', ')', '[Ag+]', 'C']\n",
+      "Get test data split\n",
+      "Split dataset into train / val with given test set\n"
+     ]
+    }
+   ],
    "source": [
     "# specify the checkpoint name\n",
     "checkpoint_name = \"my_trained_model\"\n",
@@ -59,77 +115,149 @@
     "buffer_dir = os.path.join(\"results_buffer\", checkpoint_name, kind)\n",
     "# make sure to use the same data module and model class that were used during training\n",
     "data_module = ChEBIOver50(\n",
-    "    chebi_version=227, \n",
+    "    chebi_version=231,\n",
     ")\n",
+    "# load chebi data if missing and perform dynamic splits\n",
+    "data_module.prepare_data()\n",
+    "data_module.setup()\n",
+    "\n",
     "model_class = Electra"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
+   "id": "42642a53-511d-4cbc-a799-56641c89aebe",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\HP\\Desktop\\github-aditya0by0\\python-chebai\\logs\\chebi50_bce_unweighted\\version_27\\checkpoints\\per_epoch=99_val_loss=0.1377_val_macro-f1=0.0200_val_micro-f1=0.2947.ckpt\n"
+     ]
+    }
+   ],
+   "source": [
+    "# --------------- For testing only : comment afterwards\n",
+    "# data_module.data_limit = 100\n",
+    "# main_directory = r\"C:\\Users\\HP\\Desktop\\github-aditya0by0\\python-chebai\"\n",
+    "# checkpoint_name = r\"logs\\chebi50_bce_unweighted\\version_27\\checkpoints\\per_epoch=99_val_loss=0.1377_val_macro-f1=0.0200_val_micro-f1=0.2947\"\n",
+    "# checkpoint_path = os.path.join(main_directory, f\"{checkpoint_name}.ckpt\")\n",
+    "# print(checkpoint_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
    "id": "fa1276b47def696c",
    "metadata": {
-    "collapsed": false,
     "ExecuteTime": {
      "end_time": "2024-04-02T13:47:38.418564Z",
      "start_time": "2024-04-02T13:47:37.861168Z"
+    },
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
     }
    },
    "outputs": [
     {
-     "ename": "FileNotFoundError",
-     "evalue": "[Errno 2] No such file or directory: 'C:/Users/Simon Flügel/Desktop/chebai/tutorials/logs/best_epoch=99_val_loss=0.0096_val_macro-f1=0.5358_val_micro-f1=0.8968.ckpt'",
-     "output_type": "error",
-     "traceback": [
-      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
-      "\u001B[1;31mFileNotFoundError\u001B[0m                         Traceback (most recent call last)",
-      "Cell \u001B[1;32mIn[3], line 2\u001B[0m\n\u001B[0;32m      1\u001B[0m \u001B[38;5;66;03m# evaluates model, stores results in buffer_dir\u001B[39;00m\n\u001B[1;32m----> 2\u001B[0m model \u001B[38;5;241m=\u001B[39m \u001B[43mmodel_class\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mload_from_checkpoint\u001B[49m\u001B[43m(\u001B[49m\u001B[43mcheckpoint_path\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m      3\u001B[0m preds, labels \u001B[38;5;241m=\u001B[39m evaluate_model(\n\u001B[0;32m      4\u001B[0m         model,\n\u001B[0;32m      5\u001B[0m         data_module,\n\u001B[1;32m   (...)\u001B[0m\n\u001B[0;32m      8\u001B[0m         batch_size\u001B[38;5;241m=\u001B[39m\u001B[38;5;241m10\u001B[39m,\n\u001B[0;32m      9\u001B[0m     )\n",
-      "File \u001B[1;32m~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\lightning\\pytorch\\core\\module.py:1552\u001B[0m, in \u001B[0;36mLightningModule.load_from_checkpoint\u001B[1;34m(cls, checkpoint_path, map_location, hparams_file, strict, **kwargs)\u001B[0m\n\u001B[0;32m   1471\u001B[0m \u001B[38;5;129m@_restricted_classmethod\u001B[39m\n\u001B[0;32m   1472\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mload_from_checkpoint\u001B[39m(\n\u001B[0;32m   1473\u001B[0m     \u001B[38;5;28mcls\u001B[39m,\n\u001B[1;32m   (...)\u001B[0m\n\u001B[0;32m   1478\u001B[0m     \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs: Any,\n\u001B[0;32m   1479\u001B[0m ) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m Self:\n\u001B[0;32m   1480\u001B[0m \u001B[38;5;250m    \u001B[39m\u001B[38;5;124mr\u001B[39m\u001B[38;5;124;03m\"\"\"Primary way of loading a model from a checkpoint. When Lightning saves a checkpoint it stores the arguments\u001B[39;00m\n\u001B[0;32m   1481\u001B[0m \u001B[38;5;124;03m    passed to ``__init__``  in the checkpoint under ``\"hyper_parameters\"``.\u001B[39;00m\n\u001B[0;32m   1482\u001B[0m \n\u001B[1;32m   (...)\u001B[0m\n\u001B[0;32m   1550\u001B[0m \n\u001B[0;32m   1551\u001B[0m \u001B[38;5;124;03m    \"\"\"\u001B[39;00m\n\u001B[1;32m-> 1552\u001B[0m     loaded \u001B[38;5;241m=\u001B[39m \u001B[43m_load_from_checkpoint\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m   1553\u001B[0m \u001B[43m        \u001B[49m\u001B[38;5;28;43mcls\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m  \u001B[49m\u001B[38;5;66;43;03m# type: ignore[arg-type]\u001B[39;49;00m\n\u001B[0;32m   1554\u001B[0m \u001B[43m        \u001B[49m\u001B[43mcheckpoint_path\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1555\u001B[0m \u001B[43m        \u001B[49m\u001B[43mmap_location\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1556\u001B[0m \u001B[43m        \u001B[49m\u001B[43mhparams_file\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1557\u001B[0m \u001B[43m        \u001B[49m\u001B[43mstrict\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1558\u001B[0m \u001B[43m        \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1559\u001B[0m \u001B[43m    \u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m   1560\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m cast(Self, loaded)\n",
-      "File \u001B[1;32m~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\lightning\\pytorch\\core\\saving.py:61\u001B[0m, in \u001B[0;36m_load_from_checkpoint\u001B[1;34m(cls, checkpoint_path, map_location, hparams_file, strict, **kwargs)\u001B[0m\n\u001B[0;32m     59\u001B[0m map_location \u001B[38;5;241m=\u001B[39m map_location \u001B[38;5;129;01mor\u001B[39;00m _default_map_location\n\u001B[0;32m     60\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m pl_legacy_patch():\n\u001B[1;32m---> 61\u001B[0m     checkpoint \u001B[38;5;241m=\u001B[39m \u001B[43mpl_load\u001B[49m\u001B[43m(\u001B[49m\u001B[43mcheckpoint_path\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mmap_location\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mmap_location\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m     63\u001B[0m \u001B[38;5;66;03m# convert legacy checkpoints to the new format\u001B[39;00m\n\u001B[0;32m     64\u001B[0m checkpoint \u001B[38;5;241m=\u001B[39m _pl_migrate_checkpoint(\n\u001B[0;32m     65\u001B[0m     checkpoint, checkpoint_path\u001B[38;5;241m=\u001B[39m(checkpoint_path \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(checkpoint_path, (\u001B[38;5;28mstr\u001B[39m, Path)) \u001B[38;5;28;01melse\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m)\n\u001B[0;32m     66\u001B[0m )\n",
-      "File \u001B[1;32m~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\lightning\\fabric\\utilities\\cloud_io.py:54\u001B[0m, in \u001B[0;36m_load\u001B[1;34m(path_or_url, map_location)\u001B[0m\n\u001B[0;32m     49\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m torch\u001B[38;5;241m.\u001B[39mhub\u001B[38;5;241m.\u001B[39mload_state_dict_from_url(\n\u001B[0;32m     50\u001B[0m         \u001B[38;5;28mstr\u001B[39m(path_or_url),\n\u001B[0;32m     51\u001B[0m         map_location\u001B[38;5;241m=\u001B[39mmap_location,  \u001B[38;5;66;03m# type: ignore[arg-type]\u001B[39;00m\n\u001B[0;32m     52\u001B[0m     )\n\u001B[0;32m     53\u001B[0m fs \u001B[38;5;241m=\u001B[39m get_filesystem(path_or_url)\n\u001B[1;32m---> 54\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m \u001B[43mfs\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mopen\u001B[49m\u001B[43m(\u001B[49m\u001B[43mpath_or_url\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mrb\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m)\u001B[49m \u001B[38;5;28;01mas\u001B[39;00m f:\n\u001B[0;32m     55\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m torch\u001B[38;5;241m.\u001B[39mload(f, map_location\u001B[38;5;241m=\u001B[39mmap_location)\n",
-      "File \u001B[1;32m~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\fsspec\\spec.py:1307\u001B[0m, in \u001B[0;36mAbstractFileSystem.open\u001B[1;34m(self, path, mode, block_size, cache_options, compression, **kwargs)\u001B[0m\n\u001B[0;32m   1305\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m   1306\u001B[0m     ac \u001B[38;5;241m=\u001B[39m kwargs\u001B[38;5;241m.\u001B[39mpop(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mautocommit\u001B[39m\u001B[38;5;124m\"\u001B[39m, \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_intrans)\n\u001B[1;32m-> 1307\u001B[0m     f \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_open\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m   1308\u001B[0m \u001B[43m        \u001B[49m\u001B[43mpath\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1309\u001B[0m \u001B[43m        \u001B[49m\u001B[43mmode\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mmode\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1310\u001B[0m \u001B[43m        \u001B[49m\u001B[43mblock_size\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mblock_size\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1311\u001B[0m \u001B[43m        \u001B[49m\u001B[43mautocommit\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mac\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1312\u001B[0m \u001B[43m        \u001B[49m\u001B[43mcache_options\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mcache_options\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1313\u001B[0m \u001B[43m        \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1314\u001B[0m \u001B[43m    \u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m   1315\u001B[0m     \u001B[38;5;28;01mif\u001B[39;00m compression \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[0;32m   1316\u001B[0m         \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mfsspec\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mcompression\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m compr\n",
-      "File \u001B[1;32m~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\fsspec\\implementations\\local.py:180\u001B[0m, in \u001B[0;36mLocalFileSystem._open\u001B[1;34m(self, path, mode, block_size, **kwargs)\u001B[0m\n\u001B[0;32m    178\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mauto_mkdir \u001B[38;5;129;01mand\u001B[39;00m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mw\u001B[39m\u001B[38;5;124m\"\u001B[39m \u001B[38;5;129;01min\u001B[39;00m mode:\n\u001B[0;32m    179\u001B[0m     \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mmakedirs(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_parent(path), exist_ok\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m)\n\u001B[1;32m--> 180\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mLocalFileOpener\u001B[49m\u001B[43m(\u001B[49m\u001B[43mpath\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mmode\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mfs\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n",
-      "File \u001B[1;32m~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\fsspec\\implementations\\local.py:302\u001B[0m, in \u001B[0;36mLocalFileOpener.__init__\u001B[1;34m(self, path, mode, autocommit, fs, compression, **kwargs)\u001B[0m\n\u001B[0;32m    300\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcompression \u001B[38;5;241m=\u001B[39m get_compression(path, compression)\n\u001B[0;32m    301\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mblocksize \u001B[38;5;241m=\u001B[39m io\u001B[38;5;241m.\u001B[39mDEFAULT_BUFFER_SIZE\n\u001B[1;32m--> 302\u001B[0m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_open\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\n",
-      "File \u001B[1;32m~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\fsspec\\implementations\\local.py:307\u001B[0m, in \u001B[0;36mLocalFileOpener._open\u001B[1;34m(self)\u001B[0m\n\u001B[0;32m    305\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mf \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mf\u001B[38;5;241m.\u001B[39mclosed:\n\u001B[0;32m    306\u001B[0m     \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mautocommit \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mw\u001B[39m\u001B[38;5;124m\"\u001B[39m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mmode:\n\u001B[1;32m--> 307\u001B[0m         \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mf \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mopen\u001B[39;49m\u001B[43m(\u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mpath\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mmode\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mmode\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m    308\u001B[0m         \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcompression:\n\u001B[0;32m    309\u001B[0m             compress \u001B[38;5;241m=\u001B[39m compr[\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcompression]\n",
-      "\u001B[1;31mFileNotFoundError\u001B[0m: [Errno 2] No such file or directory: 'C:/Users/Simon Flügel/Desktop/chebai/tutorials/logs/best_epoch=99_val_loss=0.0096_val_macro-f1=0.5358_val_micro-f1=0.8968.ckpt'"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|████████████████████████████████| 10/10 [00:06<00:00,  1.54it/s]\n"
      ]
     }
    ],
    "source": [
     "# evaluates model, stores results in buffer_dir\n",
     "model = model_class.load_from_checkpoint(checkpoint_path)\n",
-    "preds, labels = evaluate_model(\n",
+    "if buffer_dir is None:\n",
+    "    preds, labels = evaluate_model(\n",
+    "        model,\n",
+    "        data_module,\n",
+    "        buffer_dir=buffer_dir,\n",
+    "        # No need to provide this parameter for Chebi dataset, \"kind\" parameter should be provided\n",
+    "        # filename=data_module.processed_file_names_dict[kind],\n",
+    "        batch_size=10,\n",
+    "        kind=kind,\n",
+    "    )\n",
+    "else:\n",
+    "    evaluate_model(\n",
     "        model,\n",
     "        data_module,\n",
     "        buffer_dir=buffer_dir,\n",
-    "        filename=data_module.processed_file_names_dict[kind],\n",
+    "        # No need to provide this parameter for Chebi dataset, \"kind\" parameter should be provided\n",
+    "        # filename=data_module.processed_file_names_dict[kind],\n",
     "        batch_size=10,\n",
-    "    )"
+    "        kind=kind,\n",
+    "    )\n",
+    "    # load data from buffer_dir\n",
+    "    preds, labels = load_results_from_buffer(buffer_dir, device=DEVICE)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "id": "201f750c475b4677",
    "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
    },
    "outputs": [],
    "source": [
-    "# load data from buffer_dir\n",
-    "load_results_from_buffer(buffer_dir, device=DEVICE)\n",
-    "with open(os.path.join(data_module.raw_dir, \"classes.txt\"), \"r\") as f:\n",
+    "# Load classes from the classes.txt\n",
+    "with open(os.path.join(data_module.processed_dir_main, \"classes.txt\"), \"r\") as f:\n",
     "    classes = [line.strip() for line in f.readlines()]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "id": "e567cd2fb1718baf",
    "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Macro-F1: 0.290936\n",
+      "Micro-F1: 0.890380\n",
+      "Balanced Accuracy: 0.507610\n",
+      "Macro-Precision: 0.021964\n",
+      "Micro-Precision: 0.908676\n",
+      "Macro-Recall: 0.020987\n",
+      "Micro-Recall: 0.872807\n",
+      "Top 10 classes (F1-score):\n",
+      "1. 23367 - F1: 1.000000\n",
+      "2. 33259 - F1: 1.000000\n",
+      "3. 36914 - F1: 1.000000\n",
+      "4. 24431 - F1: 1.000000\n",
+      "5. 33238 - F1: 1.000000\n",
+      "6. 36357 - F1: 1.000000\n",
+      "7. 37577 - F1: 1.000000\n",
+      "8. 24867 - F1: 1.000000\n",
+      "9. 33579 - F1: 0.974026\n",
+      "10. 24866 - F1: 0.973684\n",
+      "Found 63 classes with F1-score == 0 (and non-zero labels): 17792, 22563, 22632, 22712, 24062, 24834, 25108, 25693, 25697, 25698, 25699, 25806, 26151, 26217, 26218, 26421, 26469, 29347, 32988, 33240, 33256, 33296, 33299, 33304, 33597, 33598, 33635, 33655, 33659, 33661, 33670, 33671, 33836, 33976, 35217, 35273, 35479, 35618, 36364, 36562, 36916, 36962, 36963, 37141, 37143, 37622, 37929, 37960, 38101, 38104, 38166, 38835, 39203, 46850, 47704, 47916, 48592, 50047, 50995, 72544, 79389, 83565, 139358\n"
+     ]
+    }
+   ],
    "source": [
     "# output relevant metrics\n",
     "print_metrics(\n",
@@ -145,21 +273,21 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
-    "version": 2
+    "version": 3
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "3.8.13"
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
   }
  },
  "nbformat": 4,

From 98342af4b478f0a5a102e0e79551e012fe18f26a Mon Sep 17 00:00:00 2001
From: sfluegel <sfluegel@ovgu.de>
Date: Thu, 13 Jun 2024 17:22:50 +0200
Subject: [PATCH 08/26] set split variables when required instead of during
 setup

---
 chebai/preprocessing/datasets/chebi.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/chebai/preprocessing/datasets/chebi.py b/chebai/preprocessing/datasets/chebi.py
index eacd0107..85572024 100644
--- a/chebai/preprocessing/datasets/chebi.py
+++ b/chebai/preprocessing/datasets/chebi.py
@@ -578,8 +578,6 @@ def prepare_data(self, *args, **kwargs):
 
     def setup(self, **kwargs):
         super().setup(**kwargs)
-        if not all([self.dynamic_df_train, self.dynamic_df_val, self.dynamic_df_test]):
-            self._get_dynamic_splits()
 
     def _get_dynamic_splits(self):
         """Generate data splits during run-time and saves in class variables"""
@@ -627,6 +625,15 @@ def _get_dynamic_splits(self):
 
     @property
     def dynamic_split_dfs(self):
+        if any(
+            split is None
+            for split in [
+                self.dynamic_df_test,
+                self.dynamic_df_val,
+                self.dynamic_df_train,
+            ]
+        ):
+            self._get_dynamic_splits()
         return {
             "train": self.dynamic_df_train,
             "validation": self.dynamic_df_val,

From 89cbdb6d3680ae8a7f0f04c8525afe8aa74bfa6d Mon Sep 17 00:00:00 2001
From: sfluegel <sfluegel@ovgu.de>
Date: Thu, 13 Jun 2024 17:23:28 +0200
Subject: [PATCH 09/26] remove unnecessary class instantiation

---
 tests/testChebiDynamicDataSplits.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/testChebiDynamicDataSplits.py b/tests/testChebiDynamicDataSplits.py
index efc53420..7efaf430 100644
--- a/tests/testChebiDynamicDataSplits.py
+++ b/tests/testChebiDynamicDataSplits.py
@@ -96,8 +96,7 @@ def _get_hashed_splits(self):
         """Returns hashed dynamic data splits"""
 
         # Get the raw/processed data if missing
-        chebi_class_obj = ChEBIOver50(seed=42)
-        self._generate_chebi_class_data(chebi_class_obj)
+        chebi_class_obj = self.chebi_50_v231
 
         # Get dynamic splits from class variables
         train_data = chebi_class_obj.dynamic_split_dfs["train"]

From b2439f80bafef3a1b072b1b40b1bf65ca0aa40cc Mon Sep 17 00:00:00 2001
From: sfluegel <sfluegel@ovgu.de>
Date: Thu, 13 Jun 2024 17:32:48 +0200
Subject: [PATCH 10/26] add isort to pre-commit, reformat with isort

---
 .github/workflows/black.yml                 |  2 +-
 .pre-commit-config.yaml                     | 40 +++++++++------------
 README.md                                   | 12 +++----
 chebai/callbacks.py                         |  2 +-
 chebai/callbacks/prediction_callback.py     |  5 +--
 chebai/loggers/custom.py                    |  6 ++--
 chebai/loss/bce_weighted.py                 |  8 +++--
 chebai/loss/semantic.py                     |  8 ++---
 chebai/models/base.py                       |  4 +--
 chebai/models/chemberta.py                  |  4 +--
 chebai/models/chemyk.py                     |  4 +--
 chebai/models/electra.py                    |  4 +--
 chebai/models/lnn_model.py                  |  4 +--
 chebai/models/recursive.py                  |  2 +-
 chebai/preprocessing/bin/BPE_SWJ/vocab.json |  2 +-
 chebai/preprocessing/collate.py             |  2 +-
 chebai/preprocessing/collect_all.py         |  6 ++--
 chebai/preprocessing/datasets/base.py       |  8 ++---
 chebai/preprocessing/datasets/pubchem.py    | 16 ++++-----
 chebai/preprocessing/datasets/tox21.py      |  8 ++---
 chebai/preprocessing/reader.py              |  4 +--
 chebai/preprocessing/structures.py          |  2 +-
 chebai/result/analyse_sem.py                | 17 ++++-----
 chebai/result/base.py                       |  2 +-
 chebai/result/classification.py             | 14 ++++----
 chebai/result/molplot.py                    | 10 +++---
 chebai/result/pretraining.py                |  2 +-
 chebai/result/utils.py                      | 12 ++++---
 chebai/train.py                             | 10 +++---
 chebai/trainer/CustomTrainer.py             |  8 ++---
 configs/data/chebi100.yml                   |  2 +-
 configs/data/chebi100_SELFIES.yml           |  2 +-
 configs/data/chebi100_deepSMILES.yml        |  2 +-
 configs/data/chebi100_mixed.yml             |  2 +-
 configs/data/chebi50_mixed.yml              |  2 +-
 configs/data/tox21_moleculenet.yml          |  2 +-
 configs/default_prediction_callback.yml     |  2 +-
 configs/loss/weighting_chebi100.yml         |  2 +-
 configs/metrics/balanced-accuracy.yml       |  2 +-
 configs/metrics/micro-macro-f1.yml          |  2 +-
 configs/model/electra-for-pretraining.yml   |  2 +-
 configs/model/electra.yml                   |  1 +
 configs/model/electra_pretraining.yml       |  2 +-
 configs/training/csv_logger.yml             |  2 +-
 configs/training/default_trainer.yml        |  2 +-
 configs/training/early_stop_callbacks.yml   |  2 +-
 configs/training/pretraining_callbacks.yml  |  2 +-
 configs/training/pretraining_trainer.yml    |  2 +-
 configs/training/single_class_callbacks.yml |  2 +-
 configs/training/wandb_logger.yml           |  2 +-
 docs/source/experiment.rst                  |  2 +-
 docs/source/model.rst                       |  2 +-
 setup.cfg                                   |  2 +-
 tests/testCustomBalancedAccuracyMetric.py   |  6 ++--
 tests/testCustomMacroF1Metric.py            |  8 +++--
 tests/testPubChemData.py                    |  4 ++-
 tests/testTox21MolNetData.py                |  4 ++-
 57 files changed, 151 insertions(+), 144 deletions(-)

diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml
index 98b2a668..b04fb15c 100644
--- a/.github/workflows/black.yml
+++ b/.github/workflows/black.yml
@@ -7,4 +7,4 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v2
-      - uses: psf/black@stable
\ No newline at end of file
+      - uses: psf/black@stable
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 8ce58be4..108b91d5 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,31 +1,25 @@
 repos:
-#-   repo: https://github.com/PyCQA/isort
-#    rev: "5.12.0"
-#    hooks:
-#    -   id: isort
 -   repo: https://github.com/psf/black
     rev: "24.2.0"
     hooks:
     -   id: black
     -   id: black-jupyter # for formatting jupyter-notebook
 
-# -------- below works for me --- locally
-#-   repo: https://github.com/pycqa/isort
-#    rev: 5.13.2
-#    hooks:
-#      - id: isort
-#        name: isort (python)
-#        args: ["--profile=black"]
-#
-#-   repo: https://github.com/asottile/seed-isort-config
-#    rev: v2.2.0
-#    hooks:
-#    -   id: seed-isort-config
-#
-#-   repo: https://github.com/pre-commit/pre-commit-hooks
-#    rev: v4.6.0
-#    hooks:
-#    -   id: check-yaml
-#    -   id: end-of-file-fixer
-#    -   id: trailing-whitespace
+-   repo: https://github.com/pycqa/isort
+    rev: 5.13.2
+    hooks:
+      - id: isort
+        name: isort (python)
+        args: ["--profile=black"]
+
+-   repo: https://github.com/asottile/seed-isort-config
+    rev: v2.2.0
+    hooks:
+    -   id: seed-isort-config
 
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
+    hooks:
+    -   id: check-yaml
+    -   id: end-of-file-fixer
+    -   id: trailing-whitespace
diff --git a/README.md b/README.md
index 2082f3e0..3c0817ee 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # ChEBai
 
-ChEBai is a deep learning library designed for the integration of deep learning methods with chemical ontologies, particularly ChEBI. 
+ChEBai is a deep learning library designed for the integration of deep learning methods with chemical ontologies, particularly ChEBI.
 The library emphasizes the incorporation of the semantic qualities of the ontology into the learning process.
 
 ## Installation
@@ -21,7 +21,7 @@ pip install .
 
 ## Usage
 
-The training and inference is abstracted using the Pytorch Lightning modules. 
+The training and inference is abstracted using the Pytorch Lightning modules.
 Here are some CLI commands for the standard functionalities of pretraining, ontology extension, fine-tuning for toxicity and prediction.
 For further details, see the [wiki](https://github.com/ChEB-AI/python-chebai/wiki).
 If you face any problems, please open a new [issue](https://github.com/ChEB-AI/python-chebai/issues/new).
@@ -55,18 +55,18 @@ The `classes_path` is the path to the dataset's `raw/classes.txt` file that cont
 
 ## Evaluation
 
-An example for evaluating a model trained on the ontology extension task is given in `tutorials/eval_model_basic.ipynb`. 
+An example for evaluating a model trained on the ontology extension task is given in `tutorials/eval_model_basic.ipynb`.
 It takes in the finetuned model as input for performing the evaluation.
 
 ## Cross-validation
-You can do inner k-fold cross-validation, i.e., train models on k train-validation splits that all use the same test 
+You can do inner k-fold cross-validation, i.e., train models on k train-validation splits that all use the same test
 set. For that, you need to specify the total_number of folds as
 ```
 --data.init_args.inner_k_folds=K
 ```
 and the fold to be used in the current optimisation run as
-``` 
+```
 --data.init_args.fold_index=I
 ```
-To train K models, you need to do K such calls, each with a different `fold_index`. On the first call with a given 
+To train K models, you need to do K such calls, each with a different `fold_index`. On the first call with a given
 `inner_k_folds`, all folds will be created and stored in the data directory
diff --git a/chebai/callbacks.py b/chebai/callbacks.py
index ede0bac0..af306ccb 100644
--- a/chebai/callbacks.py
+++ b/chebai/callbacks.py
@@ -1,8 +1,8 @@
 import json
 import os
 
-from lightning.pytorch.callbacks import BasePredictionWriter
 import torch
+from lightning.pytorch.callbacks import BasePredictionWriter
 
 
 class ChebaiPredictionWriter(BasePredictionWriter):
diff --git a/chebai/callbacks/prediction_callback.py b/chebai/callbacks/prediction_callback.py
index a0b34262..07e8b82c 100644
--- a/chebai/callbacks/prediction_callback.py
+++ b/chebai/callbacks/prediction_callback.py
@@ -1,8 +1,9 @@
-from lightning.pytorch.callbacks import BasePredictionWriter
-import torch
 import os
 import pickle
 
+import torch
+from lightning.pytorch.callbacks import BasePredictionWriter
+
 
 class PredictionWriter(BasePredictionWriter):
     def __init__(self, output_dir, write_interval):
diff --git a/chebai/loggers/custom.py b/chebai/loggers/custom.py
index 121ad08b..bb11ea66 100644
--- a/chebai/loggers/custom.py
+++ b/chebai/loggers/custom.py
@@ -1,11 +1,11 @@
-from datetime import datetime
-from typing import Literal, Optional, Union, List
 import os
+from datetime import datetime
+from typing import List, Literal, Optional, Union
 
+import wandb
 from lightning.fabric.utilities.types import _PATH
 from lightning.pytorch.callbacks import ModelCheckpoint
 from lightning.pytorch.loggers import WandbLogger
-import wandb
 
 
 class CustomLogger(WandbLogger):
diff --git a/chebai/loss/bce_weighted.py b/chebai/loss/bce_weighted.py
index 2148b644..09ed7276 100644
--- a/chebai/loss/bce_weighted.py
+++ b/chebai/loss/bce_weighted.py
@@ -1,9 +1,11 @@
+import os
+import pickle
+
+import pandas as pd
 import torch
+
 from chebai.preprocessing.datasets.base import XYBaseDataModule
 from chebai.preprocessing.datasets.pubchem import LabeledUnlabeledMixed
-import pandas as pd
-import os
-import pickle
 
 
 class BCEWeighted(torch.nn.BCEWithLogitsLoss):
diff --git a/chebai/loss/semantic.py b/chebai/loss/semantic.py
index 89db2af0..c4ebeb54 100644
--- a/chebai/loss/semantic.py
+++ b/chebai/loss/semantic.py
@@ -1,14 +1,14 @@
 import csv
+import math
 import os
 import pickle
+from typing import Literal
 
-import math
 import torch
-from typing import Literal
 
-from chebai.preprocessing.datasets.chebi import _ChEBIDataExtractor, ChEBIOver100
-from chebai.preprocessing.datasets.pubchem import LabeledUnlabeledMixed
 from chebai.loss.bce_weighted import BCEWeighted
+from chebai.preprocessing.datasets.chebi import ChEBIOver100, _ChEBIDataExtractor
+from chebai.preprocessing.datasets.pubchem import LabeledUnlabeledMixed
 
 
 class ImplicationLoss(torch.nn.Module):
diff --git a/chebai/models/base.py b/chebai/models/base.py
index b62e1bf8..8b7b65c1 100644
--- a/chebai/models/base.py
+++ b/chebai/models/base.py
@@ -1,9 +1,9 @@
-from typing import Optional
 import logging
 import typing
+from typing import Optional
 
-from lightning.pytorch.core.module import LightningModule
 import torch
+from lightning.pytorch.core.module import LightningModule
 
 from chebai.preprocessing.structures import XYData
 
diff --git a/chebai/models/chemberta.py b/chebai/models/chemberta.py
index 8b3b6175..b601542a 100644
--- a/chebai/models/chemberta.py
+++ b/chebai/models/chemberta.py
@@ -1,7 +1,8 @@
-from tempfile import TemporaryDirectory
 import logging
 import random
+from tempfile import TemporaryDirectory
 
+import torch
 from torch import nn
 from torch.nn.functional import one_hot
 from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
@@ -11,7 +12,6 @@
     RobertaModel,
     RobertaTokenizer,
 )
-import torch
 
 from chebai.models.base import ChebaiBaseNet
 
diff --git a/chebai/models/chemyk.py b/chebai/models/chemyk.py
index 4705aa1a..13bbea7c 100644
--- a/chebai/models/chemyk.py
+++ b/chebai/models/chemyk.py
@@ -3,11 +3,11 @@
 import pickle
 import sys
 
+import networkx as nx
+import torch
 from torch import nn
 from torch.nn import functional as F
 from torch.nn.functional import pad
-import networkx as nx
-import torch
 
 from chebai.models.base import ChebaiBaseNet
 
diff --git a/chebai/models/electra.py b/chebai/models/electra.py
index 76f2711e..4377c29f 100644
--- a/chebai/models/electra.py
+++ b/chebai/models/electra.py
@@ -1,7 +1,8 @@
+import logging
 from math import pi
 from tempfile import TemporaryDirectory
-import logging
 
+import torch
 from torch import nn
 from torch.nn.utils.rnn import pad_sequence
 from transformers import (
@@ -10,7 +11,6 @@
     ElectraForPreTraining,
     ElectraModel,
 )
-import torch
 
 from chebai.loss.pretraining import ElectraPreLoss  # noqa
 from chebai.models.base import ChebaiBaseNet
diff --git a/chebai/models/lnn_model.py b/chebai/models/lnn_model.py
index fdfcdb42..3d61c5af 100644
--- a/chebai/models/lnn_model.py
+++ b/chebai/models/lnn_model.py
@@ -1,8 +1,8 @@
-from lnn import Implies, Model, Not, Predicate, Variable, World
-from owlready2 import get_ontology
 import fastobo
 import pyhornedowl
 import tqdm
+from lnn import Implies, Model, Not, Predicate, Variable, World
+from owlready2 import get_ontology
 
 
 def get_name(iri: str):
diff --git a/chebai/models/recursive.py b/chebai/models/recursive.py
index 9e69e5b1..fb408039 100644
--- a/chebai/models/recursive.py
+++ b/chebai/models/recursive.py
@@ -1,9 +1,9 @@
 import logging
 
-from torch import exp, nn, tensor
 import networkx as nx
 import torch
 import torch.nn.functional as F
+from torch import exp, nn, tensor
 
 from chebai.models.base import ChebaiBaseNet
 
diff --git a/chebai/preprocessing/bin/BPE_SWJ/vocab.json b/chebai/preprocessing/bin/BPE_SWJ/vocab.json
index 7e984775..afc12714 100644
--- a/chebai/preprocessing/bin/BPE_SWJ/vocab.json
+++ b/chebai/preprocessing/bin/BPE_SWJ/vocab.json
@@ -1 +1 @@
-{"<s>":0,"<pad>":1,"</s>":2,"<unk>":3,"<mask>":4,"!":5,"\"":6,"#":7,"$":8,"%":9,"&":10,"'":11,"(":12,")":13,"*":14,"+":15,",":16,"-":17,".":18,"/":19,"0":20,"1":21,"2":22,"3":23,"4":24,"5":25,"6":26,"7":27,"8":28,"9":29,":":30,";":31,"<":32,"=":33,">":34,"?":35,"@":36,"A":37,"B":38,"C":39,"D":40,"E":41,"F":42,"G":43,"H":44,"I":45,"J":46,"K":47,"L":48,"M":49,"N":50,"O":51,"P":52,"Q":53,"R":54,"S":55,"T":56,"U":57,"V":58,"W":59,"X":60,"Y":61,"Z":62,"[":63,"\\":64,"]":65,"^":66,"_":67,"`":68,"a":69,"b":70,"c":71,"d":72,"e":73,"f":74,"g":75,"h":76,"i":77,"j":78,"k":79,"l":80,"m":81,"n":82,"o":83,"p":84,"q":85,"r":86,"s":87,"t":88,"u":89,"v":90,"w":91,"x":92,"y":93,"z":94,"{":95,"|":96,"}":97,"~":98,"¡":99,"¢":100,"£":101,"¤":102,"¥":103,"¦":104,"§":105,"¨":106,"©":107,"ª":108,"«":109,"¬":110,"®":111,"¯":112,"°":113,"±":114,"²":115,"³":116,"´":117,"µ":118,"¶":119,"·":120,"¸":121,"¹":122,"º":123,"»":124,"¼":125,"½":126,"¾":127,"¿":128,"À":129,"Á":130,"Â":131,"Ã":132,"Ä":133,"Å":134,"Æ":135,"Ç":136,"È":137,"É":138,"Ê":139,"Ë":140,"Ì":141,"Í":142,"Î":143,"Ï":144,"Ð":145,"Ñ":146,"Ò":147,"Ó":148,"Ô":149,"Õ":150,"Ö":151,"×":152,"Ø":153,"Ù":154,"Ú":155,"Û":156,"Ü":157,"Ý":158,"Þ":159,"ß":160,"à":161,"á":162,"â":163,"ã":164,"ä":165,"å":166,"æ":167,"ç":168,"è":169,"é":170,"ê":171,"ë":172,"ì":173,"í":174,"î":175,"ï":176,"ð":177,"ñ":178,"ò":179,"ó":180,"ô":181,"õ":182,"ö":183,"÷":184,"ø":185,"ù":186,"ú":187,"û":188,"ü":189,"ý":190,"þ":191,"ÿ":192,"Ā":193,"ā":194,"Ă":195,"ă":196,"Ą":197,"ą":198,"Ć":199,"ć":200,"Ĉ":201,"ĉ":202,"Ċ":203,"ċ":204,"Č":205,"č":206,"Ď":207,"ď":208,"Đ":209,"đ":210,"Ē":211,"ē":212,"Ĕ":213,"ĕ":214,"Ė":215,"ė":216,"Ę":217,"ę":218,"Ě":219,"ě":220,"Ĝ":221,"ĝ":222,"Ğ":223,"ğ":224,"Ġ":225,"ġ":226,"Ģ":227,"ģ":228,"Ĥ":229,"ĥ":230,"Ħ":231,"ħ":232,"Ĩ":233,"ĩ":234,"Ī":235,"ī":236,"Ĭ":237,"ĭ":238,"Į":239,"į":240,"İ":241,"ı":242,"Ĳ":243,"ĳ":244,"Ĵ":245,"ĵ":246,"Ķ":247,"ķ":248,"ĸ":249,"Ĺ":250,"ĺ":251,"Ļ":252,"ļ":253,"Ľ":254,"ľ":255,"Ŀ":256,"ŀ":257,"Ł":258,"ł":259,"Ń":260,"CC":261,"(=":262,"](":263,"cc":264,"@@":265,")[":266,"CO":267,"NC":268,"CCCC":269,"ccc":270,"OC":271,"]([":272,")(":273,"CN":274,")=":275,"Cl":276,"CCC":277,"-]":278,"([":279,"ccccc":280,")(=":281,"CCCCCCCC":282,"nc":283,"COC":284,"CCN":285,"+]":286,"OCC":287,"Cc":288,"@]":289,"Br":290,"12":291,"]%":292,"@@]":293,".[":294,"cccc":295,"-])":296,"NCC":297,"CS":298,"CCCCC":299,"Nc":300,"COc":301,"nH":302,"NN":303,")/":304,"])":305,"CCOC":306,"CNC":307,"(-":308,"SC":309,")([":310,"Oc":311,"+](=":312,"OP":313,"CCCN":314,"(/":315,"-])=":316,"COP":317,"CCNC":318,"=[":319,"10":320,"21":321,"][":322,"+](":323,"13":324,"-])(=":325,"CCOCC":326,"Na":327,"])[":328,"11":329,"CCCCCC":330,"cnc":331,"NS":332,"NH":333,"CCCCCCCCCCCCCCCC":334,"CCO":335,"@@](":336,"cn":337,"CSC":338,"(\\":339,")\\":340,"CCCCCCC":341,"-].[":342,"CCCCN":343,"Si":344,"CCc":345,"+].[":346,"23":347,")*":348,"SCC":349,"sc":350,"@](":351,"ncc":352,"NCCC":353,"CCCNC":354,"oc":355,"CCCCO":356,"*]":357,"34":358,"OCc":359,"+]([":360,"@]([":361,"-])[":362,"OCO":363,"nnc":364,"14":365,"NNC":366,"CCNCC":367,"])(":368,"].[":369,"NO":370,"nn":371,"32":372,"Cn":373,"ncnc":374,"])([":375,"CCOc":376,"@]%":377,"-].":378,"-])([":379,"COCC":380,"*)=":381,"CCl":382,"CH":383,"([*]":384,"CCS":385,"([*])=":386,"cccnc":387,"CCCCCCCCC":388,"15":389,"OCCO":390,"OS":391,"ccnc":392,"+]=[":393,"Clc":394,"CBr":395,"(*)=":396,"OCCN":397,"*)":398,"43":399,")-":400,"CSc":401,"CNCC":402,"NCc":403,"CCCO":404,"CCCCCCCCCCCCC":405,"CCn":406,"\\[":407,"CCCCCCCCCCC":408,"OCCOCC":409,"FC":410,"@@]([":411,"ON":412,"CCCCCCCCCCCCCCC":413,"ccccn":414,"/[":415,"CCCCCCCCCC":416,"NOC":417,"NCCN":418,"SCCNC":419,"CCCCCCCCCCCC":420,"Fc":421,"ccncc":422,"ccn":423,")*)":424,"CCCCCCCCCCCCCC":425,"OB":426,"OCCC":427,"+])":428,"-](":429,"CCCOC":430,"COCCN":431,"16":432,"CCSC":433,"csc":434,"OCCCC":435,"*])":436,"Brc":437,"])=":438,"](/":439,"CCCCCCCCCCCCCCCCCC":440,"NCCCC":441,"#[":442,"cccs":443,"[*]":444,"CNc":445,"17":446,"45":447,"Sc":448,"ccco":449,"NCCCN":450,"-][":451,"cH":452,"CCOP":453,"CCCc":454,"nccc":455,"+][":456,"CCCCCCCCCCCCCCCCC":457,"COS":458,"31":459,"(=[":460,"cnn":461,"CCCCNC":462,"cs":463,"-])(":464,"](=":465,"NCCSC":466,"OO":467,"18":468,"ncn":469,"Sn":470,"ncccc":471,"cncc":472,"CCCCOC":473,"OCCOC":474,"CSCC":475,"54":476,"Fe":477,"+].":478,"NCCc":479,"Mg":480,")*)[":481,"CCCS":482,"-]#[":483,"CCCl":484,"[*])":485,"Cu":486,"Li":487,"Zn":488,"SCc":489,"CNS":490,"].":491,")*)(":492,"NCCO":493,"no":494,"COCCC":495,"OH":496,"Ca":497,"24":498,"CP":499,"cccn":500,"ClC":501,"CCCOc":502,"*)(=":503,"Co":504,"OCOc":505,"Al":506,"CCCCOc":507,"CCNc":508,"*)[":509,"NCCOC":510,"+]=":511,"CNCCC":512,"OCCOCCOCCOCC":513,"CCCCCN":514,"NCCS":515,"Se":516,"CCNS":517,"NCCNC":518,"As":519,"CNCCN":520,"Ni":521,"coc":522,"ccsc":523,"Ru":524,"NNc":525,"Cr":526,"noc":527,"19":528,"OCCNC":529,"OCOC":530,"56":531,"41":532,")*)*":533,"([*])":534,"ccoc":535,"42":536,"CON":537,"*])[":538,")=[":539,"CCCNCC":540,"SCCC":541,"CCCCc":542,"](*)":543,"OCCCN":544,"Pb":545,"CF":546,"CCCCCCCCCCCCCCCCCCC":547,"cncn":548,"COCCOC":549,"OCCCCCC":550,"sccc":551,"-])/":552,"Pd":553,"CCSCC":554,"CNCc":555,"35":556,"cnccc":557,"-]#":558,"65":559,"*)(":560,"CCBr":561,"+])[":562,"CSSC":563,"Ti":564,"22":565,"Rh":566,"on":567,"CCCCS":568,"COCCOCC":569,"CCCCCCN":570,"ClCc":571,"CCCCCCCCCCCCCCCCCCCC":572,"(\\[":573,"Ba":574,"-]=[":575,"COCc":576,"NNN":577,"CI":578,"SS":579,")*)([":580,"OCCCO":581,"CNN":582,")/[":583,"Mo":584,"Zr":585,"Hg":586,"53":587,"Mn":588,"Pt":589,"COCCNC":590,"CNCCO":591,"CCCCCCCCCCCCCCCCCCCCCC":592,"cnccn":593,"]=":594,"CCP":595,"](\\":596,"CCCCCCCCCCCCCCCCCCCCCCCC":597,"NP":598,"COCCO":599,"CSCCC":600,"OCCCCCCCC":601,"SiH":602,"OOC":603,"CCCCCCCCCCCCCCCCCCCCC":604,"(-*)=":605,"CCOS":606,"OCCc":607,"20":608,"OCCOCCO":609,"CCCCCNC":610,"25":611,"(*)(=":612,"*)([":613,"-]=":614,"CCCCCCOC":615,"CCCn":616,"NSC":617,"OCCCCC":618,"Sb":619,"@](=":620,"-*)":621,"SN":622,"nsc":623,"CCCCCO":624,"CCCCCOC":625,"NON":626,"CCSc":627,"BrCc":628,"co":629,"@@](=":630,"nccn":631,"nccs":632,"Ic":633,"Ag":634,"ncccn":635,"CCCCCCCCN":636,"CCCSC":637,"+]#":638,"NCCCCCC":639,"CCCCl":640,"nccnc":641,"Ir":642,"[*])[":643,"ccnn":644,"Au":645,"*)*":646,"NCCCCC":647,"+]#[":648,")\\[":649,"123":650,"HH":651,")[*]":652,"CSSCC":653,"67":654,"CCCCCCCCCCCCCCCCCCCCCCC":655,"BrC":656,")(/":657,"PH":658,"Te":659,"cnnc":660,"ClCC":661,"nnnn":662,"]=[":663,"occc":664,"CCCCCCCCOC":665,"CCNCCN":666,"(/[":667,"--]":668,"NOCC":669,"76":670,"ns":671,"CCCCn":672,"OCCOc":673,"NCCCOC":674,"onc":675,"COCCCN":676,"CCCCCCCCCCCCCCCCCCCCCCCCC":677,"OCCCOC":678,"CCCCCCCCCCCCCCCCCCCCCCCCCC":679,"SSC":680,"CCOCCO":681,"nnn":682,"CCCOCC":683,"+])=":684,"NSN":685,")*)(=":686,"nonc":687,"Cd":688,"NCCCCN":689,"SCCN":690,"occ":691,"46":692,"CSCCN":693,"CCCCCCCN":694,"64":695,"*[":696,"CCCCCOc":697,"Ge":698,"OCCCl":699,"CCCCCCCCCCCCc":700,"NCCCO":701,"cncnc":702,"(*)[":703,"OCCCCCCC":704,"OCCOCCOCCOCCOCCOCCOCCOCC":705,"33":706,"CNNC":707,"52":708,"@@]%":709,"CCCBr":710,"OCN":711,"OCCCCCCCCCC":712,"])(=":713,"+])(=":714,"SCCS":715,"CCCCCCCOC":716,"CCCCCCCCCCCCCCCCOC":717,"NCCCNC":718,"36":719,"CCOCCN":720,")*)*)*)*":721,"Nn":722,"CNCCCC":723,"BrCC":724,")*)=":725,"COCCCC":726,"CCCCCCCCCc":727,"CCON":728,"26":729,"CCNCc":730,"CCOCCOC":731,"cnnn":732,"75":733,"OCCS":734,"CCCCCCCc":735,"OCCOCCN":736,"CCCCSC":737,"CCCCCCCCCCCCN":738,"++]":739,"Ce":740,"OCCCCCCCCC":741,"scc":742,"(*)":743,"78":744,"OCCCCCCCCCCCCCCCC":745,")(*)[":746,"++].[":747,"86":748,"87":749,"ClH":750,"OCCNCCO":751,"Gd":752,"OCCCNC":753,"CCCCCc":754,"CCOCCC":755,"ncsc":756,"CCCCCCCCCCCCOC":757,"98":758,"Bi":759,"NCN":760,"]([*])":761,"]([*])[":762,"ncnn":763,"NCCOP":764,"Cs":765,"Sr":766,"CCNCCC":767,"=%":768,"La":769,"CNCCOC":770,"CCCCCCCCOc":771,"CCCCCCc":772,"CCCCCCCCCCCCCCCCCCOC":773,"57":774,"OCCCCCCCCCCCC":775,"SCCCC":776,"scnc":777,"CCCCCCCCCCOC":778,"89":779,"97":780,"CCCCCl":781,"CSCc":782,"CCCNc":783,"[*])(":784,"@+]":785,"SSc":786,")(*)":787,"44":788,"Hf":789,"OCCCCCCCCCCC":790,"COCCCNC":791,"CCCCCCNC":792,"CNCCCN":793,"OCCl":794,"OCCOCCOC":795,"OCCCCOC":796,"cscn":797,"-*)=":798,"51":799,"Tl":800,"NCCCOCC":801,"-]([":802,"--](":803,"ONC":804,"Pr":805,"]\\":806,"])\\":807,":*)":808,"CCCCCCCCO":809,"CCCCCCO":810,"NCCCn":811,"Ga":812,"ncoc":813,"NCCOCCOCC":814,"27":815,"314":816,"CCF":817,"CONC":818,"CNCCc":819,"CCCCCCCCCCCCCCCCCCN":820,"OCCCCCN":821,"NCCCCNC":822,"nsnc":823,"Nd":824,"CNCCNC":825,"CCCCCCCCCCN":826,"CCCNS":827,"CCCCCCOc":828,"NCCCS":829,"COCCn":830,"OCCCc":831,"(:*)":832,"COCN":833,"OCCBr":834,"cscc":835,"CCCCCCCCCCCCO":836,"CCOCCCNC":837,"-])(-*)=":838,"In":839,"CCI":840,"cccnn":841,"NCCCc":842,"91":843,"Nb":844,"OCCCCN":845,"COCOC":846,"CSCN":847,"nncn":848,"OCCOCCOCCO":849,"NCCNc":850,"NNCC":851,"OCCCCCCCCCCCCCC":852,"@@+]":853,"([*])[":854,"ncncc":855,"1234":856,"CSN":857,"OCCOCCOCCOCCO":858,"NCCCCCCNC":859,"IC":860,"COCCOc":861,"CCCCOCC":862,"CCCP":863,"NCCSCC":864,"CCOCCCN":865,"OCCCCCCCCCCCCC":866,"OCCCCCCCCCCCCCCCCCC":867,"PC":868,"SH":869,"CCCCCCCCc":870,"CCCCCCCCNC":871,"NCCOCC":872,"NCCOc":873,"CCOCCOCCOCCOCCOCC":874,"(#":875,"++].":876,"29":877,"OCCCCO":878,"NNS":879,"+](-":880,"cncs":881,"cnoc":882,"NOS":883,"ccncn":884,"OCCOCCOCCOC":885,"28":886,"SCCO":887,"nnnc":888,"*)*)(":889,"Eu":890,"NCCCCCCCC":891,"Rb":892,"]#[":893,"])/[":894,"CCCCP":895,"81":896,"Re":897,"nncc":898,"CCOCc":899,"CCNCS":900,"CCSS":901,"NCCCCCN":902,"Sm":903,"]/":904,")(\\":905,"NCCOCCO":906,"NCCCCCCN":907,"37":908,"CB":909,"NCCCCO":910,"Os":911,"]#":912,"COCCc":913,"CCCCNc":914,"CCCCCCCCCN":915,"-])\\":916,"NCCCl":917,"([*])[*]":918,"*)*)*":919,"])/":920,"ocnc":921,"ccnnc":922,"NCNC":923,"CCCCBr":924,"CCCCCS":925,"CCCCCCCCCOC":926,"+])(":927,"OCCF":928,"NCCn":929,"CCCCCCSC":930,"CCCCCCCCCCCCCCCCCCCCCCCCCCC":931,"ccno":932,"Er":933,"IH":934,"OI":935,"Po":936,"ccon":937,"COCCNCC":938,"CCCCNCC":939,"CSCCNC":940,"CCCCCCCCCCCCOS":941,"-*)[":942,"30":943,"61":944,"NCCNCC":945,"SP":946,"Yb":947,"oncc":948,"sn":949,"CCCOP":950,"COB":951,"COCCCCCCCCCCCCCCCC":952,"OCl":953,"CCCNCCC":954,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":955,"OCCCCCOC":956,"OOO":957,"SeH":958,"*)*)*)*)*":959,"68":960,"NCCCCCCC":961,"Ta":962,"sccn":963,"CCCCCBr":964,"CCOCCNC":965,"NCCNS":966,"NCCNCCNCCN":967,"CCCCCCCCCCCCNC":968,"CCCCCCCCCCCCCCOC":969,"OCCCCCCOC":970,"*)*)=":971,"63":972,"735":973,"Ho":974,"SNC":975,"Tb":976,"CCCCCSC":977,"-]/":978,"CSS":979,")([*])":980,"ncco":981,"CCCOS":982,"47":983,"Dy":984,"COCO":985,"CCCCOP":986,"CCCCCOP":987,"CCCF":988,"CCCSCC":989,"CCCCCCCCOP":990,"CCCCCCCCCCO":991,"CCCCCCCCCCOc":992,"+])([":993,"OCCSC":994,"CSCCCC":995,"CCCCCn":996,"CCCCCCn":997,"CCCCCCCCCCCCCCCCCCCCCCCCCCCC":998,"2345":999,"nnsc":1000,"COCCCOC":1001,"OCCCS":1002,"nocc":1003,"SbH":1004,"NOCc":1005,"SCCOC":1006,"+]\\":1007,"CCOCCOCCN":1008,"cnco":1009,"CCCCCCCCCCCCCCCCN":1010,"CCCCCCCCCCCCCCCCS":1011,"OCCOS":1012,"\\[*]":1013,"conc":1014,"79":1015,"753":1016,"Be":1017,"FS":1018,"[*":1019,"NCl":1020,"OCP":1021,"ClP":1022,"CCCCCCCCCCCCCCCCCCNC":1023,"@](/":1024,"NCCNCCN":1025,"OCCCCl":1026,"NCCCCCO":1027,"TlH":1028,"(*)(":1029,"++]([":1030,"NCO":1031,"On":1032,"SCN":1033,"SCCCCCC":1034,"SCCCS":1035,"Tm":1036,"CCCCNS":1037,"CCCCOCCOC":1038,"OCF":1039,"CCCSc":1040,"CSCCO":1041,"SCF":1042,"+](\\":1043,"CCONC":1044,"@@](/":1045,"nncs":1046,"NNCc":1047,"/[*])":1048,"SSS":1049,"cocn":1050,"(*)([":1051,"48":1052,"55":1053,"FB":1054,"He":1055,"NCCCCCNC":1056,"OCCCCCCCCCCCCCCC":1057,"OCCCCCCCCCCCCCCCCC":1058,"se":1059,")[*])":1060,"CNCCS":1061,"OCCCOCC":1062,"NCCSCc":1063,"CCOCN":1064,"CCCNCCN":1065,"CCOCCOCC":1066,"cnsc":1067,")*)*)":1068,"OCOCC":1069,"OCOCCOC":1070,"([*])([*])":1071,"CCSCCC":1072,"OCCOCCCC":1073,"OCCOCCOCCOCCOCCO":1074,"COCCOCCOC":1075,"74":1076,"BH":1077,"CCB":1078,")[*])[":1079,"ClCCN":1080,"CCCCCCCCS":1081,"CCNN":1082,"OCCn":1083,"CSP":1084,"234":1085,"nnco":1086,"CCSSC":1087,"CCCCCCCCCCCc":1088,"CCCCCCCCCCCCS":1089,"(*":1090,"-*":1091,"73":1092,"NCS":1093,"CCCCCCCCOCC":1094,"BrCCC":1095,"CCOCCOCCOCC":1096,"CCCCCCCCCCCCOc":1097,"CCCCCCCCCCCCOP":1098,"NCCCNCCCCN":1099,"AlH":1100,"*)*)(*)*)(":1101,"23451234":1102,"SO":1103,"SCCc":1104,"SOOO":1105,"snc":1106,"OCNC":1107,")(*)=":1108,"CCCCCCCCCCc":1109,"CCNCCO":1110,"OCCSCC":1111,"CCOCCOCCNC":1112,"COCCNc":1113,"COCCOCCNC":1114,"NCCOCCOCCNC":1115,"*)*)":1116,"Lu":1117,"NCCCCCCCCN":1118,"OCOP":1119,"OCCNCC":1120,"COCCCCC":1121,"CCCI":1122,"CCCCCCCCn":1123,"BrCCc":1124,"CSCCS":1125,"CNCP":1126,"CCCNCCCN":1127,"+](\\[":1128,"CCOCCOCCOCCO":1129,"CCCCCCl":1130,"CCCCCCCO":1131,"CCCCCCBr":1132,"CCCCCCCOc":1133,"COCCS":1134,"OCCNc":1135,"CCCCCCCCCCCOC":1136,"CCCCCCCCCCCCn":1137,"OCCCCCCO":1138,"OCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCC":1139,"(*)*":1140,"/*)":1141,"96":1142,"ONc":1143,"Xe":1144,"COO":1145,"CNCCNCc":1146,"CCCCCCCCSC":1147,"CCCCCCCCCCNC":1148,"OCCCOc":1149,"CSSCCC":1150,"])*":1151,"CCOCCOCCOC":1152,"CCCCCCS":1153,"CCCCCCCCCCCCCCCCOc":1154,"CCCCCCCCCCCCCCCCOP":1155,"CCCCCCCCCCCCCCCCCCOCC":1156,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCC":1157,"CCCCCCCNC":1158,"NCCCSC":1159,"COCCCOc":1160,"CCSSCC":1161,"CNCCCO":1162,"CCCOCCC":1163,"/[*]":1164,"CCCCCCCCCCOP":1165,"NCCNCCO":1166,"ccns":1167,"OCCCCc":1168,"456":1169,"OCCCCCCS":1170,"OCCCCCCOc":1171,"94":1172,"FCC":1173,"NI":1174,"NCOC":1175,"OOCC":1176,"SSCC":1177,"](*)[":1178,"COOC":1179,"CCCCCCOCC":1180,"CCCCCNc":1181,")(*)*":1182,"CNCCCNC":1183,"CCCCCCCCCCS":1184,"CCCCCCCCCCn":1185,"+]/":1186,"NCCOCCOCCO":1187,"NCCOCCOCCN":1188,"-])=[":1189,"CCCCCCCOP":1190,"CCSCc":1191,"OCCOCCC":1192,"CCCOCCOC":1193,"OCCOCCOCC":1194,"CCCCCCCCCCCCCOC":1195,"CCCCCCCCCCCCCCCO":1196,"OCCCCNC":1197,"CCCCCCCCCCCCCCCCCCOP":1198,"NCCCCNCCCN":1199,"COCCOCCO":1200,"COCCOCCOCC":1201,"NCCCCCCO":1202,"CCOCCOCCOCCOCCOCCOCCNC":1203,"--].[":1204,"/*":1205,"62":1206,"83":1207,"OOS":1208,"OCCCCCCCCCCCCCCCCCCCC":1209,"Pc":1210,"SCl":1211,"SCS":1212,"CNOC":1213,"CNCCCCN":1214,"ClCCc":1215,"CCCCCCCCCCCN":1216,"CCCCCCCCCCCO":1217,"ncon":1218,"CCNCCCC":1219,"+])/":1220,"OCCOP":1221,"BrBr":1222,"-])*)[":1223,"NCCF":1224,"CSCCCCC":1225,"CSSSC":1226,"CCCCCCCCCCCCCCCCCOC":1227,"CCCCCCCSC":1228,"CCCNCc":1229,"345":1230,"COCCCO":1231,"CCSCCN":1232,"CCCCCCCCCBr":1233,"OCCNS":1234,"CCCCCCCCCCCCSC":1235,"CCCCCCCCCCCCCOP":1236,"OCCCCCl":1237,"csnn":1238,"*)(*)*":1239,"BiH":1240,"[*][":1241,"--][":1242,"59":1243,"71":1244,"NOCCO":1245,"SCCCN":1246,"](-*)":1247,"NCn":1248,"CCCCOCCOCC":1249,"CCCCCCNc":1250,"OCn":1251,"CNCCCCC":1252,"ClCCC":1253,"ClCCCN":1254,"CCCCCCCCOS":1255,"NCCP":1256,"CSCCCCCC":1257,"])\\[":1258,"(-[":1259,"CCCCCCCCCCCCCCCCOCC":1260,"CCCCCCCBr":1261,"CCNCCP":1262,"CCCOCCOCC":1263,"357":1264,"cocc":1265,")#":1266,"*=[":1267,"-[":1268,"58":1269,"72":1270,"85":1271,"=*":1272,"OF":1273,"OON":1274,"ccsn":1275,"COCOc":1276,"CCCCSCC":1277,"CCCCCNS":1278,"CNO":1279,"CCCCCCCCP":1280,"CCCCCCCCCO":1281,"CCCCCCCCCS":1282,"CCCCCCCCCCOCC":1283,"CCNCCCN":1284,"CCNCCNC":1285,"OCCP":1286,"-])*":1287,"NCCCNCC":1288,"NCCCOCCOCC":1289,"CCOCCS":1290,"CCOCCOCCOCCN":1291,"CCOCCCn":1292,"CCCCNCCC":1293,"SCCSC":1294,"COCCCl":1295,"CCSP":1296,"CCSSCCC":1297,"CCCON":1298,"CCCCCCCCCCCCCCN":1299,"CCCCCCCCCCCCOCC":1300,"NCCCCSC":1301,"cnns":1302,"AsH":1303,"CCNCCNCCNCC":1304,"COCCCNc":1305,"*)*)*)*)*)":1306,"++][":1307,"39":1308,"312":1309,"69":1310,"82":1311,"BOB":1312,"FP":1313,"IN":1314,"ICC":1315,"NOc":1316,"OSC":1317,"SCCCCC":1318,"Tc":1319,"]/[":1320,"ssc":1321,"sncc":1322,"snnc":1323,"COCCCCCCCCCCCCCCCCCC":1324,"NCP":1325,"CCCCB":1326,"CCCCNCCCN":1327,"-]/[":1328,"CCNCCOC":1329,"OCCCNCC":1330,"OCCSS":1331,"BrCCCC":1332,"BrCCOc":1333,"CSCCCCCCCCC":1334,"CNCN":1335,"CCCNN":1336,"+](/[":1337,"118":1338,"CCCCCCOP":1339,"CCCCCCCCCCCCCCCCCCCCOC":1340,"CCOO":1341,"SCCCO":1342,"SCCSCC":1343,"CCCCOCCCNC":1344,"341":1345,"NOCCN":1346,"CNCCCOC":1347,"CCCCCCCCCCCBr":1348,"CCCCCCCCCCBr":1349,"CCCCCCCCCCCCCCc":1350,"OCCCCCCN":1351,"NCCCCc":1352,"NCCCCOC":1353,"[*]\\":1354,"NCCCNS":1355,"OCCOCn":1356,"CCCSP":1357,"COCCCn":1358,"OCCOCCOCCOCCOC":1359,"OCCOCCOCCOCCOCCOCCOCCO":1360,"COCCOCc":1361,"COCCOCCOCCOC":1362,"CCCCCCCCCCCCCCCCCCCCCCOC":1363,"SSSSC":1364,"*)*)*)":1365,"nscc":1366,"CNCCCCCCOCC":1367,"*)*)*)*":1368,"*)*)*)*)*)*)*)*)*":1369,"NCCNCCNCCNCCN":1370,"NCCCCCCCCNCCCCCCCCN":1371,"2123":1372,"38":1373,"93":1374,"NCCCCCCCCCC":1375,"NOCOCCOC":1376,"OCCCCCCCCCCCCCCCCCCC":1377,"SOC":1378,"SCSc":1379,"Th":1380,"\\*)(":1381,"ss":1382,"](*)(*)*":1383,"COCCCCCCCC":1384,"CCCCCNCC":1385,")(-*)=":1386,"ClB":1387,"CCCCCCCCBr":1388,"CCCCCCCCCCCNC":1389,"CCNCCc":1390,"cccccc":1391,"NCCCOc":1392,"NCCOCCN":1393,"NCCSc":1394,"NCCOCCOC":1395,"NCCCOCCOC":1396,"CSCCCCCCC":1397,"CSCCl":1398,"CCCNCCOC":1399,"CCOCCn":1400,"CCOCCCO":1401,"CCCCCCP":1402,"CCCCCCCCCCCCCCCCO":1403,"CCCCCCCCCCCCCCCCNC":1404,"CCOCO":1405,"CCOCOC":1406,"cnncc":1407,"SCCn":1408,"NCCCF":1409,"COCCSC":1410,"OCCON":1411,"CCCOCCN":1412,"OCCOCCOCCNC":1413,"CCCCCCCCCCCCCCCc":1414,"CCCCCCCCCCSC":1415,"CCCCCCCCCCCCCN":1416,"OCCCBr":1417,"OCCCSC":1418,"-](/":1419,"OCCCCS":1420,"OCCCCBr":1421,"OCCCCOc":1422,"NCCCNCCCCNC":1423,"NCCCNCCCCNCCCN":1424,"csnc":1425,"ncno":1426,"CNCCNCCNCCN":1427,"567":1428,"*])[*]":1429,"COCCOCCOCCOCC":1430,"COCCCNS":1431,"NCCCCNCCCNC":1432,"OCCCCCCCCCCCOC":1433,"*)*)*)*)*)=":1434,"/*)/":1435,"92":1436,"95":1437,"99":1438,"At":1439,"NB":1440,"NCCCCCCCCCCC":1441,"ONCC":1442,"SCNC":1443,"SOO":1444,"](=[":1445,"al":1446,"COCOCC":1447,"COCCCCCCCCCC":1448,"COCCCS":1449,"NCSC":1450,"CCCCI":1451,"CCCCCOCC":1452,"CCCCOCCCN":1453,"CNSC":1454,"CNCCNCC":1455,"CCCCCCCCNCCCN":1456,"COCn":1457,"COCNC":1458,"OCCI":1459,"@]/":1460,"BrCCN":1461,"NCCBr":1462,"NCCCNc":1463,"NCCCOCCO":1464,"CSCCCCCCCC":1465,"CCCCCI":1466,"CCOCP":1467,"CCOCn":1468,"SCn":1469,"+](=[":1470,"CCCNCCNC":1471,"101":1472,"+](/":1473,"CCOCCCl":1474,"CCOCCOCCOCCOCCOCCOCCOCCOCCOCC":1475,"CCOCCOCCOCCOCCO":1476,"CCCCCCCl":1477,"CCOB":1478,"CCCCCCCS":1479,"CCCCNCCCCN":1480,"SCCSCCS":1481,"CCCCON":1482,"nnccc":1483,"COCCBr":1484,")-[":1485,"CCCOCCO":1486,"CCCCCCCCCCCCCCO":1487,"CCCCCCCCCCCCCCOc":1488,"OCCCCCOc":1489,"NCCCCl":1490,"NCCCCCCCN":1491,"NCCCCNS":1492,"NCCSSCCNC":1493,"COCCOCCOCCO":1494,"COCCOCCOCCOc":1495,"COCCOCCOCCOCCOCCO":1496,"SSSC":1497,"SSSS":1498,"COCCOCCCC":1499,"COCCOCCC":1500,"COCCOCCN":1501,"OCCCCCCCCOC":1502,"*)*)(*)*":1503,"NCCCCCCCCCCNC":1504,"NCCCNCCCCNCCCNC":1505,"(*)*)*":1506,"(*)*)=":1507,"*-":1508,"66":1509,"656":1510,"77":1511,"711":1512,"84":1513,"88":1514,"@+](":1515,"BO":1516,"NCCCCCCCCCCCCCCCCCC":1517,"NNCCO":1518,"OBr":1519,"OOP":1520,"Rn":1521,"SI":1522,"SCSC":1523,"cco":1524,"COn":1525,"COCCCCN":1526,"NCNc":1527,"CCCCF":1528,"CCCCCOS":1529,"CCCCCCNS":1530,"OCI":1531,"OCS":1532,"CNP":1533,"CNCCCCCC":1534,"CNCCNS":1535,"CNCCOCCO":1536,"CCCCCCCCCCOS":1537,"CCNP":1538,"+][*]":1539,"OCCNCc":1540,"NCCCOCCC":1541,"CSNC":1542,"CSCSC":1543,"CSCCSC":1544,"CCCCCP":1545,"CCCNCCCCN":1546,"CCCNCCNCCC":1547,"=[*]":1548,"CCOCCOc":1549,"NSc":1550,"CCCCCCCCCCCCCCCCCc":1551,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":1552,"CCCCCCCCCCCCCCCCCCOc":1553,"CCOCCCC":1554,"CCOCCCCNC":1555,"(\\[*])":1556,"SCCl":1557,"])(\\":1558,"ncncn":1559,"CCSCCNC":1560,"CCCOCCCC":1561,"CCCCCCCCCCCS":1562,"CCCCCCCCCCCCCc":1563,"CCCCCCCCCCCCCCCN":1564,"COCCNS":1565,"454":1566,"OCCOCCOCCOCCOCCOCC":1567,"SCCCSC":1568,"229":1569,"CCCCCCNCCC":1570,"COCCNCc":1571,"COCCOS":1572,"CSCCCSC":1573,"OCCCCCCCCCCOC":1574,"OCCOCCOCCN":1575,")*)*)*)*)*)*":1576,"NCCOCCOCCOCCNC":1577,"CCOCCOCCOCCOCCN":1578,"CNCCCCCCOCCOCc":1579,"*:":1580,"*=":1581,"*#[":1582,"++](":1583,"40":1584,"49":1585,"467":1586,"50":1587,"60":1588,"70":1589,"80":1590,"Ac":1591,"Ar":1592,"FCc":1593,"NBr":1594,"NCCCCCCCCC":1595,"NOCCOC":1596,"OCCCCCCCCCCCCCCCCCCCCCC":1597,"OCCCCCCCCCCCCCCCCCCCCC":1598,"PN":1599,"Ra":1600,"SB":1601,"SCCNCC":1602,"SCCCCCCCCCCCC":1603,"SSCCC":1604,"\\*":1605,"op":1606,"sscc":1607,"@@+](":1608,"COOCC":1609,"COCCCCCC":1610,"COCCCCNC":1611,"COCCCCOC":1612,"COCCCCCCCCCCCCCCCCCCCC":1613,"NCNCCCC":1614,"CCCCOCCO":1615,"CCCCOCCNC":1616,"OCSC":1617,"]([*]":1618,"CNCOC":1619,"CNOCC":1620,"CNNN":1621,"CNCCOc":1622,"CNCCCc":1623,"CNCCCS":1624,"ClCCn":1625,"ClCCCc":1626,"ClOCl":1627,"-](=":1628,"CCCCCCCCCl":1629,"CCCCCCCCCNC":1630,"CCCCCCCCCOP":1631,"CCCCCCCCNS":1632,"CCCCCCCCCSC":1633,"CCCCCCCCCOCC":1634,"CCCCCCCCON":1635,"CCNCCCNCC":1636,"+]*)":1637,"OCCCOP":1638,"OCCSSC":1639,"BrP":1640,"BrCCOC":1641,"BrCCn":1642,"125":1643,"-])-":1644,"NCCOCc":1645,"NCCNCc":1646,"NCCON":1647,"NCCOCCNC":1648,"NCCOCCOCCOCCOCCOCCOCCOCCOCC":1649,"CSCCCN":1650,"CSCCCO":1651,"CCCCCF":1652,"NNCCN":1653,"])-":1654,")([*])[*]":1655,"102":1656,"131":1657,"CCOCCc":1658,"CCOCCCOC":1659,"CCOCCOCCO":1660,"CCOCCCNCC":1661,"CCOCCOCCOCCOC":1662,"CCCCCCI":1663,"CCCCCCCOCCCN":1664,"NSS":1665,"NSCC":1666,"CCCCCCCCCCCCCCCCCN":1667,"CCCCCCCCCCCCCCCCCCc":1668,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":1669,"CCCCCCCCCCCCCCCCSc":1670,"CCCCCCCCCCCCCCCCCCCCCCN":1671,"CCOOC":1672,"cnon":1673,"cnsn":1674,")\\*":1675,"CCCCNO":1676,"SCCCOC":1677,"NCCCBr":1678,"CCCCOS":1679,"CCCCOCCN":1680,"+]([*])":1681,"COCCl":1682,"COCCCOCC":1683,"CCSN":1684,"CCSCCCC":1685,"CCSCCO":1686,"CCSSc":1687,"OCCNCCC":1688,"OCCNCCOC":1689,"CNCCCl":1690,"CNCCSC":1691,"CNCCOP":1692,"CCCOCCCN":1693,"CCCOCCCNC":1694,"OCCOCCOc":1695,"CCCCCCCCCCP":1696,"CCCCCCCCCCCl":1697,"CCCCCCCCCCCOP":1698,"NCCNCCNC":1699,"CCCCCCCCCCCCCCOCC":1700,"CCCCCCCCCCCCCOS":1701,"CCCCCCCCCCCCCCCOC":1702,"OCCCn":1703,"CCCOCc":1704,"OCCCCOP":1705,"OCCCCCCNC":1706,"])=[":1707,"CCCCCCCCCCCCCCCCCCP":1708,"NCCCCCOC":1709,"NCCCCOCC":1710,"NCCCCCCNc":1711,"NCCCNCCCN":1712,"CSCCCl":1713,"CCCSCCC":1714,"NCCSSC":1715,"NCCSSCC":1716,"NCCSSCCN":1717,"CNCCNCCN":1718,")*)*)=":1719,"OCCCCCCCl":1720,"COCCOCCOCCN":1721,"COCCOCCOCCOCCN":1722,"COCCOCCOCCOCCOC":1723,"CCCCCCNCCCC":1724,"]=*":1725,"OCCOCCOS":1726,"GeH":1727,"OCCOCCOCCOCCOCCOCCOCCOCCOCCO":1728,"OCCOCCOCCOCCOCCOCCOCCOCCOCCOC":1729,")*)*)*)*)*":1730,"OCCSCCOC":1731,"579":1732,"([*])[*])":1733,"CCOCCOCCOCCOCCOCCOC":1734,"ClCCNP":1735,"CCOCCOCCOCCOCCNC":1736,"NCCOCCOCCOCCC":1737,"NCCOCCOCCOCCN":1738,"NCCCOCCOCCOCCCN":1739,"NCCCOCCOCCOCCCNC":1740,"OCCSSCCOC":1741,"OCCSSCCO":1742,"*)*)*)*)*)*)*)*)*)*)":1743,"#*":1744,"(*)*)(*)*)(":1745,"*)*)[":1746,".*:":1747,"113":1748,"4123":1749,"557":1750,"90":1751,"=*)*)*)*":1752,"BB":1753,"Cm":1754,"FN":1755,"FCl":1756,"FCCC":1757,"FCCOC":1758,"NF":1759,"NCCCCCCCCCCCCCCCC":1760,"NCCCCCCCCCCCC":1761,"NNCCc":1762,"NCCCCCCCCCCCCN":1763,"OCCCCCCCCCCCCOC":1764,"Pu":1765,"SF":1766,"SCCCCCCCCCC":1767,"SCCCc":1768,"\\*)":1769,"]*":1770,"]\\[":1771,"(=*)":1772,"COSC":1773,"COCCCCCCCCC":1774,"COCCCCCCCCCCCC":1775,"NCNCCCCC":1776,"CCCCSc":1777,"OCBr":1778,")(*)(":1779,")(*)*)(*)*)(":1780,"CNCCCCO":1781,"CNNCc":1782,"CNCCCCNC":1783,"CNCCCNCC":1784,"CNCCCCCCN":1785,"ClS":1786,"ClCl":1787,"ClCCOP":1788,"ClCCCl":1789,"ClSS":1790,"-]%":1791,"CCCCCCCCl":1792,"CCCCCCCCCCl":1793,"CCCCCCCCNCCCC":1794,"CCCCCCCCCCOCCO":1795,"CCCCCCCCCCOCCC":1796,"CCCCCCCCOCCOS":1797,"CCNNC":1798,"CCNCCNCC":1799,"+]/[":1800,"OCCSc":1801,"OCCCON":1802,"OCCCNCCCO":1803,"@]\\":1804,"BrCCCCN":1805,"BrCCCc":1806,"-])/[":1807,"NCCOCCC":1808,"NCCOCCOCCOCCOCC":1809,"NCCOCCOCCOCCOCCOCCO":1810,"CSI":1811,"CSO":1812,"CSCO":1813,"CSCOC":1814,"CSCS":1815,"CSCNC":1816,"CSCCCNC":1817,"CSCSCC":1818,"CSCCCS":1819,"CSCCNc":1820,"NNNC":1821,"])*)=":1822,"CNCNC":1823,"(-*)":1824,"(-*)(=":1825,"(/[*])[":1826,"10467":1827,"213":1828,"CCOCCl":1829,"CCOCCBr":1830,"CCOCCCNc":1831,"CCCCCCCCCCCCCCCCc":1832,"CCCCCCCCCCCCCCCCNS":1833,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":1834,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCOC":1835,"CCOCCCCCC":1836,"CCOCCNc":1837,"CCCCCCCn":1838,"CCCCNP":1839,"CCCCNCCCC":1840,"@](\\":1841,"NCCCSCC":1842,"CCCCOB":1843,"CCCCOCCC":1844,"*])=":1845,"343":1846,"OCOCCOCCO":1847,"nncnn":1848,"CCNCCCO":1849,"])(*)*":1850,"COCCCOCCOC":1851,"CCSNC":1852,"CCSCCOC":1853,"CCSSCCNC":1854,"CCSSSC":1855,"CCCCCCCCCI":1856,"OCCOO":1857,"CNCCCOc":1858,"CNCCSCC":1859,"CNCCCOCC":1860,"CCCOCCOCCO":1861,"CCCOCCOCCOCCOCCOCCOCC":1862,"CCCCCCCCCCCCCNC":1863,"OCCOCCOCCS":1864,"CCCCCCCCCCCCCCCBr":1865,"CCCCCCCCCCI":1866,"NCCNCCCNCCN":1867,"CCCCCCCCCCCCP":1868,"CCCCCCCCCCCCCO":1869,"CCCCCCCCCCCCNCC":1870,"CCCCCCCCCCCCCOCC":1871,"CCCCCCCCCCCCSCCC":1872,"CCCCCCCCCCCCOCCOCCOCCO":1873,"CCCCCCCCCCCCOCCOS":1874,"CCCCCCCCCCCCCCS":1875,"CCCCCCCCCCCCCCCOCC":1876,"OCCCF":1877,"OCCCCI":1878,"OCCCCCS":1879,"OCCCCOCCCO":1880,"CCCCCCCCCCCCCCCCCCBr":1881,"[*])=":1882,"NCCCNCCN":1883,"318":1884,"OOOC":1885,"ncns":1886,"CCCCOCN":1887,"CCCSSC":1888,"CCCSSCCCC":1889,"[*])-":1890,"OCCOCCOCCOCCOc":1891,"OCCOCCOCCOCCOCCOCCOCCOCCO":1892,"NCCSS":1893,"197":1894,"5678":1895,"4253":1896,"OCCCNCCO":1897,"OCCCCCCl":1898,"*)(*)=":1899,"CCCCSCCCC":1900,"COCCOCCOCCOCCOCCOCCOCCOCCOCCOCC":1901,"COCCOCCOCCOCCOCCOCCO":1902,"COCCOCCOCCOCCOCCOC":1903,"COCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCC":1904,"COCCOCCOCCOCCOCC":1905,"CCCCCCCCCCCCCCCCCCCCCO":1906,"(\\[*])[*]":1907,"SSN":1908,"SSCCCCCCO":1909,"537":1910,"CCCCCCCCCCCCCCCCCCCCCCCCCO":1911,"CCCCCCCCCCCCCCCCCCCCCCCCOC":1912,"CCCCCCCCCCCCCCCCCCCCCCCCCCCO":1913,"COCCOCCCNC":1914,"OCCCCCCCCCCOc":1915,"OOCl":1916,"201":1917,"CCCCCCCCNCCCCCCCC":1918,"NCCCCCCBr":1919,"CCNCCNCCN":1920,"468":1921,"OCCCCCCCc":1922,")*)*)*)*)*)*)*)*)*)*":1923,"CCCCCCCCCCCCNCCC":1924,"869":1925,"OCCCNCc":1926,"8915":1927,"@+]%":1928,"OCCOCCOCCOCCC":1929,"12345678":1930,"OCCOCCOCCOCCOS":1931,"NCCSCCCO":1932,"CCOCCOCCOCCOCCOCCNC":1933,"CCOCCOCCOCCOCCOCCOCCO":1934,"CCOCCOCCOCCOCCOCCOCCOCCOCCNC":1935,"234567":1936,"[*+":1937,"[*-]":1938,"CCOCCOCCOCCOCCC":1939,"(*)*)*)(":1940,"CCCCOCCOCCOP":1941,"CCCOCCOCCOCCOCCOCCOC":1942,"CCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOC":1943,"@]/%":1944,"CNCCCCCCOCCCCc":1945,"CNCCCCCCNCCCCCCCCNCCCC":1946,"ClSSCl":1947,"(/[*])[*]":1948,"CCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOC":1949,"123456789":1950,"(#[":1951,"*(":1952,"**":1953,"*/":1954,"++])=":1955,"--]([":1956,"/*)(=":1957,"212":1958,"245":1959,"321":1960,"642":1961,"6123":1962,"696":1963,"713":1964,"822":1965,"846":1966,"912":1967,"933":1968,"=*)":1969,"Bc":1970,"FCOC":1971,"IB":1972,"IP":1973,"ICCC":1974,"ICc":1975,"Kr":1976,"Ne":1977,"NNCCCN":1978,"NOCCCN":1979,"NCOCCCC":1980,"NCCCCCCCCO":1981,"NCCCCCCCCCCN":1982,"Op":1983,"ONO":1984,"ONCc":1985,"OOCCCC":1986,"OCCCCCNC":1987,"PO":1988,"Pa":1989,"PCC":1990,"PCCP":1991,"Rf":1992,"SNCC":1993,"SCCOCC":1994,"SCCCCCCC":1995,"SCCCCCCCCCCC":1996,"SCCCCl":1997,"SCCOCCO":1998,"SCCOCCN":1999,"SCCCCSC":2000,"]*)[":2001,"(=*)([":2002,"](*)(":2003,"](*)([":2004,"](-*)[":2005,"ccs":2006,"ccocc":2007,"@@+]([":2008,"COI":2009,"COCCCCCCC":2010,"CONCCC":2011,"CONCc":2012,"COCCCCCCCCCCCCCC":2013,"COCCCc":2014,"COCOCCC":2015,"COCCCCS":2016,"COCCCCCNC":2017,"NCNS":2018,"NCSCC":2019,"CCCCNN":2020,"CCCCOCc":2021,"CCCCCSc":2022,"CCCCCSCC":2023,"CCCCCON":2024,"CCCCCCCNCC":2025,"CCCCCOCCOC":2026,"CCCCOCCOCCOC":2027,"CCCCCCOCCCN":2028,"CCCCOCCOCCCC":2029,")(*)(*)*":2030,"CNn":2031,"CNCO":2032,"CNNc":2033,"CNOCc":2034,"CNCCCCOC":2035,"CNCCCCc":2036,"CNCCCCCO":2037,"CNCCCCl":2038,"CNCCOCCOC":2039,"CNCCCCCCO":2040,"ClI":2041,"Cln":2042,"ClCOC":2043,"ClCCNC":2044,"ClCCO":2045,"ClCn":2046,"ClCCl":2047,"ClCSc":2048,"ClCCCCc":2049,"ClCOCc":2050,"ClCCSc":2051,"-]*":2052,")(=[":2053,"CCCCCCCCCOc":2054,"CCCCCCCCOCCO":2055,"CCCCCCCCOCCOCC":2056,"CCCCCCCCSc":2057,"CCCCCCCCCNS":2058,"CCCCCCCCCCNS":2059,"CCCCCCCCCON":2060,"CCCCCCCCCCOCCCN":2061,"CCCCCCCCCCSS":2062,"CCCCCCCCCSCCCC":2063,"CCCCCCCCCOCCOS":2064,"CCCCCCCCCCOCCOCCOCCOCCOCCOCCO":2065,"nco":2066,"ncs":2067,"ncnnc":2068,"COCP":2069,"COCS":2070,"COCBr":2071,"CCNO":2072,"CCNNCC":2073,"CCNSC":2074,"CCNCCCCNCC":2075,"CCNCCCCNCCCCN":2076,"+]%":2077,"+])=[":2078,"OCCOCc":2079,"OCCOCO":2080,"OCCCOCCN":2081,"OCCCOS":2082,"OCCCNS":2083,"OCCSCCC":2084,"OCCCOCCOC":2085,"OCCCOCc":2086,"OCCSCCS":2087,"@]=":2088,"BrB":2089,"BrCCCN":2090,"BrCCCn":2091,"121":2092,"127":2093,"@@]/":2094,"@@]\\":2095,"-])*)*":2096,"NCCCOP":2097,"NCCCOCCN":2098,"NCCOCCS":2099,"NCCOCCOCCOC":2100,"NCCOCCBr":2101,"NCCCOCCOCCO":2102,"NCCOCCOCCOCCOCCOCCOCCOCCO":2103,"CSCCOC":2104,"CSCCCCN":2105,"CSSc":2106,"CSCCCCCN":2107,"CSSCCN":2108,"CSSCCO":2109,"CSOOO":2110,"CSSSSSC":2111,"CSSSSC":2112,"NNCCCC":2113,"NNNN":2114,")/*":2115,")([*])[":2116,"OPO":2117,"OPOC":2118,"CCCNO":2119,"CCCNNC":2120,"CCCNCCCC":2121,"CCCNCCCNC":2122,"CCCNCCS":2123,"100":2124,"103":2125,"1045":2126,"10345":2127,"10642":2128,"+](*)":2129,"133":2130,"CCOCCI":2131,"CCOCCOS":2132,"CCOCCOCCC":2133,"CCOCCOCCCC":2134,"CCOCCCNCCN":2135,"CCOCCOCCOCCOCCOCCO":2136,"NaH":2137,"111":2138,"CCCCCCF":2139,"CCCCCCOS":2140,"CCCCCCOCCC":2141,"CCCCCCCOCCC":2142,"CCCCCCSCCC":2143,"CCCCCCCOCCO":2144,"NSNS":2145,"CCCCCCCCCCCCCCCCP":2146,"CCCCCCCCCCCCCCCCCO":2147,"CCCCCCCCCCCCCCCCNc":2148,"CCCCCCCCCCCCCCCCCCO":2149,"CCCCCCCCCCCCCCCCCCCCN":2150,"CCCCCCCCCCCCCCCCSCC":2151,"CCCCCCCCCCCCCCCCCOCC":2152,"CCCCCCCCCCCCCCCCCCS":2153,"CCCCCCCCCCCCCCCCOCCO":2154,"CCCCCCCCCCCCCCCCCCSC":2155,"CCCCCCCCCCCCCCCCCCCc":2156,"CCCCCCCCCCCCCCCCCCCCc":2157,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":2158,"CCOOCC":2159,"CCOCCCCO":2160,"CCOCCNCC":2161,"CCOCOCC":2162,"CCOCCCS":2163,"CCOCCNS":2164,"@@](*)(":2165,"(\\[*])=":2166,"CCCCCCCP":2167,"CCCCNCCCCNCC":2168,"238":2169,"SCCCl":2170,"SCCBr":2171,"SCCOP":2172,"NCCCNCc":2173,"CCCCOCCCC":2174,"CCCCOOC":2175,"CCCCOCOCCCC":2176,"346":2177,"OCON":2178,"OCOS":2179,"nncnc":2180,"NNCNNC":2181,"CCNCCCOC":2182,"CCNCCSC":2183,"CCNCCSS":2184,"])(*)[":2185,"COCCF":2186,"([*])(":2187,"([*])(=":2188,"CCSCCCCCCCCCCCC":2189,"CCSSCCN":2190,"CCCCCCCCCF":2191,"151":2192,"OCCOCCCCCC":2193,"OSOO":2194,"OCCNCCCN":2195,"OCCNCCCS":2196,"CCCOO":2197,"CCCOCCCCC":2198,"CCCOCCNC":2199,"\\[*])":2200,"CCCCCCCCCCCI":2201,"CCCCCCCCCCCP":2202,"CCCCCCCCCCCSCC":2203,"OCCOCCF":2204,"OCCOCCS":2205,"OCCOCCCOC":2206,"OCCOCCSC":2207,"OCCOCCOCCOc":2208,"OCCOCCOCCF":2209,"CCCCCCCCCCF":2210,"NCCNCCCC":2211,"NCCNCCC":2212,"CCCCCCCCCCCCl":2213,"CCCCCCCCCCCCCl":2214,"CCCCCCCCCCCCBr":2215,"CCCCCCCCCCCCNc":2216,"CCCCCCCCCCCCSCC":2217,"CCCCCCCCCCCCOCCO":2218,"CCCCCCCCCCCCCNc":2219,"CCCCCCCCCCCCOCCOC":2220,"CCCCCCCCCCCCCOCCO":2221,")*)#":2222,"CCCCCCCCCCCCCCBr":2223,"CCCCCCCCCCCCCCOP":2224,"CCCCCCCCCCCCCCOS":2225,"OCCCI":2226,"OCCCP":2227,"COCCNN":2228,"COCCNCCNC":2229,"COCCNCCNS":2230,"OCCCCCO":2231,"OCCCCOCC":2232,"OCCCCCc":2233,"OCCCCCCOCC":2234,"OCCCCNCc":2235,"NCCCCI":2236,"NCCCCn":2237,"NCCCCBr":2238,"NCCCCCS":2239,"NCCCCNc":2240,"NCCCCCCOC":2241,"NCCCCOCCOC":2242,"NCCCCCCNS":2243,"4567":2244,"NCCCNCCS":2245,"NCCCNCCCNCCCN":2246,"NCCCNCCSP":2247,"+][*])[":2248,"-])(*)=":2249,"](=*)*":2250,"OOCCCCO":2251,"OOOO":2252,"CSCCSCC":2253,"CSCCSCCS":2254,"CCCSN":2255,"CCCSSc":2256,"CCCSSCCC":2257,"CNCCCP":2258,"CNCCCn":2259,"CNCCCBr":2260,"CNCCCSC":2261,"OCCOCCOCCOCCNC":2262,"OCCOCCOCCOCCOCCOC":2263,"OCCOCCOCCOCCOCCOCCO":2264,"OCCOCCOCCOCCOCCOCCN":2265,"OCCOCCOCCOCCOCCOCCOCCOC":2266,"NCCSP":2267,"NCCSCCN":2268,"CNCCNCCCNCCNC":2269,"OCOCc":2270,"564":2271,")*)*)*":2272,")*)*)*)":2273,"SCCCSCC":2274,"PbH":2275,"OCCCCCCSSCCCCCCO":2276,"CCSCCS":2277,"CCSCCCO":2278,"CCSCCCl":2279,"CCSCCSCC":2280,"CCSCCSP":2281,"*)(*)*)(*)*)(":2282,"228":2283,"CCCCSP":2284,"CCCCSCCO":2285,"COCCOCCl":2286,"COCCOCCCOC":2287,"COCCOCCOc":2288,"COCCOCCOS":2289,"COCCOCCOCCC":2290,"COCCOCCOCCCN":2291,"COCCOCCOCCOCCO":2292,"COCCOCCOCCS":2293,"CCCCCCNO":2294,"CCCCCCNCCCCC":2295,"SSCCCC":2296,"SSCCCCCCCC":2297,"CNCCOCCC":2298,"CCCCCCCCCCCCCCCCCCCCCCCO":2299,"CCPC":2300,"CCPCC":2301,"CCCCCCCCCCCCCCCCCCCCCCCCCOC":2302,"COCCOO":2303,"COCCOCCNCC":2304,"CSCCCSCC":2305,"OCCCCCCCCNC":2306,"OCCCCCCCCCOC":2307,"CCOSOOO":2308,"202":2309,"203":2310,"252":2311,"CCCCCCOCc":2312,"OCCCCCBr":2313,"CCNCCNCCNCCN":2314,"NOCCS":2315,"NCCCCNP":2316,"CCCCCCCNS":2317,"NCCCOCCOc":2318,"OCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":2319,"OCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":2320,"CNCCCCCCNC":2321,"CNCCCCCCNCC":2322,"COCCCCCl":2323,"COCCCCOCC":2324,"785":2325,"OCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":2326,"573":2327,"OCCCCCCCCCCCS":2328,"OCCCCCCCCCCCSS":2329,"NCCOCCOCCOCCO":2330,"NCCOCCOCCOCCOCCC":2331,"CCCCCCCCCCNCCCCCCCCC":2332,"12345":2333,"CCCCOCCOP":2334,"*)*)(*)*)=":2335,"NCCCCCCCCCCO":2336,"CCSSCCCCCCCCCCCCCCC":2337,"NCCOCCON":2338,"CCCCCCCCCNCCCCCCCCC":2339,"*)*)*)*)":2340,"CCCCCSCCO":2341,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCO":2342,"*)*)*)*)*)*)=":2343,"474":2344,"CCCSCCSCCC":2345,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCOC":2346,"SSSSCC":2347,"OCCCOCCCO":2348,"CCOCCOCCOP":2349,"CCCCCCCCCCCCSCCCCCCCCCCCC":2350,"(*)*)":2351,"(*)*)[":2352,"CCCCCCCCOCCOP":2353,"CCOCCOCCOCCNC":2354,"CCOCCOCCOCCOCC":2355,"CCCCCCSSCCS":2356,"CCCCCCCCCCCCCCCCCCOCCCOC":2357,"CCSSCCn":2358,"NCOCNC":2359,"CCCCCCOCCOP":2360,"NCCCCNCCCNCCCN":2361,"/*)\\[":2362,"CCCCCCCCCCCOCCO":2363,"CCCCOCCOCCNC":2364,"NOCCNB":2365,"CNCCNCCNCCNCCNCCN":2366,"COCCOCCOCCOCCNC":2367,"COCCOCCOCCOCCCN":2368,"/*)/*":2369,"CCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCNC":2370,"CCOCCOCCOCCOCCOCCC":2371,"SBS":2372,"NCCCNCCCNCCCCCNC":2373,"(*)*)(*)*)(*)*)(*)*)(":2374,"NCCOCCOCCOCCOCCOCCOCCC":2375,"CCCCCCCCCCCCOCCOCCOCCOS":2376,"CNCCCCCCNCCCCCCCCNCCCCCCNCc":2377,"SCCCCCCCCCCCn":2378,"ClCCOCOCCCl":2379,"CCNCCCCNCCCCNCCCCNCCCCNCC":2380,"CCNCCSSCCNCCC":2381,"(*)*)(":2382,"(*)*)(*)*":2383,"*#":2384,"*\\":2385,"*([":2386,"*)/":2387,"*)\\":2388,"*)\\*":2389,"*(*)*)":2390,"++](=":2391,"--]=[":2392,"-]([*])[":2393,".*":2394,"/*)=":2395,"/*)[":2396,"1123":2397,"112345":2398,"210":2399,"224":2400,"256":2401,"265":2402,"356":2403,"375":2404,"3345":2405,"412":2406,"532":2407,"613":2408,"686":2409,"718":2410,"824":2411,"=*)=":2412,"@+]([":2413,"Am":2414,"Bh":2415,"Bk":2416,"Cf":2417,"Db":2418,"Ds":2419,"Es":2420,"FF":2421,"FI":2422,"Fm":2423,"FCCN":2424,"FCCOc":2425,"FCCl":2426,"FCBr":2427,"FCCBr":2428,"FCCCBr":2429,"FCCI":2430,"FCCCCBr":2431,"FCCCCCBr":2432,"FCCCCCCBr":2433,"FCCCCF":2434,"FCCCCCCCCCCF":2435,"Hs":2436,"II":2437,"IO":2438,"ICCCC":2439,"IOC":2440,"ICCCCCCCC":2441,"ICCN":2442,"ICI":2443,"ICCI":2444,"ICCOCc":2445,"ICCOCCOCC":2446,"ICCCI":2447,"ICCCCCI":2448,"ICCCCCCI":2449,"ICCCCCCCCCI":2450,"ICCCCCCCCCCI":2451,"ICCOCCI":2452,"ICCCCCCCCCCCI":2453,"KH":2454,"Lr":2455,"Md":2456,"Mt":2457,"No":2458,"NCOCC":2459,"NCCCCCCCCCCCCCCC":2460,"NCCCCCCCCCCCCCC":2461,"NNCCS":2462,"NCOCCOC":2463,"NOCCCO":2464,"NOCCc":2465,"NCCCCCCCCOC":2466,"NNCCCO":2467,"NCCCCCCCCCCCCCCCCCCN":2468,"NCOCN":2469,"NCCCCCCCCNC":2470,"NCCCCCCCCCCCCNC":2471,"NCCCCCCCCCCCCCCN":2472,"ONN":2473,"OOc":2474,"ONS":2475,"ONNC":2476,"ONCCCN":2477,"ONCCc":2478,"OCCCCCCCCCCCCCCCCCCCCCCCC":2479,"OCCCCCCCCCCCCCCCCCCCCCCCCC":2480,"OCCCCCCCCCCCCCCCCCCCCCCCCCC":2481,"OOCCCOCC":2482,"PI":2483,"PP":2484,"PS":2485,"Pm":2486,"PCl":2487,"POCCC":2488,"PNP":2489,"POOP":2490,"Rg":2491,"SCO":2492,"SCOC":2493,"SBr":2494,"SnH":2495,"SNN":2496,"SOP":2497,"SNCCC":2498,"SCCCNC":2499,"SCCCCO":2500,"SCCCCCCCCC":2501,"SOS":2502,"SSCCNC":2503,"SNCCCC":2504,"SCCCCOC":2505,"SCSCC":2506,"SSCc":2507,"SCCCCOc":2508,"SCCCCCN":2509,"SCCNS":2510,"SCCCCS":2511,"SOOC":2512,"SCCCCCNC":2513,"SCCON":2514,"SCCCCCCCCCCCCN":2515,"SCCCCCCO":2516,"SCCCCCCCCCCN":2517,"SNCCOCC":2518,"SCCOCCOCCOCCOCCOCC":2519,"SCCCCCCCCCCCCCCCCS":2520,"SCSP":2521,"SCCCCCCCCCCCc":2522,"SCCOCCOCCOCC":2523,"SCCCCCCCCCBr":2524,"SCCCCCCCCCS":2525,"SCCOCCS":2526,"SCCCCCCCCCCCBr":2527,"SNSNS":2528,"\\*)=":2529,"onnc":2530,"pH":2531,"scnn":2532,"te":2533,"(=*)=":2534,"(=*)*)=":2535,"](-":2536,"](*)(=":2537,"](-[":2538,"](*)*)(*)*":2539,"ccccccc":2540,"ccss":2541,"COF":2542,"COOP":2543,"CONS":2544,"COCCCCO":2545,"CONNC":2546,"COOS":2547,"COOCCN":2548,"COCCCCCCCCCCC":2549,"COCCCCCCCCCCCCCCC":2550,"COCNc":2551,"COCCCCCN":2552,"COCCCNCC":2553,"COCCCCc":2554,"COCOCCOC":2555,"COCCCCCCOC":2556,"COCCCCCO":2557,"COCCCCCOC":2558,"COCCCCCCO":2559,"COCOCOC":2560,"CONCCCCCCNC":2561,"COCCCCCCS":2562,"COCCCCOCCC":2563,"NCNN":2564,"NCSc":2565,"NCSCCC":2566,"NCNCN":2567,"NCSCN":2568,"CCCCNCc":2569,"CCCCNCCO":2570,"CCCCCOCCC":2571,"CCCCNCCOC":2572,"CCCCCNCCC":2573,"CCCCNCCS":2574,"CCCCCSSC":2575,"CCCCOCCOCCO":2576,"CCCCOCCCOC":2577,"CCCCCCOCCO":2578,"CCCCOCCCl":2579,"CCCCOCCS":2580,"CCCCOCCOCCOCCO":2581,"CCCCOCCOCCOCCOCCO":2582,"CCCCOCCOS":2583,"CCCCCCOCCOCCOCCO":2584,"CCCCCOCCOCCO":2585,"CCCCOCCOCCOS":2586,"CCCCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCC":2587,"cccoc":2588,"OCSOC":2589,")(\\[":2590,")(*)*)(":2591,")(*)*)=":2592,")(*)*)":2593,")(*)*)(*)*":2594,")(*)*)[":2595,"CNI":2596,"CNNCC":2597,"CNOP":2598,"CNCCOCC":2599,"CNNNC":2600,"CNCOCC":2601,"CNCCCCCCCCCCCC":2602,"CNOCCOC":2603,"CNCCCCCNC":2604,"CNCCCCCCOC":2605,"CNCCCCCCCCNC":2606,"CNCCOCCOCC":2607,"CNCCCCCCNCCC":2608,"CNCOOCC":2609,")=*":2610,")=*)=":2611,")=*)":2612,"ClN":2613,"ClO":2614,"Clp":2615,"ClCCCC":2616,"ClBr":2617,"ClCCCCC":2618,"ClCOc":2619,"ClCCOC":2620,"ClCCOCC":2621,"ClCCCCN":2622,"ClCCNCC":2623,"ClCCOc":2624,"ClCOCC":2625,"ClCBr":2626,"ClCCSC":2627,"ClCCCOc":2628,"ClCCNc":2629,"ClCCBr":2630,"ClCCCn":2631,"ClCCCCl":2632,"ClCCCCCl":2633,"ClCSCc":2634,"ClCCOCc":2635,"ClNCNC":2636,"ClCCCCBr":2637,"ClCCCCCBr":2638,"ClCCCSc":2639,"ClCCOCCOCCOCC":2640,"ClCCCCCCl":2641,"ClCCCCCCBr":2642,"ClCCSSCC":2643,"ClCCSCc":2644,"ClSCl":2645,"ClCCCCCCCBr":2646,"ClCSCCl":2647,"ClCCCCI":2648,"ClCCOCCCl":2649,"ClCCCCCCCl":2650,"ClCCCCCCCCCl":2651,"ClCCCCCCI":2652,"ClCCCCCCCCl":2653,"ClCCCCCCCCCCl":2654,"ClCCOCCl":2655,"ClCCCCCCCCCI":2656,"ClOOCl":2657,"ClCCSCCCl":2658,"ClCOCCOCCl":2659,"-]\\[":2660,"-][*]":2661,"CCCCCCCCB":2662,"CCCCCCCCF":2663,"CCCCCCCCNCC":2664,"CCCCCCCCNc":2665,"CCCCCCCCSCC":2666,"CCCCCCCCNCCC":2667,"CCCCCCCCCCNCC":2668,"CCCCCCCCCn":2669,"CCCCCCCCNOC":2670,"CCCCCCCCOB":2671,"CCCCCCCCOCCC":2672,"CCCCCCCCOCCCC":2673,"CCCCCCCCCOS":2674,"CCCCCCCCOO":2675,"CCCCCCCCCSCC":2676,"CCCCCCCCNCCO":2677,"CCCCCCCCCCNc":2678,"CCCCCCCCNCCOC":2679,"CCCCCCCCCOCCOC":2680,"CCCCCCCCCOCCO":2681,"CCCCCCCCOCCCCCCCC":2682,"CCCCCCCCOCCCCC":2683,"CCCCCCCCCCSc":2684,"CCCCCCCCCCNCCN":2685,"CCCCCCCCNCCCNC":2686,"CCCCCCCCCCOCCOC":2687,"CCCCCCCCOCCCCCCCCC":2688,"CCCCCCCCCCNCCC":2689,"CCCCCCCCCCCNc":2690,"CCCCCCCCSSc":2691,"CCCCCCCCOCCl":2692,"CCCCCCCCOCCOCCOCCOCCO":2693,"CCCCCCCCSCCO":2694,"CCCCCCCCCCCOS":2695,"CCCCCCCCCCOCCOCC":2696,"CCCCCCCCCCSCCC":2697,"CCCCCCCCOCCOCCOCCOCCOCCO":2698,"CCCCCCCCCCOCCOCCOCCO":2699,"CCCCCCCCSSSS":2700,"CCCCCCCCSSCCCCCCCC":2701,"ncnnn":2702,"ncsn":2703,"COCF":2704,"COCI":2705,"COCSC":2706,"CCNOC":2707,"CCNOCC":2708,"CCNCCCCC":2709,"CCNOP":2710,"CCNCCOCC":2711,"CCNCCCCCC":2712,"CCNCCCCCCC":2713,"CCNCCCCN":2714,"CCNCCCNC":2715,"CCNCCCCO":2716,"CCNOCc":2717,"CCNOS":2718,"CCNCCCCOC":2719,"CCNCCCS":2720,"CCNCCNS":2721,"CCNCCCCS":2722,"CCNCCCCCCN":2723,"CCNCCCCCOC":2724,"CCNCCOCCO":2725,"CCNCCOCCN":2726,"CCNCCON":2727,"CCNCCCCCCNC":2728,"CCNCCCCCCCCCCN":2729,"CCNCCCCCCl":2730,"+]*":2731,"+])-":2732,"+])*)":2733,"+]*)*":2734,"+])*)=":2735,"OCCSCc":2736,"OCCCOCCOCC":2737,"OCCPC":2738,"BrI":2739,"BrN":2740,"BrO":2741,"Brn":2742,"BrCCOCC":2743,"BrCCCCCCC":2744,"BrCCCCO":2745,"BrCn":2746,"BrCCCO":2747,"BrCCCOC":2748,"BrSc":2749,"BrCCCOc":2750,"BrCCCCOc":2751,"BrCCNc":2752,"BrCCCCc":2753,"BrCCBr":2754,"BrCOCc":2755,"BrCI":2756,"BrCCCCn":2757,"BrCCCBr":2758,"BrCCCCCc":2759,"BrCCCCCCCCOc":2760,"BrCCCCCCc":2761,"BrCCCCCCOc":2762,"BrCCCCCCCCc":2763,"BrCCOCc":2764,"BrCCCCBr":2765,"BrCCCCCBr":2766,"BrCCCCCCn":2767,"BrCCCCCCCCCCc":2768,"BrCCCCCCBr":2769,"BrCCCCCCCCCBr":2770,"BrCCCCCCCCCCBr":2771,"BrCCCCCCCCBr":2772,"BrCCCOCc":2773,"BrCCOCCBr":2774,"BrCCCCCCCCCCCCCCCCCCBr":2775,"BrCCCCCCCCCCCCBr":2776,"BrCCCCCCCCCCCCCCBr":2777,"BrCCCCCCOCc":2778,"120":2779,"129":2780,"@@]=":2781,"cccco":2782,"-])\\[":2783,"-])*)*)":2784,"NCCI":2785,"NCCOCCCC":2786,"NCCCON":2787,"NCCOCCCN":2788,"NCCOCCOc":2789,"NCCCOCCCN":2790,"NCCOCCCOC":2791,"NCCOCCCl":2792,"NCCOCCNCCO":2793,"NCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCC":2794,"NCCOCCOCCOCCOCCOCCOCC":2795,"CSOCC":2796,"CSNN":2797,"CSCCOCC":2798,"CSCCc":2799,"CSCCCCO":2800,"CSCSc":2801,"CSCNCC":2802,"CSCCCCCCCCCCCC":2803,"CSCCCOC":2804,"CSCCCCNC":2805,"CSOO":2806,"CSCCCCOC":2807,"CSSCc":2808,"CSCCCCOc":2809,"CSCON":2810,"CSCSSC":2811,"CSSS":2812,"CSCCCCCNC":2813,"CSCCOCCO":2814,"CSSCCCC":2815,"CSOCCOCCOCCO":2816,"CSCCCCCS":2817,"CSCCCCCCSC":2818,"CSCCCOS":2819,"CSCCCCCCn":2820,"CSCOCCCCC":2821,"CSCCCCCCCCCCCCCOC":2822,"CSCCCCCCCCCCCO":2823,"CSSSSS":2824,"CSSSSSCC":2825,"CCCCCB":2826,"CCCCCPC":2827,"NNI":2828,"NNCCC":2829,"NNBr":2830,"NNNCC":2831,"NNCCCc":2832,"])*)[":2833,"])*)(":2834,"])*[":2835,"])*)*)*)*)*":2836,"CCOCS":2837,"CCOCBr":2838,"CNCS":2839,"CNCl":2840,"CNCn":2841,"CNCSC":2842,"SCI":2843,"SCBr":2844,")([*])=":2845,"OPC":2846,"OPP":2847,"OPOCC":2848,"CCCNI":2849,"CCCNP":2850,"CCCNSC":2851,"CCCNCCOCC":2852,"CCCNCCCCCC":2853,"CCCNNS":2854,"CCCNCCCCCCC":2855,"CCCNSCC":2856,"CCCNNCCC":2857,"CCCNOCc":2858,"CCCNCCNCC":2859,"CCCNCCCO":2860,"CCCNCCCOC":2861,"CCCNCCSC":2862,"CCCNCCCCNC":2863,"CCCNCCCS":2864,"CCCNCCCCS":2865,"CCCNCCCCCCOC":2866,"CCCNCCNCCN":2867,"CCCNCCOCCC":2868,"CCCNCCCNCCC":2869,"CCCNCCCOS":2870,"CCCNCCCCCCNCCC":2871,"(/*)=":2872,"(/*)\\":2873,"COPC":2874,"COPN":2875,"COPNP":2876,"CCNCN":2877,"CCNCSC":2878,"109":2879,"10121":2880,"211":2881,"214":2882,"215":2883,"216":2884,"217":2885,"218":2886,"219":2887,"+](*)([":2888,"+](\\[*]":2889,"+](*)(*)*":2890,"132":2891,"134":2892,"139":2893,"1336":2894,"13468":2895,"CCOCCF":2896,"CCOCCCOCC":2897,"CCOCCNCc":2898,"CCOCCON":2899,"CCOCCCNS":2900,"CCOCCOCCNCCO":2901,"CCOCCCNCCOC":2902,"CCOCCOCCBr":2903,"CCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCC":2904,"CCOCCSCCSCC":2905,"CCOCCPCCP":2906,"115":2907,"116":2908,"117":2909,"119":2910,"CCCCCCB":2911,"CCCCCCCOCC":2912,"CCCCCCON":2913,"CCCCCCOB":2914,"CCCCCCOCCCC":2915,"CCCCCCSc":2916,"CCCCCCCOS":2917,"CCCCCCSCc":2918,"CCCCCCOCCCCCC":2919,"CCCCCCCOCCOCC":2920,"CCCCCCSS":2921,"CCCCCCOCCCCC":2922,"CCCCCCSCCO":2923,"CCCCCCSCCCCCC":2924,"CCCCCCCOCCOCCCC":2925,"CCCCCCSSCCCCCCO":2926,"CCCCCCCOCCOCCOCCOCCCN":2927,"NSO":2928,"NSNC":2929,"NSCCC":2930,"NSCCN":2931,"NSNSN":2932,"CCCCCCCCCCCCCCCCn":2933,"CCCCCCCCCCCCCCCCBr":2934,"CCCCCCCCCCCCCCCCCS":2935,"CCCCCCCCCCCCCCCCCOc":2936,"CCCCCCCCCCCCCCCCCCCN":2937,"CCCCCCCCCCCCCCCCCOP":2938,"CCCCCCCCCCCCCCCCCCCCO":2939,"CCCCCCCCCCCCCCCCCCn":2940,"CCCCCCCCCCCCCCCCOCCOCC":2941,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":2942,"CCCCCCCCCCCCCCCCOB":2943,"CCCCCCCCCCCCCCCCCCCOC":2944,"CCCCCCCCCCCCCCCCCCCCCN":2945,"CCCCCCCCCCCCCCCCSCCC":2946,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":2947,"CCCCCCCCCCCCCCCCCCCCS":2948,"CCCCCCCCCCCCCCCCSS":2949,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":2950,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":2951,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":2952,"CCCCCCCCCCCCCCCCCCOS":2953,"CCCCCCCCCCCCCCCCCCSc":2954,"CCCCCCCCCCCCCCCCCCOCCO":2955,"CCCCCCCCCCCCCCCCOCCCCCCCCCCCCCCCC":2956,"CCCCCCCCCCCCCCCCCCOCCC":2957,"CCCCCCCCCCCCCCCCCCNCCC":2958,"CCCCCCCCCCCCCCCCCCOCCCN":2959,"CCCCCCCCCCCCCCCCCCOCCOCCOCCOCCOCC":2960,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":2961,"CCCCCCCCCCCCCCCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCC":2962,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":2963,"CCCCCCCCCCCCCCCCOCCOP":2964,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCO":2965,"CCCCCCCCCCCCCCCCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCC":2966,"CCCCCCCCCCCCCCCCCCNCCNCCN":2967,"CCOF":2968,"CCOn":2969,"CCOCCCCCCCC":2970,"CCONCC":2971,"CCOCCCCC":2972,"CCOCOc":2973,"CCOSC":2974,"CCOOP":2975,"CCONS":2976,"CCOCCCCCCCCCC":2977,"CCOCOCCN":2978,"CCOOO":2979,"CCOCCCCCN":2980,"CCOCCCCCCCCN":2981,"CCOCCCCl":2982,"CCOCCCBr":2983,"CCOCCCCCCCCCCCCN":2984,"CCOCCCCCCCCO":2985,"CCOCCCCCCCCCCCCCCCCCCN":2986,"CCOCCCCNCC":2987,"CCOOCCCCCCO":2988,"CCOOOCC":2989,"CCOCCCSP":2990,"CCOCCCCCCCCCCCCCN":2991,"@@](\\":2992,"cnnnc":2993,"cnncn":2994,"cnnnn":2995,"CSCn":2996,"CCCCCCCF":2997,"CCCCCCCSc":2998,"CCCCCCCSCCO":2999,"-].[*]":3000,"CCCCNNC":3001,"CCCCNOC":3002,"CCCCNCCN":3003,"CCCCNOCC":3004,"CCCCNSC":3005,"CCCCNCCCNC":3006,"CCCCNOCc":3007,"CCCCNCCCCOC":3008,"CCCCNCCCCCCNCCCC":3009,"231":3010,")*)*)[":3011,")*)\\[":3012,")*.*":3013,"SCCF":3014,"SCCP":3015,"SCCCOCC":3016,"SCCSc":3017,"SCCSCCC":3018,"SCCSS":3019,"@](*)(":3020,"NCCCB":3021,"NCCCP":3022,"CCCCOCOC":3023,"CCCCOCCCCC":3024,"CCCCOCCCCN":3025,"CCCCOCCCCO":3026,"CCCCOCCCCCCCCCBr":3027,"*])([*])":3028,"*])([*])[*]":3029,"342":3030,"3456":3031,"+]([*])=":3032,"+]([*])([*])[*]":3033,"@]([*])([*])":3034,"OCOO":3035,"OCOCCC":3036,"OCOCOC":3037,"OCOCCCCC":3038,"OCOCCS":3039,"OCOCOCO":3040,"OCOCCOCO":3041,"142":3042,"144":3043,"1414":3044,"1448":3045,"NNCN":3046,"NNCS":3047,"NNCl":3048,"CCNCCOc":3049,"CCNCCCOCC":3050,"CCNCCSCCCO":3051,"])(/":3052,"NOO":3053,"NONC":3054,"NOCCCCCC":3055,"NOON":3056,"NOCCCCNC":3057,"NOCCCS":3058,"NOOSC":3059,"nnoc":3060,"nnns":3061,"])([*])":3062,"-])([*])=":3063,"COCCB":3064,"COCCI":3065,"COCCP":3066,"COCCCOP":3067,"COCCSCC":3068,"COCCCOCCN":3069,"COCCSc":3070,"COCCSCCN":3071,"COCCCOCCCC":3072,"COCCSCCCC":3073,"COCCSNC":3074,"COCCCOCCCOC":3075,"COCCSCCl":3076,"([*])([*])[*]":3077,"([*])([*])=":3078,"CCSCN":3079,"CCSCS":3080,"CCSCCCN":3081,"CCSCCCCCC":3082,"CCSCCCNC":3083,"CCSCCCCCCCCC":3084,"CCSCCCNCC":3085,"CCSCCCCS":3086,"CCSSS":3087,"CCSCCCCl":3088,"CCSOOO":3089,"CCSCCOCCOCC":3090,"CCSCCCCSCC":3091,"CCSCCCCCCCCCCSCCC":3092,"CCCCCCCCCP":3093,"153":3094,"155":3095,"1585":3096,"OCCONC":3097,"OCCOOC":3098,"OCCOCCCCCCCC":3099,"OCCOOCC":3100,"OCCOCCCCCCCCCCCCCCCCCC":3101,"OCCOCCCS":3102,"OCCOCCNS":3103,"OCCOCCCCS":3104,"OCCOCCCCCCCCCCCCCCCCCCCCCC":3105,"OCCOCCCCBr":3106,"OCCOCCCCOCCOC":3107,"OSS":3108,"OSc":3109,"OCCNP":3110,"OCCNCO":3111,"OCCNNC":3112,"OCCNCCCCC":3113,"OCCNCCc":3114,"OCCNCCCF":3115,"OCCNCCNCCO":3116,"*)#":3117,"*)-":3118,"CNCCF":3119,"CNCCI":3120,"CNCCBr":3121,"CNCCCOCCO":3122,"CCCOB":3123,"CCCONC":3124,"CCCOCOC":3125,"CCCOCCCCCCC":3126,"CCCOCCCS":3127,"CCCOCOCCC":3128,"CCCOCCNc":3129,"CCCOCCOCCC":3130,"CCCOOCCCCOC":3131,"CCCOCCOCCOCCOCCO":3132,"CCCCCCCCCCCSC":3133,"OCCOCCI":3134,"OCCOCCNC":3135,"OCCOCCCl":3136,"OCCOCCBr":3137,"OCCOCCOP":3138,"OCCOCCCOCCO":3139,"OCCOCCOCCCl":3140,"OCCOCCOCCOCCN":3141,"FCI":3142,"ONP":3143,"ONCCCC":3144,"CCCCCCCCCCCCCCCS":3145,"CCCCCCCCCCCOc":3146,"CCCCCCCCCCSCC":3147,"CCCCCCCCCCCOCC":3148,"NOCS":3149,"NCCNN":3150,"NCCNP":3151,"NCCNCCOC":3152,"NCCNCCCN":3153,"CCCCCCCCCCCCCS":3154,"CCCCCCCCCCCCOCCOCC":3155,"CCCCCCCCCCCCSc":3156,"CCCCCCCCCCCCNCCCN":3157,"CCCCCCCCCCCCSCc":3158,"CCCCCCCCCCCCCNS":3159,"CCCCCCCCCCCCCOCCC":3160,"CCCCCCCCCCCCCOCCOCC":3161,"CCCCCCCCCCCCOCCOCCO":3162,"CCCCCCCCCCCCCCNCCN":3163,"CCCCCCCCCCCCCOCCCN":3164,"CCCCCCCCCCCCSSC":3165,"CCCCCCCCCCCCCCOCCO":3166,"CCCCCCCCCCCCOCCOCCOCCOCCOCCOCCOCCOCC":3167,"CCCCCCCCCCCCOCCOCCN":3168,"CCCCCCCCCCCCCNCCNC":3169,"CCCCCCCCCCCCOCCOCCOCCOCCO":3170,"CCCCCCCCCCCCCCOCCOCCOCCOCCOCC":3171,"CCCCCCCCCCCCOCCOCCOCCOC":3172,"CCCCCCCCCCCCOCCOCCOCCOCCOCCO":3173,"CCCCCCCCCCCCCCOCCOCCOCCO":3174,"CCCCCCCCCCCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCC":3175,"CCCCCCCCCCCCCCNCCCC":3176,"CCCCCCCCCCCCOCCOP":3177,"CCCCCCCCCCCCNCCCNCC":3178,"CCCCCCCCCCCCOCCOCCOCCOCCOCCOCCOCCO":3179,"CCCCCCCCCCCCCCOCCCO":3180,"CCCCCCCCCCCCOCCOCCOCCOCCOCCOCC":3181,"CCCCCCCCCCCCCOCCOS":3182,"CCCCCCCCCCCCOCCOCCOS":3183,"CCCCCCCCCCCCOCCOCCOCCOCCOCCOCCOCCOCCO":3184,"CCCCCCCCCCCCCCF":3185,"CCCCCCCCCCCCCCI":3186,"CCCCCCCCCCCCCCP":3187,"CCCCCCCCCCCCCCl":3188,"CCCCCCCCCCCCCCCl":3189,"CCCCCCCCCCCCCCSCC":3190,"CCCCCCCCCCCCCCCOS":3191,"CCCCCCCCCCCCCCOCCCCCCCCCCCCCC":3192,"OBO":3193,"OBOBO":3194,"OCCCSCC":3195,"OCCCSCCNC":3196,"OCCCSCCN":3197,"OCCCSCCCOC":3198,"COCCNCCC":3199,"COCCNCCN":3200,"COCCNCCOC":3201,"COCCNCCO":3202,"COCCNCCCCSCC":3203,"COCCNCCOCCOCCOCCN":3204,"OCCCCF":3205,"OCCCCCCCN":3206,"OCCCCCOP":3207,"OCCCCOCc":3208,"OCCCCCCNCC":3209,"OCCCCOCCO":3210,"OCCCCCSc":3211,"OCCCCCOCc":3212,"OCCCCCOCCO":3213,"OCCCCCCNCc":3214,"CCCCCCCCCCCCCCCCCCI":3215,"CCCCCCCCCCCCCCCCCCCl":3216,"NCCCCF":3217,"NCCCCP":3218,"NCCCCS":3219,"NCCCCCl":3220,"NCCCCCSC":3221,"NCCCCOCc":3222,"NCCCCCNCC":3223,"NCCCCCNS":3224,"NCCCCCNCCCN":3225,"NCCCCOCCI":3226,"NCCCCCOCCOCCC":3227,"1721":3228,"457":3229,"4545":3230,"NCCCNO":3231,"NCCCNCCCCCCCC":3232,"NCCCNCCCNC":3233,"NCCCNCCCCCCCCC":3234,"NCCCNCCCCCNC":3235,"NCCCNCCNCCCN":3236,"COSN":3237,"COSOC":3238,"3191":3239,"-])(*)":3240,"-])(*)*":3241,"OOSC":3242,"CCCCOCSSCCNC":3243,"OCCOCN":3244,"OCCOCNC":3245,"CSCCB":3246,"CSCCBr":3247,"CSCCCOc":3248,"CSCCSCCSC":3249,"CSCCSCCSCC":3250,"CSCCSCCSCCS":3251,"CCCSO":3252,"CCCSNC":3253,"CCCSCCO":3254,"CCCSON":3255,"CCCSCCCSC":3256,"CCCSCCON":3257,"CCCSSCCCS":3258,"CCCSCCCCCCCCCBr":3259,"CCCSSSSC":3260,"CCCSSCCCSC":3261,"[*])\\":3262,"LiH":3263,")*)(*)*)=":3264,"NCCONC":3265,"COCCCF":3266,"COCCCI":3267,"COCCCBr":3268,"246":3269,"CPC":3270,"CPP":3271,"CPc":3272,"CPCC":3273,"CPOC":3274,"CPCc":3275,"CPCCP":3276,"+]=*":3277,"CNCCCF":3278,"OCCOCCOCCOCCS":3279,"OCCOCCOCCOCCCl":3280,"OCCOCCOCCOCCOCc":3281,"OCCOCCOCCOCCOCCNC":3282,"OCCOCCOCCOCCOCCBr":3283,"OCCOCCOCCOCCOCCOS":3284,"OCCOCCOCCOCCOCCOCCOCCNC":3285,"OCCOCCOCCOCCOCCOCCOS":3286,"CCCCCNN":3287,"CCCCCNNC":3288,"CCCCCNOCc":3289,"NCCSN":3290,"NCCSCCO":3291,"NCCSSCCC":3292,"CCNSSNCCC":3293,"CNCCNCCC":3294,"CNCCNCCCN":3295,"CNCCNCCO":3296,"CNCCNCCNCCC":3297,"NiH":3298,"190":3299,"191":3300,"193":3301,"194":3302,"198":3303,"199":3304,"569":3305,")*)*)*)*)":3306,")*)*)*)(*)*)=":3307,"*])[*])":3308,"SCCCBr":3309,"OCCCNCCCC":3310,"OCCCNCCCCN":3311,"OCCCCCCI":3312,"OCCCCCCCO":3313,"OCCCCCCBr":3314,"OCCCCCCCOP":3315,"CCSCCBr":3316,"CCSCCOP":3317,"CCSCCSCCN":3318,"CCSCCSCCSCC":3319,"3535":3320,"*)(\\":3321,"*)(*)[":3322,"*)(*)*)*)*)*":3323,"*)(*)*)(*)*":3324,"+])[*])[":3325,"220":3326,"222":3327,"226":3328,"CCCCSN":3329,"CCCCSNC":3330,"CCCCSCSC":3331,"CCCCSCCCCCCCCCCC":3332,"CCCCSCCCCCN":3333,"COCCOCCI":3334,"COCCOCCS":3335,"COCCOCCn":3336,"COCCOCCCN":3337,"COCCOCCBr":3338,"COCCOCCCNCC":3339,"COCCOCCON":3340,"COCCOCCOCCNC":3341,"COCCOCCSS":3342,"COCCOCCOCCBr":3343,"COCCOCCOCCOCCOCCOCCOCCO":3344,"COCCOCCOCCn":3345,"COCCOCCOCCOCCOCCOCCOCCOCCOCCO":3346,"COCCOCCOCCOCCOCCN":3347,"CCCCCCNN":3348,"CCCCCCNP":3349,"CCCCCCNCCOC":3350,"CCCCCCNCCCCCC":3351,"CCCCCCNCCCCCCC":3352,"CCCCCCNCCCCCBr":3353,"CCCCCCCCCCCCCCCCCCCCP":3354,"CCCCCCCCCCCCCCCCCCCCOCC":3355,"CCCCCCCCCCCCCCCCCCCCBr":3356,"CCCCCCCCCCCCCCCCCCCCCCNC":3357,"(\\[*])[*])":3358,"SSP":3359,"SSSCC":3360,"SSCOCC":3361,"SSSSc":3362,"SSSSS":3363,"SSCCCCCCCCCCCN":3364,"SSCCCCCCCCCCCCCCCCO":3365,"SSCSSS":3366,"OCCCOOC":3367,"OCCCOCCCCl":3368,"OCCCOCCOCCCCl":3369,"CNCCOCCN":3370,"CCCCCCCCCCCCCCCCCCCCCCO":3371,"CCCCCCCCCCCCCCCCCCCCCCc":3372,"CCCCCCCCCCCCCCCCCCCCCCBr":3373,"CCPP":3374,"CCCCCCCCCCCCCCCCCCCCCCCCO":3375,"CCCCCCCCCCCCCCCCCCCCCCCCCCN":3376,"CCCCCCCCCCCCCCCCCCCCCCCCCCOC":3377,"CCCCCCCCCCCCCCCCCCCCCCCCCCO":3378,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCO":3379,"NPN":3380,"NPOCCOC":3381,"NPOCCCN":3382,"COCCON":3383,"COCCOCOC":3384,"COCCOCCCBr":3385,"COCCOCCCCCCBr":3386,"CSCCCBr":3387,"OCCCCCCCCO":3388,"OCCCCCCCCS":3389,"OCCCCCCCCCO":3390,"OCCCCCCCCCl":3391,"OCCCCCCCCCS":3392,"OCCCCCCCCOP":3393,"OCCCCCCCCCCCN":3394,"OCCCCCCCCCCOCC":3395,"OCCCCCCCCCSCCCC":3396,"204":3397,"205":3398,"206":3399,"207":3400,"208":3401,"209":3402,"OCCOCCOCCCC":3403,"OCCOCCOCCC":3404,"OCCOCCOCCCCOCC":3405,"OCCOCCOCCCCOCCOCCOC":3406,"(*)(=[":3407,"CCCCCOB":3408,"CCCCCONC":3409,"CCCCCOCCCC":3410,"CCCCCOCCCCC":3411,"CCCCCOCCCN":3412,"CCCCCOOOO":3413,"coccc":3414,"NCCCCCCS":3415,"NCCCCCCc":3416,"NCCCCCCn":3417,"NCCCCCCOP":3418,"NCCCCCCOCCCC":3419,"NCCCCCCSc":3420,"ccnnn":3421,"*)*)*)[":3422,"678":3423,"TeH":3424,"ClCCI":3425,"nnnnn":3426,"CCNCCNCCC":3427,"NOCCBr":3428,"NOCCCON":3429,"COCCCNN":3430,"COCCCNCCCC":3431,"COCCCNCCOC":3432,"COCCCNCCCOC":3433,"CCCCCCCCCCCCCCCCCCCCCCCCCCCOC":3434,"CCCOCCBr":3435,"CCCOCCCOS":3436,"NCCCCNO":3437,"NCCCCNCCCC":3438,"NCCCCNCCN":3439,"NCCCCNCCCCN":3440,"SCCNCCO":3441,"occo":3442,"4681":3443,"CSCCNCCNS":3444,"CCCCCCCNCCCCCCC":3445,"CCCCCCCNCCCCCCCCCCC":3446,"NCCCOOC":3447,"OCCCCCCCF":3448,"OCCCCCCCS":3449,"OCCCCCCCBr":3450,"OCCOCCOCCOCCOCCOCCOCCOCCOC":3451,"OCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":3452,"OCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":3453,"OCCOCCOCCOCCOCCOCCOCCOCCOCCOCCC":3454,"OCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":3455,"OCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOP":3456,"CNNCN":3457,"OCCCCCCCCCCCl":3458,"OCCCCCCCCCCBr":3459,"SCCSCCOCC":3460,"SCCSCCO":3461,"CCOCCNN":3462,"CNCCCCBr":3463,"CNCCCCNCC":3464,"CNCCCCSC":3465,"BrCCI":3466,")*)=*":3467,"COCCCCBr":3468,"COCCCCCCNC":3469,"COCCCCCNCC":3470,"758":3471,"OCCSCCCN":3472,"OCCSSCC":3473,"OCCSSc":3474,"OCCOCCNCCOCCO":3475,"CCCCCCCCCCCCNCCCCCCCCCCCC":3476,"CCCCCCCCCCCCNCCNCCNCC":3477,"OCCCCCCCCCBr":3478,"OCCCCCCCCCCCCCCCCO":3479,"OCCCCCCCCCCCCCCCCBr":3480,"OCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":3481,"OCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":3482,"OCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":3483,"OCCCCCCCCCCCCCCCCCCCCCCCCCCCC":3484,"OCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":3485,"OCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":3486,"OCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":3487,"OCCCCCCCCCCCCCCCCSSCCCCCCCCCCCCCCCCO":3488,"CCCCCCCCCCCCCCCCCCOCN":3489,"CCCCCCCCCCCCCCCCCCOCc":3490,"OCCCCCCCCCCCCNCc":3491,"SCCCCP":3492,"SCCCCBr":3493,"SCCCCCS":3494,"CCCCCCNCl":3495,"CNCCCNCCCNC":3496,"CNCCCNCCNCCCNC":3497,"CCCCCCOCCNCC":3498,"NCCOCCOCCOCC":3499,"NCCOCCOCCOCCOC":3500,"NCCOCCOCCOCCOCCO":3501,"NCCOCCOCCOCCOCCN":3502,"NCCOCCOCCOCCP":3503,"NCCOCCOCCOCCOCCOCCN":3504,"CNCCNCN":3505,"CCCCCCCCCCNCCCCCCCCCC":3506,"NCCCSCCCN":3507,"CCCCCCCCCCCCOCCC":3508,"CCCCCCCCCCCCOCCCNC":3509,"CCCCCCCCCCCCOCCCCCCCCCCCC":3510,"CCCCCCCCCCCCOCCCCCCCCCCCCc":3511,"NNCCF":3512,"NNCCON":3513,"NNCCCOCc":3514,"OCCCCCCCCCCCCCCc":3515,"OCCCCCCCCCCCCCCBr":3516,"OCCOCCOCCOCCOCCN":3517,"CCCCOCCCNCC":3518,"CCCCOCCCNS":3519,"NCCSCCSC":3520,"OCCCCCCCCCCCCCBr":3521,"NCCOCCP":3522,"CCOCCOCCOCCOCCOCCOCCOCCOCCO":3523,"(#*)":3524,"OCCCCON":3525,"OCCCCOS":3526,"OCCCCOOC":3527,"OCCCCOCCC":3528,"285":3529,"*)*)(*)=":3530,"NCCCCCCCCS":3531,"NCCCCCCCCCN":3532,"NCCCCCCCCCCCN":3533,"NCCCCCCCCCCCO":3534,"NCCCCCCCCNP":3535,"CCSSOC":3536,"CCSSSCC":3537,"NCCCCCNO":3538,"NCCCCCNCCCNC":3539,"NCCOCCOCCC":3540,"NCCOCCOCCNS":3541,"NCCOCCOCCCCCCCl":3542,"NCCCCCCNCCCCCCN":3543,"NCCCCON":3544,"NCCCCOCCC":3545,"NCCCCOCCCCOCCCCN":3546,"CCCCCCCCCNCCCNC":3547,"*)*)*)*)=":3548,"*)*)*)=*)":3549,"NCNCNCN":3550,"CCCCCSSCCCCC":3551,"CCCCNCCCNCC":3552,"NCCNCCS":3553,"NCCNCCCOC":3554,"COCCCCCCCCCCCCCCCCCCCCOC":3555,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCO":3556,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCOC":3557,"*)*)*)*)*)*":3558,"*)*)*)*)*)*)":3559,"*)*)*)*)*)*)(":3560,"684":3561,"NCCCCCCCNC":3562,"NCCNCCNCCNCCNC":3563,"NCCNCCNCCNCCNCCN":3564,"NCCNCCNCCNCCNCCNCCN":3565,"CSSCS":3566,"479":3567,"COCOCOCOC":3568,"COCOCCCCCCCCCCCCCNC":3569,"CCCSCCCO":3570,"CCCSCCSC":3571,"CCCSCCSCCS":3572,"CCCCCCCCCCOB":3573,"CCCCCCCCCCOCCCC":3574,"CCCCCCCCCCOOC":3575,"CCCCCCCCCCOCCCCCCCCCCOP":3576,"CSCCCCNS":3577,"CSCCCCSCC":3578,"CSCCCCCCNCC":3579,"CSCCCCCNCC":3580,"CSCCCCCNS":3581,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCO":3582,"CCCCCCCCCCCCCCCCNCCCCCCCCCCCCCCCC":3583,"FSI":3584,"[*-":3585,"[*+]":3586,"[*++]":3587,"OCPOCOC":3588,"(*)(*)(=":3589,"(*)(*)[":3590,"(*)(*)*":3591,"(*)(*)*)":3592,"(*)(*)(*)[":3593,"NCONCCCOOC":3594,"SCCCCCCS":3595,"CSCCOOCCO":3596,"OCCCCCCCCCCCCCCCBr":3597,"OCCCOCCCOCCCO":3598,"CCCNCCNCCCNCC":3599,"OCCOCCCCCCOCCOC":3600,"CCCCCCCCSCCCCCCCC":3601,"CCCCCCCCSNS":3602,"CCCCCCCCSOS":3603,"CCCCCCCCSCCOCCO":3604,"CCCCCCCCSCSP":3605,"CCCCCCCCCCCCSCCCCCCCCCCCCCCCCCCCCCCC":3606,"CCCCCCCCCCCCSCCOCCO":3607,"CCCCCCCCOCCCNCCC":3608,"BrCCCB":3609,"NCCCNCCCCNCCC":3610,"SOSC":3611,"OCCSCCCO":3612,"OCCSCCCl":3613,"OCCSCCSCCSCCS":3614,"OCCSCCSCCSCCO":3615,"NCCCCCCCCNCCCCCCCCNC":3616,"CSCCSCCNC":3617,"CSCCSCCCSCCSC":3618,"CCOCCOCCOCCOCCNCC":3619,"CCCCCCCOCCCCCCC":3620,"CCCCCCCOOCCCCCC":3621,"OCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":3622,"OCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":3623,"OCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":3624,"9669":3625,"COOSC":3626,"CCCCCCCCCCCCCCCCCCOCCOP":3627,"CCCCCCCCCCCCCCCCCCOCCOCCO":3628,"CCCCCCCCCCCCCCCCCCOCCOCCOCCOCCO":3629,"CCCCCCCCCCCCCCCCCCOCCOCCOCCOCCOCCO":3630,"CCCCCCCCCCCCCCCCCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":3631,"CCCCCCOCCOCc":3632,"CCCCCCOCCOCCO":3633,"OCCOCCOCCI":3634,"/*)\\*":3635,"SCSCS":3636,"CNCCCCNCCC":3637,"CCCCCCCCCCCNCCCCCCCCCCC":3638,"CCCCCCCCCCCON":3639,"CCCCCCCCCCCOCCOCCOCC":3640,"CCNCCCCCCNCC":3641,"CCCCOCCOCCSC":3642,"CCCCOCCOCCOCc":3643,"CCCCOCCOCCSCCSCCS":3644,"CNCCCCCI":3645,"ClCCCNCCCl":3646,"CCCOCCOCCOCCC":3647,"CCCOCCOCCOCCCN":3648,"CCCOCCOCCOCCOCCOCCOCCOCCN":3649,"CCCCCCCCCCOCCOS":3650,"CCCCCCCCCCOCCOCCOCCOCCOCCO":3651,"CCCCCCCCCCOCCOCCOCCC":3652,"NCCCNCCCl":3653,"NCCCOCCOCCCN":3654,"CCCCCCCCCCCCCCNCCCCCCCCCCCCCC":3655,"OCCCNCCCl":3656,"OCCCNCCSC":3657,"OCCCNCCCOB":3658,"BrCCCCOCc":3659,"BrCCCCCOCc":3660,"SCCSCCCl":3661,"CNCCCCCCOCCCOC":3662,"*)*)*)*)*)*)*)*)*)*)[":3663,"ssss":3664,"CCCCCCCCCCCCCNCCCCCCCCCCCCC":3665,"CNCCNCCNCCNCCNCCNC":3666,"NBN":3667,"NBNBN":3668,"NCCCCCCCCCCCS":3669,"NCCCCCCCCCCCSSCCCCCCCCCCCN":3670,"SCNCS":3671,"SOONC":3672,"CCCCCOCCPCC":3673,"CCCCCCCCNCCCNCCCN":3674,"CCCCCCCCNCCCNCCCNCCCN":3675,"CCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOC":3676,"CCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCN":3677,"CCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCS":3678,"CCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOC":3679,"CCCCCCCSCCCCCCC":3680,"CCCCCCCSCCCCOC":3681,"CCCOCCOCCCCCCCl":3682,"COCCOCCOCCOB":3683,"COCCOCCOCCOCCOCCOCCC":3684,"COCCOCCNCCOCCOCCN":3685,"COCCOCCNCCOCCOCCOCCN":3686,"(*)*)*)=":3687,"CNCCCCCCOCCCCC":3688,"+][*])=":3689,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCO":3690,"CCCOCCCCCCNCC":3691,"OCCOCCOCCOCCOCCOCCNC":3692,"OCCOCCOCCOCCOCCOCCOP":3693,"OCCOCCOCCOCCOCCOCCOCO":3694,"SBSBS":3695,"\\*)\\*":3696,"CCCCOCCOCCCS":3697,"]([*])([*])":3698,"]([*])([*])[*]":3699,"]([*])([*])[":3700,"CCCCCCCCCOCCOP":3701,"NCCOCCOCCOCCOCCOCCOCCOCCOCCNC":3702,"NCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCN":3703,"NCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCN":3704,"NCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCC":3705,"CSCCCONN":3706,"CCOCCOCCOCCCCCC":3707,"CCOCCOCCOCCNCC":3708,"CCCCCCCOCCCNCCCN":3709,"CCCCCCCCCCCCCCCCCCCCCCNCCCCCCCCCCCCCCCCCCCCCC":3710,"(*)*)(*)*)(*)*":3711,"BBB":3712,"NCCCCCCCCCCCCNc":3713,")(*)*)(*)*)(*)*)=":3714,")(*)*)(*)*)(*)*)(*)*":3715,")(*)*)(*)*)(*)*)(*)*)=":3716,"NCCOCCOCCOCCOCCOCCNC":3717,"NCCOCCOCCOCCOCCOCCSC":3718,"CSCCCSCc":3719,"CSCCCSCCCSCCCSC":3720,"(-*)-*":3721,"COCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCCC":3722,"COCCOCCOCCOCCOCCOCCOCCC":3723,"COCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":3724,"COCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCN":3725,"COCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":3726,"23456789":3727,"CNCCCCCCNCCCCCCCCNCCCCCCNCC":3728,"PCCPc":3729,"SCCOCCOCCS":3730,"](*)(*)[":3731,"](*)(*)*)(*)=":3732,"CCCCCCCCCCSSCCCCCCCCCCC":3733,"CCCCCCCCCCSSCCCCCCCCCCc":3734,"OCCOCOCCOc":3735,"OCCOCOCCSCC":3736,"OCCSCCSCCO":3737,"NCCOCCOCCOCCOCCOCCOCCOCCOCCC":3738,"+](*)#[":3739,"CCOCCOCCOCCOCCOCCOCCC":3740,"CCCCCCCCCCCCCCCCCCOCCCCCCCCCCCCCCCCCC":3741,"CCCCCCCCCCCCCCCCCCSCCCCl":3742,"CCOCCCCOCCCCO":3743,"CCCCOCCCCCCNCc":3744,"OCCNCCCNCCO":3745,"NCCCNCCCNCCCNCCCN":3746,"OOOOOOC":3747,"*)(*)*)(*)*)(*)*)=":3748,"*)(*)*)(*)*)(*)*)(*)*)=":3749,"OCCCCCCCCCCCSSCCCCCCCCCCCOC":3750,"OCCCCCCCCCCCSSCCCCCCCCCCCO":3751,"(*)*)(*)*)(*)*)(*)*)(*)*)=":3752,"(*)*)(*)*)(*)*)(*)*)(*)*)(*)*":3753,"(*)*)(*)*)*)[":3754,"6868":3755,"=*)=*":3756,"ICCCCCCCCI":3757,"ICCOCCOCCI":3758,"NCCCCCCCCCCCCCCCI":3759,"SNCCCCCNCCSCC":3760,"SCCCCCNCCNC":3761,"SCCOCCOCCOCCOCCOCCS":3762,"SCCOCCOCCOCCS":3763,"SNSNSNS":3764,"(=*)*)=*)(":3765,"COCCCCCOCOOC":3766,"CCCCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":3767,")(*)*)(*)*)*":3768,")(*)*)=*)":3769,"CNCCCCCCCCCCCCNCc":3770,"CNCCOCCOCCNCc":3771,"ClCCOCCOCCCl":3772,"ClCCNCCCl":3773,"ClCCOCCOCCOCCCl":3774,"ClCCSSCCCl":3775,"CCCCCCCCOCCCCCOCCC":3776,"CCCCCCCCSSSSSSSSS":3777,"CCNCCOCCOCCCOC":3778,"OCCCOCCOCCOCCOCCOCCOCCOCCOCCOCO":3779,"NCCOCCCCCCOCCO":3780,"NCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOC":3781,"NCCOCCOCCOCCOCCOCCOCCOCCOCCN":3782,"CSCCOCCSCC":3783,"CSOCCOCCOCCOSC":3784,"CSCCCCCCCCCCCOCCOCCOCCOCC":3785,"+](\\[*])=":3786,"CCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCNC":3787,"CCOCCSCCSCCSCCSCCOCC":3788,"CCOCCPCCPCCO":3789,"CCCCCCSSCCCCCC":3790,"CCCCCCCCCCCCCCCCSSCCCCCCCCCCCCCCCC":3791,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCOC":3792,"CCCCCCCCCCCCCCCCCCOCCOCCOCCOCCOCCOCCOCCO":3793,"CCCCCCCCCCCCCCCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":3794,"CCCCCCCCCCCCCCCCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":3795,"CCOOCCCCCCOOCC":3796,"SCCSCCCSCCS":3797,"COCCSCCNCCC":3798,"COCCCOCCCCOCCCOC":3799,"CCSCCCNCCCOCC":3800,"CCSSSOC":3801,"CCSCCOCCOCCSCC":3802,"158591":3803,"CCCCCCCCCCCCCOCCOCCOCCOS":3804,"CCCCCCCCCCCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":3805,"CCCCCCCCCCCCCCOCCOCCOCCOS":3806,"CCCCCCCCCCCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":3807,"CSCCSCCSCCSCCSCCS":3808,"CPCCPCCOCCPCCPCCO":3809,"COCCOCCSSCCOCCO":3810,"OCCOCCOCCCCOCCCOCCOC":3811,"OCCSCCSCCSCCSCCO":3812,"CCCCCCCCCCCCCCCCCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCCS":3813,"CCCCOCCOCCSCCSCCSCCOCCOCCCC":3814,"CCCCCCCCSSSSSSSSSCCCCCCC":3815}
\ No newline at end of file
+{"<s>":0,"<pad>":1,"</s>":2,"<unk>":3,"<mask>":4,"!":5,"\"":6,"#":7,"$":8,"%":9,"&":10,"'":11,"(":12,")":13,"*":14,"+":15,",":16,"-":17,".":18,"/":19,"0":20,"1":21,"2":22,"3":23,"4":24,"5":25,"6":26,"7":27,"8":28,"9":29,":":30,";":31,"<":32,"=":33,">":34,"?":35,"@":36,"A":37,"B":38,"C":39,"D":40,"E":41,"F":42,"G":43,"H":44,"I":45,"J":46,"K":47,"L":48,"M":49,"N":50,"O":51,"P":52,"Q":53,"R":54,"S":55,"T":56,"U":57,"V":58,"W":59,"X":60,"Y":61,"Z":62,"[":63,"\\":64,"]":65,"^":66,"_":67,"`":68,"a":69,"b":70,"c":71,"d":72,"e":73,"f":74,"g":75,"h":76,"i":77,"j":78,"k":79,"l":80,"m":81,"n":82,"o":83,"p":84,"q":85,"r":86,"s":87,"t":88,"u":89,"v":90,"w":91,"x":92,"y":93,"z":94,"{":95,"|":96,"}":97,"~":98,"¡":99,"¢":100,"£":101,"¤":102,"¥":103,"¦":104,"§":105,"¨":106,"©":107,"ª":108,"«":109,"¬":110,"®":111,"¯":112,"°":113,"±":114,"²":115,"³":116,"´":117,"µ":118,"¶":119,"·":120,"¸":121,"¹":122,"º":123,"»":124,"¼":125,"½":126,"¾":127,"¿":128,"À":129,"Á":130,"Â":131,"Ã":132,"Ä":133,"Å":134,"Æ":135,"Ç":136,"È":137,"É":138,"Ê":139,"Ë":140,"Ì":141,"Í":142,"Î":143,"Ï":144,"Ð":145,"Ñ":146,"Ò":147,"Ó":148,"Ô":149,"Õ":150,"Ö":151,"×":152,"Ø":153,"Ù":154,"Ú":155,"Û":156,"Ü":157,"Ý":158,"Þ":159,"ß":160,"à":161,"á":162,"â":163,"ã":164,"ä":165,"å":166,"æ":167,"ç":168,"è":169,"é":170,"ê":171,"ë":172,"ì":173,"í":174,"î":175,"ï":176,"ð":177,"ñ":178,"ò":179,"ó":180,"ô":181,"õ":182,"ö":183,"÷":184,"ø":185,"ù":186,"ú":187,"û":188,"ü":189,"ý":190,"þ":191,"ÿ":192,"Ā":193,"ā":194,"Ă":195,"ă":196,"Ą":197,"ą":198,"Ć":199,"ć":200,"Ĉ":201,"ĉ":202,"Ċ":203,"ċ":204,"Č":205,"č":206,"Ď":207,"ď":208,"Đ":209,"đ":210,"Ē":211,"ē":212,"Ĕ":213,"ĕ":214,"Ė":215,"ė":216,"Ę":217,"ę":218,"Ě":219,"ě":220,"Ĝ":221,"ĝ":222,"Ğ":223,"ğ":224,"Ġ":225,"ġ":226,"Ģ":227,"ģ":228,"Ĥ":229,"ĥ":230,"Ħ":231,"ħ":232,"Ĩ":233,"ĩ":234,"Ī":235,"ī":236,"Ĭ":237,"ĭ":238,"Į":239,"į":240,"İ":241,"ı":242,"Ĳ":243,"ĳ":244,"Ĵ":245,"ĵ":246,"Ķ":247,"ķ":248,"ĸ":249,"Ĺ":250,"ĺ":251,"Ļ":252,"ļ":253,"Ľ":254,"ľ":255,"Ŀ":256,"ŀ":257,"Ł":258,"ł":259,"Ń":260,"CC":261,"(=":262,"](":263,"cc":264,"@@":265,")[":266,"CO":267,"NC":268,"CCCC":269,"ccc":270,"OC":271,"]([":272,")(":273,"CN":274,")=":275,"Cl":276,"CCC":277,"-]":278,"([":279,"ccccc":280,")(=":281,"CCCCCCCC":282,"nc":283,"COC":284,"CCN":285,"+]":286,"OCC":287,"Cc":288,"@]":289,"Br":290,"12":291,"]%":292,"@@]":293,".[":294,"cccc":295,"-])":296,"NCC":297,"CS":298,"CCCCC":299,"Nc":300,"COc":301,"nH":302,"NN":303,")/":304,"])":305,"CCOC":306,"CNC":307,"(-":308,"SC":309,")([":310,"Oc":311,"+](=":312,"OP":313,"CCCN":314,"(/":315,"-])=":316,"COP":317,"CCNC":318,"=[":319,"10":320,"21":321,"][":322,"+](":323,"13":324,"-])(=":325,"CCOCC":326,"Na":327,"])[":328,"11":329,"CCCCCC":330,"cnc":331,"NS":332,"NH":333,"CCCCCCCCCCCCCCCC":334,"CCO":335,"@@](":336,"cn":337,"CSC":338,"(\\":339,")\\":340,"CCCCCCC":341,"-].[":342,"CCCCN":343,"Si":344,"CCc":345,"+].[":346,"23":347,")*":348,"SCC":349,"sc":350,"@](":351,"ncc":352,"NCCC":353,"CCCNC":354,"oc":355,"CCCCO":356,"*]":357,"34":358,"OCc":359,"+]([":360,"@]([":361,"-])[":362,"OCO":363,"nnc":364,"14":365,"NNC":366,"CCNCC":367,"])(":368,"].[":369,"NO":370,"nn":371,"32":372,"Cn":373,"ncnc":374,"])([":375,"CCOc":376,"@]%":377,"-].":378,"-])([":379,"COCC":380,"*)=":381,"CCl":382,"CH":383,"([*]":384,"CCS":385,"([*])=":386,"cccnc":387,"CCCCCCCCC":388,"15":389,"OCCO":390,"OS":391,"ccnc":392,"+]=[":393,"Clc":394,"CBr":395,"(*)=":396,"OCCN":397,"*)":398,"43":399,")-":400,"CSc":401,"CNCC":402,"NCc":403,"CCCO":404,"CCCCCCCCCCCCC":405,"CCn":406,"\\[":407,"CCCCCCCCCCC":408,"OCCOCC":409,"FC":410,"@@]([":411,"ON":412,"CCCCCCCCCCCCCCC":413,"ccccn":414,"/[":415,"CCCCCCCCCC":416,"NOC":417,"NCCN":418,"SCCNC":419,"CCCCCCCCCCCC":420,"Fc":421,"ccncc":422,"ccn":423,")*)":424,"CCCCCCCCCCCCCC":425,"OB":426,"OCCC":427,"+])":428,"-](":429,"CCCOC":430,"COCCN":431,"16":432,"CCSC":433,"csc":434,"OCCCC":435,"*])":436,"Brc":437,"])=":438,"](/":439,"CCCCCCCCCCCCCCCCCC":440,"NCCCC":441,"#[":442,"cccs":443,"[*]":444,"CNc":445,"17":446,"45":447,"Sc":448,"ccco":449,"NCCCN":450,"-][":451,"cH":452,"CCOP":453,"CCCc":454,"nccc":455,"+][":456,"CCCCCCCCCCCCCCCCC":457,"COS":458,"31":459,"(=[":460,"cnn":461,"CCCCNC":462,"cs":463,"-])(":464,"](=":465,"NCCSC":466,"OO":467,"18":468,"ncn":469,"Sn":470,"ncccc":471,"cncc":472,"CCCCOC":473,"OCCOC":474,"CSCC":475,"54":476,"Fe":477,"+].":478,"NCCc":479,"Mg":480,")*)[":481,"CCCS":482,"-]#[":483,"CCCl":484,"[*])":485,"Cu":486,"Li":487,"Zn":488,"SCc":489,"CNS":490,"].":491,")*)(":492,"NCCO":493,"no":494,"COCCC":495,"OH":496,"Ca":497,"24":498,"CP":499,"cccn":500,"ClC":501,"CCCOc":502,"*)(=":503,"Co":504,"OCOc":505,"Al":506,"CCCCOc":507,"CCNc":508,"*)[":509,"NCCOC":510,"+]=":511,"CNCCC":512,"OCCOCCOCCOCC":513,"CCCCCN":514,"NCCS":515,"Se":516,"CCNS":517,"NCCNC":518,"As":519,"CNCCN":520,"Ni":521,"coc":522,"ccsc":523,"Ru":524,"NNc":525,"Cr":526,"noc":527,"19":528,"OCCNC":529,"OCOC":530,"56":531,"41":532,")*)*":533,"([*])":534,"ccoc":535,"42":536,"CON":537,"*])[":538,")=[":539,"CCCNCC":540,"SCCC":541,"CCCCc":542,"](*)":543,"OCCCN":544,"Pb":545,"CF":546,"CCCCCCCCCCCCCCCCCCC":547,"cncn":548,"COCCOC":549,"OCCCCCC":550,"sccc":551,"-])/":552,"Pd":553,"CCSCC":554,"CNCc":555,"35":556,"cnccc":557,"-]#":558,"65":559,"*)(":560,"CCBr":561,"+])[":562,"CSSC":563,"Ti":564,"22":565,"Rh":566,"on":567,"CCCCS":568,"COCCOCC":569,"CCCCCCN":570,"ClCc":571,"CCCCCCCCCCCCCCCCCCCC":572,"(\\[":573,"Ba":574,"-]=[":575,"COCc":576,"NNN":577,"CI":578,"SS":579,")*)([":580,"OCCCO":581,"CNN":582,")/[":583,"Mo":584,"Zr":585,"Hg":586,"53":587,"Mn":588,"Pt":589,"COCCNC":590,"CNCCO":591,"CCCCCCCCCCCCCCCCCCCCCC":592,"cnccn":593,"]=":594,"CCP":595,"](\\":596,"CCCCCCCCCCCCCCCCCCCCCCCC":597,"NP":598,"COCCO":599,"CSCCC":600,"OCCCCCCCC":601,"SiH":602,"OOC":603,"CCCCCCCCCCCCCCCCCCCCC":604,"(-*)=":605,"CCOS":606,"OCCc":607,"20":608,"OCCOCCO":609,"CCCCCNC":610,"25":611,"(*)(=":612,"*)([":613,"-]=":614,"CCCCCCOC":615,"CCCn":616,"NSC":617,"OCCCCC":618,"Sb":619,"@](=":620,"-*)":621,"SN":622,"nsc":623,"CCCCCO":624,"CCCCCOC":625,"NON":626,"CCSc":627,"BrCc":628,"co":629,"@@](=":630,"nccn":631,"nccs":632,"Ic":633,"Ag":634,"ncccn":635,"CCCCCCCCN":636,"CCCSC":637,"+]#":638,"NCCCCCC":639,"CCCCl":640,"nccnc":641,"Ir":642,"[*])[":643,"ccnn":644,"Au":645,"*)*":646,"NCCCCC":647,"+]#[":648,")\\[":649,"123":650,"HH":651,")[*]":652,"CSSCC":653,"67":654,"CCCCCCCCCCCCCCCCCCCCCCC":655,"BrC":656,")(/":657,"PH":658,"Te":659,"cnnc":660,"ClCC":661,"nnnn":662,"]=[":663,"occc":664,"CCCCCCCCOC":665,"CCNCCN":666,"(/[":667,"--]":668,"NOCC":669,"76":670,"ns":671,"CCCCn":672,"OCCOc":673,"NCCCOC":674,"onc":675,"COCCCN":676,"CCCCCCCCCCCCCCCCCCCCCCCCC":677,"OCCCOC":678,"CCCCCCCCCCCCCCCCCCCCCCCCCC":679,"SSC":680,"CCOCCO":681,"nnn":682,"CCCOCC":683,"+])=":684,"NSN":685,")*)(=":686,"nonc":687,"Cd":688,"NCCCCN":689,"SCCN":690,"occ":691,"46":692,"CSCCN":693,"CCCCCCCN":694,"64":695,"*[":696,"CCCCCOc":697,"Ge":698,"OCCCl":699,"CCCCCCCCCCCCc":700,"NCCCO":701,"cncnc":702,"(*)[":703,"OCCCCCCC":704,"OCCOCCOCCOCCOCCOCCOCCOCC":705,"33":706,"CNNC":707,"52":708,"@@]%":709,"CCCBr":710,"OCN":711,"OCCCCCCCCCC":712,"])(=":713,"+])(=":714,"SCCS":715,"CCCCCCCOC":716,"CCCCCCCCCCCCCCCCOC":717,"NCCCNC":718,"36":719,"CCOCCN":720,")*)*)*)*":721,"Nn":722,"CNCCCC":723,"BrCC":724,")*)=":725,"COCCCC":726,"CCCCCCCCCc":727,"CCON":728,"26":729,"CCNCc":730,"CCOCCOC":731,"cnnn":732,"75":733,"OCCS":734,"CCCCCCCc":735,"OCCOCCN":736,"CCCCSC":737,"CCCCCCCCCCCCN":738,"++]":739,"Ce":740,"OCCCCCCCCC":741,"scc":742,"(*)":743,"78":744,"OCCCCCCCCCCCCCCCC":745,")(*)[":746,"++].[":747,"86":748,"87":749,"ClH":750,"OCCNCCO":751,"Gd":752,"OCCCNC":753,"CCCCCc":754,"CCOCCC":755,"ncsc":756,"CCCCCCCCCCCCOC":757,"98":758,"Bi":759,"NCN":760,"]([*])":761,"]([*])[":762,"ncnn":763,"NCCOP":764,"Cs":765,"Sr":766,"CCNCCC":767,"=%":768,"La":769,"CNCCOC":770,"CCCCCCCCOc":771,"CCCCCCc":772,"CCCCCCCCCCCCCCCCCCOC":773,"57":774,"OCCCCCCCCCCCC":775,"SCCCC":776,"scnc":777,"CCCCCCCCCCOC":778,"89":779,"97":780,"CCCCCl":781,"CSCc":782,"CCCNc":783,"[*])(":784,"@+]":785,"SSc":786,")(*)":787,"44":788,"Hf":789,"OCCCCCCCCCCC":790,"COCCCNC":791,"CCCCCCNC":792,"CNCCCN":793,"OCCl":794,"OCCOCCOC":795,"OCCCCOC":796,"cscn":797,"-*)=":798,"51":799,"Tl":800,"NCCCOCC":801,"-]([":802,"--](":803,"ONC":804,"Pr":805,"]\\":806,"])\\":807,":*)":808,"CCCCCCCCO":809,"CCCCCCO":810,"NCCCn":811,"Ga":812,"ncoc":813,"NCCOCCOCC":814,"27":815,"314":816,"CCF":817,"CONC":818,"CNCCc":819,"CCCCCCCCCCCCCCCCCCN":820,"OCCCCCN":821,"NCCCCNC":822,"nsnc":823,"Nd":824,"CNCCNC":825,"CCCCCCCCCCN":826,"CCCNS":827,"CCCCCCOc":828,"NCCCS":829,"COCCn":830,"OCCCc":831,"(:*)":832,"COCN":833,"OCCBr":834,"cscc":835,"CCCCCCCCCCCCO":836,"CCOCCCNC":837,"-])(-*)=":838,"In":839,"CCI":840,"cccnn":841,"NCCCc":842,"91":843,"Nb":844,"OCCCCN":845,"COCOC":846,"CSCN":847,"nncn":848,"OCCOCCOCCO":849,"NCCNc":850,"NNCC":851,"OCCCCCCCCCCCCCC":852,"@@+]":853,"([*])[":854,"ncncc":855,"1234":856,"CSN":857,"OCCOCCOCCOCCO":858,"NCCCCCCNC":859,"IC":860,"COCCOc":861,"CCCCOCC":862,"CCCP":863,"NCCSCC":864,"CCOCCCN":865,"OCCCCCCCCCCCCC":866,"OCCCCCCCCCCCCCCCCCC":867,"PC":868,"SH":869,"CCCCCCCCc":870,"CCCCCCCCNC":871,"NCCOCC":872,"NCCOc":873,"CCOCCOCCOCCOCCOCC":874,"(#":875,"++].":876,"29":877,"OCCCCO":878,"NNS":879,"+](-":880,"cncs":881,"cnoc":882,"NOS":883,"ccncn":884,"OCCOCCOCCOC":885,"28":886,"SCCO":887,"nnnc":888,"*)*)(":889,"Eu":890,"NCCCCCCCC":891,"Rb":892,"]#[":893,"])/[":894,"CCCCP":895,"81":896,"Re":897,"nncc":898,"CCOCc":899,"CCNCS":900,"CCSS":901,"NCCCCCN":902,"Sm":903,"]/":904,")(\\":905,"NCCOCCO":906,"NCCCCCCN":907,"37":908,"CB":909,"NCCCCO":910,"Os":911,"]#":912,"COCCc":913,"CCCCNc":914,"CCCCCCCCCN":915,"-])\\":916,"NCCCl":917,"([*])[*]":918,"*)*)*":919,"])/":920,"ocnc":921,"ccnnc":922,"NCNC":923,"CCCCBr":924,"CCCCCS":925,"CCCCCCCCCOC":926,"+])(":927,"OCCF":928,"NCCn":929,"CCCCCCSC":930,"CCCCCCCCCCCCCCCCCCCCCCCCCCC":931,"ccno":932,"Er":933,"IH":934,"OI":935,"Po":936,"ccon":937,"COCCNCC":938,"CCCCNCC":939,"CSCCNC":940,"CCCCCCCCCCCCOS":941,"-*)[":942,"30":943,"61":944,"NCCNCC":945,"SP":946,"Yb":947,"oncc":948,"sn":949,"CCCOP":950,"COB":951,"COCCCCCCCCCCCCCCCC":952,"OCl":953,"CCCNCCC":954,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":955,"OCCCCCOC":956,"OOO":957,"SeH":958,"*)*)*)*)*":959,"68":960,"NCCCCCCC":961,"Ta":962,"sccn":963,"CCCCCBr":964,"CCOCCNC":965,"NCCNS":966,"NCCNCCNCCN":967,"CCCCCCCCCCCCNC":968,"CCCCCCCCCCCCCCOC":969,"OCCCCCCOC":970,"*)*)=":971,"63":972,"735":973,"Ho":974,"SNC":975,"Tb":976,"CCCCCSC":977,"-]/":978,"CSS":979,")([*])":980,"ncco":981,"CCCOS":982,"47":983,"Dy":984,"COCO":985,"CCCCOP":986,"CCCCCOP":987,"CCCF":988,"CCCSCC":989,"CCCCCCCCOP":990,"CCCCCCCCCCO":991,"CCCCCCCCCCOc":992,"+])([":993,"OCCSC":994,"CSCCCC":995,"CCCCCn":996,"CCCCCCn":997,"CCCCCCCCCCCCCCCCCCCCCCCCCCCC":998,"2345":999,"nnsc":1000,"COCCCOC":1001,"OCCCS":1002,"nocc":1003,"SbH":1004,"NOCc":1005,"SCCOC":1006,"+]\\":1007,"CCOCCOCCN":1008,"cnco":1009,"CCCCCCCCCCCCCCCCN":1010,"CCCCCCCCCCCCCCCCS":1011,"OCCOS":1012,"\\[*]":1013,"conc":1014,"79":1015,"753":1016,"Be":1017,"FS":1018,"[*":1019,"NCl":1020,"OCP":1021,"ClP":1022,"CCCCCCCCCCCCCCCCCCNC":1023,"@](/":1024,"NCCNCCN":1025,"OCCCCl":1026,"NCCCCCO":1027,"TlH":1028,"(*)(":1029,"++]([":1030,"NCO":1031,"On":1032,"SCN":1033,"SCCCCCC":1034,"SCCCS":1035,"Tm":1036,"CCCCNS":1037,"CCCCOCCOC":1038,"OCF":1039,"CCCSc":1040,"CSCCO":1041,"SCF":1042,"+](\\":1043,"CCONC":1044,"@@](/":1045,"nncs":1046,"NNCc":1047,"/[*])":1048,"SSS":1049,"cocn":1050,"(*)([":1051,"48":1052,"55":1053,"FB":1054,"He":1055,"NCCCCCNC":1056,"OCCCCCCCCCCCCCCC":1057,"OCCCCCCCCCCCCCCCCC":1058,"se":1059,")[*])":1060,"CNCCS":1061,"OCCCOCC":1062,"NCCSCc":1063,"CCOCN":1064,"CCCNCCN":1065,"CCOCCOCC":1066,"cnsc":1067,")*)*)":1068,"OCOCC":1069,"OCOCCOC":1070,"([*])([*])":1071,"CCSCCC":1072,"OCCOCCCC":1073,"OCCOCCOCCOCCOCCO":1074,"COCCOCCOC":1075,"74":1076,"BH":1077,"CCB":1078,")[*])[":1079,"ClCCN":1080,"CCCCCCCCS":1081,"CCNN":1082,"OCCn":1083,"CSP":1084,"234":1085,"nnco":1086,"CCSSC":1087,"CCCCCCCCCCCc":1088,"CCCCCCCCCCCCS":1089,"(*":1090,"-*":1091,"73":1092,"NCS":1093,"CCCCCCCCOCC":1094,"BrCCC":1095,"CCOCCOCCOCC":1096,"CCCCCCCCCCCCOc":1097,"CCCCCCCCCCCCOP":1098,"NCCCNCCCCN":1099,"AlH":1100,"*)*)(*)*)(":1101,"23451234":1102,"SO":1103,"SCCc":1104,"SOOO":1105,"snc":1106,"OCNC":1107,")(*)=":1108,"CCCCCCCCCCc":1109,"CCNCCO":1110,"OCCSCC":1111,"CCOCCOCCNC":1112,"COCCNc":1113,"COCCOCCNC":1114,"NCCOCCOCCNC":1115,"*)*)":1116,"Lu":1117,"NCCCCCCCCN":1118,"OCOP":1119,"OCCNCC":1120,"COCCCCC":1121,"CCCI":1122,"CCCCCCCCn":1123,"BrCCc":1124,"CSCCS":1125,"CNCP":1126,"CCCNCCCN":1127,"+](\\[":1128,"CCOCCOCCOCCO":1129,"CCCCCCl":1130,"CCCCCCCO":1131,"CCCCCCBr":1132,"CCCCCCCOc":1133,"COCCS":1134,"OCCNc":1135,"CCCCCCCCCCCOC":1136,"CCCCCCCCCCCCn":1137,"OCCCCCCO":1138,"OCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCC":1139,"(*)*":1140,"/*)":1141,"96":1142,"ONc":1143,"Xe":1144,"COO":1145,"CNCCNCc":1146,"CCCCCCCCSC":1147,"CCCCCCCCCCNC":1148,"OCCCOc":1149,"CSSCCC":1150,"])*":1151,"CCOCCOCCOC":1152,"CCCCCCS":1153,"CCCCCCCCCCCCCCCCOc":1154,"CCCCCCCCCCCCCCCCOP":1155,"CCCCCCCCCCCCCCCCCCOCC":1156,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCC":1157,"CCCCCCCNC":1158,"NCCCSC":1159,"COCCCOc":1160,"CCSSCC":1161,"CNCCCO":1162,"CCCOCCC":1163,"/[*]":1164,"CCCCCCCCCCOP":1165,"NCCNCCO":1166,"ccns":1167,"OCCCCc":1168,"456":1169,"OCCCCCCS":1170,"OCCCCCCOc":1171,"94":1172,"FCC":1173,"NI":1174,"NCOC":1175,"OOCC":1176,"SSCC":1177,"](*)[":1178,"COOC":1179,"CCCCCCOCC":1180,"CCCCCNc":1181,")(*)*":1182,"CNCCCNC":1183,"CCCCCCCCCCS":1184,"CCCCCCCCCCn":1185,"+]/":1186,"NCCOCCOCCO":1187,"NCCOCCOCCN":1188,"-])=[":1189,"CCCCCCCOP":1190,"CCSCc":1191,"OCCOCCC":1192,"CCCOCCOC":1193,"OCCOCCOCC":1194,"CCCCCCCCCCCCCOC":1195,"CCCCCCCCCCCCCCCO":1196,"OCCCCNC":1197,"CCCCCCCCCCCCCCCCCCOP":1198,"NCCCCNCCCN":1199,"COCCOCCO":1200,"COCCOCCOCC":1201,"NCCCCCCO":1202,"CCOCCOCCOCCOCCOCCOCCNC":1203,"--].[":1204,"/*":1205,"62":1206,"83":1207,"OOS":1208,"OCCCCCCCCCCCCCCCCCCCC":1209,"Pc":1210,"SCl":1211,"SCS":1212,"CNOC":1213,"CNCCCCN":1214,"ClCCc":1215,"CCCCCCCCCCCN":1216,"CCCCCCCCCCCO":1217,"ncon":1218,"CCNCCCC":1219,"+])/":1220,"OCCOP":1221,"BrBr":1222,"-])*)[":1223,"NCCF":1224,"CSCCCCC":1225,"CSSSC":1226,"CCCCCCCCCCCCCCCCCOC":1227,"CCCCCCCSC":1228,"CCCNCc":1229,"345":1230,"COCCCO":1231,"CCSCCN":1232,"CCCCCCCCCBr":1233,"OCCNS":1234,"CCCCCCCCCCCCSC":1235,"CCCCCCCCCCCCCOP":1236,"OCCCCCl":1237,"csnn":1238,"*)(*)*":1239,"BiH":1240,"[*][":1241,"--][":1242,"59":1243,"71":1244,"NOCCO":1245,"SCCCN":1246,"](-*)":1247,"NCn":1248,"CCCCOCCOCC":1249,"CCCCCCNc":1250,"OCn":1251,"CNCCCCC":1252,"ClCCC":1253,"ClCCCN":1254,"CCCCCCCCOS":1255,"NCCP":1256,"CSCCCCCC":1257,"])\\[":1258,"(-[":1259,"CCCCCCCCCCCCCCCCOCC":1260,"CCCCCCCBr":1261,"CCNCCP":1262,"CCCOCCOCC":1263,"357":1264,"cocc":1265,")#":1266,"*=[":1267,"-[":1268,"58":1269,"72":1270,"85":1271,"=*":1272,"OF":1273,"OON":1274,"ccsn":1275,"COCOc":1276,"CCCCSCC":1277,"CCCCCNS":1278,"CNO":1279,"CCCCCCCCP":1280,"CCCCCCCCCO":1281,"CCCCCCCCCS":1282,"CCCCCCCCCCOCC":1283,"CCNCCCN":1284,"CCNCCNC":1285,"OCCP":1286,"-])*":1287,"NCCCNCC":1288,"NCCCOCCOCC":1289,"CCOCCS":1290,"CCOCCOCCOCCN":1291,"CCOCCCn":1292,"CCCCNCCC":1293,"SCCSC":1294,"COCCCl":1295,"CCSP":1296,"CCSSCCC":1297,"CCCON":1298,"CCCCCCCCCCCCCCN":1299,"CCCCCCCCCCCCOCC":1300,"NCCCCSC":1301,"cnns":1302,"AsH":1303,"CCNCCNCCNCC":1304,"COCCCNc":1305,"*)*)*)*)*)":1306,"++][":1307,"39":1308,"312":1309,"69":1310,"82":1311,"BOB":1312,"FP":1313,"IN":1314,"ICC":1315,"NOc":1316,"OSC":1317,"SCCCCC":1318,"Tc":1319,"]/[":1320,"ssc":1321,"sncc":1322,"snnc":1323,"COCCCCCCCCCCCCCCCCCC":1324,"NCP":1325,"CCCCB":1326,"CCCCNCCCN":1327,"-]/[":1328,"CCNCCOC":1329,"OCCCNCC":1330,"OCCSS":1331,"BrCCCC":1332,"BrCCOc":1333,"CSCCCCCCCCC":1334,"CNCN":1335,"CCCNN":1336,"+](/[":1337,"118":1338,"CCCCCCOP":1339,"CCCCCCCCCCCCCCCCCCCCOC":1340,"CCOO":1341,"SCCCO":1342,"SCCSCC":1343,"CCCCOCCCNC":1344,"341":1345,"NOCCN":1346,"CNCCCOC":1347,"CCCCCCCCCCCBr":1348,"CCCCCCCCCCBr":1349,"CCCCCCCCCCCCCCc":1350,"OCCCCCCN":1351,"NCCCCc":1352,"NCCCCOC":1353,"[*]\\":1354,"NCCCNS":1355,"OCCOCn":1356,"CCCSP":1357,"COCCCn":1358,"OCCOCCOCCOCCOC":1359,"OCCOCCOCCOCCOCCOCCOCCO":1360,"COCCOCc":1361,"COCCOCCOCCOC":1362,"CCCCCCCCCCCCCCCCCCCCCCOC":1363,"SSSSC":1364,"*)*)*)":1365,"nscc":1366,"CNCCCCCCOCC":1367,"*)*)*)*":1368,"*)*)*)*)*)*)*)*)*":1369,"NCCNCCNCCNCCN":1370,"NCCCCCCCCNCCCCCCCCN":1371,"2123":1372,"38":1373,"93":1374,"NCCCCCCCCCC":1375,"NOCOCCOC":1376,"OCCCCCCCCCCCCCCCCCCC":1377,"SOC":1378,"SCSc":1379,"Th":1380,"\\*)(":1381,"ss":1382,"](*)(*)*":1383,"COCCCCCCCC":1384,"CCCCCNCC":1385,")(-*)=":1386,"ClB":1387,"CCCCCCCCBr":1388,"CCCCCCCCCCCNC":1389,"CCNCCc":1390,"cccccc":1391,"NCCCOc":1392,"NCCOCCN":1393,"NCCSc":1394,"NCCOCCOC":1395,"NCCCOCCOC":1396,"CSCCCCCCC":1397,"CSCCl":1398,"CCCNCCOC":1399,"CCOCCn":1400,"CCOCCCO":1401,"CCCCCCP":1402,"CCCCCCCCCCCCCCCCO":1403,"CCCCCCCCCCCCCCCCNC":1404,"CCOCO":1405,"CCOCOC":1406,"cnncc":1407,"SCCn":1408,"NCCCF":1409,"COCCSC":1410,"OCCON":1411,"CCCOCCN":1412,"OCCOCCOCCNC":1413,"CCCCCCCCCCCCCCCc":1414,"CCCCCCCCCCSC":1415,"CCCCCCCCCCCCCN":1416,"OCCCBr":1417,"OCCCSC":1418,"-](/":1419,"OCCCCS":1420,"OCCCCBr":1421,"OCCCCOc":1422,"NCCCNCCCCNC":1423,"NCCCNCCCCNCCCN":1424,"csnc":1425,"ncno":1426,"CNCCNCCNCCN":1427,"567":1428,"*])[*]":1429,"COCCOCCOCCOCC":1430,"COCCCNS":1431,"NCCCCNCCCNC":1432,"OCCCCCCCCCCCOC":1433,"*)*)*)*)*)=":1434,"/*)/":1435,"92":1436,"95":1437,"99":1438,"At":1439,"NB":1440,"NCCCCCCCCCCC":1441,"ONCC":1442,"SCNC":1443,"SOO":1444,"](=[":1445,"al":1446,"COCOCC":1447,"COCCCCCCCCCC":1448,"COCCCS":1449,"NCSC":1450,"CCCCI":1451,"CCCCCOCC":1452,"CCCCOCCCN":1453,"CNSC":1454,"CNCCNCC":1455,"CCCCCCCCNCCCN":1456,"COCn":1457,"COCNC":1458,"OCCI":1459,"@]/":1460,"BrCCN":1461,"NCCBr":1462,"NCCCNc":1463,"NCCCOCCO":1464,"CSCCCCCCCC":1465,"CCCCCI":1466,"CCOCP":1467,"CCOCn":1468,"SCn":1469,"+](=[":1470,"CCCNCCNC":1471,"101":1472,"+](/":1473,"CCOCCCl":1474,"CCOCCOCCOCCOCCOCCOCCOCCOCCOCC":1475,"CCOCCOCCOCCOCCO":1476,"CCCCCCCl":1477,"CCOB":1478,"CCCCCCCS":1479,"CCCCNCCCCN":1480,"SCCSCCS":1481,"CCCCON":1482,"nnccc":1483,"COCCBr":1484,")-[":1485,"CCCOCCO":1486,"CCCCCCCCCCCCCCO":1487,"CCCCCCCCCCCCCCOc":1488,"OCCCCCOc":1489,"NCCCCl":1490,"NCCCCCCCN":1491,"NCCCCNS":1492,"NCCSSCCNC":1493,"COCCOCCOCCO":1494,"COCCOCCOCCOc":1495,"COCCOCCOCCOCCOCCO":1496,"SSSC":1497,"SSSS":1498,"COCCOCCCC":1499,"COCCOCCC":1500,"COCCOCCN":1501,"OCCCCCCCCOC":1502,"*)*)(*)*":1503,"NCCCCCCCCCCNC":1504,"NCCCNCCCCNCCCNC":1505,"(*)*)*":1506,"(*)*)=":1507,"*-":1508,"66":1509,"656":1510,"77":1511,"711":1512,"84":1513,"88":1514,"@+](":1515,"BO":1516,"NCCCCCCCCCCCCCCCCCC":1517,"NNCCO":1518,"OBr":1519,"OOP":1520,"Rn":1521,"SI":1522,"SCSC":1523,"cco":1524,"COn":1525,"COCCCCN":1526,"NCNc":1527,"CCCCF":1528,"CCCCCOS":1529,"CCCCCCNS":1530,"OCI":1531,"OCS":1532,"CNP":1533,"CNCCCCCC":1534,"CNCCNS":1535,"CNCCOCCO":1536,"CCCCCCCCCCOS":1537,"CCNP":1538,"+][*]":1539,"OCCNCc":1540,"NCCCOCCC":1541,"CSNC":1542,"CSCSC":1543,"CSCCSC":1544,"CCCCCP":1545,"CCCNCCCCN":1546,"CCCNCCNCCC":1547,"=[*]":1548,"CCOCCOc":1549,"NSc":1550,"CCCCCCCCCCCCCCCCCc":1551,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":1552,"CCCCCCCCCCCCCCCCCCOc":1553,"CCOCCCC":1554,"CCOCCCCNC":1555,"(\\[*])":1556,"SCCl":1557,"])(\\":1558,"ncncn":1559,"CCSCCNC":1560,"CCCOCCCC":1561,"CCCCCCCCCCCS":1562,"CCCCCCCCCCCCCc":1563,"CCCCCCCCCCCCCCCN":1564,"COCCNS":1565,"454":1566,"OCCOCCOCCOCCOCCOCC":1567,"SCCCSC":1568,"229":1569,"CCCCCCNCCC":1570,"COCCNCc":1571,"COCCOS":1572,"CSCCCSC":1573,"OCCCCCCCCCCOC":1574,"OCCOCCOCCN":1575,")*)*)*)*)*)*":1576,"NCCOCCOCCOCCNC":1577,"CCOCCOCCOCCOCCN":1578,"CNCCCCCCOCCOCc":1579,"*:":1580,"*=":1581,"*#[":1582,"++](":1583,"40":1584,"49":1585,"467":1586,"50":1587,"60":1588,"70":1589,"80":1590,"Ac":1591,"Ar":1592,"FCc":1593,"NBr":1594,"NCCCCCCCCC":1595,"NOCCOC":1596,"OCCCCCCCCCCCCCCCCCCCCCC":1597,"OCCCCCCCCCCCCCCCCCCCCC":1598,"PN":1599,"Ra":1600,"SB":1601,"SCCNCC":1602,"SCCCCCCCCCCCC":1603,"SSCCC":1604,"\\*":1605,"op":1606,"sscc":1607,"@@+](":1608,"COOCC":1609,"COCCCCCC":1610,"COCCCCNC":1611,"COCCCCOC":1612,"COCCCCCCCCCCCCCCCCCCCC":1613,"NCNCCCC":1614,"CCCCOCCO":1615,"CCCCOCCNC":1616,"OCSC":1617,"]([*]":1618,"CNCOC":1619,"CNOCC":1620,"CNNN":1621,"CNCCOc":1622,"CNCCCc":1623,"CNCCCS":1624,"ClCCn":1625,"ClCCCc":1626,"ClOCl":1627,"-](=":1628,"CCCCCCCCCl":1629,"CCCCCCCCCNC":1630,"CCCCCCCCCOP":1631,"CCCCCCCCNS":1632,"CCCCCCCCCSC":1633,"CCCCCCCCCOCC":1634,"CCCCCCCCON":1635,"CCNCCCNCC":1636,"+]*)":1637,"OCCCOP":1638,"OCCSSC":1639,"BrP":1640,"BrCCOC":1641,"BrCCn":1642,"125":1643,"-])-":1644,"NCCOCc":1645,"NCCNCc":1646,"NCCON":1647,"NCCOCCNC":1648,"NCCOCCOCCOCCOCCOCCOCCOCCOCC":1649,"CSCCCN":1650,"CSCCCO":1651,"CCCCCF":1652,"NNCCN":1653,"])-":1654,")([*])[*]":1655,"102":1656,"131":1657,"CCOCCc":1658,"CCOCCCOC":1659,"CCOCCOCCO":1660,"CCOCCCNCC":1661,"CCOCCOCCOCCOC":1662,"CCCCCCI":1663,"CCCCCCCOCCCN":1664,"NSS":1665,"NSCC":1666,"CCCCCCCCCCCCCCCCCN":1667,"CCCCCCCCCCCCCCCCCCc":1668,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":1669,"CCCCCCCCCCCCCCCCSc":1670,"CCCCCCCCCCCCCCCCCCCCCCN":1671,"CCOOC":1672,"cnon":1673,"cnsn":1674,")\\*":1675,"CCCCNO":1676,"SCCCOC":1677,"NCCCBr":1678,"CCCCOS":1679,"CCCCOCCN":1680,"+]([*])":1681,"COCCl":1682,"COCCCOCC":1683,"CCSN":1684,"CCSCCCC":1685,"CCSCCO":1686,"CCSSc":1687,"OCCNCCC":1688,"OCCNCCOC":1689,"CNCCCl":1690,"CNCCSC":1691,"CNCCOP":1692,"CCCOCCCN":1693,"CCCOCCCNC":1694,"OCCOCCOc":1695,"CCCCCCCCCCP":1696,"CCCCCCCCCCCl":1697,"CCCCCCCCCCCOP":1698,"NCCNCCNC":1699,"CCCCCCCCCCCCCCOCC":1700,"CCCCCCCCCCCCCOS":1701,"CCCCCCCCCCCCCCCOC":1702,"OCCCn":1703,"CCCOCc":1704,"OCCCCOP":1705,"OCCCCCCNC":1706,"])=[":1707,"CCCCCCCCCCCCCCCCCCP":1708,"NCCCCCOC":1709,"NCCCCOCC":1710,"NCCCCCCNc":1711,"NCCCNCCCN":1712,"CSCCCl":1713,"CCCSCCC":1714,"NCCSSC":1715,"NCCSSCC":1716,"NCCSSCCN":1717,"CNCCNCCN":1718,")*)*)=":1719,"OCCCCCCCl":1720,"COCCOCCOCCN":1721,"COCCOCCOCCOCCN":1722,"COCCOCCOCCOCCOC":1723,"CCCCCCNCCCC":1724,"]=*":1725,"OCCOCCOS":1726,"GeH":1727,"OCCOCCOCCOCCOCCOCCOCCOCCOCCO":1728,"OCCOCCOCCOCCOCCOCCOCCOCCOCCOC":1729,")*)*)*)*)*":1730,"OCCSCCOC":1731,"579":1732,"([*])[*])":1733,"CCOCCOCCOCCOCCOCCOC":1734,"ClCCNP":1735,"CCOCCOCCOCCOCCNC":1736,"NCCOCCOCCOCCC":1737,"NCCOCCOCCOCCN":1738,"NCCCOCCOCCOCCCN":1739,"NCCCOCCOCCOCCCNC":1740,"OCCSSCCOC":1741,"OCCSSCCO":1742,"*)*)*)*)*)*)*)*)*)*)":1743,"#*":1744,"(*)*)(*)*)(":1745,"*)*)[":1746,".*:":1747,"113":1748,"4123":1749,"557":1750,"90":1751,"=*)*)*)*":1752,"BB":1753,"Cm":1754,"FN":1755,"FCl":1756,"FCCC":1757,"FCCOC":1758,"NF":1759,"NCCCCCCCCCCCCCCCC":1760,"NCCCCCCCCCCCC":1761,"NNCCc":1762,"NCCCCCCCCCCCCN":1763,"OCCCCCCCCCCCCOC":1764,"Pu":1765,"SF":1766,"SCCCCCCCCCC":1767,"SCCCc":1768,"\\*)":1769,"]*":1770,"]\\[":1771,"(=*)":1772,"COSC":1773,"COCCCCCCCCC":1774,"COCCCCCCCCCCCC":1775,"NCNCCCCC":1776,"CCCCSc":1777,"OCBr":1778,")(*)(":1779,")(*)*)(*)*)(":1780,"CNCCCCO":1781,"CNNCc":1782,"CNCCCCNC":1783,"CNCCCNCC":1784,"CNCCCCCCN":1785,"ClS":1786,"ClCl":1787,"ClCCOP":1788,"ClCCCl":1789,"ClSS":1790,"-]%":1791,"CCCCCCCCl":1792,"CCCCCCCCCCl":1793,"CCCCCCCCNCCCC":1794,"CCCCCCCCCCOCCO":1795,"CCCCCCCCCCOCCC":1796,"CCCCCCCCOCCOS":1797,"CCNNC":1798,"CCNCCNCC":1799,"+]/[":1800,"OCCSc":1801,"OCCCON":1802,"OCCCNCCCO":1803,"@]\\":1804,"BrCCCCN":1805,"BrCCCc":1806,"-])/[":1807,"NCCOCCC":1808,"NCCOCCOCCOCCOCC":1809,"NCCOCCOCCOCCOCCOCCO":1810,"CSI":1811,"CSO":1812,"CSCO":1813,"CSCOC":1814,"CSCS":1815,"CSCNC":1816,"CSCCCNC":1817,"CSCSCC":1818,"CSCCCS":1819,"CSCCNc":1820,"NNNC":1821,"])*)=":1822,"CNCNC":1823,"(-*)":1824,"(-*)(=":1825,"(/[*])[":1826,"10467":1827,"213":1828,"CCOCCl":1829,"CCOCCBr":1830,"CCOCCCNc":1831,"CCCCCCCCCCCCCCCCc":1832,"CCCCCCCCCCCCCCCCNS":1833,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":1834,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCOC":1835,"CCOCCCCCC":1836,"CCOCCNc":1837,"CCCCCCCn":1838,"CCCCNP":1839,"CCCCNCCCC":1840,"@](\\":1841,"NCCCSCC":1842,"CCCCOB":1843,"CCCCOCCC":1844,"*])=":1845,"343":1846,"OCOCCOCCO":1847,"nncnn":1848,"CCNCCCO":1849,"])(*)*":1850,"COCCCOCCOC":1851,"CCSNC":1852,"CCSCCOC":1853,"CCSSCCNC":1854,"CCSSSC":1855,"CCCCCCCCCI":1856,"OCCOO":1857,"CNCCCOc":1858,"CNCCSCC":1859,"CNCCCOCC":1860,"CCCOCCOCCO":1861,"CCCOCCOCCOCCOCCOCCOCC":1862,"CCCCCCCCCCCCCNC":1863,"OCCOCCOCCS":1864,"CCCCCCCCCCCCCCCBr":1865,"CCCCCCCCCCI":1866,"NCCNCCCNCCN":1867,"CCCCCCCCCCCCP":1868,"CCCCCCCCCCCCCO":1869,"CCCCCCCCCCCCNCC":1870,"CCCCCCCCCCCCCOCC":1871,"CCCCCCCCCCCCSCCC":1872,"CCCCCCCCCCCCOCCOCCOCCO":1873,"CCCCCCCCCCCCOCCOS":1874,"CCCCCCCCCCCCCCS":1875,"CCCCCCCCCCCCCCCOCC":1876,"OCCCF":1877,"OCCCCI":1878,"OCCCCCS":1879,"OCCCCOCCCO":1880,"CCCCCCCCCCCCCCCCCCBr":1881,"[*])=":1882,"NCCCNCCN":1883,"318":1884,"OOOC":1885,"ncns":1886,"CCCCOCN":1887,"CCCSSC":1888,"CCCSSCCCC":1889,"[*])-":1890,"OCCOCCOCCOCCOc":1891,"OCCOCCOCCOCCOCCOCCOCCOCCO":1892,"NCCSS":1893,"197":1894,"5678":1895,"4253":1896,"OCCCNCCO":1897,"OCCCCCCl":1898,"*)(*)=":1899,"CCCCSCCCC":1900,"COCCOCCOCCOCCOCCOCCOCCOCCOCCOCC":1901,"COCCOCCOCCOCCOCCOCCO":1902,"COCCOCCOCCOCCOCCOC":1903,"COCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCC":1904,"COCCOCCOCCOCCOCC":1905,"CCCCCCCCCCCCCCCCCCCCCO":1906,"(\\[*])[*]":1907,"SSN":1908,"SSCCCCCCO":1909,"537":1910,"CCCCCCCCCCCCCCCCCCCCCCCCCO":1911,"CCCCCCCCCCCCCCCCCCCCCCCCOC":1912,"CCCCCCCCCCCCCCCCCCCCCCCCCCCO":1913,"COCCOCCCNC":1914,"OCCCCCCCCCCOc":1915,"OOCl":1916,"201":1917,"CCCCCCCCNCCCCCCCC":1918,"NCCCCCCBr":1919,"CCNCCNCCN":1920,"468":1921,"OCCCCCCCc":1922,")*)*)*)*)*)*)*)*)*)*":1923,"CCCCCCCCCCCCNCCC":1924,"869":1925,"OCCCNCc":1926,"8915":1927,"@+]%":1928,"OCCOCCOCCOCCC":1929,"12345678":1930,"OCCOCCOCCOCCOS":1931,"NCCSCCCO":1932,"CCOCCOCCOCCOCCOCCNC":1933,"CCOCCOCCOCCOCCOCCOCCO":1934,"CCOCCOCCOCCOCCOCCOCCOCCOCCNC":1935,"234567":1936,"[*+":1937,"[*-]":1938,"CCOCCOCCOCCOCCC":1939,"(*)*)*)(":1940,"CCCCOCCOCCOP":1941,"CCCOCCOCCOCCOCCOCCOC":1942,"CCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOC":1943,"@]/%":1944,"CNCCCCCCOCCCCc":1945,"CNCCCCCCNCCCCCCCCNCCCC":1946,"ClSSCl":1947,"(/[*])[*]":1948,"CCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOC":1949,"123456789":1950,"(#[":1951,"*(":1952,"**":1953,"*/":1954,"++])=":1955,"--]([":1956,"/*)(=":1957,"212":1958,"245":1959,"321":1960,"642":1961,"6123":1962,"696":1963,"713":1964,"822":1965,"846":1966,"912":1967,"933":1968,"=*)":1969,"Bc":1970,"FCOC":1971,"IB":1972,"IP":1973,"ICCC":1974,"ICc":1975,"Kr":1976,"Ne":1977,"NNCCCN":1978,"NOCCCN":1979,"NCOCCCC":1980,"NCCCCCCCCO":1981,"NCCCCCCCCCCN":1982,"Op":1983,"ONO":1984,"ONCc":1985,"OOCCCC":1986,"OCCCCCNC":1987,"PO":1988,"Pa":1989,"PCC":1990,"PCCP":1991,"Rf":1992,"SNCC":1993,"SCCOCC":1994,"SCCCCCCC":1995,"SCCCCCCCCCCC":1996,"SCCCCl":1997,"SCCOCCO":1998,"SCCOCCN":1999,"SCCCCSC":2000,"]*)[":2001,"(=*)([":2002,"](*)(":2003,"](*)([":2004,"](-*)[":2005,"ccs":2006,"ccocc":2007,"@@+]([":2008,"COI":2009,"COCCCCCCC":2010,"CONCCC":2011,"CONCc":2012,"COCCCCCCCCCCCCCC":2013,"COCCCc":2014,"COCOCCC":2015,"COCCCCS":2016,"COCCCCCNC":2017,"NCNS":2018,"NCSCC":2019,"CCCCNN":2020,"CCCCOCc":2021,"CCCCCSc":2022,"CCCCCSCC":2023,"CCCCCON":2024,"CCCCCCCNCC":2025,"CCCCCOCCOC":2026,"CCCCOCCOCCOC":2027,"CCCCCCOCCCN":2028,"CCCCOCCOCCCC":2029,")(*)(*)*":2030,"CNn":2031,"CNCO":2032,"CNNc":2033,"CNOCc":2034,"CNCCCCOC":2035,"CNCCCCc":2036,"CNCCCCCO":2037,"CNCCCCl":2038,"CNCCOCCOC":2039,"CNCCCCCCO":2040,"ClI":2041,"Cln":2042,"ClCOC":2043,"ClCCNC":2044,"ClCCO":2045,"ClCn":2046,"ClCCl":2047,"ClCSc":2048,"ClCCCCc":2049,"ClCOCc":2050,"ClCCSc":2051,"-]*":2052,")(=[":2053,"CCCCCCCCCOc":2054,"CCCCCCCCOCCO":2055,"CCCCCCCCOCCOCC":2056,"CCCCCCCCSc":2057,"CCCCCCCCCNS":2058,"CCCCCCCCCCNS":2059,"CCCCCCCCCON":2060,"CCCCCCCCCCOCCCN":2061,"CCCCCCCCCCSS":2062,"CCCCCCCCCSCCCC":2063,"CCCCCCCCCOCCOS":2064,"CCCCCCCCCCOCCOCCOCCOCCOCCOCCO":2065,"nco":2066,"ncs":2067,"ncnnc":2068,"COCP":2069,"COCS":2070,"COCBr":2071,"CCNO":2072,"CCNNCC":2073,"CCNSC":2074,"CCNCCCCNCC":2075,"CCNCCCCNCCCCN":2076,"+]%":2077,"+])=[":2078,"OCCOCc":2079,"OCCOCO":2080,"OCCCOCCN":2081,"OCCCOS":2082,"OCCCNS":2083,"OCCSCCC":2084,"OCCCOCCOC":2085,"OCCCOCc":2086,"OCCSCCS":2087,"@]=":2088,"BrB":2089,"BrCCCN":2090,"BrCCCn":2091,"121":2092,"127":2093,"@@]/":2094,"@@]\\":2095,"-])*)*":2096,"NCCCOP":2097,"NCCCOCCN":2098,"NCCOCCS":2099,"NCCOCCOCCOC":2100,"NCCOCCBr":2101,"NCCCOCCOCCO":2102,"NCCOCCOCCOCCOCCOCCOCCOCCO":2103,"CSCCOC":2104,"CSCCCCN":2105,"CSSc":2106,"CSCCCCCN":2107,"CSSCCN":2108,"CSSCCO":2109,"CSOOO":2110,"CSSSSSC":2111,"CSSSSC":2112,"NNCCCC":2113,"NNNN":2114,")/*":2115,")([*])[":2116,"OPO":2117,"OPOC":2118,"CCCNO":2119,"CCCNNC":2120,"CCCNCCCC":2121,"CCCNCCCNC":2122,"CCCNCCS":2123,"100":2124,"103":2125,"1045":2126,"10345":2127,"10642":2128,"+](*)":2129,"133":2130,"CCOCCI":2131,"CCOCCOS":2132,"CCOCCOCCC":2133,"CCOCCOCCCC":2134,"CCOCCCNCCN":2135,"CCOCCOCCOCCOCCOCCO":2136,"NaH":2137,"111":2138,"CCCCCCF":2139,"CCCCCCOS":2140,"CCCCCCOCCC":2141,"CCCCCCCOCCC":2142,"CCCCCCSCCC":2143,"CCCCCCCOCCO":2144,"NSNS":2145,"CCCCCCCCCCCCCCCCP":2146,"CCCCCCCCCCCCCCCCCO":2147,"CCCCCCCCCCCCCCCCNc":2148,"CCCCCCCCCCCCCCCCCCO":2149,"CCCCCCCCCCCCCCCCCCCCN":2150,"CCCCCCCCCCCCCCCCSCC":2151,"CCCCCCCCCCCCCCCCCOCC":2152,"CCCCCCCCCCCCCCCCCCS":2153,"CCCCCCCCCCCCCCCCOCCO":2154,"CCCCCCCCCCCCCCCCCCSC":2155,"CCCCCCCCCCCCCCCCCCCc":2156,"CCCCCCCCCCCCCCCCCCCCc":2157,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":2158,"CCOOCC":2159,"CCOCCCCO":2160,"CCOCCNCC":2161,"CCOCOCC":2162,"CCOCCCS":2163,"CCOCCNS":2164,"@@](*)(":2165,"(\\[*])=":2166,"CCCCCCCP":2167,"CCCCNCCCCNCC":2168,"238":2169,"SCCCl":2170,"SCCBr":2171,"SCCOP":2172,"NCCCNCc":2173,"CCCCOCCCC":2174,"CCCCOOC":2175,"CCCCOCOCCCC":2176,"346":2177,"OCON":2178,"OCOS":2179,"nncnc":2180,"NNCNNC":2181,"CCNCCCOC":2182,"CCNCCSC":2183,"CCNCCSS":2184,"])(*)[":2185,"COCCF":2186,"([*])(":2187,"([*])(=":2188,"CCSCCCCCCCCCCCC":2189,"CCSSCCN":2190,"CCCCCCCCCF":2191,"151":2192,"OCCOCCCCCC":2193,"OSOO":2194,"OCCNCCCN":2195,"OCCNCCCS":2196,"CCCOO":2197,"CCCOCCCCC":2198,"CCCOCCNC":2199,"\\[*])":2200,"CCCCCCCCCCCI":2201,"CCCCCCCCCCCP":2202,"CCCCCCCCCCCSCC":2203,"OCCOCCF":2204,"OCCOCCS":2205,"OCCOCCCOC":2206,"OCCOCCSC":2207,"OCCOCCOCCOc":2208,"OCCOCCOCCF":2209,"CCCCCCCCCCF":2210,"NCCNCCCC":2211,"NCCNCCC":2212,"CCCCCCCCCCCCl":2213,"CCCCCCCCCCCCCl":2214,"CCCCCCCCCCCCBr":2215,"CCCCCCCCCCCCNc":2216,"CCCCCCCCCCCCSCC":2217,"CCCCCCCCCCCCOCCO":2218,"CCCCCCCCCCCCCNc":2219,"CCCCCCCCCCCCOCCOC":2220,"CCCCCCCCCCCCCOCCO":2221,")*)#":2222,"CCCCCCCCCCCCCCBr":2223,"CCCCCCCCCCCCCCOP":2224,"CCCCCCCCCCCCCCOS":2225,"OCCCI":2226,"OCCCP":2227,"COCCNN":2228,"COCCNCCNC":2229,"COCCNCCNS":2230,"OCCCCCO":2231,"OCCCCOCC":2232,"OCCCCCc":2233,"OCCCCCCOCC":2234,"OCCCCNCc":2235,"NCCCCI":2236,"NCCCCn":2237,"NCCCCBr":2238,"NCCCCCS":2239,"NCCCCNc":2240,"NCCCCCCOC":2241,"NCCCCOCCOC":2242,"NCCCCCCNS":2243,"4567":2244,"NCCCNCCS":2245,"NCCCNCCCNCCCN":2246,"NCCCNCCSP":2247,"+][*])[":2248,"-])(*)=":2249,"](=*)*":2250,"OOCCCCO":2251,"OOOO":2252,"CSCCSCC":2253,"CSCCSCCS":2254,"CCCSN":2255,"CCCSSc":2256,"CCCSSCCC":2257,"CNCCCP":2258,"CNCCCn":2259,"CNCCCBr":2260,"CNCCCSC":2261,"OCCOCCOCCOCCNC":2262,"OCCOCCOCCOCCOCCOC":2263,"OCCOCCOCCOCCOCCOCCO":2264,"OCCOCCOCCOCCOCCOCCN":2265,"OCCOCCOCCOCCOCCOCCOCCOC":2266,"NCCSP":2267,"NCCSCCN":2268,"CNCCNCCCNCCNC":2269,"OCOCc":2270,"564":2271,")*)*)*":2272,")*)*)*)":2273,"SCCCSCC":2274,"PbH":2275,"OCCCCCCSSCCCCCCO":2276,"CCSCCS":2277,"CCSCCCO":2278,"CCSCCCl":2279,"CCSCCSCC":2280,"CCSCCSP":2281,"*)(*)*)(*)*)(":2282,"228":2283,"CCCCSP":2284,"CCCCSCCO":2285,"COCCOCCl":2286,"COCCOCCCOC":2287,"COCCOCCOc":2288,"COCCOCCOS":2289,"COCCOCCOCCC":2290,"COCCOCCOCCCN":2291,"COCCOCCOCCOCCO":2292,"COCCOCCOCCS":2293,"CCCCCCNO":2294,"CCCCCCNCCCCC":2295,"SSCCCC":2296,"SSCCCCCCCC":2297,"CNCCOCCC":2298,"CCCCCCCCCCCCCCCCCCCCCCCO":2299,"CCPC":2300,"CCPCC":2301,"CCCCCCCCCCCCCCCCCCCCCCCCCOC":2302,"COCCOO":2303,"COCCOCCNCC":2304,"CSCCCSCC":2305,"OCCCCCCCCNC":2306,"OCCCCCCCCCOC":2307,"CCOSOOO":2308,"202":2309,"203":2310,"252":2311,"CCCCCCOCc":2312,"OCCCCCBr":2313,"CCNCCNCCNCCN":2314,"NOCCS":2315,"NCCCCNP":2316,"CCCCCCCNS":2317,"NCCCOCCOc":2318,"OCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":2319,"OCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":2320,"CNCCCCCCNC":2321,"CNCCCCCCNCC":2322,"COCCCCCl":2323,"COCCCCOCC":2324,"785":2325,"OCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":2326,"573":2327,"OCCCCCCCCCCCS":2328,"OCCCCCCCCCCCSS":2329,"NCCOCCOCCOCCO":2330,"NCCOCCOCCOCCOCCC":2331,"CCCCCCCCCCNCCCCCCCCC":2332,"12345":2333,"CCCCOCCOP":2334,"*)*)(*)*)=":2335,"NCCCCCCCCCCO":2336,"CCSSCCCCCCCCCCCCCCC":2337,"NCCOCCON":2338,"CCCCCCCCCNCCCCCCCCC":2339,"*)*)*)*)":2340,"CCCCCSCCO":2341,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCO":2342,"*)*)*)*)*)*)=":2343,"474":2344,"CCCSCCSCCC":2345,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCOC":2346,"SSSSCC":2347,"OCCCOCCCO":2348,"CCOCCOCCOP":2349,"CCCCCCCCCCCCSCCCCCCCCCCCC":2350,"(*)*)":2351,"(*)*)[":2352,"CCCCCCCCOCCOP":2353,"CCOCCOCCOCCNC":2354,"CCOCCOCCOCCOCC":2355,"CCCCCCSSCCS":2356,"CCCCCCCCCCCCCCCCCCOCCCOC":2357,"CCSSCCn":2358,"NCOCNC":2359,"CCCCCCOCCOP":2360,"NCCCCNCCCNCCCN":2361,"/*)\\[":2362,"CCCCCCCCCCCOCCO":2363,"CCCCOCCOCCNC":2364,"NOCCNB":2365,"CNCCNCCNCCNCCNCCN":2366,"COCCOCCOCCOCCNC":2367,"COCCOCCOCCOCCCN":2368,"/*)/*":2369,"CCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCNC":2370,"CCOCCOCCOCCOCCOCCC":2371,"SBS":2372,"NCCCNCCCNCCCCCNC":2373,"(*)*)(*)*)(*)*)(*)*)(":2374,"NCCOCCOCCOCCOCCOCCOCCC":2375,"CCCCCCCCCCCCOCCOCCOCCOS":2376,"CNCCCCCCNCCCCCCCCNCCCCCCNCc":2377,"SCCCCCCCCCCCn":2378,"ClCCOCOCCCl":2379,"CCNCCCCNCCCCNCCCCNCCCCNCC":2380,"CCNCCSSCCNCCC":2381,"(*)*)(":2382,"(*)*)(*)*":2383,"*#":2384,"*\\":2385,"*([":2386,"*)/":2387,"*)\\":2388,"*)\\*":2389,"*(*)*)":2390,"++](=":2391,"--]=[":2392,"-]([*])[":2393,".*":2394,"/*)=":2395,"/*)[":2396,"1123":2397,"112345":2398,"210":2399,"224":2400,"256":2401,"265":2402,"356":2403,"375":2404,"3345":2405,"412":2406,"532":2407,"613":2408,"686":2409,"718":2410,"824":2411,"=*)=":2412,"@+]([":2413,"Am":2414,"Bh":2415,"Bk":2416,"Cf":2417,"Db":2418,"Ds":2419,"Es":2420,"FF":2421,"FI":2422,"Fm":2423,"FCCN":2424,"FCCOc":2425,"FCCl":2426,"FCBr":2427,"FCCBr":2428,"FCCCBr":2429,"FCCI":2430,"FCCCCBr":2431,"FCCCCCBr":2432,"FCCCCCCBr":2433,"FCCCCF":2434,"FCCCCCCCCCCF":2435,"Hs":2436,"II":2437,"IO":2438,"ICCCC":2439,"IOC":2440,"ICCCCCCCC":2441,"ICCN":2442,"ICI":2443,"ICCI":2444,"ICCOCc":2445,"ICCOCCOCC":2446,"ICCCI":2447,"ICCCCCI":2448,"ICCCCCCI":2449,"ICCCCCCCCCI":2450,"ICCCCCCCCCCI":2451,"ICCOCCI":2452,"ICCCCCCCCCCCI":2453,"KH":2454,"Lr":2455,"Md":2456,"Mt":2457,"No":2458,"NCOCC":2459,"NCCCCCCCCCCCCCCC":2460,"NCCCCCCCCCCCCCC":2461,"NNCCS":2462,"NCOCCOC":2463,"NOCCCO":2464,"NOCCc":2465,"NCCCCCCCCOC":2466,"NNCCCO":2467,"NCCCCCCCCCCCCCCCCCCN":2468,"NCOCN":2469,"NCCCCCCCCNC":2470,"NCCCCCCCCCCCCNC":2471,"NCCCCCCCCCCCCCCN":2472,"ONN":2473,"OOc":2474,"ONS":2475,"ONNC":2476,"ONCCCN":2477,"ONCCc":2478,"OCCCCCCCCCCCCCCCCCCCCCCCC":2479,"OCCCCCCCCCCCCCCCCCCCCCCCCC":2480,"OCCCCCCCCCCCCCCCCCCCCCCCCCC":2481,"OOCCCOCC":2482,"PI":2483,"PP":2484,"PS":2485,"Pm":2486,"PCl":2487,"POCCC":2488,"PNP":2489,"POOP":2490,"Rg":2491,"SCO":2492,"SCOC":2493,"SBr":2494,"SnH":2495,"SNN":2496,"SOP":2497,"SNCCC":2498,"SCCCNC":2499,"SCCCCO":2500,"SCCCCCCCCC":2501,"SOS":2502,"SSCCNC":2503,"SNCCCC":2504,"SCCCCOC":2505,"SCSCC":2506,"SSCc":2507,"SCCCCOc":2508,"SCCCCCN":2509,"SCCNS":2510,"SCCCCS":2511,"SOOC":2512,"SCCCCCNC":2513,"SCCON":2514,"SCCCCCCCCCCCCN":2515,"SCCCCCCO":2516,"SCCCCCCCCCCN":2517,"SNCCOCC":2518,"SCCOCCOCCOCCOCCOCC":2519,"SCCCCCCCCCCCCCCCCS":2520,"SCSP":2521,"SCCCCCCCCCCCc":2522,"SCCOCCOCCOCC":2523,"SCCCCCCCCCBr":2524,"SCCCCCCCCCS":2525,"SCCOCCS":2526,"SCCCCCCCCCCCBr":2527,"SNSNS":2528,"\\*)=":2529,"onnc":2530,"pH":2531,"scnn":2532,"te":2533,"(=*)=":2534,"(=*)*)=":2535,"](-":2536,"](*)(=":2537,"](-[":2538,"](*)*)(*)*":2539,"ccccccc":2540,"ccss":2541,"COF":2542,"COOP":2543,"CONS":2544,"COCCCCO":2545,"CONNC":2546,"COOS":2547,"COOCCN":2548,"COCCCCCCCCCCC":2549,"COCCCCCCCCCCCCCCC":2550,"COCNc":2551,"COCCCCCN":2552,"COCCCNCC":2553,"COCCCCc":2554,"COCOCCOC":2555,"COCCCCCCOC":2556,"COCCCCCO":2557,"COCCCCCOC":2558,"COCCCCCCO":2559,"COCOCOC":2560,"CONCCCCCCNC":2561,"COCCCCCCS":2562,"COCCCCOCCC":2563,"NCNN":2564,"NCSc":2565,"NCSCCC":2566,"NCNCN":2567,"NCSCN":2568,"CCCCNCc":2569,"CCCCNCCO":2570,"CCCCCOCCC":2571,"CCCCNCCOC":2572,"CCCCCNCCC":2573,"CCCCNCCS":2574,"CCCCCSSC":2575,"CCCCOCCOCCO":2576,"CCCCOCCCOC":2577,"CCCCCCOCCO":2578,"CCCCOCCCl":2579,"CCCCOCCS":2580,"CCCCOCCOCCOCCO":2581,"CCCCOCCOCCOCCOCCO":2582,"CCCCOCCOS":2583,"CCCCCCOCCOCCOCCO":2584,"CCCCCOCCOCCO":2585,"CCCCOCCOCCOS":2586,"CCCCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCC":2587,"cccoc":2588,"OCSOC":2589,")(\\[":2590,")(*)*)(":2591,")(*)*)=":2592,")(*)*)":2593,")(*)*)(*)*":2594,")(*)*)[":2595,"CNI":2596,"CNNCC":2597,"CNOP":2598,"CNCCOCC":2599,"CNNNC":2600,"CNCOCC":2601,"CNCCCCCCCCCCCC":2602,"CNOCCOC":2603,"CNCCCCCNC":2604,"CNCCCCCCOC":2605,"CNCCCCCCCCNC":2606,"CNCCOCCOCC":2607,"CNCCCCCCNCCC":2608,"CNCOOCC":2609,")=*":2610,")=*)=":2611,")=*)":2612,"ClN":2613,"ClO":2614,"Clp":2615,"ClCCCC":2616,"ClBr":2617,"ClCCCCC":2618,"ClCOc":2619,"ClCCOC":2620,"ClCCOCC":2621,"ClCCCCN":2622,"ClCCNCC":2623,"ClCCOc":2624,"ClCOCC":2625,"ClCBr":2626,"ClCCSC":2627,"ClCCCOc":2628,"ClCCNc":2629,"ClCCBr":2630,"ClCCCn":2631,"ClCCCCl":2632,"ClCCCCCl":2633,"ClCSCc":2634,"ClCCOCc":2635,"ClNCNC":2636,"ClCCCCBr":2637,"ClCCCCCBr":2638,"ClCCCSc":2639,"ClCCOCCOCCOCC":2640,"ClCCCCCCl":2641,"ClCCCCCCBr":2642,"ClCCSSCC":2643,"ClCCSCc":2644,"ClSCl":2645,"ClCCCCCCCBr":2646,"ClCSCCl":2647,"ClCCCCI":2648,"ClCCOCCCl":2649,"ClCCCCCCCl":2650,"ClCCCCCCCCCl":2651,"ClCCCCCCI":2652,"ClCCCCCCCCl":2653,"ClCCCCCCCCCCl":2654,"ClCCOCCl":2655,"ClCCCCCCCCCI":2656,"ClOOCl":2657,"ClCCSCCCl":2658,"ClCOCCOCCl":2659,"-]\\[":2660,"-][*]":2661,"CCCCCCCCB":2662,"CCCCCCCCF":2663,"CCCCCCCCNCC":2664,"CCCCCCCCNc":2665,"CCCCCCCCSCC":2666,"CCCCCCCCNCCC":2667,"CCCCCCCCCCNCC":2668,"CCCCCCCCCn":2669,"CCCCCCCCNOC":2670,"CCCCCCCCOB":2671,"CCCCCCCCOCCC":2672,"CCCCCCCCOCCCC":2673,"CCCCCCCCCOS":2674,"CCCCCCCCOO":2675,"CCCCCCCCCSCC":2676,"CCCCCCCCNCCO":2677,"CCCCCCCCCCNc":2678,"CCCCCCCCNCCOC":2679,"CCCCCCCCCOCCOC":2680,"CCCCCCCCCOCCO":2681,"CCCCCCCCOCCCCCCCC":2682,"CCCCCCCCOCCCCC":2683,"CCCCCCCCCCSc":2684,"CCCCCCCCCCNCCN":2685,"CCCCCCCCNCCCNC":2686,"CCCCCCCCCCOCCOC":2687,"CCCCCCCCOCCCCCCCCC":2688,"CCCCCCCCCCNCCC":2689,"CCCCCCCCCCCNc":2690,"CCCCCCCCSSc":2691,"CCCCCCCCOCCl":2692,"CCCCCCCCOCCOCCOCCOCCO":2693,"CCCCCCCCSCCO":2694,"CCCCCCCCCCCOS":2695,"CCCCCCCCCCOCCOCC":2696,"CCCCCCCCCCSCCC":2697,"CCCCCCCCOCCOCCOCCOCCOCCO":2698,"CCCCCCCCCCOCCOCCOCCO":2699,"CCCCCCCCSSSS":2700,"CCCCCCCCSSCCCCCCCC":2701,"ncnnn":2702,"ncsn":2703,"COCF":2704,"COCI":2705,"COCSC":2706,"CCNOC":2707,"CCNOCC":2708,"CCNCCCCC":2709,"CCNOP":2710,"CCNCCOCC":2711,"CCNCCCCCC":2712,"CCNCCCCCCC":2713,"CCNCCCCN":2714,"CCNCCCNC":2715,"CCNCCCCO":2716,"CCNOCc":2717,"CCNOS":2718,"CCNCCCCOC":2719,"CCNCCCS":2720,"CCNCCNS":2721,"CCNCCCCS":2722,"CCNCCCCCCN":2723,"CCNCCCCCOC":2724,"CCNCCOCCO":2725,"CCNCCOCCN":2726,"CCNCCON":2727,"CCNCCCCCCNC":2728,"CCNCCCCCCCCCCN":2729,"CCNCCCCCCl":2730,"+]*":2731,"+])-":2732,"+])*)":2733,"+]*)*":2734,"+])*)=":2735,"OCCSCc":2736,"OCCCOCCOCC":2737,"OCCPC":2738,"BrI":2739,"BrN":2740,"BrO":2741,"Brn":2742,"BrCCOCC":2743,"BrCCCCCCC":2744,"BrCCCCO":2745,"BrCn":2746,"BrCCCO":2747,"BrCCCOC":2748,"BrSc":2749,"BrCCCOc":2750,"BrCCCCOc":2751,"BrCCNc":2752,"BrCCCCc":2753,"BrCCBr":2754,"BrCOCc":2755,"BrCI":2756,"BrCCCCn":2757,"BrCCCBr":2758,"BrCCCCCc":2759,"BrCCCCCCCCOc":2760,"BrCCCCCCc":2761,"BrCCCCCCOc":2762,"BrCCCCCCCCc":2763,"BrCCOCc":2764,"BrCCCCBr":2765,"BrCCCCCBr":2766,"BrCCCCCCn":2767,"BrCCCCCCCCCCc":2768,"BrCCCCCCBr":2769,"BrCCCCCCCCCBr":2770,"BrCCCCCCCCCCBr":2771,"BrCCCCCCCCBr":2772,"BrCCCOCc":2773,"BrCCOCCBr":2774,"BrCCCCCCCCCCCCCCCCCCBr":2775,"BrCCCCCCCCCCCCBr":2776,"BrCCCCCCCCCCCCCCBr":2777,"BrCCCCCCOCc":2778,"120":2779,"129":2780,"@@]=":2781,"cccco":2782,"-])\\[":2783,"-])*)*)":2784,"NCCI":2785,"NCCOCCCC":2786,"NCCCON":2787,"NCCOCCCN":2788,"NCCOCCOc":2789,"NCCCOCCCN":2790,"NCCOCCCOC":2791,"NCCOCCCl":2792,"NCCOCCNCCO":2793,"NCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCC":2794,"NCCOCCOCCOCCOCCOCCOCC":2795,"CSOCC":2796,"CSNN":2797,"CSCCOCC":2798,"CSCCc":2799,"CSCCCCO":2800,"CSCSc":2801,"CSCNCC":2802,"CSCCCCCCCCCCCC":2803,"CSCCCOC":2804,"CSCCCCNC":2805,"CSOO":2806,"CSCCCCOC":2807,"CSSCc":2808,"CSCCCCOc":2809,"CSCON":2810,"CSCSSC":2811,"CSSS":2812,"CSCCCCCNC":2813,"CSCCOCCO":2814,"CSSCCCC":2815,"CSOCCOCCOCCO":2816,"CSCCCCCS":2817,"CSCCCCCCSC":2818,"CSCCCOS":2819,"CSCCCCCCn":2820,"CSCOCCCCC":2821,"CSCCCCCCCCCCCCCOC":2822,"CSCCCCCCCCCCCO":2823,"CSSSSS":2824,"CSSSSSCC":2825,"CCCCCB":2826,"CCCCCPC":2827,"NNI":2828,"NNCCC":2829,"NNBr":2830,"NNNCC":2831,"NNCCCc":2832,"])*)[":2833,"])*)(":2834,"])*[":2835,"])*)*)*)*)*":2836,"CCOCS":2837,"CCOCBr":2838,"CNCS":2839,"CNCl":2840,"CNCn":2841,"CNCSC":2842,"SCI":2843,"SCBr":2844,")([*])=":2845,"OPC":2846,"OPP":2847,"OPOCC":2848,"CCCNI":2849,"CCCNP":2850,"CCCNSC":2851,"CCCNCCOCC":2852,"CCCNCCCCCC":2853,"CCCNNS":2854,"CCCNCCCCCCC":2855,"CCCNSCC":2856,"CCCNNCCC":2857,"CCCNOCc":2858,"CCCNCCNCC":2859,"CCCNCCCO":2860,"CCCNCCCOC":2861,"CCCNCCSC":2862,"CCCNCCCCNC":2863,"CCCNCCCS":2864,"CCCNCCCCS":2865,"CCCNCCCCCCOC":2866,"CCCNCCNCCN":2867,"CCCNCCOCCC":2868,"CCCNCCCNCCC":2869,"CCCNCCCOS":2870,"CCCNCCCCCCNCCC":2871,"(/*)=":2872,"(/*)\\":2873,"COPC":2874,"COPN":2875,"COPNP":2876,"CCNCN":2877,"CCNCSC":2878,"109":2879,"10121":2880,"211":2881,"214":2882,"215":2883,"216":2884,"217":2885,"218":2886,"219":2887,"+](*)([":2888,"+](\\[*]":2889,"+](*)(*)*":2890,"132":2891,"134":2892,"139":2893,"1336":2894,"13468":2895,"CCOCCF":2896,"CCOCCCOCC":2897,"CCOCCNCc":2898,"CCOCCON":2899,"CCOCCCNS":2900,"CCOCCOCCNCCO":2901,"CCOCCCNCCOC":2902,"CCOCCOCCBr":2903,"CCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCC":2904,"CCOCCSCCSCC":2905,"CCOCCPCCP":2906,"115":2907,"116":2908,"117":2909,"119":2910,"CCCCCCB":2911,"CCCCCCCOCC":2912,"CCCCCCON":2913,"CCCCCCOB":2914,"CCCCCCOCCCC":2915,"CCCCCCSc":2916,"CCCCCCCOS":2917,"CCCCCCSCc":2918,"CCCCCCOCCCCCC":2919,"CCCCCCCOCCOCC":2920,"CCCCCCSS":2921,"CCCCCCOCCCCC":2922,"CCCCCCSCCO":2923,"CCCCCCSCCCCCC":2924,"CCCCCCCOCCOCCCC":2925,"CCCCCCSSCCCCCCO":2926,"CCCCCCCOCCOCCOCCOCCCN":2927,"NSO":2928,"NSNC":2929,"NSCCC":2930,"NSCCN":2931,"NSNSN":2932,"CCCCCCCCCCCCCCCCn":2933,"CCCCCCCCCCCCCCCCBr":2934,"CCCCCCCCCCCCCCCCCS":2935,"CCCCCCCCCCCCCCCCCOc":2936,"CCCCCCCCCCCCCCCCCCCN":2937,"CCCCCCCCCCCCCCCCCOP":2938,"CCCCCCCCCCCCCCCCCCCCO":2939,"CCCCCCCCCCCCCCCCCCn":2940,"CCCCCCCCCCCCCCCCOCCOCC":2941,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":2942,"CCCCCCCCCCCCCCCCOB":2943,"CCCCCCCCCCCCCCCCCCCOC":2944,"CCCCCCCCCCCCCCCCCCCCCN":2945,"CCCCCCCCCCCCCCCCSCCC":2946,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":2947,"CCCCCCCCCCCCCCCCCCCCS":2948,"CCCCCCCCCCCCCCCCSS":2949,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":2950,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":2951,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":2952,"CCCCCCCCCCCCCCCCCCOS":2953,"CCCCCCCCCCCCCCCCCCSc":2954,"CCCCCCCCCCCCCCCCCCOCCO":2955,"CCCCCCCCCCCCCCCCOCCCCCCCCCCCCCCCC":2956,"CCCCCCCCCCCCCCCCCCOCCC":2957,"CCCCCCCCCCCCCCCCCCNCCC":2958,"CCCCCCCCCCCCCCCCCCOCCCN":2959,"CCCCCCCCCCCCCCCCCCOCCOCCOCCOCCOCC":2960,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":2961,"CCCCCCCCCCCCCCCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCC":2962,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":2963,"CCCCCCCCCCCCCCCCOCCOP":2964,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCO":2965,"CCCCCCCCCCCCCCCCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCC":2966,"CCCCCCCCCCCCCCCCCCNCCNCCN":2967,"CCOF":2968,"CCOn":2969,"CCOCCCCCCCC":2970,"CCONCC":2971,"CCOCCCCC":2972,"CCOCOc":2973,"CCOSC":2974,"CCOOP":2975,"CCONS":2976,"CCOCCCCCCCCCC":2977,"CCOCOCCN":2978,"CCOOO":2979,"CCOCCCCCN":2980,"CCOCCCCCCCCN":2981,"CCOCCCCl":2982,"CCOCCCBr":2983,"CCOCCCCCCCCCCCCN":2984,"CCOCCCCCCCCO":2985,"CCOCCCCCCCCCCCCCCCCCCN":2986,"CCOCCCCNCC":2987,"CCOOCCCCCCO":2988,"CCOOOCC":2989,"CCOCCCSP":2990,"CCOCCCCCCCCCCCCCN":2991,"@@](\\":2992,"cnnnc":2993,"cnncn":2994,"cnnnn":2995,"CSCn":2996,"CCCCCCCF":2997,"CCCCCCCSc":2998,"CCCCCCCSCCO":2999,"-].[*]":3000,"CCCCNNC":3001,"CCCCNOC":3002,"CCCCNCCN":3003,"CCCCNOCC":3004,"CCCCNSC":3005,"CCCCNCCCNC":3006,"CCCCNOCc":3007,"CCCCNCCCCOC":3008,"CCCCNCCCCCCNCCCC":3009,"231":3010,")*)*)[":3011,")*)\\[":3012,")*.*":3013,"SCCF":3014,"SCCP":3015,"SCCCOCC":3016,"SCCSc":3017,"SCCSCCC":3018,"SCCSS":3019,"@](*)(":3020,"NCCCB":3021,"NCCCP":3022,"CCCCOCOC":3023,"CCCCOCCCCC":3024,"CCCCOCCCCN":3025,"CCCCOCCCCO":3026,"CCCCOCCCCCCCCCBr":3027,"*])([*])":3028,"*])([*])[*]":3029,"342":3030,"3456":3031,"+]([*])=":3032,"+]([*])([*])[*]":3033,"@]([*])([*])":3034,"OCOO":3035,"OCOCCC":3036,"OCOCOC":3037,"OCOCCCCC":3038,"OCOCCS":3039,"OCOCOCO":3040,"OCOCCOCO":3041,"142":3042,"144":3043,"1414":3044,"1448":3045,"NNCN":3046,"NNCS":3047,"NNCl":3048,"CCNCCOc":3049,"CCNCCCOCC":3050,"CCNCCSCCCO":3051,"])(/":3052,"NOO":3053,"NONC":3054,"NOCCCCCC":3055,"NOON":3056,"NOCCCCNC":3057,"NOCCCS":3058,"NOOSC":3059,"nnoc":3060,"nnns":3061,"])([*])":3062,"-])([*])=":3063,"COCCB":3064,"COCCI":3065,"COCCP":3066,"COCCCOP":3067,"COCCSCC":3068,"COCCCOCCN":3069,"COCCSc":3070,"COCCSCCN":3071,"COCCCOCCCC":3072,"COCCSCCCC":3073,"COCCSNC":3074,"COCCCOCCCOC":3075,"COCCSCCl":3076,"([*])([*])[*]":3077,"([*])([*])=":3078,"CCSCN":3079,"CCSCS":3080,"CCSCCCN":3081,"CCSCCCCCC":3082,"CCSCCCNC":3083,"CCSCCCCCCCCC":3084,"CCSCCCNCC":3085,"CCSCCCCS":3086,"CCSSS":3087,"CCSCCCCl":3088,"CCSOOO":3089,"CCSCCOCCOCC":3090,"CCSCCCCSCC":3091,"CCSCCCCCCCCCCSCCC":3092,"CCCCCCCCCP":3093,"153":3094,"155":3095,"1585":3096,"OCCONC":3097,"OCCOOC":3098,"OCCOCCCCCCCC":3099,"OCCOOCC":3100,"OCCOCCCCCCCCCCCCCCCCCC":3101,"OCCOCCCS":3102,"OCCOCCNS":3103,"OCCOCCCCS":3104,"OCCOCCCCCCCCCCCCCCCCCCCCCC":3105,"OCCOCCCCBr":3106,"OCCOCCCCOCCOC":3107,"OSS":3108,"OSc":3109,"OCCNP":3110,"OCCNCO":3111,"OCCNNC":3112,"OCCNCCCCC":3113,"OCCNCCc":3114,"OCCNCCCF":3115,"OCCNCCNCCO":3116,"*)#":3117,"*)-":3118,"CNCCF":3119,"CNCCI":3120,"CNCCBr":3121,"CNCCCOCCO":3122,"CCCOB":3123,"CCCONC":3124,"CCCOCOC":3125,"CCCOCCCCCCC":3126,"CCCOCCCS":3127,"CCCOCOCCC":3128,"CCCOCCNc":3129,"CCCOCCOCCC":3130,"CCCOOCCCCOC":3131,"CCCOCCOCCOCCOCCO":3132,"CCCCCCCCCCCSC":3133,"OCCOCCI":3134,"OCCOCCNC":3135,"OCCOCCCl":3136,"OCCOCCBr":3137,"OCCOCCOP":3138,"OCCOCCCOCCO":3139,"OCCOCCOCCCl":3140,"OCCOCCOCCOCCN":3141,"FCI":3142,"ONP":3143,"ONCCCC":3144,"CCCCCCCCCCCCCCCS":3145,"CCCCCCCCCCCOc":3146,"CCCCCCCCCCSCC":3147,"CCCCCCCCCCCOCC":3148,"NOCS":3149,"NCCNN":3150,"NCCNP":3151,"NCCNCCOC":3152,"NCCNCCCN":3153,"CCCCCCCCCCCCCS":3154,"CCCCCCCCCCCCOCCOCC":3155,"CCCCCCCCCCCCSc":3156,"CCCCCCCCCCCCNCCCN":3157,"CCCCCCCCCCCCSCc":3158,"CCCCCCCCCCCCCNS":3159,"CCCCCCCCCCCCCOCCC":3160,"CCCCCCCCCCCCCOCCOCC":3161,"CCCCCCCCCCCCOCCOCCO":3162,"CCCCCCCCCCCCCCNCCN":3163,"CCCCCCCCCCCCCOCCCN":3164,"CCCCCCCCCCCCSSC":3165,"CCCCCCCCCCCCCCOCCO":3166,"CCCCCCCCCCCCOCCOCCOCCOCCOCCOCCOCCOCC":3167,"CCCCCCCCCCCCOCCOCCN":3168,"CCCCCCCCCCCCCNCCNC":3169,"CCCCCCCCCCCCOCCOCCOCCOCCO":3170,"CCCCCCCCCCCCCCOCCOCCOCCOCCOCC":3171,"CCCCCCCCCCCCOCCOCCOCCOC":3172,"CCCCCCCCCCCCOCCOCCOCCOCCOCCO":3173,"CCCCCCCCCCCCCCOCCOCCOCCO":3174,"CCCCCCCCCCCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCC":3175,"CCCCCCCCCCCCCCNCCCC":3176,"CCCCCCCCCCCCOCCOP":3177,"CCCCCCCCCCCCNCCCNCC":3178,"CCCCCCCCCCCCOCCOCCOCCOCCOCCOCCOCCO":3179,"CCCCCCCCCCCCCCOCCCO":3180,"CCCCCCCCCCCCOCCOCCOCCOCCOCCOCC":3181,"CCCCCCCCCCCCCOCCOS":3182,"CCCCCCCCCCCCOCCOCCOS":3183,"CCCCCCCCCCCCOCCOCCOCCOCCOCCOCCOCCOCCO":3184,"CCCCCCCCCCCCCCF":3185,"CCCCCCCCCCCCCCI":3186,"CCCCCCCCCCCCCCP":3187,"CCCCCCCCCCCCCCl":3188,"CCCCCCCCCCCCCCCl":3189,"CCCCCCCCCCCCCCSCC":3190,"CCCCCCCCCCCCCCCOS":3191,"CCCCCCCCCCCCCCOCCCCCCCCCCCCCC":3192,"OBO":3193,"OBOBO":3194,"OCCCSCC":3195,"OCCCSCCNC":3196,"OCCCSCCN":3197,"OCCCSCCCOC":3198,"COCCNCCC":3199,"COCCNCCN":3200,"COCCNCCOC":3201,"COCCNCCO":3202,"COCCNCCCCSCC":3203,"COCCNCCOCCOCCOCCN":3204,"OCCCCF":3205,"OCCCCCCCN":3206,"OCCCCCOP":3207,"OCCCCOCc":3208,"OCCCCCCNCC":3209,"OCCCCOCCO":3210,"OCCCCCSc":3211,"OCCCCCOCc":3212,"OCCCCCOCCO":3213,"OCCCCCCNCc":3214,"CCCCCCCCCCCCCCCCCCI":3215,"CCCCCCCCCCCCCCCCCCCl":3216,"NCCCCF":3217,"NCCCCP":3218,"NCCCCS":3219,"NCCCCCl":3220,"NCCCCCSC":3221,"NCCCCOCc":3222,"NCCCCCNCC":3223,"NCCCCCNS":3224,"NCCCCCNCCCN":3225,"NCCCCOCCI":3226,"NCCCCCOCCOCCC":3227,"1721":3228,"457":3229,"4545":3230,"NCCCNO":3231,"NCCCNCCCCCCCC":3232,"NCCCNCCCNC":3233,"NCCCNCCCCCCCCC":3234,"NCCCNCCCCCNC":3235,"NCCCNCCNCCCN":3236,"COSN":3237,"COSOC":3238,"3191":3239,"-])(*)":3240,"-])(*)*":3241,"OOSC":3242,"CCCCOCSSCCNC":3243,"OCCOCN":3244,"OCCOCNC":3245,"CSCCB":3246,"CSCCBr":3247,"CSCCCOc":3248,"CSCCSCCSC":3249,"CSCCSCCSCC":3250,"CSCCSCCSCCS":3251,"CCCSO":3252,"CCCSNC":3253,"CCCSCCO":3254,"CCCSON":3255,"CCCSCCCSC":3256,"CCCSCCON":3257,"CCCSSCCCS":3258,"CCCSCCCCCCCCCBr":3259,"CCCSSSSC":3260,"CCCSSCCCSC":3261,"[*])\\":3262,"LiH":3263,")*)(*)*)=":3264,"NCCONC":3265,"COCCCF":3266,"COCCCI":3267,"COCCCBr":3268,"246":3269,"CPC":3270,"CPP":3271,"CPc":3272,"CPCC":3273,"CPOC":3274,"CPCc":3275,"CPCCP":3276,"+]=*":3277,"CNCCCF":3278,"OCCOCCOCCOCCS":3279,"OCCOCCOCCOCCCl":3280,"OCCOCCOCCOCCOCc":3281,"OCCOCCOCCOCCOCCNC":3282,"OCCOCCOCCOCCOCCBr":3283,"OCCOCCOCCOCCOCCOS":3284,"OCCOCCOCCOCCOCCOCCOCCNC":3285,"OCCOCCOCCOCCOCCOCCOS":3286,"CCCCCNN":3287,"CCCCCNNC":3288,"CCCCCNOCc":3289,"NCCSN":3290,"NCCSCCO":3291,"NCCSSCCC":3292,"CCNSSNCCC":3293,"CNCCNCCC":3294,"CNCCNCCCN":3295,"CNCCNCCO":3296,"CNCCNCCNCCC":3297,"NiH":3298,"190":3299,"191":3300,"193":3301,"194":3302,"198":3303,"199":3304,"569":3305,")*)*)*)*)":3306,")*)*)*)(*)*)=":3307,"*])[*])":3308,"SCCCBr":3309,"OCCCNCCCC":3310,"OCCCNCCCCN":3311,"OCCCCCCI":3312,"OCCCCCCCO":3313,"OCCCCCCBr":3314,"OCCCCCCCOP":3315,"CCSCCBr":3316,"CCSCCOP":3317,"CCSCCSCCN":3318,"CCSCCSCCSCC":3319,"3535":3320,"*)(\\":3321,"*)(*)[":3322,"*)(*)*)*)*)*":3323,"*)(*)*)(*)*":3324,"+])[*])[":3325,"220":3326,"222":3327,"226":3328,"CCCCSN":3329,"CCCCSNC":3330,"CCCCSCSC":3331,"CCCCSCCCCCCCCCCC":3332,"CCCCSCCCCCN":3333,"COCCOCCI":3334,"COCCOCCS":3335,"COCCOCCn":3336,"COCCOCCCN":3337,"COCCOCCBr":3338,"COCCOCCCNCC":3339,"COCCOCCON":3340,"COCCOCCOCCNC":3341,"COCCOCCSS":3342,"COCCOCCOCCBr":3343,"COCCOCCOCCOCCOCCOCCOCCO":3344,"COCCOCCOCCn":3345,"COCCOCCOCCOCCOCCOCCOCCOCCOCCO":3346,"COCCOCCOCCOCCOCCN":3347,"CCCCCCNN":3348,"CCCCCCNP":3349,"CCCCCCNCCOC":3350,"CCCCCCNCCCCCC":3351,"CCCCCCNCCCCCCC":3352,"CCCCCCNCCCCCBr":3353,"CCCCCCCCCCCCCCCCCCCCP":3354,"CCCCCCCCCCCCCCCCCCCCOCC":3355,"CCCCCCCCCCCCCCCCCCCCBr":3356,"CCCCCCCCCCCCCCCCCCCCCCNC":3357,"(\\[*])[*])":3358,"SSP":3359,"SSSCC":3360,"SSCOCC":3361,"SSSSc":3362,"SSSSS":3363,"SSCCCCCCCCCCCN":3364,"SSCCCCCCCCCCCCCCCCO":3365,"SSCSSS":3366,"OCCCOOC":3367,"OCCCOCCCCl":3368,"OCCCOCCOCCCCl":3369,"CNCCOCCN":3370,"CCCCCCCCCCCCCCCCCCCCCCO":3371,"CCCCCCCCCCCCCCCCCCCCCCc":3372,"CCCCCCCCCCCCCCCCCCCCCCBr":3373,"CCPP":3374,"CCCCCCCCCCCCCCCCCCCCCCCCO":3375,"CCCCCCCCCCCCCCCCCCCCCCCCCCN":3376,"CCCCCCCCCCCCCCCCCCCCCCCCCCOC":3377,"CCCCCCCCCCCCCCCCCCCCCCCCCCO":3378,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCO":3379,"NPN":3380,"NPOCCOC":3381,"NPOCCCN":3382,"COCCON":3383,"COCCOCOC":3384,"COCCOCCCBr":3385,"COCCOCCCCCCBr":3386,"CSCCCBr":3387,"OCCCCCCCCO":3388,"OCCCCCCCCS":3389,"OCCCCCCCCCO":3390,"OCCCCCCCCCl":3391,"OCCCCCCCCCS":3392,"OCCCCCCCCOP":3393,"OCCCCCCCCCCCN":3394,"OCCCCCCCCCCOCC":3395,"OCCCCCCCCCSCCCC":3396,"204":3397,"205":3398,"206":3399,"207":3400,"208":3401,"209":3402,"OCCOCCOCCCC":3403,"OCCOCCOCCC":3404,"OCCOCCOCCCCOCC":3405,"OCCOCCOCCCCOCCOCCOC":3406,"(*)(=[":3407,"CCCCCOB":3408,"CCCCCONC":3409,"CCCCCOCCCC":3410,"CCCCCOCCCCC":3411,"CCCCCOCCCN":3412,"CCCCCOOOO":3413,"coccc":3414,"NCCCCCCS":3415,"NCCCCCCc":3416,"NCCCCCCn":3417,"NCCCCCCOP":3418,"NCCCCCCOCCCC":3419,"NCCCCCCSc":3420,"ccnnn":3421,"*)*)*)[":3422,"678":3423,"TeH":3424,"ClCCI":3425,"nnnnn":3426,"CCNCCNCCC":3427,"NOCCBr":3428,"NOCCCON":3429,"COCCCNN":3430,"COCCCNCCCC":3431,"COCCCNCCOC":3432,"COCCCNCCCOC":3433,"CCCCCCCCCCCCCCCCCCCCCCCCCCCOC":3434,"CCCOCCBr":3435,"CCCOCCCOS":3436,"NCCCCNO":3437,"NCCCCNCCCC":3438,"NCCCCNCCN":3439,"NCCCCNCCCCN":3440,"SCCNCCO":3441,"occo":3442,"4681":3443,"CSCCNCCNS":3444,"CCCCCCCNCCCCCCC":3445,"CCCCCCCNCCCCCCCCCCC":3446,"NCCCOOC":3447,"OCCCCCCCF":3448,"OCCCCCCCS":3449,"OCCCCCCCBr":3450,"OCCOCCOCCOCCOCCOCCOCCOCCOC":3451,"OCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":3452,"OCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":3453,"OCCOCCOCCOCCOCCOCCOCCOCCOCCOCCC":3454,"OCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":3455,"OCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOP":3456,"CNNCN":3457,"OCCCCCCCCCCCl":3458,"OCCCCCCCCCCBr":3459,"SCCSCCOCC":3460,"SCCSCCO":3461,"CCOCCNN":3462,"CNCCCCBr":3463,"CNCCCCNCC":3464,"CNCCCCSC":3465,"BrCCI":3466,")*)=*":3467,"COCCCCBr":3468,"COCCCCCCNC":3469,"COCCCCCNCC":3470,"758":3471,"OCCSCCCN":3472,"OCCSSCC":3473,"OCCSSc":3474,"OCCOCCNCCOCCO":3475,"CCCCCCCCCCCCNCCCCCCCCCCCC":3476,"CCCCCCCCCCCCNCCNCCNCC":3477,"OCCCCCCCCCBr":3478,"OCCCCCCCCCCCCCCCCO":3479,"OCCCCCCCCCCCCCCCCBr":3480,"OCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":3481,"OCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":3482,"OCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":3483,"OCCCCCCCCCCCCCCCCCCCCCCCCCCCC":3484,"OCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":3485,"OCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":3486,"OCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC":3487,"OCCCCCCCCCCCCCCCCSSCCCCCCCCCCCCCCCCO":3488,"CCCCCCCCCCCCCCCCCCOCN":3489,"CCCCCCCCCCCCCCCCCCOCc":3490,"OCCCCCCCCCCCCNCc":3491,"SCCCCP":3492,"SCCCCBr":3493,"SCCCCCS":3494,"CCCCCCNCl":3495,"CNCCCNCCCNC":3496,"CNCCCNCCNCCCNC":3497,"CCCCCCOCCNCC":3498,"NCCOCCOCCOCC":3499,"NCCOCCOCCOCCOC":3500,"NCCOCCOCCOCCOCCO":3501,"NCCOCCOCCOCCOCCN":3502,"NCCOCCOCCOCCP":3503,"NCCOCCOCCOCCOCCOCCN":3504,"CNCCNCN":3505,"CCCCCCCCCCNCCCCCCCCCC":3506,"NCCCSCCCN":3507,"CCCCCCCCCCCCOCCC":3508,"CCCCCCCCCCCCOCCCNC":3509,"CCCCCCCCCCCCOCCCCCCCCCCCC":3510,"CCCCCCCCCCCCOCCCCCCCCCCCCc":3511,"NNCCF":3512,"NNCCON":3513,"NNCCCOCc":3514,"OCCCCCCCCCCCCCCc":3515,"OCCCCCCCCCCCCCCBr":3516,"OCCOCCOCCOCCOCCN":3517,"CCCCOCCCNCC":3518,"CCCCOCCCNS":3519,"NCCSCCSC":3520,"OCCCCCCCCCCCCCBr":3521,"NCCOCCP":3522,"CCOCCOCCOCCOCCOCCOCCOCCOCCO":3523,"(#*)":3524,"OCCCCON":3525,"OCCCCOS":3526,"OCCCCOOC":3527,"OCCCCOCCC":3528,"285":3529,"*)*)(*)=":3530,"NCCCCCCCCS":3531,"NCCCCCCCCCN":3532,"NCCCCCCCCCCCN":3533,"NCCCCCCCCCCCO":3534,"NCCCCCCCCNP":3535,"CCSSOC":3536,"CCSSSCC":3537,"NCCCCCNO":3538,"NCCCCCNCCCNC":3539,"NCCOCCOCCC":3540,"NCCOCCOCCNS":3541,"NCCOCCOCCCCCCCl":3542,"NCCCCCCNCCCCCCN":3543,"NCCCCON":3544,"NCCCCOCCC":3545,"NCCCCOCCCCOCCCCN":3546,"CCCCCCCCCNCCCNC":3547,"*)*)*)*)=":3548,"*)*)*)=*)":3549,"NCNCNCN":3550,"CCCCCSSCCCCC":3551,"CCCCNCCCNCC":3552,"NCCNCCS":3553,"NCCNCCCOC":3554,"COCCCCCCCCCCCCCCCCCCCCOC":3555,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCO":3556,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCOC":3557,"*)*)*)*)*)*":3558,"*)*)*)*)*)*)":3559,"*)*)*)*)*)*)(":3560,"684":3561,"NCCCCCCCNC":3562,"NCCNCCNCCNCCNC":3563,"NCCNCCNCCNCCNCCN":3564,"NCCNCCNCCNCCNCCNCCN":3565,"CSSCS":3566,"479":3567,"COCOCOCOC":3568,"COCOCCCCCCCCCCCCCNC":3569,"CCCSCCCO":3570,"CCCSCCSC":3571,"CCCSCCSCCS":3572,"CCCCCCCCCCOB":3573,"CCCCCCCCCCOCCCC":3574,"CCCCCCCCCCOOC":3575,"CCCCCCCCCCOCCCCCCCCCCOP":3576,"CSCCCCNS":3577,"CSCCCCSCC":3578,"CSCCCCCCNCC":3579,"CSCCCCCNCC":3580,"CSCCCCCNS":3581,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCO":3582,"CCCCCCCCCCCCCCCCNCCCCCCCCCCCCCCCC":3583,"FSI":3584,"[*-":3585,"[*+]":3586,"[*++]":3587,"OCPOCOC":3588,"(*)(*)(=":3589,"(*)(*)[":3590,"(*)(*)*":3591,"(*)(*)*)":3592,"(*)(*)(*)[":3593,"NCONCCCOOC":3594,"SCCCCCCS":3595,"CSCCOOCCO":3596,"OCCCCCCCCCCCCCCCBr":3597,"OCCCOCCCOCCCO":3598,"CCCNCCNCCCNCC":3599,"OCCOCCCCCCOCCOC":3600,"CCCCCCCCSCCCCCCCC":3601,"CCCCCCCCSNS":3602,"CCCCCCCCSOS":3603,"CCCCCCCCSCCOCCO":3604,"CCCCCCCCSCSP":3605,"CCCCCCCCCCCCSCCCCCCCCCCCCCCCCCCCCCCC":3606,"CCCCCCCCCCCCSCCOCCO":3607,"CCCCCCCCOCCCNCCC":3608,"BrCCCB":3609,"NCCCNCCCCNCCC":3610,"SOSC":3611,"OCCSCCCO":3612,"OCCSCCCl":3613,"OCCSCCSCCSCCS":3614,"OCCSCCSCCSCCO":3615,"NCCCCCCCCNCCCCCCCCNC":3616,"CSCCSCCNC":3617,"CSCCSCCCSCCSC":3618,"CCOCCOCCOCCOCCNCC":3619,"CCCCCCCOCCCCCCC":3620,"CCCCCCCOOCCCCCC":3621,"OCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":3622,"OCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":3623,"OCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":3624,"9669":3625,"COOSC":3626,"CCCCCCCCCCCCCCCCCCOCCOP":3627,"CCCCCCCCCCCCCCCCCCOCCOCCO":3628,"CCCCCCCCCCCCCCCCCCOCCOCCOCCOCCO":3629,"CCCCCCCCCCCCCCCCCCOCCOCCOCCOCCOCCO":3630,"CCCCCCCCCCCCCCCCCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":3631,"CCCCCCOCCOCc":3632,"CCCCCCOCCOCCO":3633,"OCCOCCOCCI":3634,"/*)\\*":3635,"SCSCS":3636,"CNCCCCNCCC":3637,"CCCCCCCCCCCNCCCCCCCCCCC":3638,"CCCCCCCCCCCON":3639,"CCCCCCCCCCCOCCOCCOCC":3640,"CCNCCCCCCNCC":3641,"CCCCOCCOCCSC":3642,"CCCCOCCOCCOCc":3643,"CCCCOCCOCCSCCSCCS":3644,"CNCCCCCI":3645,"ClCCCNCCCl":3646,"CCCOCCOCCOCCC":3647,"CCCOCCOCCOCCCN":3648,"CCCOCCOCCOCCOCCOCCOCCOCCN":3649,"CCCCCCCCCCOCCOS":3650,"CCCCCCCCCCOCCOCCOCCOCCOCCO":3651,"CCCCCCCCCCOCCOCCOCCC":3652,"NCCCNCCCl":3653,"NCCCOCCOCCCN":3654,"CCCCCCCCCCCCCCNCCCCCCCCCCCCCC":3655,"OCCCNCCCl":3656,"OCCCNCCSC":3657,"OCCCNCCCOB":3658,"BrCCCCOCc":3659,"BrCCCCCOCc":3660,"SCCSCCCl":3661,"CNCCCCCCOCCCOC":3662,"*)*)*)*)*)*)*)*)*)*)[":3663,"ssss":3664,"CCCCCCCCCCCCCNCCCCCCCCCCCCC":3665,"CNCCNCCNCCNCCNCCNC":3666,"NBN":3667,"NBNBN":3668,"NCCCCCCCCCCCS":3669,"NCCCCCCCCCCCSSCCCCCCCCCCCN":3670,"SCNCS":3671,"SOONC":3672,"CCCCCOCCPCC":3673,"CCCCCCCCNCCCNCCCN":3674,"CCCCCCCCNCCCNCCCNCCCN":3675,"CCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOC":3676,"CCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCN":3677,"CCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCS":3678,"CCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOC":3679,"CCCCCCCSCCCCCCC":3680,"CCCCCCCSCCCCOC":3681,"CCCOCCOCCCCCCCl":3682,"COCCOCCOCCOB":3683,"COCCOCCOCCOCCOCCOCCC":3684,"COCCOCCNCCOCCOCCN":3685,"COCCOCCNCCOCCOCCOCCN":3686,"(*)*)*)=":3687,"CNCCCCCCOCCCCC":3688,"+][*])=":3689,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCO":3690,"CCCOCCCCCCNCC":3691,"OCCOCCOCCOCCOCCOCCNC":3692,"OCCOCCOCCOCCOCCOCCOP":3693,"OCCOCCOCCOCCOCCOCCOCO":3694,"SBSBS":3695,"\\*)\\*":3696,"CCCCOCCOCCCS":3697,"]([*])([*])":3698,"]([*])([*])[*]":3699,"]([*])([*])[":3700,"CCCCCCCCCOCCOP":3701,"NCCOCCOCCOCCOCCOCCOCCOCCOCCNC":3702,"NCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCN":3703,"NCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCN":3704,"NCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCC":3705,"CSCCCONN":3706,"CCOCCOCCOCCCCCC":3707,"CCOCCOCCOCCNCC":3708,"CCCCCCCOCCCNCCCN":3709,"CCCCCCCCCCCCCCCCCCCCCCNCCCCCCCCCCCCCCCCCCCCCC":3710,"(*)*)(*)*)(*)*":3711,"BBB":3712,"NCCCCCCCCCCCCNc":3713,")(*)*)(*)*)(*)*)=":3714,")(*)*)(*)*)(*)*)(*)*":3715,")(*)*)(*)*)(*)*)(*)*)=":3716,"NCCOCCOCCOCCOCCOCCNC":3717,"NCCOCCOCCOCCOCCOCCSC":3718,"CSCCCSCc":3719,"CSCCCSCCCSCCCSC":3720,"(-*)-*":3721,"COCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCCC":3722,"COCCOCCOCCOCCOCCOCCOCCC":3723,"COCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":3724,"COCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCN":3725,"COCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":3726,"23456789":3727,"CNCCCCCCNCCCCCCCCNCCCCCCNCC":3728,"PCCPc":3729,"SCCOCCOCCS":3730,"](*)(*)[":3731,"](*)(*)*)(*)=":3732,"CCCCCCCCCCSSCCCCCCCCCCC":3733,"CCCCCCCCCCSSCCCCCCCCCCc":3734,"OCCOCOCCOc":3735,"OCCOCOCCSCC":3736,"OCCSCCSCCO":3737,"NCCOCCOCCOCCOCCOCCOCCOCCOCCC":3738,"+](*)#[":3739,"CCOCCOCCOCCOCCOCCOCCC":3740,"CCCCCCCCCCCCCCCCCCOCCCCCCCCCCCCCCCCCC":3741,"CCCCCCCCCCCCCCCCCCSCCCCl":3742,"CCOCCCCOCCCCO":3743,"CCCCOCCCCCCNCc":3744,"OCCNCCCNCCO":3745,"NCCCNCCCNCCCNCCCN":3746,"OOOOOOC":3747,"*)(*)*)(*)*)(*)*)=":3748,"*)(*)*)(*)*)(*)*)(*)*)=":3749,"OCCCCCCCCCCCSSCCCCCCCCCCCOC":3750,"OCCCCCCCCCCCSSCCCCCCCCCCCO":3751,"(*)*)(*)*)(*)*)(*)*)(*)*)=":3752,"(*)*)(*)*)(*)*)(*)*)(*)*)(*)*":3753,"(*)*)(*)*)*)[":3754,"6868":3755,"=*)=*":3756,"ICCCCCCCCI":3757,"ICCOCCOCCI":3758,"NCCCCCCCCCCCCCCCI":3759,"SNCCCCCNCCSCC":3760,"SCCCCCNCCNC":3761,"SCCOCCOCCOCCOCCOCCS":3762,"SCCOCCOCCOCCS":3763,"SNSNSNS":3764,"(=*)*)=*)(":3765,"COCCCCCOCOOC":3766,"CCCCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":3767,")(*)*)(*)*)*":3768,")(*)*)=*)":3769,"CNCCCCCCCCCCCCNCc":3770,"CNCCOCCOCCNCc":3771,"ClCCOCCOCCCl":3772,"ClCCNCCCl":3773,"ClCCOCCOCCOCCCl":3774,"ClCCSSCCCl":3775,"CCCCCCCCOCCCCCOCCC":3776,"CCCCCCCCSSSSSSSSS":3777,"CCNCCOCCOCCCOC":3778,"OCCCOCCOCCOCCOCCOCCOCCOCCOCCOCO":3779,"NCCOCCCCCCOCCO":3780,"NCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOC":3781,"NCCOCCOCCOCCOCCOCCOCCOCCOCCN":3782,"CSCCOCCSCC":3783,"CSOCCOCCOCCOSC":3784,"CSCCCCCCCCCCCOCCOCCOCCOCC":3785,"+](\\[*])=":3786,"CCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCNC":3787,"CCOCCSCCSCCSCCSCCOCC":3788,"CCOCCPCCPCCO":3789,"CCCCCCSSCCCCCC":3790,"CCCCCCCCCCCCCCCCSSCCCCCCCCCCCCCCCC":3791,"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCOC":3792,"CCCCCCCCCCCCCCCCCCOCCOCCOCCOCCOCCOCCOCCO":3793,"CCCCCCCCCCCCCCCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":3794,"CCCCCCCCCCCCCCCCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":3795,"CCOOCCCCCCOOCC":3796,"SCCSCCCSCCS":3797,"COCCSCCNCCC":3798,"COCCCOCCCCOCCCOC":3799,"CCSCCCNCCCOCC":3800,"CCSSSOC":3801,"CCSCCOCCOCCSCC":3802,"158591":3803,"CCCCCCCCCCCCCOCCOCCOCCOS":3804,"CCCCCCCCCCCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":3805,"CCCCCCCCCCCCCCOCCOCCOCCOS":3806,"CCCCCCCCCCCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCO":3807,"CSCCSCCSCCSCCSCCS":3808,"CPCCPCCOCCPCCPCCO":3809,"COCCOCCSSCCOCCO":3810,"OCCOCCOCCCCOCCCOCCOC":3811,"OCCSCCSCCSCCSCCO":3812,"CCCCCCCCCCCCCCCCCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCOCCCS":3813,"CCCCOCCOCCSCCSCCSCCOCCOCCCC":3814,"CCCCCCCCSSSSSSSSSCCCCCCC":3815}
diff --git a/chebai/preprocessing/collate.py b/chebai/preprocessing/collate.py
index 181b3afd..8e0a703d 100644
--- a/chebai/preprocessing/collate.py
+++ b/chebai/preprocessing/collate.py
@@ -1,5 +1,5 @@
-from torch.nn.utils.rnn import pad_sequence
 import torch
+from torch.nn.utils.rnn import pad_sequence
 
 from chebai.preprocessing.structures import XYData
 
diff --git a/chebai/preprocessing/collect_all.py b/chebai/preprocessing/collect_all.py
index f82ce71c..62e140f8 100644
--- a/chebai/preprocessing/collect_all.py
+++ b/chebai/preprocessing/collect_all.py
@@ -2,6 +2,9 @@
 import os
 import sys
 
+import pytorch_lightning as pl
+import torch
+import torch.nn.functional as F
 from pytorch_lightning import loggers as pl_loggers
 from pytorch_lightning.callbacks import ModelCheckpoint
 from pytorch_lightning.metrics import F1
@@ -9,9 +12,6 @@
 from torch import nn
 from torch_geometric import nn as tgnn
 from torch_geometric.data import DataLoader
-import pytorch_lightning as pl
-import torch
-import torch.nn.functional as F
 
 from data import ClassificationData, JCIClassificationData
 
diff --git a/chebai/preprocessing/datasets/base.py b/chebai/preprocessing/datasets/base.py
index 97d322b2..1d7da9d5 100644
--- a/chebai/preprocessing/datasets/base.py
+++ b/chebai/preprocessing/datasets/base.py
@@ -1,14 +1,14 @@
-from typing import List, Union
 import os
 import random
 import typing
+from typing import List, Union
 
-from lightning.pytorch.core.datamodule import LightningDataModule
-from lightning_utilities.core.rank_zero import rank_zero_info
-from torch.utils.data import DataLoader
 import lightning as pl
 import torch
 import tqdm
+from lightning.pytorch.core.datamodule import LightningDataModule
+from lightning_utilities.core.rank_zero import rank_zero_info
+from torch.utils.data import DataLoader
 
 from chebai.preprocessing import reader as dr
 
diff --git a/chebai/preprocessing/datasets/pubchem.py b/chebai/preprocessing/datasets/pubchem.py
index 000ab8f1..5b18add0 100644
--- a/chebai/preprocessing/datasets/pubchem.py
+++ b/chebai/preprocessing/datasets/pubchem.py
@@ -12,16 +12,19 @@
 import random
 import shutil
 import tempfile
-from scipy import spatial
+import time
+from datetime import datetime
 
+import numpy as np
 import pandas as pd
-from sklearn.model_selection import train_test_split
 import requests
 import torch
-import time
-import numpy as np
 import tqdm
-from datetime import datetime
+from rdkit import Chem, DataStructs
+from rdkit.Chem import AllChem
+from scipy import spatial
+from sklearn.cluster import KMeans
+from sklearn.model_selection import train_test_split
 
 from chebai.preprocessing import reader as dr
 from chebai.preprocessing.datasets.base import DataLoader, XYBaseDataModule
@@ -31,9 +34,6 @@
     ChEBIOverX,
     _ChEBIDataExtractor,
 )
-from rdkit import Chem, DataStructs
-from rdkit.Chem import AllChem
-from sklearn.cluster import KMeans
 
 
 class PubChem(XYBaseDataModule):
diff --git a/chebai/preprocessing/datasets/tox21.py b/chebai/preprocessing/datasets/tox21.py
index 8208ffe4..ba101ff5 100644
--- a/chebai/preprocessing/datasets/tox21.py
+++ b/chebai/preprocessing/datasets/tox21.py
@@ -1,17 +1,17 @@
-from tempfile import NamedTemporaryFile, TemporaryDirectory
-from urllib import request
 import csv
 import gzip
 import os
 import random
 import shutil
 import zipfile
+from tempfile import NamedTemporaryFile, TemporaryDirectory
+from urllib import request
 
-from rdkit import Chem
-from sklearn.model_selection import GroupShuffleSplit, train_test_split
 import numpy as np
 import pysmiles
 import torch
+from rdkit import Chem
+from sklearn.model_selection import GroupShuffleSplit, train_test_split
 
 from chebai.preprocessing import reader as dr
 from chebai.preprocessing.datasets.base import MergedDataset, XYBaseDataModule
diff --git a/chebai/preprocessing/reader.py b/chebai/preprocessing/reader.py
index 3c2ee548..120011df 100644
--- a/chebai/preprocessing/reader.py
+++ b/chebai/preprocessing/reader.py
@@ -1,9 +1,9 @@
 import os
 
-from pysmiles.read_smiles import _tokenize
-from transformers import RobertaTokenizerFast
 import deepsmiles
 import selfies as sf
+from pysmiles.read_smiles import _tokenize
+from transformers import RobertaTokenizerFast
 
 from chebai.preprocessing.collate import DefaultCollater, RaggedCollater
 
diff --git a/chebai/preprocessing/structures.py b/chebai/preprocessing/structures.py
index 37a55870..eb54fd41 100644
--- a/chebai/preprocessing/structures.py
+++ b/chebai/preprocessing/structures.py
@@ -1,6 +1,6 @@
-from torch.utils.data.dataset import T_co
 import networkx as nx
 import torch
+from torch.utils.data.dataset import T_co
 
 
 class XYData(torch.utils.data.Dataset):
diff --git a/chebai/result/analyse_sem.py b/chebai/result/analyse_sem.py
index 4b270e12..291de685 100644
--- a/chebai/result/analyse_sem.py
+++ b/chebai/result/analyse_sem.py
@@ -1,18 +1,19 @@
-import pandas as pd
+import gc
+import os
 import sys
 import traceback
 from datetime import datetime
-from chebai.loss.semantic import DisjointLoss
-from chebai.preprocessing.datasets.chebi import ChEBIOver100
-from chebai.preprocessing.datasets.pubchem import Hazardous
-import os
+
+import pandas as pd
 import torch
-from torchmetrics.functional.classification import multilabel_auroc
-from torchmetrics.functional.classification import multilabel_f1_score
 import wandb
-import gc
+from torchmetrics.functional.classification import multilabel_auroc, multilabel_f1_score
 from utils import *
 
+from chebai.loss.semantic import DisjointLoss
+from chebai.preprocessing.datasets.chebi import ChEBIOver100
+from chebai.preprocessing.datasets.pubchem import Hazardous
+
 DEVICE = "cpu"  # torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
 
diff --git a/chebai/result/base.py b/chebai/result/base.py
index 7983167d..1b8a9940 100644
--- a/chebai/result/base.py
+++ b/chebai/result/base.py
@@ -1,6 +1,6 @@
-from typing import Iterable
 import abc
 import multiprocessing as mp
+from typing import Iterable
 
 import torch
 import tqdm
diff --git a/chebai/result/classification.py b/chebai/result/classification.py
index 6ce5fc9a..69ccacce 100644
--- a/chebai/result/classification.py
+++ b/chebai/result/classification.py
@@ -1,19 +1,17 @@
 import os
 
-from torchmetrics.classification import (
-    MultilabelF1Score,
-    MultilabelPrecision,
-    MultilabelRecall,
-)
 import matplotlib.pyplot as plt
 import pandas as pd
 import seaborn as sns
 import torch
 import tqdm
+from torchmetrics.classification import (
+    MultilabelF1Score,
+    MultilabelPrecision,
+    MultilabelRecall,
+)
 
-from chebai.callbacks.epoch_metrics import MacroF1
-from chebai.callbacks.epoch_metrics import BalancedAccuracy
-
+from chebai.callbacks.epoch_metrics import BalancedAccuracy, MacroF1
 from chebai.models import ChebaiBaseNet
 from chebai.models.electra import Electra
 from chebai.preprocessing.datasets import XYBaseDataModule
diff --git a/chebai/result/molplot.py b/chebai/result/molplot.py
index 9fd19589..8fdbc77d 100644
--- a/chebai/result/molplot.py
+++ b/chebai/result/molplot.py
@@ -1,7 +1,11 @@
+import abc
 from os import makedirs
 from tempfile import NamedTemporaryFile
-import abc
 
+import networkx as nx
+import numpy as np
+import pandas as pd
+import torch
 from matplotlib import cm, colors
 from matplotlib import pyplot as plt
 from matplotlib import rc
@@ -11,10 +15,6 @@
 from pysmiles.read_smiles import _tokenize
 from rdkit import Chem
 from rdkit.Chem.Draw import MolToMPL, rdMolDraw2D
-import networkx as nx
-import numpy as np
-import pandas as pd
-import torch
 
 from chebai.preprocessing.datasets import JCI_500_COLUMNS, JCI_500_COLUMNS_INT
 from chebai.result.base import ResultProcessor
diff --git a/chebai/result/pretraining.py b/chebai/result/pretraining.py
index 20822d12..7c469674 100644
--- a/chebai/result/pretraining.py
+++ b/chebai/result/pretraining.py
@@ -6,9 +6,9 @@
 import torch
 import tqdm
 
+import chebai.models.electra as electra
 from chebai.loss.pretraining import ElectraPreLoss
 from chebai.result.base import ResultProcessor
-import chebai.models.electra as electra
 
 
 def visualise_loss(logs_path):
diff --git a/chebai/result/utils.py b/chebai/result/utils.py
index 4678ca54..08860d76 100644
--- a/chebai/result/utils.py
+++ b/chebai/result/utils.py
@@ -1,12 +1,14 @@
+import os
+
+import torch
+import tqdm
+import wandb
 import wandb.util as wandb_util
-from chebai.models.electra import Electra
+
 from chebai.models.base import ChebaiBaseNet
+from chebai.models.electra import Electra
 from chebai.preprocessing.datasets.base import XYBaseDataModule
 from chebai.preprocessing.datasets.chebi import _ChEBIDataExtractor
-import os
-import wandb
-import tqdm
-import torch
 
 
 def get_checkpoint_from_wandb(
diff --git a/chebai/train.py b/chebai/train.py
index 060db560..f6638733 100644
--- a/chebai/train.py
+++ b/chebai/train.py
@@ -3,16 +3,16 @@
 import os
 import pickle
 
-from model import ChEBIRecNN
-from molecule import Molecule
-from pytorch_lightning import loggers as pl_loggers
-from sklearn.metrics import f1_score
-from torch.utils import data
 import numpy as np
 import pandas as pd
 import pytorch_lightning as pl
 import torch
 import torch.nn as nn
+from model import ChEBIRecNN
+from molecule import Molecule
+from pytorch_lightning import loggers as pl_loggers
+from sklearn.metrics import f1_score
+from torch.utils import data
 
 BATCH_SIZE = 100
 NUM_EPOCHS = 100
diff --git a/chebai/trainer/CustomTrainer.py b/chebai/trainer/CustomTrainer.py
index a4369678..d23df782 100644
--- a/chebai/trainer/CustomTrainer.py
+++ b/chebai/trainer/CustomTrainer.py
@@ -1,15 +1,15 @@
-from typing import List, Optional
 import logging
+from typing import List, Optional
 
+import pandas as pd
+import torch
 from lightning import LightningModule, Trainer
 from lightning.fabric.utilities.types import _PATH
 from lightning.pytorch.loggers import WandbLogger
 from torch.nn.utils.rnn import pad_sequence
-import pandas as pd
-import torch
 
-from chebai.preprocessing.reader import CLS_TOKEN, ChemDataReader
 from chebai.loggers.custom import CustomLogger
+from chebai.preprocessing.reader import CLS_TOKEN, ChemDataReader
 
 log = logging.getLogger(__name__)
 
diff --git a/configs/data/chebi100.yml b/configs/data/chebi100.yml
index ac8246dd..ebc59974 100644
--- a/configs/data/chebi100.yml
+++ b/configs/data/chebi100.yml
@@ -1 +1 @@
-class_path: chebai.preprocessing.datasets.chebi.ChEBIOver100
\ No newline at end of file
+class_path: chebai.preprocessing.datasets.chebi.ChEBIOver100
diff --git a/configs/data/chebi100_SELFIES.yml b/configs/data/chebi100_SELFIES.yml
index fbdfeafa..0f62bcbc 100644
--- a/configs/data/chebi100_SELFIES.yml
+++ b/configs/data/chebi100_SELFIES.yml
@@ -1 +1 @@
-class_path: chebai.preprocessing.datasets.chebi.ChEBIOver100SELFIES
\ No newline at end of file
+class_path: chebai.preprocessing.datasets.chebi.ChEBIOver100SELFIES
diff --git a/configs/data/chebi100_deepSMILES.yml b/configs/data/chebi100_deepSMILES.yml
index 943f0e17..901db031 100644
--- a/configs/data/chebi100_deepSMILES.yml
+++ b/configs/data/chebi100_deepSMILES.yml
@@ -1 +1 @@
-class_path: chebai.preprocessing.datasets.chebi.ChEBIOver100DeepSMILES
\ No newline at end of file
+class_path: chebai.preprocessing.datasets.chebi.ChEBIOver100DeepSMILES
diff --git a/configs/data/chebi100_mixed.yml b/configs/data/chebi100_mixed.yml
index 48f2757f..3b92a774 100644
--- a/configs/data/chebi100_mixed.yml
+++ b/configs/data/chebi100_mixed.yml
@@ -1,4 +1,4 @@
 class_path: chebai.preprocessing.datasets.pubchem.LabeledUnlabeledMixed
 init_args:
   labeled: chebi100.yml
-  unlabeled: pubchem_dissimilar.yml
\ No newline at end of file
+  unlabeled: pubchem_dissimilar.yml
diff --git a/configs/data/chebi50_mixed.yml b/configs/data/chebi50_mixed.yml
index deb1aa6d..0586cc5d 100644
--- a/configs/data/chebi50_mixed.yml
+++ b/configs/data/chebi50_mixed.yml
@@ -1 +1 @@
-class_path: chebai.preprocessing.datasets.pubchem.PubToxAndChebi50
\ No newline at end of file
+class_path: chebai.preprocessing.datasets.pubchem.PubToxAndChebi50
diff --git a/configs/data/tox21_moleculenet.yml b/configs/data/tox21_moleculenet.yml
index 41c1c5c6..5579a829 100644
--- a/configs/data/tox21_moleculenet.yml
+++ b/configs/data/tox21_moleculenet.yml
@@ -1,3 +1,3 @@
 class_path: chebai.preprocessing.datasets.tox21.Tox21MolNetChem
 init_args:
-  batch_size: 10
\ No newline at end of file
+  batch_size: 10
diff --git a/configs/default_prediction_callback.yml b/configs/default_prediction_callback.yml
index 127f4153..152b5d10 100644
--- a/configs/default_prediction_callback.yml
+++ b/configs/default_prediction_callback.yml
@@ -1,4 +1,4 @@
 class_path: chebai.callbacks.prediction_callback.PredictionWriter
 init_args:
   output_dir: pred
-  write_interval: epoch
\ No newline at end of file
+  write_interval: epoch
diff --git a/configs/loss/weighting_chebi100.yml b/configs/loss/weighting_chebi100.yml
index 07b87672..15471d38 100644
--- a/configs/loss/weighting_chebi100.yml
+++ b/configs/loss/weighting_chebi100.yml
@@ -3,4 +3,4 @@ init_args:
   pos_weight:
     class_path: chebai.CustomTensor
     init_args:
-      data: [0.7269214993582873, 0.8409484012184109, 0.27551414429639587, 1.2614226018276162, 0.06103657750778788, 1.1091819429863523, 0.004043529396179034, 1.4961058765862425, 0.5230288836846214, 1.4961058765862425, 0.6433255269320843, 0.41639192681688303, 0.1760124560689697, 1.0997017554394604, 0.9060922914536399, 0.15154900516656875, 0.1883822919274039, 0.03447618043580302, 0.6954870561427938, 1.5137071221931395, 0.24790964428982054, 1.1487955838072934, 0.030583576274403817, 0.015282706424327932, 0.7704497328527956, 0.03311843124489495, 0.8873455543890818, 0.615622513810607, 1.029320843091335, 0.21881820643948446, 0.6736392952168422, 0.10235887461131016, 0.03832740702604017, 0.11612374132348093, 0.19855726139879146, 0.2335119879971268, 0.007641807055082073, 0.615622513810607, 0.09054546473357977, 1.0633479784001394, 0.9125184779178501, 0.2992211753172485, 0.40588361320636235, 0.07559641914595586, 1.3687777168767752, 0.9974039177241617, 1.1091819429863523, 0.3092911187173482, 1.4621034703001916, 0.1745795188418139, 0.47477898666574486, 0.1967356351474264, 0.18224519176546297, 1.413902256993592, 0.4713007523311973, 0.06615172513440455, 0.49296975243837876, 0.21196887213577736, 0.5618563553992003, 0.5025980679156908, 0.11316192206369118, 0.7568535610965698, 0.22415523586483774, 0.48009367681498827, 0.01856371452696824, 0.8354876973143952, 0.1706433758440542, 1.4789092573151363, 0.1769808877392254, 0.023848953732422032, 0.41238815828979764, 1.0997017554394604, 1.413902256993592, 0.17270483944485485, 0.0967406807416668, 0.08922684146076065, 1.2371644748693928, 0.20719018580743456, 0.12764395375636595, 1.2739119345189789, 0.3496334385500458, 0.5743977919036467, 1.090382249037431, 1.3402615144418424, 0.6401249024199844, 1.3402615144418424, 0.2852884820097935, 0.05942960987825259, 0.14343935940514702, 0.08526514604798997, 0.08520867906385222, 0.28719889595182335, 0.8635241972242742, 0.0011988251251925615, 0.40588361320636235, 0.2985269266506192, 0.019120984601934444, 0.6276346604215457, 0.7524275168796307, 1.3543695303833354, 0.8300974541059153, 0.7148061410356492, 1.2614226018276162, 0.15190685405716275, 1.4456753414204142, 0.3048936146597556, 0.32166276346604217, 0.1376097383811945, 0.30345543723211527, 1.1696827762401534, 1.3687777168767752, 1.0997017554394604, 0.8520867906385222, 0.7893564747632936, 0.28340331582911205, 0.10940910321974223, 1.4456753414204142, 0.25630499080959535, 1.2866510538641687, 0.18566393273653226, 0.5821950469973614, 1.0997017554394604, 0.36761458681833387, 0.5569917982095968, 0.8409484012184109, 1.0376218176323941, 0.7524275168796307, 1.2614226018276162, 0.043704179818755726, 0.1373160142864641, 0.5230288836846214, 0.00117096018735363, 0.05611212620428123, 0.8195229642446934, 0.031974429767996235, 1.0546320113640726, 0.3910793476790786, 0.004047217935466542, 0.7394546286575682, 1.4961058765862425, 0.14392069953737904, 0.008958094088032923, 0.23099659853934804, 1.413902256993592, 1.213821748928461, 0.13075722092115535, 0.953074854714199, 1.0460577673692428, 0.7352291736366677, 0.35444932613337976, 1.2739119345189789, 0.35444932613337976, 0.761331984535011, 0.09799322573222914, 0.017727349874127427, 1.0722092115534738, 0.18354508614324802, 1.3985337542001832, 0.8300974541059153, 0.09645060373794367, 0.9821763769955486, 0.12047294511836784, 0.013122397285713091, 1.0131110660347784, 0.8409484012184109, 0.03119154069973742, 0.4765374273570995, 0.28978627339283075, 0.01912382660321297, 0.06924924940065492, 0.6338182531350585, 0.43467941008924615, 0.9190364670458347, 0.5769735667552326, 0.8195229642446934, 0.23915447097847, 0.002221965001665058, 0.03485914532278972, 0.26805230288836845, 0.6598210532636762, 0.16974288309553676, 0.12230523325705026, 0.20455501651258642, 0.5025980679156908, 0.4137141652296362, 0.12218908393771782, 0.5743977919036467, 0.0489220933028201, 0.016487071423169766, 0.019491759640420672, 0.05249494303811377, 0.02473377650642385, 0.01422185314318745, 0.022768555191367345, 0.20955228890295907, 0.02262044750112814, 1.3543695303833354, 0.033332928856584676, 1.326444385426978, 0.2992211753172485, 0.0017904968742891297, 0.9974039177241617, 0.54751108675071, 0.47830894195694, 0.13557966847883757, 1.138629251207229, 0.6369559672594894, 0.08670155349489007, 0.022688256989317028, 0.015679393783380072, 0.01524287470517911, 0.1883822919274039, 0.17746911087781636, 0.2026222132069557, 0.12431411148446074, 0.2249389954307987, 0.24230716645276246, 1.029320843091335, 1.1188270033601466, 1.3402615144418424, 0.6069108744642305, 1.029320843091335, 1.4296122820712984, 0.8935076762945615, 0.1552051934697429, 0.16328059059189956, 0.10195333231887231, 0.47830894195694, 0.26583699460003485, 0.07132212050244838, 0.6771847651916677, 0.08972462021367981, 0.10227750825629321, 0.8520867906385222, 1.1804138108845583, 0.15353831191696524, 0.14621034703001917, 1.225381956061113, 0.3333292885658468, 1.413902256993592, 0.5294860303967772, 0.15427470669834156, 1.3402615144418424, 0.6598210532636762, 0.5569917982095968, 0.3740264691465606, 0.37621375843981536, 0.0751987757956849, 0.649823764577863, 0.7991621452572476, 0.3806659922675055, 1.2866510538641687, 0.08184803141629571, 0.22533293412682462, 0.18512964803800988, 0.24601358582488883, 0.017105172213030692, 1.1188270033601466, 0.7269214993582873, 0.03403838766836425, 0.9747356468667944, 0.1862013102553066, 0.7704497328527956, 0.26365800284101815, 0.538347721282079, 1.090382249037431, 0.1802032288325166, 0.5273160056820363, 0.05383477212820789, 1.0051961358313817, 1.2491757804506491, 1.0812193729950996, 0.4873678234333972, 0.5902069054422792, 0.017734680273799704, 0.5718449128285193, 0.17041735812770445, 1.0812193729950996, 1.2866510538641687, 1.326444385426978, 0.05845756719055741, 0.48189178047347137, 0.6126909780305565, 0.1710972146095969, 0.5643206376597231, 0.5316739892000697, 0.04112019986782258, 0.43321584305190863, 0.039024903059271115, 0.7030880075760484, 0.8693588201784923, 0.07826344609879371, 0.7187994714324965, 0.03933509794754413, 1.5884580911903317, 1.0546320113640726, 0.21444184231069477, 0.2652888770853956, 0.004796820094188452, 0.7797885174934355, 0.8041569086651054, 0.22853482306645978, 0.010909369627473025, 0.7991621452572476, 0.10713164478469347, 0.09426015046623946, 0.7310517351500958, 0.6736392952168422, 0.005066951734195127, 0.7352291736366677, 0.5618563553992003, 0.020504399264767628, 0.034338165301952724, 0.0015163652212279981, 0.3100363985214864, 1.1286412753194461, 0.0023147034394706734, 0.0016863054441207976, 0.017833001439558818, 1.0812193729950996, 0.036315299290549496, 0.11539471335104651, 1.383495756843192, 0.4873678234333972, 0.46787311049606134, 0.20198603671337026, 1.090382249037431, 0.5451911245187155, 0.04614960738393718, 0.6843888584383876, 1.0812193729950996, 0.6185822374346964, 1.299647529155726, 0.10409798170422076, 0.49296975243837876, 0.2547823869037958, 0.8812678451124443, 0.01803295100019858, 1.0131110660347784, 0.001209235779274984, 0.8409484012184109, 0.0012497946107919151, 0.0027769646987334485, 0.014558169878526461, 0.014564761759838903, 0.5105758150254638, 0.008193142217678098, 0.007187191676148858, 0.015190685405716277, 1.2739119345189789, 0.5848413881200767, 1.0722092115534738, 1.4789092573151363, 0.5451911245187155, 0.8520867906385222, 0.005692390628961504, 0.5230288836846214, 0.005700461006885688, 0.016478625177563634, 0.3887163304725585, 1.0376218176323941, 0.016484959050149503, 0.010033931637402859, 0.01773223613373992, 0.00794032988067248, 0.0012141080951773235, 0.2180764498074862, 0.09391613531855246, 0.08459244272611234, 0.056038808966209434, 1.1696827762401534, 0.18539640545593208, 0.27433924389427905, 0.13864774287329404, 0.2985269266506192, 1.2614226018276162, 1.0051961358313817, 1.0460577673692428, 0.9323558361334555, 0.8812678451124443, 0.014738270949188644, 0.7704497328527956, 0.0027887618481135935, 0.022130221084695024, 0.01655708472351266, 0.5045690407310465, 0.07840652369678054, 0.8520867906385222, 0.047233885971518674, 1.2739119345189789, 0.13673231178152695, 0.09897315798955143, 0.31305378439517484, 1.159145093571323, 1.1286412753194461, 0.9125184779178501, 0.22772585024144576, 0.1273911934518979, 0.387545498151858, 0.7845433255269321, 1.029320843091335, 0.3852248664263978, 0.5594135016800733, 0.3729423344533822, 0.078026140319234, 0.05891259404139966, 0.6276346604215457, 0.09183804809879861, 0.09666799803637631, 0.8354876973143952, 0.06891542870188369, 0.06891542870188369, 0.1596341257895991, 0.5769735667552326, 0.5718449128285193, 1.5317274450763911, 0.05253781355100729, 1.4961058765862425, 1.4789092573151363, 1.1913435683927487, 0.11326153643170499, 0.0029835386756270578, 0.8464809564895847, 0.05275322074063832, 0.08081978981558847, 0.3333292885658468, 0.034991869835849024, 0.1889355438860747, 0.33246797257472055, 0.7991621452572476, 0.7030880075760484, 0.7108569358365573, 0.8300974541059153, 0.7148061410356492, 0.953074854714199, 0.9974039177241617, 0.953074854714199, 0.018012754499008382, 0.03763237946370777, 0.46116525228106403, 0.08258350795020337, 0.5294860303967772, 0.25888351184389713, 0.3200624512099922, 0.009566890132085424, 0.2068570826148181, 0.21881820643948446, 0.3249118822889315, 0.39589263195820573, 0.10219627115680449, 0.025233399761995855, 1.299647529155726, 0.1843339618716574, 1.3402615144418424, 0.06554513774142479, 0.7030880075760484, 0.3249118822889315, 0.9674068074166682, 0.25228452036552323, 0.8409484012184109, 0.8195229642446934, 1.3985337542001832, 0.1475517263605698, 0.4765374273570995, 0.10107235301368174, 0.6215705574223037, 0.6040615276357599, 1.4789092573151363, 1.0131110660347784, 1.0460577673692428, 1.0722092115534738, 0.5294860303967772, 1.3129092386369068, 0.28719889595182335, 0.22533293412682462, 0.706951128496796, 0.761331984535011, 0.32086061193620163, 0.761331984535011, 0.4391300525133681, 0.28216031882986153, 0.3946782373816468, 1.2491757804506491, 1.1487955838072934, 1.1188270033601466, 0.691747878421596, 0.10546320113640727, 0.13716962194713953, 0.489220933028201, 0.001681391286102438, 0.021312755571710595, 0.0156052280638468, 0.015396087757139746, 1.0211516300509276, 0.3041728259726167, 0.41910457780591814, 1.159145093571323, 0.006876441953204899, 0.006872401740541441, 0.13864774287329404, 0.07604320649315417, 0.05381225653969756, 0.7394546286575682, 0.08554860730479845, 0.9125184779178501, 0.13129092386369068, 0.10546320113640727, 1.3687777168767752, 0.41238815828979764, 0.04497207458455675, 0.21881820643948446, 1.4961058765862425, 0.46617791806672776, 0.7568535610965698, 1.4961058765862425, 1.3402615144418424, 1.0812193729950996, 1.383495756843192, 0.9747356468667944, 0.6215705574223037, 1.5317274450763911, 0.044659876913022166, 0.001852255922296684, 0.0019729977977767754, 0.129181832717286, 1.3985337542001832, 1.0546320113640726, 0.5338801053378293, 0.19494712937335887, 0.44520797711562926, 1.4456753414204142, 0.11246949771539935, 0.10713164478469347, 0.19825131800680565, 0.17086999387306356, 1.2024776204338024, 1.413902256993592, 1.413902256993592, 1.3129092386369068, 0.2985269266506192, 0.003683618351124191, 0.03282273096592267, 0.23099659853934804, 0.006007615697175929, 0.598442350634497, 0.33859238259583385, 0.023083083133551643, 0.040963102638146086, 0.17895007703256866, 0.47477898666574486, 0.9391613531855245, 0.9974039177241617, 0.40082587347793414, 0.54751108675071, 0.7704497328527956, 1.4456753414204142, 0.9674068074166682, 0.2615144418423107, 0.46617791806672776, 1.0722092115534738, 0.005503212377519969, 0.9460669513707122, 0.016121426561385398, 0.08092144992856407, 1.1487955838072934, 0.3829318612690978, 0.4837034037083341, 0.010034714193294093, 0.06996471201001461, 0.086121221811524, 0.19494712937335887, 0.30345543723211527, 1.213821748928461, 1.5690866510538641, 1.138629251207229, 0.23182901871426462, 1.1286412753194461, 1.3129092386369068, 0.14015806686973514, 0.9674068074166682, 0.649823764577863, 0.8693588201784923, 0.8812678451124443, 0.10235887461131016, 0.3200624512099922, 0.2827804513987184, 0.3729423344533822, 1.3402615144418424, 0.44063392255622214, 0.430318078215441, 1.0131110660347784, 1.0997017554394604, 0.10326252438717244, 0.5929267529327966, 1.0460577673692428, 0.5693146256036145, 0.3316110963567445, 0.8997559817231948, 0.5618563553992003, 1.2866510538641687, 1.3129092386369068, 1.3985337542001832, 0.17771423395913932, 1.0812193729950996, 0.8247763165795953, 0.21919098021536093, 1.2614226018276162, 1.1804138108845583, 1.2491757804506491, 1.0051961358313817, 0.8195229642446934, 0.7845433255269321, 0.28216031882986153, 0.9323558361334555, 1.1913435683927487, 1.1487955838072934, 0.5875118967416295, 0.5428907400270754, 1.4456753414204142, 0.5795725467856615, 1.2866510538641687, 1.0633479784001394, 0.22853482306645978, 0.07175967952393579, 1.138629251207229, 0.5498508777197302, 0.3863816978571077, 0.08107442053334396, 0.48009367681498827, 0.6531223623675982, 0.7568535610965698, 0.6701307572209212, 0.13035978255969288, 0.7480529382931213, 1.2371644748693928, 0.9821763769955486, 1.2866510538641687, 1.1804138108845583, 1.1804138108845583, 0.172242443623048, 0.5146604215456675, 0.4837034037083341, 0.7991621452572476, 1.1913435683927487, 0.6880486919059725, 0.010215570098167278, 0.7228376707102071, 1.4296122820712984, 1.3129092386369068, 0.5498508777197302, 1.2491757804506491, 0.680767753367285, 1.213821748928461, 0.05665570470559968, 0.5406096864975498, 0.706951128496796, 0.001250657141336504, 0.7845433255269321, 0.8464809564895847, 0.37621375843981536, 0.30345543723211527, 0.12789771907198494, 0.46617791806672776, 0.019036115606808235, 0.25630499080959535, 0.002561774124169574, 1.1091819429863523, 0.05827223975834097, 0.44520797711562926, 0.6564546193184534, 0.18566393273653226, 0.7187994714324965, 1.3402615144418424, 0.48009367681498827, 0.8464809564895847, 0.3232791592623539, 0.49870195886208085, 0.5105758150254638, 0.13746271942993255, 1.3543695303833354, 0.17796003511260977, 0.4514565101277785, 1.2371644748693928, 0.1760124560689697, 0.4421481284756593, 0.9391613531855245, 0.9674068074166682, 0.7480529382931213, 0.021803949396105214, 0.04333617561011009, 0.3282273096592267, 0.33246797257472055, 0.814336110040613, 0.3729423344533822, 0.6992668771000916, 0.020829707849508963, 0.5498508777197302, 1.4961058765862425, 1.413902256993592, 0.3593997357162482, 0.8520867906385222, 0.18921339027414244, 0.20921155347384857, 1.138629251207229, 0.0503778799476965, 0.18302290951126154, 1.413902256993592, 1.299647529155726, 0.7108569358365573, 0.5085577288000667, 0.9601873536299765, 0.5230288836846214, 1.0546320113640726, 0.7524275168796307, 0.5956717841963743, 0.615622513810607, 1.2491757804506491, 0.6433255269320843, 1.2614226018276162, 1.4961058765862425, 0.40846065202037096, 0.6245878902253246, 1.2024776204338024, 0.8935076762945615, 0.1736371192799148, 0.691747878421596, 0.9125184779178501, 0.20520750460353565, 1.213821748928461, 0.1955396738395393, 0.19524295202794667, 0.7148061410356492, 0.07869425405897056, 0.17746911087781636, 0.1784536829215213, 0.6992668771000916, 0.19613583138173302, 0.8354876973143952, 0.09904934979708765, 0.2985269266506192, 0.5105758150254638, 1.213821748928461, 1.1286412753194461, 0.9323558361334555, 0.1586499449894166, 0.25228452036552323, 0.061591721104077006, 0.5902069054422792, 0.6531223623675982, 0.02524330103716242, 0.010022207928526006, 0.2214545703724903, 0.043541490824506555, 1.413902256993592, 0.1213821748928461, 0.1326444385426978, 0.29783589209818717, 0.8464809564895847, 0.9060922914536399, 0.41238815828979764, 0.28655925475816674, 0.07352291736366678, 1.4789092573151363, 0.8409484012184109, 0.516727330869144, 0.3655258675750479, 0.35542846791827865, 1.3985337542001832, 1.5137071221931395, 1.0546320113640726, 0.1090382249037431, 0.1883822919274039, 1.3402615144418424, 0.489220933028201, 0.3934712702948528, 0.8693588201784923, 0.46958067659276226, 1.2024776204338024, 0.017396579960305147, 1.0376218176323941, 0.21956502625668406, 1.0997017554394604, 0.22415523586483774, 0.39589263195820573, 0.5718449128285193, 0.6771847651916677, 0.07050142760899554, 0.6992668771000916, 1.0211516300509276, 0.07061751118903231, 0.002227465771971969, 0.49486578994775715, 1.3402615144418424, 0.7942290455951658, 0.8520867906385222, 1.4961058765862425, 1.0812193729950996, 1.0633479784001394, 0.161234467902778, 1.1804138108845583, 0.9391613531855245, 0.5693146256036145, 0.1546455593586741, 1.4621034703001916, 1.4789092573151363, 0.39589263195820573, 1.1487955838072934, 0.22376540067202932, 0.11581017586536171, 0.9060922914536399, 0.3593997357162482, 1.326444385426978, 0.6771847651916677, 0.006941363044152831, 0.9674068074166682, 0.041252037635914354, 0.31381733021077285, 0.9256482401900493, 0.4873678234333972, 1.4961058765862425, 1.4296122820712984, 0.4274588218817836, 1.0546320113640726, 0.4246373115063263, 0.30132343181830645, 0.43763641287896893, 1.0211516300509276, 0.13909741122855876, 0.9391613531855245, 0.193772749075929, 0.054243299066786196, 0.4644949652939237, 1.4621034703001916, 0.21551943950823596, 0.13486908321427343, 0.7108569358365573, 1.029320843091335, 0.9601873536299765, 1.383495756843192, 1.213821748928461, 0.20294180660318117, 0.9190364670458347, 0.7437289328694616, 1.0633479784001394, 0.8520867906385222, 1.2371644748693928, 1.0546320113640726, 1.628672220081226, 0.3333292885658468, 0.7148061410356492, 0.09035470883877589, 0.8041569086651054, 0.5594135016800733, 0.02351765772005426]
\ No newline at end of file
+      data: [0.7269214993582873, 0.8409484012184109, 0.27551414429639587, 1.2614226018276162, 0.06103657750778788, 1.1091819429863523, 0.004043529396179034, 1.4961058765862425, 0.5230288836846214, 1.4961058765862425, 0.6433255269320843, 0.41639192681688303, 0.1760124560689697, 1.0997017554394604, 0.9060922914536399, 0.15154900516656875, 0.1883822919274039, 0.03447618043580302, 0.6954870561427938, 1.5137071221931395, 0.24790964428982054, 1.1487955838072934, 0.030583576274403817, 0.015282706424327932, 0.7704497328527956, 0.03311843124489495, 0.8873455543890818, 0.615622513810607, 1.029320843091335, 0.21881820643948446, 0.6736392952168422, 0.10235887461131016, 0.03832740702604017, 0.11612374132348093, 0.19855726139879146, 0.2335119879971268, 0.007641807055082073, 0.615622513810607, 0.09054546473357977, 1.0633479784001394, 0.9125184779178501, 0.2992211753172485, 0.40588361320636235, 0.07559641914595586, 1.3687777168767752, 0.9974039177241617, 1.1091819429863523, 0.3092911187173482, 1.4621034703001916, 0.1745795188418139, 0.47477898666574486, 0.1967356351474264, 0.18224519176546297, 1.413902256993592, 0.4713007523311973, 0.06615172513440455, 0.49296975243837876, 0.21196887213577736, 0.5618563553992003, 0.5025980679156908, 0.11316192206369118, 0.7568535610965698, 0.22415523586483774, 0.48009367681498827, 0.01856371452696824, 0.8354876973143952, 0.1706433758440542, 1.4789092573151363, 0.1769808877392254, 0.023848953732422032, 0.41238815828979764, 1.0997017554394604, 1.413902256993592, 0.17270483944485485, 0.0967406807416668, 0.08922684146076065, 1.2371644748693928, 0.20719018580743456, 0.12764395375636595, 1.2739119345189789, 0.3496334385500458, 0.5743977919036467, 1.090382249037431, 1.3402615144418424, 0.6401249024199844, 1.3402615144418424, 0.2852884820097935, 0.05942960987825259, 0.14343935940514702, 0.08526514604798997, 0.08520867906385222, 0.28719889595182335, 0.8635241972242742, 0.0011988251251925615, 0.40588361320636235, 0.2985269266506192, 0.019120984601934444, 0.6276346604215457, 0.7524275168796307, 1.3543695303833354, 0.8300974541059153, 0.7148061410356492, 1.2614226018276162, 0.15190685405716275, 1.4456753414204142, 0.3048936146597556, 0.32166276346604217, 0.1376097383811945, 0.30345543723211527, 1.1696827762401534, 1.3687777168767752, 1.0997017554394604, 0.8520867906385222, 0.7893564747632936, 0.28340331582911205, 0.10940910321974223, 1.4456753414204142, 0.25630499080959535, 1.2866510538641687, 0.18566393273653226, 0.5821950469973614, 1.0997017554394604, 0.36761458681833387, 0.5569917982095968, 0.8409484012184109, 1.0376218176323941, 0.7524275168796307, 1.2614226018276162, 0.043704179818755726, 0.1373160142864641, 0.5230288836846214, 0.00117096018735363, 0.05611212620428123, 0.8195229642446934, 0.031974429767996235, 1.0546320113640726, 0.3910793476790786, 0.004047217935466542, 0.7394546286575682, 1.4961058765862425, 0.14392069953737904, 0.008958094088032923, 0.23099659853934804, 1.413902256993592, 1.213821748928461, 0.13075722092115535, 0.953074854714199, 1.0460577673692428, 0.7352291736366677, 0.35444932613337976, 1.2739119345189789, 0.35444932613337976, 0.761331984535011, 0.09799322573222914, 0.017727349874127427, 1.0722092115534738, 0.18354508614324802, 1.3985337542001832, 0.8300974541059153, 0.09645060373794367, 0.9821763769955486, 0.12047294511836784, 0.013122397285713091, 1.0131110660347784, 0.8409484012184109, 0.03119154069973742, 0.4765374273570995, 0.28978627339283075, 0.01912382660321297, 0.06924924940065492, 0.6338182531350585, 0.43467941008924615, 0.9190364670458347, 0.5769735667552326, 0.8195229642446934, 0.23915447097847, 0.002221965001665058, 0.03485914532278972, 0.26805230288836845, 0.6598210532636762, 0.16974288309553676, 0.12230523325705026, 0.20455501651258642, 0.5025980679156908, 0.4137141652296362, 0.12218908393771782, 0.5743977919036467, 0.0489220933028201, 0.016487071423169766, 0.019491759640420672, 0.05249494303811377, 0.02473377650642385, 0.01422185314318745, 0.022768555191367345, 0.20955228890295907, 0.02262044750112814, 1.3543695303833354, 0.033332928856584676, 1.326444385426978, 0.2992211753172485, 0.0017904968742891297, 0.9974039177241617, 0.54751108675071, 0.47830894195694, 0.13557966847883757, 1.138629251207229, 0.6369559672594894, 0.08670155349489007, 0.022688256989317028, 0.015679393783380072, 0.01524287470517911, 0.1883822919274039, 0.17746911087781636, 0.2026222132069557, 0.12431411148446074, 0.2249389954307987, 0.24230716645276246, 1.029320843091335, 1.1188270033601466, 1.3402615144418424, 0.6069108744642305, 1.029320843091335, 1.4296122820712984, 0.8935076762945615, 0.1552051934697429, 0.16328059059189956, 0.10195333231887231, 0.47830894195694, 0.26583699460003485, 0.07132212050244838, 0.6771847651916677, 0.08972462021367981, 0.10227750825629321, 0.8520867906385222, 1.1804138108845583, 0.15353831191696524, 0.14621034703001917, 1.225381956061113, 0.3333292885658468, 1.413902256993592, 0.5294860303967772, 0.15427470669834156, 1.3402615144418424, 0.6598210532636762, 0.5569917982095968, 0.3740264691465606, 0.37621375843981536, 0.0751987757956849, 0.649823764577863, 0.7991621452572476, 0.3806659922675055, 1.2866510538641687, 0.08184803141629571, 0.22533293412682462, 0.18512964803800988, 0.24601358582488883, 0.017105172213030692, 1.1188270033601466, 0.7269214993582873, 0.03403838766836425, 0.9747356468667944, 0.1862013102553066, 0.7704497328527956, 0.26365800284101815, 0.538347721282079, 1.090382249037431, 0.1802032288325166, 0.5273160056820363, 0.05383477212820789, 1.0051961358313817, 1.2491757804506491, 1.0812193729950996, 0.4873678234333972, 0.5902069054422792, 0.017734680273799704, 0.5718449128285193, 0.17041735812770445, 1.0812193729950996, 1.2866510538641687, 1.326444385426978, 0.05845756719055741, 0.48189178047347137, 0.6126909780305565, 0.1710972146095969, 0.5643206376597231, 0.5316739892000697, 0.04112019986782258, 0.43321584305190863, 0.039024903059271115, 0.7030880075760484, 0.8693588201784923, 0.07826344609879371, 0.7187994714324965, 0.03933509794754413, 1.5884580911903317, 1.0546320113640726, 0.21444184231069477, 0.2652888770853956, 0.004796820094188452, 0.7797885174934355, 0.8041569086651054, 0.22853482306645978, 0.010909369627473025, 0.7991621452572476, 0.10713164478469347, 0.09426015046623946, 0.7310517351500958, 0.6736392952168422, 0.005066951734195127, 0.7352291736366677, 0.5618563553992003, 0.020504399264767628, 0.034338165301952724, 0.0015163652212279981, 0.3100363985214864, 1.1286412753194461, 0.0023147034394706734, 0.0016863054441207976, 0.017833001439558818, 1.0812193729950996, 0.036315299290549496, 0.11539471335104651, 1.383495756843192, 0.4873678234333972, 0.46787311049606134, 0.20198603671337026, 1.090382249037431, 0.5451911245187155, 0.04614960738393718, 0.6843888584383876, 1.0812193729950996, 0.6185822374346964, 1.299647529155726, 0.10409798170422076, 0.49296975243837876, 0.2547823869037958, 0.8812678451124443, 0.01803295100019858, 1.0131110660347784, 0.001209235779274984, 0.8409484012184109, 0.0012497946107919151, 0.0027769646987334485, 0.014558169878526461, 0.014564761759838903, 0.5105758150254638, 0.008193142217678098, 0.007187191676148858, 0.015190685405716277, 1.2739119345189789, 0.5848413881200767, 1.0722092115534738, 1.4789092573151363, 0.5451911245187155, 0.8520867906385222, 0.005692390628961504, 0.5230288836846214, 0.005700461006885688, 0.016478625177563634, 0.3887163304725585, 1.0376218176323941, 0.016484959050149503, 0.010033931637402859, 0.01773223613373992, 0.00794032988067248, 0.0012141080951773235, 0.2180764498074862, 0.09391613531855246, 0.08459244272611234, 0.056038808966209434, 1.1696827762401534, 0.18539640545593208, 0.27433924389427905, 0.13864774287329404, 0.2985269266506192, 1.2614226018276162, 1.0051961358313817, 1.0460577673692428, 0.9323558361334555, 0.8812678451124443, 0.014738270949188644, 0.7704497328527956, 0.0027887618481135935, 0.022130221084695024, 0.01655708472351266, 0.5045690407310465, 0.07840652369678054, 0.8520867906385222, 0.047233885971518674, 1.2739119345189789, 0.13673231178152695, 0.09897315798955143, 0.31305378439517484, 1.159145093571323, 1.1286412753194461, 0.9125184779178501, 0.22772585024144576, 0.1273911934518979, 0.387545498151858, 0.7845433255269321, 1.029320843091335, 0.3852248664263978, 0.5594135016800733, 0.3729423344533822, 0.078026140319234, 0.05891259404139966, 0.6276346604215457, 0.09183804809879861, 0.09666799803637631, 0.8354876973143952, 0.06891542870188369, 0.06891542870188369, 0.1596341257895991, 0.5769735667552326, 0.5718449128285193, 1.5317274450763911, 0.05253781355100729, 1.4961058765862425, 1.4789092573151363, 1.1913435683927487, 0.11326153643170499, 0.0029835386756270578, 0.8464809564895847, 0.05275322074063832, 0.08081978981558847, 0.3333292885658468, 0.034991869835849024, 0.1889355438860747, 0.33246797257472055, 0.7991621452572476, 0.7030880075760484, 0.7108569358365573, 0.8300974541059153, 0.7148061410356492, 0.953074854714199, 0.9974039177241617, 0.953074854714199, 0.018012754499008382, 0.03763237946370777, 0.46116525228106403, 0.08258350795020337, 0.5294860303967772, 0.25888351184389713, 0.3200624512099922, 0.009566890132085424, 0.2068570826148181, 0.21881820643948446, 0.3249118822889315, 0.39589263195820573, 0.10219627115680449, 0.025233399761995855, 1.299647529155726, 0.1843339618716574, 1.3402615144418424, 0.06554513774142479, 0.7030880075760484, 0.3249118822889315, 0.9674068074166682, 0.25228452036552323, 0.8409484012184109, 0.8195229642446934, 1.3985337542001832, 0.1475517263605698, 0.4765374273570995, 0.10107235301368174, 0.6215705574223037, 0.6040615276357599, 1.4789092573151363, 1.0131110660347784, 1.0460577673692428, 1.0722092115534738, 0.5294860303967772, 1.3129092386369068, 0.28719889595182335, 0.22533293412682462, 0.706951128496796, 0.761331984535011, 0.32086061193620163, 0.761331984535011, 0.4391300525133681, 0.28216031882986153, 0.3946782373816468, 1.2491757804506491, 1.1487955838072934, 1.1188270033601466, 0.691747878421596, 0.10546320113640727, 0.13716962194713953, 0.489220933028201, 0.001681391286102438, 0.021312755571710595, 0.0156052280638468, 0.015396087757139746, 1.0211516300509276, 0.3041728259726167, 0.41910457780591814, 1.159145093571323, 0.006876441953204899, 0.006872401740541441, 0.13864774287329404, 0.07604320649315417, 0.05381225653969756, 0.7394546286575682, 0.08554860730479845, 0.9125184779178501, 0.13129092386369068, 0.10546320113640727, 1.3687777168767752, 0.41238815828979764, 0.04497207458455675, 0.21881820643948446, 1.4961058765862425, 0.46617791806672776, 0.7568535610965698, 1.4961058765862425, 1.3402615144418424, 1.0812193729950996, 1.383495756843192, 0.9747356468667944, 0.6215705574223037, 1.5317274450763911, 0.044659876913022166, 0.001852255922296684, 0.0019729977977767754, 0.129181832717286, 1.3985337542001832, 1.0546320113640726, 0.5338801053378293, 0.19494712937335887, 0.44520797711562926, 1.4456753414204142, 0.11246949771539935, 0.10713164478469347, 0.19825131800680565, 0.17086999387306356, 1.2024776204338024, 1.413902256993592, 1.413902256993592, 1.3129092386369068, 0.2985269266506192, 0.003683618351124191, 0.03282273096592267, 0.23099659853934804, 0.006007615697175929, 0.598442350634497, 0.33859238259583385, 0.023083083133551643, 0.040963102638146086, 0.17895007703256866, 0.47477898666574486, 0.9391613531855245, 0.9974039177241617, 0.40082587347793414, 0.54751108675071, 0.7704497328527956, 1.4456753414204142, 0.9674068074166682, 0.2615144418423107, 0.46617791806672776, 1.0722092115534738, 0.005503212377519969, 0.9460669513707122, 0.016121426561385398, 0.08092144992856407, 1.1487955838072934, 0.3829318612690978, 0.4837034037083341, 0.010034714193294093, 0.06996471201001461, 0.086121221811524, 0.19494712937335887, 0.30345543723211527, 1.213821748928461, 1.5690866510538641, 1.138629251207229, 0.23182901871426462, 1.1286412753194461, 1.3129092386369068, 0.14015806686973514, 0.9674068074166682, 0.649823764577863, 0.8693588201784923, 0.8812678451124443, 0.10235887461131016, 0.3200624512099922, 0.2827804513987184, 0.3729423344533822, 1.3402615144418424, 0.44063392255622214, 0.430318078215441, 1.0131110660347784, 1.0997017554394604, 0.10326252438717244, 0.5929267529327966, 1.0460577673692428, 0.5693146256036145, 0.3316110963567445, 0.8997559817231948, 0.5618563553992003, 1.2866510538641687, 1.3129092386369068, 1.3985337542001832, 0.17771423395913932, 1.0812193729950996, 0.8247763165795953, 0.21919098021536093, 1.2614226018276162, 1.1804138108845583, 1.2491757804506491, 1.0051961358313817, 0.8195229642446934, 0.7845433255269321, 0.28216031882986153, 0.9323558361334555, 1.1913435683927487, 1.1487955838072934, 0.5875118967416295, 0.5428907400270754, 1.4456753414204142, 0.5795725467856615, 1.2866510538641687, 1.0633479784001394, 0.22853482306645978, 0.07175967952393579, 1.138629251207229, 0.5498508777197302, 0.3863816978571077, 0.08107442053334396, 0.48009367681498827, 0.6531223623675982, 0.7568535610965698, 0.6701307572209212, 0.13035978255969288, 0.7480529382931213, 1.2371644748693928, 0.9821763769955486, 1.2866510538641687, 1.1804138108845583, 1.1804138108845583, 0.172242443623048, 0.5146604215456675, 0.4837034037083341, 0.7991621452572476, 1.1913435683927487, 0.6880486919059725, 0.010215570098167278, 0.7228376707102071, 1.4296122820712984, 1.3129092386369068, 0.5498508777197302, 1.2491757804506491, 0.680767753367285, 1.213821748928461, 0.05665570470559968, 0.5406096864975498, 0.706951128496796, 0.001250657141336504, 0.7845433255269321, 0.8464809564895847, 0.37621375843981536, 0.30345543723211527, 0.12789771907198494, 0.46617791806672776, 0.019036115606808235, 0.25630499080959535, 0.002561774124169574, 1.1091819429863523, 0.05827223975834097, 0.44520797711562926, 0.6564546193184534, 0.18566393273653226, 0.7187994714324965, 1.3402615144418424, 0.48009367681498827, 0.8464809564895847, 0.3232791592623539, 0.49870195886208085, 0.5105758150254638, 0.13746271942993255, 1.3543695303833354, 0.17796003511260977, 0.4514565101277785, 1.2371644748693928, 0.1760124560689697, 0.4421481284756593, 0.9391613531855245, 0.9674068074166682, 0.7480529382931213, 0.021803949396105214, 0.04333617561011009, 0.3282273096592267, 0.33246797257472055, 0.814336110040613, 0.3729423344533822, 0.6992668771000916, 0.020829707849508963, 0.5498508777197302, 1.4961058765862425, 1.413902256993592, 0.3593997357162482, 0.8520867906385222, 0.18921339027414244, 0.20921155347384857, 1.138629251207229, 0.0503778799476965, 0.18302290951126154, 1.413902256993592, 1.299647529155726, 0.7108569358365573, 0.5085577288000667, 0.9601873536299765, 0.5230288836846214, 1.0546320113640726, 0.7524275168796307, 0.5956717841963743, 0.615622513810607, 1.2491757804506491, 0.6433255269320843, 1.2614226018276162, 1.4961058765862425, 0.40846065202037096, 0.6245878902253246, 1.2024776204338024, 0.8935076762945615, 0.1736371192799148, 0.691747878421596, 0.9125184779178501, 0.20520750460353565, 1.213821748928461, 0.1955396738395393, 0.19524295202794667, 0.7148061410356492, 0.07869425405897056, 0.17746911087781636, 0.1784536829215213, 0.6992668771000916, 0.19613583138173302, 0.8354876973143952, 0.09904934979708765, 0.2985269266506192, 0.5105758150254638, 1.213821748928461, 1.1286412753194461, 0.9323558361334555, 0.1586499449894166, 0.25228452036552323, 0.061591721104077006, 0.5902069054422792, 0.6531223623675982, 0.02524330103716242, 0.010022207928526006, 0.2214545703724903, 0.043541490824506555, 1.413902256993592, 0.1213821748928461, 0.1326444385426978, 0.29783589209818717, 0.8464809564895847, 0.9060922914536399, 0.41238815828979764, 0.28655925475816674, 0.07352291736366678, 1.4789092573151363, 0.8409484012184109, 0.516727330869144, 0.3655258675750479, 0.35542846791827865, 1.3985337542001832, 1.5137071221931395, 1.0546320113640726, 0.1090382249037431, 0.1883822919274039, 1.3402615144418424, 0.489220933028201, 0.3934712702948528, 0.8693588201784923, 0.46958067659276226, 1.2024776204338024, 0.017396579960305147, 1.0376218176323941, 0.21956502625668406, 1.0997017554394604, 0.22415523586483774, 0.39589263195820573, 0.5718449128285193, 0.6771847651916677, 0.07050142760899554, 0.6992668771000916, 1.0211516300509276, 0.07061751118903231, 0.002227465771971969, 0.49486578994775715, 1.3402615144418424, 0.7942290455951658, 0.8520867906385222, 1.4961058765862425, 1.0812193729950996, 1.0633479784001394, 0.161234467902778, 1.1804138108845583, 0.9391613531855245, 0.5693146256036145, 0.1546455593586741, 1.4621034703001916, 1.4789092573151363, 0.39589263195820573, 1.1487955838072934, 0.22376540067202932, 0.11581017586536171, 0.9060922914536399, 0.3593997357162482, 1.326444385426978, 0.6771847651916677, 0.006941363044152831, 0.9674068074166682, 0.041252037635914354, 0.31381733021077285, 0.9256482401900493, 0.4873678234333972, 1.4961058765862425, 1.4296122820712984, 0.4274588218817836, 1.0546320113640726, 0.4246373115063263, 0.30132343181830645, 0.43763641287896893, 1.0211516300509276, 0.13909741122855876, 0.9391613531855245, 0.193772749075929, 0.054243299066786196, 0.4644949652939237, 1.4621034703001916, 0.21551943950823596, 0.13486908321427343, 0.7108569358365573, 1.029320843091335, 0.9601873536299765, 1.383495756843192, 1.213821748928461, 0.20294180660318117, 0.9190364670458347, 0.7437289328694616, 1.0633479784001394, 0.8520867906385222, 1.2371644748693928, 1.0546320113640726, 1.628672220081226, 0.3333292885658468, 0.7148061410356492, 0.09035470883877589, 0.8041569086651054, 0.5594135016800733, 0.02351765772005426]
diff --git a/configs/metrics/balanced-accuracy.yml b/configs/metrics/balanced-accuracy.yml
index 66c402d0..eb079ed1 100644
--- a/configs/metrics/balanced-accuracy.yml
+++ b/configs/metrics/balanced-accuracy.yml
@@ -2,4 +2,4 @@ class_path: torchmetrics.MetricCollection
 init_args:
   metrics:
     balanced-accuracy:
-      class_path: chebai.callbacks.epoch_metrics.BalancedAccuracy
\ No newline at end of file
+      class_path: chebai.callbacks.epoch_metrics.BalancedAccuracy
diff --git a/configs/metrics/micro-macro-f1.yml b/configs/metrics/micro-macro-f1.yml
index 7273bd4c..9cae1093 100644
--- a/configs/metrics/micro-macro-f1.yml
+++ b/configs/metrics/micro-macro-f1.yml
@@ -6,4 +6,4 @@ init_args:
       init_args:
         average: micro
     macro-f1:
-      class_path: chebai.callbacks.epoch_metrics.MacroF1
\ No newline at end of file
+      class_path: chebai.callbacks.epoch_metrics.MacroF1
diff --git a/configs/model/electra-for-pretraining.yml b/configs/model/electra-for-pretraining.yml
index bbdb4ead..80acd9a1 100644
--- a/configs/model/electra-for-pretraining.yml
+++ b/configs/model/electra-for-pretraining.yml
@@ -17,4 +17,4 @@ init_args:
       max_position_embeddings: 1800
       num_attention_heads: 8
       num_hidden_layers: 6
-      type_vocab_size: 1
\ No newline at end of file
+      type_vocab_size: 1
diff --git a/configs/model/electra.yml b/configs/model/electra.yml
index c7117b9c..c3cf2fdf 100644
--- a/configs/model/electra.yml
+++ b/configs/model/electra.yml
@@ -8,3 +8,4 @@ init_args:
     num_attention_heads: 8
     num_hidden_layers: 6
     type_vocab_size: 1
+    hidden_size: 256
diff --git a/configs/model/electra_pretraining.yml b/configs/model/electra_pretraining.yml
index 7b78e48d..f480a792 100644
--- a/configs/model/electra_pretraining.yml
+++ b/configs/model/electra_pretraining.yml
@@ -15,4 +15,4 @@ init_args:
       max_position_embeddings: 1800
       num_attention_heads: 8
       num_hidden_layers: 6
-      type_vocab_size: 1
\ No newline at end of file
+      type_vocab_size: 1
diff --git a/configs/training/csv_logger.yml b/configs/training/csv_logger.yml
index ed14c4e7..86a94baa 100644
--- a/configs/training/csv_logger.yml
+++ b/configs/training/csv_logger.yml
@@ -1,3 +1,3 @@
 class_path: lightning.pytorch.loggers.CSVLogger
 init_args:
-  save_dir: logs
\ No newline at end of file
+  save_dir: logs
diff --git a/configs/training/default_trainer.yml b/configs/training/default_trainer.yml
index ea2d0be9..91aa4244 100644
--- a/configs/training/default_trainer.yml
+++ b/configs/training/default_trainer.yml
@@ -2,4 +2,4 @@ min_epochs: 100
 max_epochs: 100
 default_root_dir: &default_root_dir logs
 logger: csv_logger.yml
-callbacks: default_callbacks.yml
\ No newline at end of file
+callbacks: default_callbacks.yml
diff --git a/configs/training/early_stop_callbacks.yml b/configs/training/early_stop_callbacks.yml
index d766fce7..75c4597d 100644
--- a/configs/training/early_stop_callbacks.yml
+++ b/configs/training/early_stop_callbacks.yml
@@ -16,4 +16,4 @@
     min_delta: 0.0
     patience: 3
     verbose: False
-    mode: "min"
\ No newline at end of file
+    mode: "min"
diff --git a/configs/training/pretraining_callbacks.yml b/configs/training/pretraining_callbacks.yml
index 3d29d5a0..0862433e 100644
--- a/configs/training/pretraining_callbacks.yml
+++ b/configs/training/pretraining_callbacks.yml
@@ -9,4 +9,4 @@
   init_args:
     filename: 'per_{epoch}_{val_loss:.4f}'
     every_n_epochs: 25
-    save_top_k: -1
\ No newline at end of file
+    save_top_k: -1
diff --git a/configs/training/pretraining_trainer.yml b/configs/training/pretraining_trainer.yml
index 7390b29f..6c56870d 100644
--- a/configs/training/pretraining_trainer.yml
+++ b/configs/training/pretraining_trainer.yml
@@ -4,4 +4,4 @@ max_epochs: 100
 default_root_dir: &default_root_dir logs
 logger: csv_logger.yml
 
-callbacks: pretraining_callbacks.yml
\ No newline at end of file
+callbacks: pretraining_callbacks.yml
diff --git a/configs/training/single_class_callbacks.yml b/configs/training/single_class_callbacks.yml
index 188f1fc5..73f4a720 100644
--- a/configs/training/single_class_callbacks.yml
+++ b/configs/training/single_class_callbacks.yml
@@ -10,4 +10,4 @@
     filename: 'per_{epoch:02d}_{val_loss:.4f}_{val_f1:.4f}'
     every_n_epochs: 25
     save_top_k: -1
-# difference to default_callbacks.yml: no macro-f1
\ No newline at end of file
+# difference to default_callbacks.yml: no macro-f1
diff --git a/configs/training/wandb_logger.yml b/configs/training/wandb_logger.yml
index b7c51418..b0dd8870 100644
--- a/configs/training/wandb_logger.yml
+++ b/configs/training/wandb_logger.yml
@@ -3,4 +3,4 @@ init_args:
   save_dir: logs
   project: 'chebai'
   entity: 'chebai'
-  log_model: 'all'
\ No newline at end of file
+  log_model: 'all'
diff --git a/docs/source/experiment.rst b/docs/source/experiment.rst
index 81f36b31..59aced74 100644
--- a/docs/source/experiment.rst
+++ b/docs/source/experiment.rst
@@ -1 +1 @@
-.. autoclass:: chebai.experiments.Experiment
\ No newline at end of file
+.. autoclass:: chebai.experiments.Experiment
diff --git a/docs/source/model.rst b/docs/source/model.rst
index 81f36b31..59aced74 100644
--- a/docs/source/model.rst
+++ b/docs/source/model.rst
@@ -1 +1 @@
-.. autoclass:: chebai.experiments.Experiment
\ No newline at end of file
+.. autoclass:: chebai.experiments.Experiment
diff --git a/setup.cfg b/setup.cfg
index f28e0e9a..034dc5b8 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -4,4 +4,4 @@ from_first = True
 line_length = 79
 known_first_party = chem
 default_section = THIRDPARTY
-skip = .tox,.eggs,ci/bootstrap.py,ci/templates,build,dist
\ No newline at end of file
+skip = .tox,.eggs,ci/bootstrap.py,ci/templates,build,dist
diff --git a/tests/testCustomBalancedAccuracyMetric.py b/tests/testCustomBalancedAccuracyMetric.py
index b420fd83..d257114e 100644
--- a/tests/testCustomBalancedAccuracyMetric.py
+++ b/tests/testCustomBalancedAccuracyMetric.py
@@ -1,8 +1,10 @@
+import os
+import random
 import unittest
+
 import torch
-import os
+
 from chebai.callbacks.epoch_metrics import BalancedAccuracy
-import random
 
 
 class TestCustomBalancedAccuracyMetric(unittest.TestCase):
diff --git a/tests/testCustomMacroF1Metric.py b/tests/testCustomMacroF1Metric.py
index 6e208fa4..685f2901 100644
--- a/tests/testCustomMacroF1Metric.py
+++ b/tests/testCustomMacroF1Metric.py
@@ -1,9 +1,11 @@
+import os
+import random
 import unittest
+
 import torch
-import os
-from chebai.callbacks.epoch_metrics import MacroF1
 from torchmetrics.classification import MultilabelF1Score
-import random
+
+from chebai.callbacks.epoch_metrics import MacroF1
 
 
 class TestCustomMacroF1Metric(unittest.TestCase):
diff --git a/tests/testPubChemData.py b/tests/testPubChemData.py
index 40f91e2d..00dc8579 100644
--- a/tests/testPubChemData.py
+++ b/tests/testPubChemData.py
@@ -1,6 +1,8 @@
-import unittest
 import os
+import unittest
+
 import torch
+
 from chebai.preprocessing.datasets.pubchem import PubChem
 
 
diff --git a/tests/testTox21MolNetData.py b/tests/testTox21MolNetData.py
index 484ed533..91e34e3d 100644
--- a/tests/testTox21MolNetData.py
+++ b/tests/testTox21MolNetData.py
@@ -1,6 +1,8 @@
-import unittest
 import os
+import unittest
+
 import torch
+
 from chebai.preprocessing.datasets.tox21 import Tox21MolNetChem
 
 

From ec6254d37effb87cee4407c5b50f4af8420a09e5 Mon Sep 17 00:00:00 2001
From: aditya0by0 <aditya0by0@gmail.com>
Date: Thu, 13 Jun 2024 20:22:04 +0200
Subject: [PATCH 11/26] Update .gitignore

---
 .gitignore | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.gitignore b/.gitignore
index bf0b7890..733bbf9e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -161,3 +161,8 @@ cython_debug/
 #.idea/
 
 # configs/ # commented as new configs can be added as a part of a feature
+/.idea
+/data
+/logs
+/results_buffer
+electra_pretrained.ckpt

From c1b6b0db52c33a37ac47bb7dcc2d4a7d1ab140ec Mon Sep 17 00:00:00 2001
From: aditya0by0 <aditya0by0@gmail.com>
Date: Thu, 13 Jun 2024 20:24:38 +0200
Subject: [PATCH 12/26] remove commented out cells - eval notebook

---
 tutorials/eval_model_basic.ipynb | 49 --------------------------------
 1 file changed, 49 deletions(-)

diff --git a/tutorials/eval_model_basic.ipynb b/tutorials/eval_model_basic.ipynb
index b92d78f1..bc54464b 100644
--- a/tutorials/eval_model_basic.ipynb
+++ b/tutorials/eval_model_basic.ipynb
@@ -1,31 +1,5 @@
 {
  "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "726ada05-9a23-46bc-a04a-c951ccd29807",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Current working directory: C:\\Users\\HP\\Desktop\\github-aditya0by0\\python-chebai\n"
-     ]
-    }
-   ],
-   "source": [
-    "# ---------------- For testing only : comment afterwards\n",
-    "# import os\n",
-    "\n",
-    "# # Set the root directory\n",
-    "# root_directory = r\"C:\\Users\\HP\\Desktop\\github-aditya0by0\\python-chebai\"\n",
-    "# os.chdir(root_directory)\n",
-    "\n",
-    "# # Verify the current working directory\n",
-    "# print(\"Current working directory:\", os.getcwd())"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 2,
@@ -124,29 +98,6 @@
     "model_class = Electra"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "42642a53-511d-4cbc-a799-56641c89aebe",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\HP\\Desktop\\github-aditya0by0\\python-chebai\\logs\\chebi50_bce_unweighted\\version_27\\checkpoints\\per_epoch=99_val_loss=0.1377_val_macro-f1=0.0200_val_micro-f1=0.2947.ckpt\n"
-     ]
-    }
-   ],
-   "source": [
-    "# --------------- For testing only : comment afterwards\n",
-    "# data_module.data_limit = 100\n",
-    "# main_directory = r\"C:\\Users\\HP\\Desktop\\github-aditya0by0\\python-chebai\"\n",
-    "# checkpoint_name = r\"logs\\chebi50_bce_unweighted\\version_27\\checkpoints\\per_epoch=99_val_loss=0.1377_val_macro-f1=0.0200_val_micro-f1=0.2947\"\n",
-    "# checkpoint_path = os.path.join(main_directory, f\"{checkpoint_name}.ckpt\")\n",
-    "# print(checkpoint_path)"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 5,

From 667b07922eabdc6bc94c9e4128e38c66f58c9957 Mon Sep 17 00:00:00 2001
From: aditya0by0 <aditya0by0@gmail.com>
Date: Thu, 13 Jun 2024 23:48:19 +0200
Subject: [PATCH 13/26]  add filename parameter to load_processed_data

---
 chebai/preprocessing/datasets/chebi.py | 33 ++++++++++++++++----------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/chebai/preprocessing/datasets/chebi.py b/chebai/preprocessing/datasets/chebi.py
index 85572024..97f2b5ee 100644
--- a/chebai/preprocessing/datasets/chebi.py
+++ b/chebai/preprocessing/datasets/chebi.py
@@ -576,9 +576,6 @@ def prepare_data(self, *args, **kwargs):
                     df, filename=self.raw_file_names_dict["data_chebi_train"]
                 )
 
-    def setup(self, **kwargs):
-        super().setup(**kwargs)
-
     def _get_dynamic_splits(self):
         """Generate data splits during run-time and saves in class variables"""
 
@@ -640,34 +637,44 @@ def dynamic_split_dfs(self):
             "test": self.dynamic_df_test,
         }
 
-    def load_processed_data(self, kind: str = None) -> List:
+    def load_processed_data(self, kind: str = None, filename: str = None) -> List:
         """
         Load processed data from a file.
 
         Args:
             kind (str, optional): The kind of dataset to load such as "train", "val" or "test". Defaults to None.
+            filename (str, optional): The name of the file to load the dataset from. Defaults to None.
 
         Returns:
             List: The loaded processed data.
 
         Raises:
-            ValueError: If kind is None.
+            ValueError: If both kind and filename are None.
+            FileNotFoundError: If the specified file does not exist.
         """
-        if kind is None:
-            raise ValueError("kind is required to load the correct dataset")
-        # if both kind and filename are given, use filename
-        if kind is not None:
+        if kind is None and filename is None:
+            raise ValueError(
+                "Either kind or filename is required to load the correct dataset, both are None"
+            )
+
+        # If both kind and filename are given, use filename
+        if kind is not None and filename is None:
             try:
-                # processed_file_names_dict is only implemented for _ChEBIDataExtractor
                 if self.use_inner_cross_validation and kind != "test":
                     filename = self.processed_file_names_dict[
                         f"fold_{self.fold_index}_{kind}"
                     ]
                 else:
                     data_df = self.dynamic_split_dfs[kind]
-            except NotImplementedError:
-                filename = f"{kind}"
-        return data_df.to_dict(orient="records")
+                    return data_df.to_dict(orient="records")
+            except KeyError:
+                kind = f"{kind}"
+
+        # If filename is provided
+        try:
+            return torch.load(os.path.join(self.processed_dir, filename))
+        except FileNotFoundError:
+            raise FileNotFoundError(f"File {filename} doesn't exist")
 
 
 class JCIExtendedBase(_ChEBIDataExtractor):

From 8c9dfe15f52458e733af2bff9b0e758d8fc61e3c Mon Sep 17 00:00:00 2001
From: aditya0by0 <aditya0by0@gmail.com>
Date: Tue, 18 Jun 2024 21:54:52 +0200
Subject: [PATCH 14/26] Updated chebi.py for train_version restructure

---
 chebai/preprocessing/datasets/chebi.py | 182 +++++++++++++++++--------
 1 file changed, 122 insertions(+), 60 deletions(-)

diff --git a/chebai/preprocessing/datasets/chebi.py b/chebai/preprocessing/datasets/chebi.py
index 97f2b5ee..58002059 100644
--- a/chebai/preprocessing/datasets/chebi.py
+++ b/chebai/preprocessing/datasets/chebi.py
@@ -127,6 +127,10 @@ class _ChEBIDataExtractor(XYBaseDataModule, ABC):
     Attributes:
         single_class (int): The ID of the single class to predict.
         chebi_version_train (int): The version of ChEBI to use for training and validation.
+        dynamic_data_split_seed (int): The seed for random data splitting, default is 42.
+        dynamic_df_train (pd.DataFrame): DataFrame to store the training data split.
+        dynamic_df_test (pd.DataFrame): DataFrame to store the test data split.
+        dynamic_df_val (pd.DataFrame): DataFrame to store the validation data split.
     """
 
     def __init__(
@@ -144,6 +148,16 @@ def __init__(
         self.dynamic_df_test = None
         self.dynamic_df_val = None
 
+        if self.chebi_version_train is not None:
+            # Instantiate another same class with "chebi_version" as "chebi_version_train", if train_version is given
+            # This is to get the data from respective directory related to "chebi_version_train"
+            _init_kwargs = kwargs
+            _init_kwargs["chebi_version"] = self.chebi_version_train
+            self._chebi_version_train_obj = self.__class__(
+                single_class=self.single_class,
+                **_init_kwargs,
+            )
+
     def extract_class_hierarchy(self, chebi_path):
         """
         Extracts the class hierarchy from the ChEBI ontology.
@@ -238,7 +252,7 @@ def _setup_pruned_test_set(
         """Create a test set with the same leaf nodes, but use only classes that appear in the training set"""
         # TODO: find a more efficient way to do this
         filename_old = "classes.txt"
-        filename_new = f"classes_v{self.chebi_version_train}.txt"
+        # filename_new = f"classes_v{self.chebi_version_train}.txt"
         # dataset = torch.load(os.path.join(self.processed_dir, "test.pt"))
 
         # Load original classes (from the current ChEBI version - chebi_version)
@@ -246,7 +260,12 @@ def _setup_pruned_test_set(
             orig_classes = file.readlines()
 
         # Load new classes (from the training ChEBI version - chebi_version_train)
-        with open(os.path.join(self.processed_dir_main, filename_new), "r") as file:
+        with open(
+            os.path.join(
+                self._chebi_version_train_obj.processed_dir_main, filename_old
+            ),
+            "r",
+        ) as file:
             new_classes = file.readlines()
 
         # Create a mapping which give index of a class from chebi_version, if the corresponding
@@ -277,42 +296,74 @@ def _setup_pruned_test_set(
     def setup_processed(self):
         print("Transform data")
         os.makedirs(self.processed_dir, exist_ok=True)
-        for k in self.processed_file_names_dict.keys():
-            # processed_name = (
-            #     "test.pt" if k == "test" else self.processed_file_names_dict[k]
-            # )
-            processed_name = self.processed_file_names_dict[k]
-            if k == "data_chebi_train" and self.chebi_version_train is None:
-                # To skip the encoding of data for "chebi_version_train", if it's not given
-                continue
-
-            if not os.path.isfile(os.path.join(self.processed_dir, processed_name)):
-                print(
-                    "Missing encoded data, transform processed data into encoded data",
-                    k,
-                )
-                torch.save(
-                    self._load_data_from_file(
-                        os.path.join(
-                            self.processed_dir_main, self.raw_file_names_dict[k]
-                        )
-                    ),
-                    os.path.join(self.processed_dir, processed_name),
-                )
-
         # -------- Commented the code for Data Handling Restructure for Issue No.10
         # -------- https://github.com/ChEB-AI/python-chebai/issues/10
+        # for k in self.processed_file_names_dict.keys():
+        #     processed_name = (
+        #         "test.pt" if k == "test" else self.processed_file_names_dict[k]
+        #     )
+        #     if not os.path.isfile(os.path.join(self.processed_dir, processed_name)):
+        #         print("transform", k)
+        #         torch.save(
+        #             self._load_data_from_file(
+        #                 os.path.join(self.raw_dir, self.raw_file_names_dict[k])
+        #             ),
+        #             os.path.join(self.processed_dir, processed_name),
+        #         )
         # # create second test set with classes used in train
         # if self.chebi_version_train is not None and not os.path.isfile(
-        #     os.path.join(
-        #         self.processed_dir, self.processed_file_names_dict["data_chebi_train"]
-        #     )
+        #     os.path.join(self.processed_dir, self.processed_file_names_dict["test"])
         # ):
         #     print("transform test (select classes)")
         #     self._setup_pruned_test_set()
+        #
+        # processed_name = self.processed_file_names_dict[k]
+        # if not os.path.isfile(os.path.join(self.processed_dir, processed_name)):
+        #     print(
+        #         "Missing encoded data, transform processed data into encoded data",
+        #         k,
+        #     )
+        #     torch.save(
+        #         self._load_data_from_file(
+        #             os.path.join(
+        #                 self.processed_dir_main, self.raw_file_names_dict[k]
+        #             )
+        #         ),
+        #         os.path.join(self.processed_dir, processed_name),
+        #     )
+
+        # Transform the processed data into encoded data
+        processed_name = self.processed_file_names_dict["data"]
+        if not os.path.isfile(os.path.join(self.processed_dir, processed_name)):
+            print(
+                f"Missing encoded data related to version {self.chebi_version}, transform processed data into encoded data:",
+                processed_name,
+            )
+            torch.save(
+                self._load_data_from_file(
+                    os.path.join(
+                        self.processed_dir_main,
+                        self.raw_file_names_dict["data"],
+                    )
+                ),
+                os.path.join(self.processed_dir, processed_name),
+            )
+
+        # Transform the data related to "chebi_version_train" to encoded data, if it doesn't exist
+        if self.chebi_version_train is not None and not os.path.isfile(
+            os.path.join(
+                self._chebi_version_train_obj.processed_dir,
+                self._chebi_version_train_obj.raw_file_names_dict["data"],
+            )
+        ):
+            print(
+                f"Missing encoded data related to train version: {self.chebi_version_train}"
+            )
+            print("Call the setup method related to it")
+            self._chebi_version_train_obj.setup()
 
     def get_test_split(self, df: pd.DataFrame, seed: int = None):
-        print("Get test data split")
+        print("\nGet test data split")
 
         # df_list = df.values.tolist()
         # df_list = [row[1] for row in df_list]
@@ -441,7 +492,6 @@ def processed_file_names_dict(self) -> dict:
             # else:
             # res[set] = f"{set}{train_v_str}.pt"
         res["data"] = "data.pt"
-        res["data_chebi_train"] = f"data{train_v_str}.pt"
         return res
 
     @property
@@ -464,7 +514,6 @@ def raw_file_names_dict(self) -> dict:
             # else:
             # res[set] = f"{set}{train_v_str}.pkl"
         res["data"] = "data.pkl"
-        res["data_chebi_train"] = f"data{train_v_str}.pkl"
         return res
 
     @property
@@ -560,29 +609,33 @@ def prepare_data(self, *args, **kwargs):
             df = self.graph_to_raw_dataset(g, self.raw_file_names_dict["data"])
             self.save_processed(df, filename=self.raw_file_names_dict["data"])
 
-            # Data from chebi_version_train
-            if self.chebi_version_train is not None and not os.path.isfile(
-                os.path.join(
-                    self.processed_dir_main,
-                    self.raw_file_names_dict["data_chebi_train"],
-                )
-            ):
-                chebi_path = self._load_chebi(self.chebi_version_train)
-                g = self.extract_class_hierarchy(chebi_path)
-                df = self.graph_to_raw_dataset(
-                    g, self.raw_file_names_dict["data_chebi_train"]
-                )
-                self.save_processed(
-                    df, filename=self.raw_file_names_dict["data_chebi_train"]
-                )
+            if self.chebi_version_train is not None:
+                if not os.path.isfile(
+                    os.path.join(
+                        self._chebi_version_train_obj.processed_dir_main,
+                        self._chebi_version_train_obj.raw_file_names_dict["data"],
+                    )
+                ):
+                    print(
+                        f"Missing processed data related to train version: {self.chebi_version_train}"
+                    )
+                    print("Call the prepare_data method related to it")
+                    # Generate the "chebi_version_train" data if it doesn't exist
+                    self._chebi_version_train_obj.prepare_data(*args, **kwargs)
 
     def _get_dynamic_splits(self):
         """Generate data splits during run-time and saves in class variables"""
 
         # Load encoded data derived from "chebi_version"
-        data_chebi_version = torch.load(
-            os.path.join(self.processed_dir, self.processed_file_names_dict["data"])
-        )
+        try:
+            filename = self.processed_file_names_dict["data"]
+            data_chebi_version = torch.load(os.path.join(self.processed_dir, filename))
+        except FileNotFoundError:
+            raise FileNotFoundError(
+                f"File {filename} doesn't exists. "
+                f"Please call 'prepare_data' and/or 'setup' methods to generate the dataset files"
+            )
+
         df_chebi_version = pd.DataFrame(data_chebi_version)
         train_df_chebi_ver, df_test_chebi_ver = self.get_test_split(
             df_chebi_version, seed=self.dynamic_data_split_seed
@@ -590,12 +643,21 @@ def _get_dynamic_splits(self):
 
         if self.chebi_version_train is not None:
             # Load encoded data derived from "chebi_version_train"
-            data_chebi_train_version = torch.load(
-                os.path.join(
-                    self.processed_dir,
-                    self.processed_file_names_dict["data_chebi_train"],
+            try:
+                filename_train = (
+                    self._chebi_version_train_obj.processed_file_names_dict["data"]
                 )
-            )
+                data_chebi_train_version = torch.load(
+                    os.path.join(
+                        self._chebi_version_train_obj.processed_dir, filename_train
+                    )
+                )
+            except FileNotFoundError:
+                raise FileNotFoundError(
+                    f"File {filename_train} doesn't exists related to chebi_version_train {self.chebi_version_train}."
+                    f"Please call 'prepare_data' and/or 'setup' methods to generate the dataset files"
+                )
+
             df_chebi_train_version = pd.DataFrame(data_chebi_train_version)
             # Get train/val split of data based on "chebi_version_train", but
             # using test set from "chebi_version"
@@ -744,12 +806,12 @@ def select_classes(self, g, split_name, *args, **kwargs):
             )
         )
         filename = "classes.txt"
-        if (
-            self.chebi_version_train
-            is not None
-            # and self.raw_file_names_dict["test"] != split_name
-        ):
-            filename = f"classes_v{self.chebi_version_train}.txt"
+        # if (
+        #     self.chebi_version_train
+        #     is not None
+        #     # and self.raw_file_names_dict["test"] != split_name
+        # ):
+        #     filename = f"classes_v{self.chebi_version_train}.txt"
         with open(os.path.join(self.processed_dir_main, filename), "wt") as fout:
             fout.writelines(str(node) + "\n" for node in nodes)
         return nodes

From cd03023d7b36ff2a18fb384f0f1e24145742c42d Mon Sep 17 00:00:00 2001
From: aditya0by0 <aditya0by0@gmail.com>
Date: Wed, 19 Jun 2024 20:29:30 +0200
Subject: [PATCH 15/26] minor changes in data split code

- removed list comprehension from data split logic
- used dataframe operations instead as they are faster
- remove looping for msss.split as no need for it, used `next` instead
---
 chebai/preprocessing/datasets/chebi.py | 73 +++++++++++++++-----------
 1 file changed, 41 insertions(+), 32 deletions(-)

diff --git a/chebai/preprocessing/datasets/chebi.py b/chebai/preprocessing/datasets/chebi.py
index 58002059..2a3a16e0 100644
--- a/chebai/preprocessing/datasets/chebi.py
+++ b/chebai/preprocessing/datasets/chebi.py
@@ -22,12 +22,10 @@
 import pandas as pd
 import requests
 import torch
-import yaml
 from iterstrat.ml_stratifiers import (
     MultilabelStratifiedKFold,
     MultilabelStratifiedShuffleSplit,
 )
-from torch.utils.data import DataLoader
 
 from chebai.preprocessing import reader as dr
 from chebai.preprocessing.datasets.base import XYBaseDataModule
@@ -363,10 +361,32 @@ def setup_processed(self):
             self._chebi_version_train_obj.setup()
 
     def get_test_split(self, df: pd.DataFrame, seed: int = None):
+        """
+        Split the input DataFrame into training and testing sets based on multilabel stratified sampling.
+
+        This method uses MultilabelStratifiedShuffleSplit to split the data such that the distribution of labels
+        in the training and testing sets is approximately the same. The split is based on the "labels" column
+        in the DataFrame.
+
+        Parameters:
+        ----------
+        df : pd.DataFrame
+            The input DataFrame containing the data to be split. It must contain a column named "labels"
+            with the multilabel data.
+
+        seed : int, optional
+            The random seed to be used for reproducibility. Default is None.
+
+        Returns:
+        -------
+        df_train : pd.DataFrame
+            The training set split from the input DataFrame.
+
+        df_test : pd.DataFrame
+            The testing set split from the input DataFrame.
+        """
         print("\nGet test data split")
 
-        # df_list = df.values.tolist()
-        # df_list = [row[1] for row in df_list]
         labels_list = df["labels"].tolist()
 
         test_size = 1 - self.train_split - (1 - self.train_split) ** 2
@@ -374,17 +394,10 @@ def get_test_split(self, df: pd.DataFrame, seed: int = None):
             n_splits=1, test_size=test_size, random_state=seed
         )
 
-        train_split = []
-        test_split = []
-        for train_split, test_split in msss.split(
-            labels_list,
-            labels_list,
-        ):
-            train_split = train_split
-            test_split = test_split
-            break
-        df_train = df.iloc[train_split]
-        df_test = df.iloc[test_split]
+        train_indices, test_indices = next(msss.split(labels_list, labels_list))
+
+        df_train = df.iloc[train_indices]
+        df_test = df.iloc[test_indices]
         return df_train, df_test
 
     def get_train_val_splits_given_test(
@@ -392,7 +405,7 @@ def get_train_val_splits_given_test(
     ):
         """
         Split the dataset into train and validation sets, given a test set.
-        Use test set (e.g., loaded from another chebi version or generated in get_test_split), avoid overlap
+        Use test set (e.g., loaded from another chebi version or generated in get_test_split), to avoid overlap
 
         Args:
             df (pd.DataFrame): The original dataset.
@@ -404,12 +417,11 @@ def get_train_val_splits_given_test(
         """
         print(f"Split dataset into train / val with given test set")
 
-        df_trainval = df
         test_ids = test_df["ident"].tolist()
-        mask = [trainval_id not in test_ids for trainval_id in df_trainval["ident"]]
-        df_trainval = df_trainval[mask]
-        # df_trainval_list = df_trainval.values.tolist()
-        # df_trainval_list = [row[3:] for row in df_trainval_list]
+        # ---- list comprehension degrades performance, dataframe operations are faster
+        # mask = [trainval_id not in test_ids for trainval_id in df_trainval["ident"]]
+        # df_trainval = df_trainval[mask]
+        df_trainval = df[~df["ident"].isin(test_ids)]
         labels_list_trainval = df_trainval["labels"].tolist()
 
         if self.use_inner_cross_validation:
@@ -437,16 +449,13 @@ def get_train_val_splits_given_test(
         msss = MultilabelStratifiedShuffleSplit(
             n_splits=1, test_size=test_size, random_state=seed
         )
-        train_split = []
-        validation_split = []
-        for train_split, validation_split in msss.split(
-            labels_list_trainval, labels_list_trainval
-        ):
-            train_split = train_split
-            validation_split = validation_split
 
-        df_validation = df_trainval.iloc[validation_split]
-        df_train = df_trainval.iloc[train_split]
+        train_indices, validation_indices = next(
+            msss.split(labels_list_trainval, labels_list_trainval)
+        )
+
+        df_validation = df_trainval.iloc[validation_indices]
+        df_train = df_trainval.iloc[train_indices]
         return df_train, df_validation
 
     @property
@@ -632,7 +641,7 @@ def _get_dynamic_splits(self):
             data_chebi_version = torch.load(os.path.join(self.processed_dir, filename))
         except FileNotFoundError:
             raise FileNotFoundError(
-                f"File {filename} doesn't exists. "
+                f"File data.pt doesn't exists. "
                 f"Please call 'prepare_data' and/or 'setup' methods to generate the dataset files"
             )
 
@@ -654,7 +663,7 @@ def _get_dynamic_splits(self):
                 )
             except FileNotFoundError:
                 raise FileNotFoundError(
-                    f"File {filename_train} doesn't exists related to chebi_version_train {self.chebi_version_train}."
+                    f"File data.pt doesn't exists related to chebi_version_train {self.chebi_version_train}."
                     f"Please call 'prepare_data' and/or 'setup' methods to generate the dataset files"
                 )
 

From f747257f6d7d581770a2e7ef23c7d92615c9cbaa Mon Sep 17 00:00:00 2001
From: sfluegel <sfluegel@ovgu.de>
Date: Thu, 27 Jun 2024 11:03:54 +0200
Subject: [PATCH 16/26] fix: test for consistency across runs did validate the
 same run twice

---
 tests/testChebiDynamicDataSplits.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/testChebiDynamicDataSplits.py b/tests/testChebiDynamicDataSplits.py
index 7efaf430..d928c394 100644
--- a/tests/testChebiDynamicDataSplits.py
+++ b/tests/testChebiDynamicDataSplits.py
@@ -25,6 +25,7 @@ def testDynamicDataSplitsConsistency(self):
         # Dynamic Data Splits in Run 1
         train_hash_1, val_hash_1, test_hash_1 = self._get_hashed_splits()
 
+        self.chebi_50_v231.dynamic_df_train = None
         # Dynamic Data Splits in Run 2
         train_hash_2, val_hash_2, test_hash_2 = self._get_hashed_splits()
 

From a87dd35a567b5231c5f356239eec6a22a3bf2d10 Mon Sep 17 00:00:00 2001
From: aditya0by0 <aditya0by0@gmail.com>
Date: Mon, 1 Jul 2024 17:34:44 +0200
Subject: [PATCH 17/26] migration script for chebi data for new data
 restructure

---
 chebai/preprocessing/migration/__init__.py    |   0
 .../migration/chebi_data_migration.py         | 183 ++++++++++++++++++
 2 files changed, 183 insertions(+)
 create mode 100644 chebai/preprocessing/migration/__init__.py
 create mode 100644 chebai/preprocessing/migration/chebi_data_migration.py

diff --git a/chebai/preprocessing/migration/__init__.py b/chebai/preprocessing/migration/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/chebai/preprocessing/migration/chebi_data_migration.py b/chebai/preprocessing/migration/chebi_data_migration.py
new file mode 100644
index 00000000..6ef3b156
--- /dev/null
+++ b/chebai/preprocessing/migration/chebi_data_migration.py
@@ -0,0 +1,183 @@
+import argparse
+import os
+import shutil
+from typing import Dict, List, Tuple, Type
+
+import pandas as pd
+import torch
+
+from chebai.preprocessing.datasets.chebi import _ChEBIDataExtractor
+
+
+class ChebiDataMigration:
+    __MODULE_PATH: str = "chebai.preprocessing.datasets.chebi"
+    __DATA_ROOT_DIR: str = "data"
+
+    def __init__(self, chebi_version, class_name: str):
+        self._chebi_version: int = chebi_version
+        # Chebi class instance according to new data structure
+        self._chebi_cls: Type[_ChEBIDataExtractor] = self._dynamic_import_chebi_cls(
+            class_name, chebi_version
+        )
+        self._class_path: str = class_name
+
+    def _get_old_dir_structure(self):
+        base_dir = os.path.join(
+            self.__DATA_ROOT_DIR,
+            self._chebi_cls._name,
+            f"chebi_v{self._chebi_cls.chebi_version}",
+        )
+
+    @classmethod
+    def _dynamic_import_chebi_cls(cls, class_name: str, chebi_version: int):
+        class_name = class_name.strip()
+        module = __import__(cls.__MODULE_PATH, fromlist=[class_name])
+        _class = getattr(module, class_name)
+        return _class({"chebi_version": chebi_version})
+
+    def migrate(self):
+        os.makedirs(self._chebi_cls.base_dir, exist_ok=True)
+        self._migrate_old_processed_data()
+
+    def _migrate_old_raw_data(self):
+        self._copy_file(self._old_raw_dir, self._chebi_cls.raw_dir, "chebi.obo")
+        self._copy_file(
+            self._old_raw_dir, self._chebi_cls.processed_dir_main, "classes.txt"
+        )
+        old_splits_file_names = {
+            "train": "train.pkl",
+            "validation": "validation.pkl",
+            "test": "test.pkl",
+        }
+        data_df, split_ass_df = self._combine_splits(
+            self._old_raw_dir, old_splits_file_names
+        )
+        data_df.to_pickle(os.path.join(self._chebi_cls.processed_dir_main, "data.pkl"))
+        split_ass_df.to_csv(
+            os.path.join(self._chebi_cls.processed_dir_main, "splits.csv")
+        )
+
+    def _migrate_old_processed_data(self):
+        old_splits_file_names = {
+            "train": "train.pt",
+            "validation": "validation.pt",
+            "test": "test.pt",
+        }
+
+        data_df = self._combine_pt_splits(
+            self._old_processed_dir, old_splits_file_names
+        )
+        torch.save(data_df, self._chebi_cls.processed_dir)
+
+    def _combine_pt_splits(
+        self, old_dir: str, old_splits_file_names: Dict[str, str]
+    ) -> pd.DataFrame:
+        self._check_if_old_splits_exists(old_dir, old_splits_file_names)
+
+        df_list: List[pd.DataFrame] = []
+        for split, file_name in old_splits_file_names.items():
+            file_path = os.path.join(old_dir, file_name)
+            file_df = pd.DataFrame(torch.load(file_path))
+            df_list.append(file_df)
+
+        return pd.concat(df_list, ignore_index=True)
+
+    def _combine_pkl_splits(
+        self, old_dir: str, old_splits_file_names: Dict[str, str]
+    ) -> Tuple[pd.DataFrame, pd.DataFrame]:
+        self._check_if_old_splits_exists(old_dir, old_splits_file_names)
+
+        df_list: List[pd.DataFrame] = []
+        split_assignment_list: List[pd.DataFrame] = []
+
+        for split, file_name in old_splits_file_names.items():
+            file_path = os.path.join(old_dir, file_name)
+            file_df = pd.DataFrame(self._chebi_cls._load_data_from_file(file_path))
+            file_df["split"] = split  # Assign the split label to the DataFrame
+            df_list.append(file_df)
+
+            # Create split assignment for the current DataFrame
+            split_assignment = pd.DataFrame({"id": file_df["id"], "split": split})
+            split_assignment_list.append(split_assignment)
+
+        # Concatenate all dataframes and split assignments
+        combined_df = pd.concat(df_list, ignore_index=True)
+        combined_split_assignment = pd.concat(split_assignment_list, ignore_index=True)
+
+        return combined_df, combined_split_assignment
+
+    @staticmethod
+    def _check_if_old_splits_exists(old_dir, old_splits_file_names):
+        if any(
+            not os.path.isfile(os.path.join(old_dir, file))
+            for file in old_splits_file_names.values()
+        ):
+            raise FileNotFoundError(
+                f"One of the split {old_splits_file_names.values()} doesn't exists "
+                f"in old data-folder structure: {old_dir}"
+            )
+
+    @staticmethod
+    def _copy_file(old_file_dir, new_file_dir, file_name):
+        os.makedirs(new_file_dir, exist_ok=True)
+        new_file_path = os.path.join(new_file_dir, file_name)
+        if os.path.isfile(new_file_path):
+            print(f"File {new_file_path} already exists in new data-folder structure")
+            return
+
+        old_file_path = os.path.join(old_file_dir, file_name)
+        if not os.path.isfile(old_file_path):
+            raise FileNotFoundError(
+                f"File {old_file_path} doesn't exists in old data-folder structure"
+            )
+
+        shutil.copy2(os.path.abspath(old_file_path), os.path.abspath(new_file_path))
+        print(f"Copied from {old_file_path} to {new_file_path}")
+
+    @property
+    def _old_base_dir(self):
+        return os.path.join(
+            "data", self._chebi_cls._name, f"chebi_v{self._chebi_cls.chebi_version}"
+        )
+
+    @property
+    def _old_processed_dir(self):
+        res = os.path.join(
+            self._old_base_dir,
+            "processed",
+            *self._chebi_cls.identifier,
+        )
+        if self._chebi_cls.single_class is None:
+            return res
+        else:
+            return os.path.join(res, f"single_{self._chebi_cls.single_class}")
+
+    @property
+    def _old_raw_dir(self):
+        """name of dir where the raw data is stored"""
+        return os.path.join(self._old_base_dir, "raw")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Migrate ChEBI dataset to new structure and handle splits."
+    )
+    parser.add_argument(
+        "old_directory", type=str, help="Path to the old directory structure"
+    )
+    parser.add_argument(
+        "new_directory", type=str, help="Path to the new directory structure"
+    )
+    parser.add_argument(
+        "--split_file_path",
+        type=str,
+        help="Path to the CSV file with split configuration",
+        default=None,
+    )
+    parser.add_argument("chebi_version", type=int, help="Data Version related to chebi")
+    args = parser.parse_args()
+
+    # main(args.old_directory, args.new_directory, args.split_file_path)
+
+# python migration_script.py path/to/old_directory path/to/new_directory --split_file_path path/to/split_configuration.csv
+# python migration_script.py path/to/old_directory path/to/new_directory

From d8e68cc9cb8fff0e181ddaab213f0b0ff37137fb Mon Sep 17 00:00:00 2001
From: aditya0by0 <aditya0by0@gmail.com>
Date: Mon, 1 Jul 2024 21:42:30 +0200
Subject: [PATCH 18/26] argparser + fixes

---
 .../migration/chebi_data_migration.py         | 92 ++++++++++++-------
 1 file changed, 60 insertions(+), 32 deletions(-)

diff --git a/chebai/preprocessing/migration/chebi_data_migration.py b/chebai/preprocessing/migration/chebi_data_migration.py
index 6ef3b156..dbed9169 100644
--- a/chebai/preprocessing/migration/chebi_data_migration.py
+++ b/chebai/preprocessing/migration/chebi_data_migration.py
@@ -13,51 +13,70 @@ class ChebiDataMigration:
     __MODULE_PATH: str = "chebai.preprocessing.datasets.chebi"
     __DATA_ROOT_DIR: str = "data"
 
-    def __init__(self, chebi_version, class_name: str):
-        self._chebi_version: int = chebi_version
+    def __init__(self, class_name: str, chebi_version: int, single_class: int = None):
         # Chebi class instance according to new data structure
         self._chebi_cls: Type[_ChEBIDataExtractor] = self._dynamic_import_chebi_cls(
-            class_name, chebi_version
-        )
-        self._class_path: str = class_name
-
-    def _get_old_dir_structure(self):
-        base_dir = os.path.join(
-            self.__DATA_ROOT_DIR,
-            self._chebi_cls._name,
-            f"chebi_v{self._chebi_cls.chebi_version}",
+            class_name, chebi_version, single_class
         )
+        self._chebi_version: int = chebi_version
+        self._single_class: int = single_class
+        self._class_name: str = class_name
 
     @classmethod
-    def _dynamic_import_chebi_cls(cls, class_name: str, chebi_version: int):
+    def _dynamic_import_chebi_cls(
+        cls, class_name: str, chebi_version: int, single_class: int
+    ) -> Type[_ChEBIDataExtractor]:
         class_name = class_name.strip()
         module = __import__(cls.__MODULE_PATH, fromlist=[class_name])
         _class = getattr(module, class_name)
-        return _class({"chebi_version": chebi_version})
+        return _class(**{"chebi_version": chebi_version, "single_class": single_class})
 
     def migrate(self):
         os.makedirs(self._chebi_cls.base_dir, exist_ok=True)
+        print("Migration started..................")
+        self._migrate_old_raw_data()
         self._migrate_old_processed_data()
+        print("Migration completed..................")
 
     def _migrate_old_raw_data(self):
+        print("-" * 50)
+        print("Migrating old raw Data.....................")
+
         self._copy_file(self._old_raw_dir, self._chebi_cls.raw_dir, "chebi.obo")
         self._copy_file(
             self._old_raw_dir, self._chebi_cls.processed_dir_main, "classes.txt"
         )
+
         old_splits_file_names = {
             "train": "train.pkl",
             "validation": "validation.pkl",
             "test": "test.pkl",
         }
-        data_df, split_ass_df = self._combine_splits(
+        data_file_path = os.path.join(self._chebi_cls.processed_dir_main, "data.pkl")
+        if os.path.isfile(data_file_path):
+            print(f"File {data_file_path} already exists in new data-folder structure")
+            return
+
+        data_df, split_ass_df = self._combine_pkl_splits(
             self._old_raw_dir, old_splits_file_names
         )
-        data_df.to_pickle(os.path.join(self._chebi_cls.processed_dir_main, "data.pkl"))
-        split_ass_df.to_csv(
-            os.path.join(self._chebi_cls.processed_dir_main, "splits.csv")
-        )
+
+        data_df.to_pickle(data_file_path)
+        print(f"File {data_file_path} saved to new data-folder structure")
+
+        split_file = os.path.join(self._chebi_cls.processed_dir_main, "splits.csv")
+        split_ass_df.to_csv(split_file)
+        print(f"File {split_file} saved to new data-folder structure")
 
     def _migrate_old_processed_data(self):
+        print("-" * 50)
+        print("Migrating old processed data.....................")
+
+        data_file_path = os.path.join(self._chebi_cls.processed_dir, "data.pt")
+        if os.path.isfile(data_file_path):
+            print(f"File {data_file_path} already exists in new data-folder structure")
+            return
+
         old_splits_file_names = {
             "train": "train.pt",
             "validation": "validation.pt",
@@ -67,13 +86,16 @@ def _migrate_old_processed_data(self):
         data_df = self._combine_pt_splits(
             self._old_processed_dir, old_splits_file_names
         )
-        torch.save(data_df, self._chebi_cls.processed_dir)
+
+        torch.save(data_df, data_file_path)
+        print(f"File {data_file_path} saved to new data-folder structure")
 
     def _combine_pt_splits(
         self, old_dir: str, old_splits_file_names: Dict[str, str]
     ) -> pd.DataFrame:
         self._check_if_old_splits_exists(old_dir, old_splits_file_names)
 
+        print("Combinig `.pt` splits...")
         df_list: List[pd.DataFrame] = []
         for split, file_name in old_splits_file_names.items():
             file_path = os.path.join(old_dir, file_name)
@@ -90,14 +112,15 @@ def _combine_pkl_splits(
         df_list: List[pd.DataFrame] = []
         split_assignment_list: List[pd.DataFrame] = []
 
+        print("Combining `.pkl` splits...")
         for split, file_name in old_splits_file_names.items():
             file_path = os.path.join(old_dir, file_name)
-            file_df = pd.DataFrame(self._chebi_cls._load_data_from_file(file_path))
+            file_df = pd.DataFrame(self._chebi_cls._load_data_from_file(path=file_path))
             file_df["split"] = split  # Assign the split label to the DataFrame
             df_list.append(file_df)
 
             # Create split assignment for the current DataFrame
-            split_assignment = pd.DataFrame({"id": file_df["id"], "split": split})
+            split_assignment = pd.DataFrame({"id": file_df["ident"], "split": split})
             split_assignment_list.append(split_assignment)
 
         # Concatenate all dataframes and split assignments
@@ -137,7 +160,9 @@ def _copy_file(old_file_dir, new_file_dir, file_name):
     @property
     def _old_base_dir(self):
         return os.path.join(
-            "data", self._chebi_cls._name, f"chebi_v{self._chebi_cls.chebi_version}"
+            self.__DATA_ROOT_DIR,
+            self._chebi_cls._name,
+            f"chebi_v{self._chebi_cls.chebi_version}",
         )
 
     @property
@@ -163,21 +188,24 @@ def _old_raw_dir(self):
         description="Migrate ChEBI dataset to new structure and handle splits."
     )
     parser.add_argument(
-        "old_directory", type=str, help="Path to the old directory structure"
+        "--chebi_class",
+        type=str,
+        required=True,
+        help="Chebi class name from the `chebai/preprocessing/datasets/chebi.py`",
     )
     parser.add_argument(
-        "new_directory", type=str, help="Path to the new directory structure"
+        "--chebi_version", type=int, required=True, help="Chebi data version"
     )
     parser.add_argument(
-        "--split_file_path",
-        type=str,
-        help="Path to the CSV file with split configuration",
+        "--single_class",
+        type=int,
+        help="The ID of the single class to predict",
         default=None,
     )
-    parser.add_argument("chebi_version", type=int, help="Data Version related to chebi")
     args = parser.parse_args()
 
-    # main(args.old_directory, args.new_directory, args.split_file_path)
-
-# python migration_script.py path/to/old_directory path/to/new_directory --split_file_path path/to/split_configuration.csv
-# python migration_script.py path/to/old_directory path/to/new_directory
+    ChebiDataMigration(
+        class_name=args.chebi_class,
+        chebi_version=args.chebi_version,
+        single_class=args.single_class,
+    ).migrate()

From ae61d10ab313493c4c2bbaa86594791d8d86c2bb Mon Sep 17 00:00:00 2001
From: aditya0by0 <aditya0by0@gmail.com>
Date: Mon, 1 Jul 2024 21:53:53 +0200
Subject: [PATCH 19/26] transform data.pkl to data.pt instead of combining .pt
 split files

---
 chebai/preprocessing/migration/chebi_data_migration.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/chebai/preprocessing/migration/chebi_data_migration.py b/chebai/preprocessing/migration/chebi_data_migration.py
index dbed9169..148c449a 100644
--- a/chebai/preprocessing/migration/chebi_data_migration.py
+++ b/chebai/preprocessing/migration/chebi_data_migration.py
@@ -35,7 +35,12 @@ def migrate(self):
         os.makedirs(self._chebi_cls.base_dir, exist_ok=True)
         print("Migration started..................")
         self._migrate_old_raw_data()
-        self._migrate_old_processed_data()
+
+        # Either we can combine `.pt` split files to form `data.pt` file
+        # self._migrate_old_processed_data()
+        # OR
+        # we can transform `data.pkl` to `data.pt` file (this seems efficient along with less code)
+        self._chebi_cls.setup_processed()
         print("Migration completed..................")
 
     def _migrate_old_raw_data(self):

From 9c2554342711820027c23573db6e0f54af20e38d Mon Sep 17 00:00:00 2001
From: aditya0by0 <aditya0by0@gmail.com>
Date: Wed, 3 Jul 2024 12:54:46 +0200
Subject: [PATCH 20/26] migration - raw data error fix + id col error

---
 .../migration/chebi_data_migration.py          | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/chebai/preprocessing/migration/chebi_data_migration.py b/chebai/preprocessing/migration/chebi_data_migration.py
index 148c449a..443cb55d 100644
--- a/chebai/preprocessing/migration/chebi_data_migration.py
+++ b/chebai/preprocessing/migration/chebi_data_migration.py
@@ -33,7 +33,7 @@ def _dynamic_import_chebi_cls(
 
     def migrate(self):
         os.makedirs(self._chebi_cls.base_dir, exist_ok=True)
-        print("Migration started..................")
+        print("Migration started.....")
         self._migrate_old_raw_data()
 
         # Either we can combine `.pt` split files to form `data.pt` file
@@ -41,11 +41,11 @@ def migrate(self):
         # OR
         # we can transform `data.pkl` to `data.pt` file (this seems efficient along with less code)
         self._chebi_cls.setup_processed()
-        print("Migration completed..................")
+        print("Migration completed.....")
 
     def _migrate_old_raw_data(self):
         print("-" * 50)
-        print("Migrating old raw Data.....................")
+        print("Migrating old raw Data....")
 
         self._copy_file(self._old_raw_dir, self._chebi_cls.raw_dir, "chebi.obo")
         self._copy_file(
@@ -66,16 +66,17 @@ def _migrate_old_raw_data(self):
             self._old_raw_dir, old_splits_file_names
         )
 
-        data_df.to_pickle(data_file_path)
+        # data_df.to_pickle(data_file_path)
+        self._chebi_cls.save_processed(data_df, "data.pkl")
         print(f"File {data_file_path} saved to new data-folder structure")
 
         split_file = os.path.join(self._chebi_cls.processed_dir_main, "splits.csv")
-        split_ass_df.to_csv(split_file)
+        split_ass_df.to_csv(split_file)  # overwrites the files with same name
         print(f"File {split_file} saved to new data-folder structure")
 
     def _migrate_old_processed_data(self):
         print("-" * 50)
-        print("Migrating old processed data.....................")
+        print("Migrating old processed data.....")
 
         data_file_path = os.path.join(self._chebi_cls.processed_dir, "data.pt")
         if os.path.isfile(data_file_path):
@@ -120,12 +121,11 @@ def _combine_pkl_splits(
         print("Combining `.pkl` splits...")
         for split, file_name in old_splits_file_names.items():
             file_path = os.path.join(old_dir, file_name)
-            file_df = pd.DataFrame(self._chebi_cls._load_data_from_file(path=file_path))
-            file_df["split"] = split  # Assign the split label to the DataFrame
+            file_df = pd.read_pickle(file_path)
             df_list.append(file_df)
 
             # Create split assignment for the current DataFrame
-            split_assignment = pd.DataFrame({"id": file_df["ident"], "split": split})
+            split_assignment = pd.DataFrame({"id": file_df["id"], "split": split})
             split_assignment_list.append(split_assignment)
 
         # Concatenate all dataframes and split assignments

From 0c2fca1a854ef8d0b4e1ef3238d39cd083c36e1a Mon Sep 17 00:00:00 2001
From: aditya0by0 <aditya0by0@gmail.com>
Date: Wed, 3 Jul 2024 12:58:57 +0200
Subject: [PATCH 21/26] pd.to_pickle instead of pickle.dump for code
 consistency

---
 chebai/preprocessing/datasets/chebi.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chebai/preprocessing/datasets/chebi.py b/chebai/preprocessing/datasets/chebi.py
index 2a3a16e0..f609ea61 100644
--- a/chebai/preprocessing/datasets/chebi.py
+++ b/chebai/preprocessing/datasets/chebi.py
@@ -216,7 +216,7 @@ def save_raw(self, data: pd.DataFrame, filename: str):
         pd.to_pickle(data, open(os.path.join(self.raw_dir, filename), "wb"))
 
     def save_processed(self, data: pd.DataFrame, filename: str):
-        pickle.dump(data, open(os.path.join(self.processed_dir_main, filename), "wb"))
+        pd.to_pickle(data, open(os.path.join(self.processed_dir_main, filename), "wb"))
 
     def _load_dict(self, input_file_path):
         """

From 1c4acea8d47e5898db0680bbaa329a0f71629af0 Mon Sep 17 00:00:00 2001
From: aditya0by0 <aditya0by0@gmail.com>
Date: Wed, 3 Jul 2024 13:06:18 +0200
Subject: [PATCH 22/26] migration : added docstring + type hints

---
 .../migration/chebi_data_migration.py         | 126 ++++++++++++++++--
 1 file changed, 112 insertions(+), 14 deletions(-)

diff --git a/chebai/preprocessing/migration/chebi_data_migration.py b/chebai/preprocessing/migration/chebi_data_migration.py
index 443cb55d..529d12a5 100644
--- a/chebai/preprocessing/migration/chebi_data_migration.py
+++ b/chebai/preprocessing/migration/chebi_data_migration.py
@@ -10,11 +10,30 @@
 
 
 class ChebiDataMigration:
+    """
+    A class to handle migration of ChEBI dataset to a new structure.
+
+    Attributes:
+        __MODULE_PATH (str): The path to the module containing ChEBI classes.
+        __DATA_ROOT_DIR (str): The root directory for data.
+        _chebi_cls (_ChEBIDataExtractor): The ChEBI class instance.
+        _chebi_version (int): The version of the ChEBI dataset.
+        _single_class (int, optional): The ID of a single class to predict.
+        _class_name (str): The name of the ChEBI class.
+    """
+
     __MODULE_PATH: str = "chebai.preprocessing.datasets.chebi"
     __DATA_ROOT_DIR: str = "data"
 
     def __init__(self, class_name: str, chebi_version: int, single_class: int = None):
-        # Chebi class instance according to new data structure
+        """
+        Initialize the ChebiDataMigration class.
+
+        Args:
+            class_name (str): The name of the ChEBI class.
+            chebi_version (int): The version of the ChEBI dataset.
+            single_class (int, optional): The ID of the single class to predict.
+        """
         self._chebi_cls: Type[_ChEBIDataExtractor] = self._dynamic_import_chebi_cls(
             class_name, chebi_version, single_class
         )
@@ -26,12 +45,26 @@ def __init__(self, class_name: str, chebi_version: int, single_class: int = None
     def _dynamic_import_chebi_cls(
         cls, class_name: str, chebi_version: int, single_class: int
     ) -> Type[_ChEBIDataExtractor]:
+        """
+        Dynamically import the ChEBI class.
+
+        Args:
+            class_name (str): The name of the ChEBI class.
+            chebi_version (int): The version of the ChEBI dataset.
+            single_class (int): The ID of the single class to predict.
+
+        Returns:
+            _ChEBIDataExtractor: An instance of the dynamically imported class.
+        """
         class_name = class_name.strip()
         module = __import__(cls.__MODULE_PATH, fromlist=[class_name])
         _class = getattr(module, class_name)
         return _class(**{"chebi_version": chebi_version, "single_class": single_class})
 
-    def migrate(self):
+    def migrate(self) -> None:
+        """
+        Start the migration process for the ChEBI dataset.
+        """
         os.makedirs(self._chebi_cls.base_dir, exist_ok=True)
         print("Migration started.....")
         self._migrate_old_raw_data()
@@ -43,7 +76,10 @@ def migrate(self):
         self._chebi_cls.setup_processed()
         print("Migration completed.....")
 
-    def _migrate_old_raw_data(self):
+    def _migrate_old_raw_data(self) -> None:
+        """
+        Migrate old raw data files to the new data folder structure.
+        """
         print("-" * 50)
         print("Migrating old raw Data....")
 
@@ -66,7 +102,6 @@ def _migrate_old_raw_data(self):
             self._old_raw_dir, old_splits_file_names
         )
 
-        # data_df.to_pickle(data_file_path)
         self._chebi_cls.save_processed(data_df, "data.pkl")
         print(f"File {data_file_path} saved to new data-folder structure")
 
@@ -74,7 +109,10 @@ def _migrate_old_raw_data(self):
         split_ass_df.to_csv(split_file)  # overwrites the files with same name
         print(f"File {split_file} saved to new data-folder structure")
 
-    def _migrate_old_processed_data(self):
+    def _migrate_old_processed_data(self) -> None:
+        """
+        Migrate old processed data files to the new data folder structure.
+        """
         print("-" * 50)
         print("Migrating old processed data.....")
 
@@ -99,9 +137,19 @@ def _migrate_old_processed_data(self):
     def _combine_pt_splits(
         self, old_dir: str, old_splits_file_names: Dict[str, str]
     ) -> pd.DataFrame:
+        """
+        Combine old `.pt` split files into a single DataFrame.
+
+        Args:
+            old_dir (str): The directory containing the old split files.
+            old_splits_file_names (Dict[str, str]): A dictionary of split names and file names.
+
+        Returns:
+            pd.DataFrame: The combined DataFrame.
+        """
         self._check_if_old_splits_exists(old_dir, old_splits_file_names)
 
-        print("Combinig `.pt` splits...")
+        print("Combining `.pt` splits...")
         df_list: List[pd.DataFrame] = []
         for split, file_name in old_splits_file_names.items():
             file_path = os.path.join(old_dir, file_name)
@@ -113,6 +161,16 @@ def _combine_pt_splits(
     def _combine_pkl_splits(
         self, old_dir: str, old_splits_file_names: Dict[str, str]
     ) -> Tuple[pd.DataFrame, pd.DataFrame]:
+        """
+        Combine old `.pkl` split files into a single DataFrame and create split assignments.
+
+        Args:
+            old_dir (str): The directory containing the old split files.
+            old_splits_file_names (Dict[str, str]): A dictionary of split names and file names.
+
+        Returns:
+            Tuple[pd.DataFrame, pd.DataFrame]: The combined DataFrame and split assignments DataFrame.
+        """
         self._check_if_old_splits_exists(old_dir, old_splits_file_names)
 
         df_list: List[pd.DataFrame] = []
@@ -135,18 +193,41 @@ def _combine_pkl_splits(
         return combined_df, combined_split_assignment
 
     @staticmethod
-    def _check_if_old_splits_exists(old_dir, old_splits_file_names):
+    def _check_if_old_splits_exists(
+        old_dir: str, old_splits_file_names: Dict[str, str]
+    ) -> None:
+        """
+        Check if the old split files exist in the specified directory.
+
+        Args:
+            old_dir (str): The directory containing the old split files.
+            old_splits_file_names (Dict[str, str]): A dictionary of split names and file names.
+
+        Raises:
+            FileNotFoundError: If any of the split files do not exist.
+        """
         if any(
             not os.path.isfile(os.path.join(old_dir, file))
             for file in old_splits_file_names.values()
         ):
             raise FileNotFoundError(
-                f"One of the split {old_splits_file_names.values()} doesn't exists "
+                f"One of the split {old_splits_file_names.values()} doesn't exist "
                 f"in old data-folder structure: {old_dir}"
             )
 
     @staticmethod
-    def _copy_file(old_file_dir, new_file_dir, file_name):
+    def _copy_file(old_file_dir: str, new_file_dir: str, file_name: str) -> None:
+        """
+        Copy a file from the old directory to the new directory.
+
+        Args:
+            old_file_dir (str): The directory containing the old file.
+            new_file_dir (str): The directory to copy the file to.
+            file_name (str): The name of the file to copy.
+
+        Raises:
+            FileNotFoundError: If the file does not exist in the old directory.
+        """
         os.makedirs(new_file_dir, exist_ok=True)
         new_file_path = os.path.join(new_file_dir, file_name)
         if os.path.isfile(new_file_path):
@@ -156,14 +237,20 @@ def _copy_file(old_file_dir, new_file_dir, file_name):
         old_file_path = os.path.join(old_file_dir, file_name)
         if not os.path.isfile(old_file_path):
             raise FileNotFoundError(
-                f"File {old_file_path} doesn't exists in old data-folder structure"
+                f"File {old_file_path} doesn't exist in old data-folder structure"
             )
 
         shutil.copy2(os.path.abspath(old_file_path), os.path.abspath(new_file_path))
         print(f"Copied from {old_file_path} to {new_file_path}")
 
     @property
-    def _old_base_dir(self):
+    def _old_base_dir(self) -> str:
+        """
+        Get the base directory for the old data structure.
+
+        Returns:
+            str: The base directory for the old data.
+        """
         return os.path.join(
             self.__DATA_ROOT_DIR,
             self._chebi_cls._name,
@@ -171,7 +258,13 @@ def _old_base_dir(self):
         )
 
     @property
-    def _old_processed_dir(self):
+    def _old_processed_dir(self) -> str:
+        """
+        Get the processed directory for the old data structure.
+
+        Returns:
+            str: The processed directory for the old data.
+        """
         res = os.path.join(
             self._old_base_dir,
             "processed",
@@ -183,8 +276,13 @@ def _old_processed_dir(self):
             return os.path.join(res, f"single_{self._chebi_cls.single_class}")
 
     @property
-    def _old_raw_dir(self):
-        """name of dir where the raw data is stored"""
+    def _old_raw_dir(self) -> str:
+        """
+        Get the raw directory for the old data structure.
+
+        Returns:
+            str: The raw directory for the old data.
+        """
         return os.path.join(self._old_base_dir, "raw")
 
 

From 9992a15162c4474e4e9f3bd6ea847e0be44f69cb Mon Sep 17 00:00:00 2001
From: aditya0by0 <aditya0by0@gmail.com>
Date: Wed, 3 Jul 2024 18:15:57 +0200
Subject: [PATCH 23/26] logic to generate splits csv + use csv if provided

---
 chebai/preprocessing/datasets/chebi.py | 85 +++++++++++++++++++++++++-
 1 file changed, 83 insertions(+), 2 deletions(-)

diff --git a/chebai/preprocessing/datasets/chebi.py b/chebai/preprocessing/datasets/chebi.py
index f609ea61..29bb75b6 100644
--- a/chebai/preprocessing/datasets/chebi.py
+++ b/chebai/preprocessing/datasets/chebi.py
@@ -156,6 +156,51 @@ def __init__(
                 **_init_kwargs,
             )
 
+        self.splits_file_path = self._validate_splits_file_path(
+            kwargs.get("splits_file_path", None)
+        )
+
+    @staticmethod
+    def _validate_splits_file_path(splits_file_path=None):
+        """
+        Validates the provided splits file path.
+
+        Args:
+            splits_file_path (str or None): Path to the splits CSV file.
+
+        Returns:
+            str or None: Validated splits file path if checks pass, None if splits_file_path is None.
+
+        Raises:
+            FileNotFoundError: If the splits file does not exist.
+            ValueError: If the splits file is empty or missing required columns ('id' and/or 'split'), or not a CSV file.
+        """
+        if splits_file_path is None:
+            return None
+
+        if not os.path.isfile(splits_file_path):
+            raise FileNotFoundError(f"File {splits_file_path} does not exist")
+
+        file_size = os.path.getsize(splits_file_path)
+        if file_size == 0:
+            raise ValueError(f"File {splits_file_path} is empty")
+
+        # Check if the file has a CSV extension
+        if not splits_file_path.lower().endswith(".csv"):
+            raise ValueError(f"File {splits_file_path} is not a CSV file")
+
+        # Read the CSV file into a DataFrame
+        splits_df = pd.read_csv(splits_file_path)
+
+        # Check if 'id' and 'split' columns are in the DataFrame
+        required_columns = {"id", "split"}
+        if not required_columns.issubset(splits_df.columns):
+            raise ValueError(
+                f"CSV file {splits_file_path} is missing required columns ('id' and/or 'split')."
+            )
+
+        return splits_file_path
+
     def extract_class_hierarchy(self, chebi_path):
         """
         Extracts the class hierarchy from the ChEBI ontology.
@@ -632,7 +677,7 @@ def prepare_data(self, *args, **kwargs):
                     # Generate the "chebi_version_train" data if it doesn't exist
                     self._chebi_version_train_obj.prepare_data(*args, **kwargs)
 
-    def _get_dynamic_splits(self):
+    def _generate_dynamic_splits(self):
         """Generate data splits during run-time and saves in class variables"""
 
         # Load encoded data derived from "chebi_version"
@@ -687,10 +732,43 @@ def _get_dynamic_splits(self):
             )
             df_test = df_test_chebi_ver
 
+        # Generate splits.csv file to store ids of each corresponding split
+        split_assignment_list: List[pd.DataFrame] = [
+            pd.DataFrame({"id": df_train["ident"], "split": "train"}),
+            pd.DataFrame({"id": df_val["ident"], "split": "validation"}),
+            pd.DataFrame({"id": df_test["ident"], "split": "test"}),
+        ]
+        combined_split_assignment = pd.concat(split_assignment_list, ignore_index=True)
+        combined_split_assignment.to_csv(
+            os.path.join(self.processed_dir_main, "splits.csv")
+        )
+
+        # Store the splits in class variables
         self.dynamic_df_train = df_train
         self.dynamic_df_val = df_val
         self.dynamic_df_test = df_test
 
+    def _retreive_splits_from_csv(self):
+        splits_df = pd.read_csv(self.splits_file_path)
+
+        filename = self.processed_file_names_dict["data"]
+        data_chebi_version = torch.load(os.path.join(self.processed_dir, filename))
+        df_chebi_version = pd.DataFrame(data_chebi_version)
+
+        train_ids = splits_df[splits_df["split"] == "train"]["id"]
+        validation_ids = splits_df[splits_df["split"] == "validation"]["id"]
+        test_ids = splits_df[splits_df["split"] == "test"]["id"]
+
+        self.dynamic_df_train = df_chebi_version[
+            df_chebi_version["ident"].isin(train_ids)
+        ]
+        self.dynamic_df_val = df_chebi_version[
+            df_chebi_version["ident"].isin(validation_ids)
+        ]
+        self.dynamic_df_test = df_chebi_version[
+            df_chebi_version["ident"].isin(test_ids)
+        ]
+
     @property
     def dynamic_split_dfs(self):
         if any(
@@ -701,7 +779,10 @@ def dynamic_split_dfs(self):
                 self.dynamic_df_train,
             ]
         ):
-            self._get_dynamic_splits()
+            if self.splits_file_path is None:
+                self._generate_dynamic_splits()
+            else:
+                self._retreive_splits_from_csv()
         return {
             "train": self.dynamic_df_train,
             "validation": self.dynamic_df_val,

From 07340cb1819e596fb83ef9c700ffe84bf060d0ac Mon Sep 17 00:00:00 2001
From: aditya0by0 <aditya0by0@gmail.com>
Date: Wed, 3 Jul 2024 20:02:28 +0200
Subject: [PATCH 24/26] read only first row to validate presence of relevant
 columns in csv

---
 chebai/preprocessing/datasets/chebi.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/chebai/preprocessing/datasets/chebi.py b/chebai/preprocessing/datasets/chebi.py
index 29bb75b6..d8a4cdd4 100644
--- a/chebai/preprocessing/datasets/chebi.py
+++ b/chebai/preprocessing/datasets/chebi.py
@@ -155,7 +155,7 @@ def __init__(
                 single_class=self.single_class,
                 **_init_kwargs,
             )
-
+        # Path of csv file which contains a list of chebi ids & their assignment to a dataset (either train, validation or test).
         self.splits_file_path = self._validate_splits_file_path(
             kwargs.get("splits_file_path", None)
         )
@@ -189,8 +189,8 @@ def _validate_splits_file_path(splits_file_path=None):
         if not splits_file_path.lower().endswith(".csv"):
             raise ValueError(f"File {splits_file_path} is not a CSV file")
 
-        # Read the CSV file into a DataFrame
-        splits_df = pd.read_csv(splits_file_path)
+        # Read the first row of CSV file into a DataFrame
+        splits_df = pd.read_csv(splits_file_path, nrows=1)
 
         # Check if 'id' and 'split' columns are in the DataFrame
         required_columns = {"id", "split"}
@@ -604,7 +604,7 @@ def prepare_data(self, *args, **kwargs):
         Prepares the data for the Chebi dataset.
 
         This method checks for the presence of raw data in the specified directory.
-        If the raw data is missing, it fetches the ontology and creates a test test set.
+        If the raw data is missing, it fetches the ontology and creates a test set.
         If the test set already exists, it loads it from the file.
         Then, it creates the train/validation split based on the test set.
 
@@ -780,8 +780,10 @@ def dynamic_split_dfs(self):
             ]
         ):
             if self.splits_file_path is None:
+                # Generate splits based on given seed, create csv file to records the splits
                 self._generate_dynamic_splits()
             else:
+                # If user has provided splits file path, use it to get the splits from the data
                 self._retreive_splits_from_csv()
         return {
             "train": self.dynamic_df_train,

From bc19a215b289705bc5e1a644bba2c046ba346be4 Mon Sep 17 00:00:00 2001
From: sfluegel <sfluegel@ovgu.de>
Date: Fri, 5 Jul 2024 12:20:46 +0200
Subject: [PATCH 25/26] add jsonargparse cli to migration, gentle
 file-not-found handling

---
 chebai/preprocessing/datasets/chebi.py        |  15 +-
 .../migration/chebi_data_migration.py         | 181 ++++++++++--------
 2 files changed, 106 insertions(+), 90 deletions(-)

diff --git a/chebai/preprocessing/datasets/chebi.py b/chebai/preprocessing/datasets/chebi.py
index d8a4cdd4..2a1c9fdb 100644
--- a/chebai/preprocessing/datasets/chebi.py
+++ b/chebai/preprocessing/datasets/chebi.py
@@ -514,9 +514,7 @@ def processed_dir_main(self):
     @property
     def processed_dir(self):
         res = os.path.join(
-            self.base_dir,
-            self._name,
-            "processed",
+            self.processed_dir_main,
             *self.identifier,
         )
         if self.single_class is None:
@@ -940,15 +938,20 @@ class ChEBIOver100SELFIES(ChEBIOverXSELFIES, ChEBIOver100):
 
 
 class ChEBIOverXPartial(ChEBIOverX):
-    """Dataset that doesn't use the full ChEBI, but extracts are part of ChEBI"""
+    """Dataset that doesn't use the full ChEBI, but extracts a part of ChEBI (subclasses of a given top class)"""
 
     def __init__(self, top_class_id: int, **kwargs):
         self.top_class_id = top_class_id
         super().__init__(**kwargs)
 
     @property
-    def base_dir(self):
-        return os.path.join(super().base_dir, f"partial_{self.top_class_id}")
+    def processed_dir_main(self):
+        return os.path.join(
+            self.base_dir,
+            self._name,
+            f"partial_{self.top_class_id}",
+            "processed",
+        )
 
     def extract_class_hierarchy(self, chebi_path):
         with open(chebi_path, encoding="utf-8") as chebi:
diff --git a/chebai/preprocessing/migration/chebi_data_migration.py b/chebai/preprocessing/migration/chebi_data_migration.py
index 529d12a5..6ea2d7e2 100644
--- a/chebai/preprocessing/migration/chebi_data_migration.py
+++ b/chebai/preprocessing/migration/chebi_data_migration.py
@@ -1,12 +1,13 @@
 import argparse
 import os
 import shutil
-from typing import Dict, List, Tuple, Type
+from typing import Dict, List, Optional, Tuple, Type
 
 import pandas as pd
 import torch
+from jsonargparse import CLI
 
-from chebai.preprocessing.datasets.chebi import _ChEBIDataExtractor
+from chebai.preprocessing.datasets.chebi import ChEBIOverXPartial, _ChEBIDataExtractor
 
 
 class ChebiDataMigration:
@@ -17,34 +18,25 @@ class ChebiDataMigration:
         __MODULE_PATH (str): The path to the module containing ChEBI classes.
         __DATA_ROOT_DIR (str): The root directory for data.
         _chebi_cls (_ChEBIDataExtractor): The ChEBI class instance.
-        _chebi_version (int): The version of the ChEBI dataset.
-        _single_class (int, optional): The ID of a single class to predict.
-        _class_name (str): The name of the ChEBI class.
     """
 
     __MODULE_PATH: str = "chebai.preprocessing.datasets.chebi"
     __DATA_ROOT_DIR: str = "data"
 
-    def __init__(self, class_name: str, chebi_version: int, single_class: int = None):
-        """
-        Initialize the ChebiDataMigration class.
+    def __init__(self, datamodule: _ChEBIDataExtractor):
+        self._chebi_cls = datamodule
 
-        Args:
-            class_name (str): The name of the ChEBI class.
-            chebi_version (int): The version of the ChEBI dataset.
-            single_class (int, optional): The ID of the single class to predict.
-        """
-        self._chebi_cls: Type[_ChEBIDataExtractor] = self._dynamic_import_chebi_cls(
+    @classmethod
+    def from_args(cls, class_name: str, chebi_version: int, single_class: int = None):
+        chebi_cls: _ChEBIDataExtractor = ChebiDataMigration._dynamic_import_chebi_cls(
             class_name, chebi_version, single_class
         )
-        self._chebi_version: int = chebi_version
-        self._single_class: int = single_class
-        self._class_name: str = class_name
+        return cls(chebi_cls)
 
     @classmethod
     def _dynamic_import_chebi_cls(
         cls, class_name: str, chebi_version: int, single_class: int
-    ) -> Type[_ChEBIDataExtractor]:
+    ) -> _ChEBIDataExtractor:
         """
         Dynamically import the ChEBI class.
 
@@ -67,47 +59,55 @@ def migrate(self) -> None:
         """
         os.makedirs(self._chebi_cls.base_dir, exist_ok=True)
         print("Migration started.....")
-        self._migrate_old_raw_data()
+        old_raw_data_exists = self._migrate_old_raw_data()
 
         # Either we can combine `.pt` split files to form `data.pt` file
         # self._migrate_old_processed_data()
         # OR
         # we can transform `data.pkl` to `data.pt` file (this seems efficient along with less code)
-        self._chebi_cls.setup_processed()
+        if old_raw_data_exists:
+            self._chebi_cls.setup_processed()
+        else:
+            self._migrate_old_processed_data()
         print("Migration completed.....")
 
-    def _migrate_old_raw_data(self) -> None:
+    def _migrate_old_raw_data(self) -> bool:
         """
         Migrate old raw data files to the new data folder structure.
         """
         print("-" * 50)
-        print("Migrating old raw Data....")
+        print("Migrating old raw data....")
 
         self._copy_file(self._old_raw_dir, self._chebi_cls.raw_dir, "chebi.obo")
         self._copy_file(
             self._old_raw_dir, self._chebi_cls.processed_dir_main, "classes.txt"
         )
 
-        old_splits_file_names = {
+        old_splits_file_names_raw = {
             "train": "train.pkl",
             "validation": "validation.pkl",
             "test": "test.pkl",
         }
+
         data_file_path = os.path.join(self._chebi_cls.processed_dir_main, "data.pkl")
         if os.path.isfile(data_file_path):
             print(f"File {data_file_path} already exists in new data-folder structure")
-            return
+            return True
 
-        data_df, split_ass_df = self._combine_pkl_splits(
-            self._old_raw_dir, old_splits_file_names
+        data_df_split_ass_df = self._combine_pkl_splits(
+            self._old_raw_dir, old_splits_file_names_raw
         )
-
-        self._chebi_cls.save_processed(data_df, "data.pkl")
-        print(f"File {data_file_path} saved to new data-folder structure")
-
-        split_file = os.path.join(self._chebi_cls.processed_dir_main, "splits.csv")
-        split_ass_df.to_csv(split_file)  # overwrites the files with same name
-        print(f"File {split_file} saved to new data-folder structure")
+        if data_df_split_ass_df is not None:
+            data_df = data_df_split_ass_df[0]
+            split_ass_df = data_df_split_ass_df[1]
+            self._chebi_cls.save_processed(data_df, "data.pkl")
+            print(f"File {data_file_path} saved to new data-folder structure")
+
+            split_file = os.path.join(self._chebi_cls.processed_dir_main, "splits.csv")
+            split_ass_df.to_csv(split_file)  # overwrites the files with same name
+            print(f"File {split_file} saved to new data-folder structure")
+            return True
+        return False
 
     def _migrate_old_processed_data(self) -> None:
         """
@@ -130,13 +130,13 @@ def _migrate_old_processed_data(self) -> None:
         data_df = self._combine_pt_splits(
             self._old_processed_dir, old_splits_file_names
         )
-
-        torch.save(data_df, data_file_path)
-        print(f"File {data_file_path} saved to new data-folder structure")
+        if data_df is not None:
+            torch.save(data_df, data_file_path)
+            print(f"File {data_file_path} saved to new data-folder structure")
 
     def _combine_pt_splits(
         self, old_dir: str, old_splits_file_names: Dict[str, str]
-    ) -> pd.DataFrame:
+    ) -> Optional[pd.DataFrame]:
         """
         Combine old `.pt` split files into a single DataFrame.
 
@@ -147,7 +147,11 @@ def _combine_pt_splits(
         Returns:
             pd.DataFrame: The combined DataFrame.
         """
-        self._check_if_old_splits_exists(old_dir, old_splits_file_names)
+        if not self._check_if_old_splits_exists(old_dir, old_splits_file_names):
+            print(
+                f"Missing at least one of [{', '.join(old_splits_file_names.values())}] in {old_dir}"
+            )
+            return None
 
         print("Combining `.pt` splits...")
         df_list: List[pd.DataFrame] = []
@@ -160,7 +164,7 @@ def _combine_pt_splits(
 
     def _combine_pkl_splits(
         self, old_dir: str, old_splits_file_names: Dict[str, str]
-    ) -> Tuple[pd.DataFrame, pd.DataFrame]:
+    ) -> Optional[Tuple[pd.DataFrame, pd.DataFrame]]:
         """
         Combine old `.pkl` split files into a single DataFrame and create split assignments.
 
@@ -171,7 +175,11 @@ def _combine_pkl_splits(
         Returns:
             Tuple[pd.DataFrame, pd.DataFrame]: The combined DataFrame and split assignments DataFrame.
         """
-        self._check_if_old_splits_exists(old_dir, old_splits_file_names)
+        if not self._check_if_old_splits_exists(old_dir, old_splits_file_names):
+            print(
+                f"Missing at least one of [{', '.join(old_splits_file_names.values())}] in {old_dir}"
+            )
+            return None
 
         df_list: List[pd.DataFrame] = []
         split_assignment_list: List[pd.DataFrame] = []
@@ -195,7 +203,7 @@ def _combine_pkl_splits(
     @staticmethod
     def _check_if_old_splits_exists(
         old_dir: str, old_splits_file_names: Dict[str, str]
-    ) -> None:
+    ) -> bool:
         """
         Check if the old split files exist in the specified directory.
 
@@ -203,17 +211,11 @@ def _check_if_old_splits_exists(
             old_dir (str): The directory containing the old split files.
             old_splits_file_names (Dict[str, str]): A dictionary of split names and file names.
 
-        Raises:
-            FileNotFoundError: If any of the split files do not exist.
         """
-        if any(
-            not os.path.isfile(os.path.join(old_dir, file))
+        return all(
+            os.path.isfile(os.path.join(old_dir, file))
             for file in old_splits_file_names.values()
-        ):
-            raise FileNotFoundError(
-                f"One of the split {old_splits_file_names.values()} doesn't exist "
-                f"in old data-folder structure: {old_dir}"
-            )
+        )
 
     @staticmethod
     def _copy_file(old_file_dir: str, new_file_dir: str, file_name: str) -> None:
@@ -230,18 +232,19 @@ def _copy_file(old_file_dir: str, new_file_dir: str, file_name: str) -> None:
         """
         os.makedirs(new_file_dir, exist_ok=True)
         new_file_path = os.path.join(new_file_dir, file_name)
-        if os.path.isfile(new_file_path):
-            print(f"File {new_file_path} already exists in new data-folder structure")
-            return
-
         old_file_path = os.path.join(old_file_dir, file_name)
-        if not os.path.isfile(old_file_path):
-            raise FileNotFoundError(
-                f"File {old_file_path} doesn't exist in old data-folder structure"
+
+        if os.path.isfile(new_file_path):
+            print(
+                f"Skipping {old_file_path} (file already exists at new location {new_file_path})"
             )
+            return
 
-        shutil.copy2(os.path.abspath(old_file_path), os.path.abspath(new_file_path))
-        print(f"Copied from {old_file_path} to {new_file_path}")
+        if os.path.isfile(old_file_path):
+            shutil.copy2(os.path.abspath(old_file_path), os.path.abspath(new_file_path))
+            print(f"Copied {old_file_path} to {new_file_path}")
+        else:
+            print(f"Skipping expected file {old_file_path} (not found)")
 
     @property
     def _old_base_dir(self) -> str:
@@ -251,6 +254,13 @@ def _old_base_dir(self) -> str:
         Returns:
             str: The base directory for the old data.
         """
+        if isinstance(self._chebi_cls, ChEBIOverXPartial):
+            return os.path.join(
+                self.__DATA_ROOT_DIR,
+                self._chebi_cls._name,
+                f"chebi_v{self._chebi_cls.chebi_version}",
+                f"partial_{self._chebi_cls.top_class_id}",
+            )
         return os.path.join(
             self.__DATA_ROOT_DIR,
             self._chebi_cls._name,
@@ -286,29 +296,32 @@ def _old_raw_dir(self) -> str:
         return os.path.join(self._old_base_dir, "raw")
 
 
+class Main:
+
+    def migrate(
+        self,
+        datamodule: Optional[_ChEBIDataExtractor] = None,
+        class_name: Optional[str] = None,
+        chebi_version: Optional[int] = None,
+        single_class: Optional[int] = None,
+    ):
+        """
+        Migrate ChEBI dataset to new structure and handle splits.
+
+        Args:
+            datamodule (Optional[_ChEBIDataExtractor]): The datamodule instance. If not provided, class_name and
+            chebi_version are required.
+            class_name (Optional[str]): The name of the ChEBI class.
+            chebi_version (Optional[int]): The version of the ChEBI dataset.
+            single_class (Optional[int]): The ID of the single class to predict.
+        """
+        if datamodule is not None:
+            ChebiDataMigration(datamodule).migrate()
+        else:
+            ChebiDataMigration.from_args(
+                class_name, chebi_version, single_class
+            ).migrate()
+
+
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Migrate ChEBI dataset to new structure and handle splits."
-    )
-    parser.add_argument(
-        "--chebi_class",
-        type=str,
-        required=True,
-        help="Chebi class name from the `chebai/preprocessing/datasets/chebi.py`",
-    )
-    parser.add_argument(
-        "--chebi_version", type=int, required=True, help="Chebi data version"
-    )
-    parser.add_argument(
-        "--single_class",
-        type=int,
-        help="The ID of the single class to predict",
-        default=None,
-    )
-    args = parser.parse_args()
-
-    ChebiDataMigration(
-        class_name=args.chebi_class,
-        chebi_version=args.chebi_version,
-        single_class=args.single_class,
-    ).migrate()
+    CLI(Main)

From 8b0b5053f8043c59677f24c88fa13065719083ba Mon Sep 17 00:00:00 2001
From: sfluegel <sfluegel@ovgu.de>
Date: Fri, 5 Jul 2024 13:43:55 +0200
Subject: [PATCH 26/26] add documentation for users

---
 chebai/preprocessing/datasets/chebi.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/chebai/preprocessing/datasets/chebi.py b/chebai/preprocessing/datasets/chebi.py
index 2a1c9fdb..93d7be65 100644
--- a/chebai/preprocessing/datasets/chebi.py
+++ b/chebai/preprocessing/datasets/chebi.py
@@ -120,6 +120,8 @@ class _ChEBIDataExtractor(XYBaseDataModule, ABC):
             chebi_version will be used for training, validation and test. Defaults to None.
         single_class (int, optional): The ID of the single class to predict. If not set, all available labels will be
             predicted. Defaults to None.
+        dynamic_data_split_seed (int, optional): The seed for random data splitting. Defaults to 42.
+        splits_file_path (str, optional): Path to the splits CSV file. Defaults to None.
         **kwargs: Additional keyword arguments (passed to XYBaseDataModule).
 
     Attributes:
@@ -677,7 +679,7 @@ def prepare_data(self, *args, **kwargs):
 
     def _generate_dynamic_splits(self):
         """Generate data splits during run-time and saves in class variables"""
-
+        print("Generate dynamic splits...")
         # Load encoded data derived from "chebi_version"
         try:
             filename = self.processed_file_names_dict["data"]
@@ -746,7 +748,8 @@ def _generate_dynamic_splits(self):
         self.dynamic_df_val = df_val
         self.dynamic_df_test = df_test
 
-    def _retreive_splits_from_csv(self):
+    def _retrieve_splits_from_csv(self):
+        print(f"Loading splits from {self.splits_file_path}...")
         splits_df = pd.read_csv(self.splits_file_path)
 
         filename = self.processed_file_names_dict["data"]
@@ -782,7 +785,7 @@ def dynamic_split_dfs(self):
                 self._generate_dynamic_splits()
             else:
                 # If user has provided splits file path, use it to get the splits from the data
-                self._retreive_splits_from_csv()
+                self._retrieve_splits_from_csv()
         return {
             "train": self.dynamic_df_train,
             "validation": self.dynamic_df_val,