ChEB-AI · sfluegel05 · Jul 5, 2024 · May 27, 2024 · May 27, 2024 · Jun 5, 2024
diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml
@@ -7,4 +7,4 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v2
-      - uses: psf/black@stable
+      - uses: psf/black@stable
diff --git a/.gitignore b/.gitignore
@@ -161,3 +161,8 @@ cython_debug/
 #.idea/
 
 # configs/ # commented as new configs can be added as a part of a feature
+/.idea
+/data
+/logs
+/results_buffer
+electra_pretrained.ckpt
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,9 +1,25 @@
 repos:
-#-   repo: https://github.com/PyCQA/isort
-#    rev: "5.12.0"
-#    hooks:
-#    -   id: isort
 -   repo: https://github.com/psf/black
     rev: "24.2.0"
     hooks:
-    -   id: black
+    -   id: black
+    -   id: black-jupyter # for formatting jupyter-notebook
+
+-   repo: https://github.com/pycqa/isort
+    rev: 5.13.2
+    hooks:
+      - id: isort
+        name: isort (python)
+        args: ["--profile=black"]
+
+-   repo: https://github.com/asottile/seed-isort-config
+    rev: v2.2.0
+    hooks:
+    -   id: seed-isort-config
+
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
+    hooks:
+    -   id: check-yaml
+    -   id: end-of-file-fixer
+    -   id: trailing-whitespace
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # ChEBai
 
-ChEBai is a deep learning library designed for the integration of deep learning methods with chemical ontologies, particularly ChEBI. 
+ChEBai is a deep learning library designed for the integration of deep learning methods with chemical ontologies, particularly ChEBI.
 The library emphasizes the incorporation of the semantic qualities of the ontology into the learning process.
 
 ## Installation
@@ -21,7 +21,7 @@ pip install .
 
 ## Usage
 
-The training and inference is abstracted using the Pytorch Lightning modules. 
+The training and inference is abstracted using the Pytorch Lightning modules.
 Here are some CLI commands for the standard functionalities of pretraining, ontology extension, fine-tuning for toxicity and prediction.
 For further details, see the [wiki](https://github.com/ChEB-AI/python-chebai/wiki).
 If you face any problems, please open a new [issue](https://github.com/ChEB-AI/python-chebai/issues/new).
@@ -55,18 +55,18 @@ The `classes_path` is the path to the dataset's `raw/classes.txt` file that cont
 
 ## Evaluation
 
-An example for evaluating a model trained on the ontology extension task is given in `tutorials/eval_model_basic.ipynb`. 
+An example for evaluating a model trained on the ontology extension task is given in `tutorials/eval_model_basic.ipynb`.
 It takes in the finetuned model as input for performing the evaluation.
 
 ## Cross-validation
-You can do inner k-fold cross-validation, i.e., train models on k train-validation splits that all use the same test 
+You can do inner k-fold cross-validation, i.e., train models on k train-validation splits that all use the same test
 set. For that, you need to specify the total_number of folds as
 ```
 --data.init_args.inner_k_folds=K
 ```
 and the fold to be used in the current optimisation run as
-``` 
+```
 --data.init_args.fold_index=I
 ```
-To train K models, you need to do K such calls, each with a different `fold_index`. On the first call with a given 
+To train K models, you need to do K such calls, each with a different `fold_index`. On the first call with a given
 `inner_k_folds`, all folds will be created and stored in the data directory
diff --git a/chebai/callbacks.py b/chebai/callbacks.py
@@ -1,8 +1,8 @@
 import json
 import os
 
-from lightning.pytorch.callbacks import BasePredictionWriter
 import torch
+from lightning.pytorch.callbacks import BasePredictionWriter
 
 
 class ChebaiPredictionWriter(BasePredictionWriter):

diff --git a/chebai/callbacks/prediction_callback.py b/chebai/callbacks/prediction_callback.py
@@ -1,8 +1,9 @@
-from lightning.pytorch.callbacks import BasePredictionWriter
-import torch
 import os
 import pickle
 
+import torch
+from lightning.pytorch.callbacks import BasePredictionWriter
+
 
 class PredictionWriter(BasePredictionWriter):
     def __init__(self, output_dir, write_interval):

diff --git a/chebai/loggers/custom.py b/chebai/loggers/custom.py
@@ -1,11 +1,11 @@
-from datetime import datetime
-from typing import Literal, Optional, Union, List
 import os
+from datetime import datetime
+from typing import List, Literal, Optional, Union
 
+import wandb
 from lightning.fabric.utilities.types import _PATH
 from lightning.pytorch.callbacks import ModelCheckpoint
 from lightning.pytorch.loggers import WandbLogger
-import wandb
 
 
 class CustomLogger(WandbLogger):

diff --git a/chebai/loss/bce_weighted.py b/chebai/loss/bce_weighted.py
@@ -1,9 +1,11 @@
+import os
+import pickle
+
+import pandas as pd
 import torch
+
 from chebai.preprocessing.datasets.base import XYBaseDataModule
 from chebai.preprocessing.datasets.pubchem import LabeledUnlabeledMixed
-import pandas as pd
-import os
-import pickle
 
 
 class BCEWeighted(torch.nn.BCEWithLogitsLoss):

diff --git a/chebai/loss/semantic.py b/chebai/loss/semantic.py
@@ -1,14 +1,15 @@
 import csv
+import math
 import os
 import pickle
 
-import math
 import torch
+
 from typing import Literal, Union
 
-from chebai.preprocessing.datasets.chebi import _ChEBIDataExtractor, ChEBIOver100
-from chebai.preprocessing.datasets.pubchem import LabeledUnlabeledMixed
 from chebai.loss.bce_weighted import BCEWeighted
+from chebai.preprocessing.datasets.chebi import ChEBIOver100, _ChEBIDataExtractor
+from chebai.preprocessing.datasets.pubchem import LabeledUnlabeledMixed
 
 
 class ImplicationLoss(torch.nn.Module):

diff --git a/chebai/models/base.py b/chebai/models/base.py
@@ -1,9 +1,9 @@
-from typing import Optional
 import logging
 import typing
+from typing import Optional
 
-from lightning.pytorch.core.module import LightningModule
 import torch
+from lightning.pytorch.core.module import LightningModule
 
 from chebai.preprocessing.structures import XYData
 

diff --git a/chebai/models/chemberta.py b/chebai/models/chemberta.py
@@ -1,7 +1,8 @@
-from tempfile import TemporaryDirectory
 import logging
 import random
+from tempfile import TemporaryDirectory
 
+import torch
 from torch import nn
 from torch.nn.functional import one_hot
 from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
@@ -11,7 +12,6 @@
     RobertaModel,
     RobertaTokenizer,
 )
-import torch
 
 from chebai.models.base import ChebaiBaseNet
 

diff --git a/chebai/models/chemyk.py b/chebai/models/chemyk.py
@@ -3,11 +3,11 @@
 import pickle
 import sys
 
+import networkx as nx
+import torch
 from torch import nn
 from torch.nn import functional as F
 from torch.nn.functional import pad
-import networkx as nx
-import torch
 
 from chebai.models.base import ChebaiBaseNet
 

diff --git a/chebai/models/electra.py b/chebai/models/electra.py
@@ -1,7 +1,8 @@
+import logging
 from math import pi
 from tempfile import TemporaryDirectory
-import logging
 
+import torch
 from torch import nn
 from torch.nn.utils.rnn import pad_sequence
 from transformers import (
@@ -10,7 +11,6 @@
     ElectraForPreTraining,
     ElectraModel,
 )
-import torch
 
 from chebai.loss.pretraining import ElectraPreLoss  # noqa
 from chebai.models.base import ChebaiBaseNet

diff --git a/chebai/models/lnn_model.py b/chebai/models/lnn_model.py
@@ -1,8 +1,8 @@
-from lnn import Implies, Model, Not, Predicate, Variable, World
-from owlready2 import get_ontology
 import fastobo
 import pyhornedowl
 import tqdm
+from lnn import Implies, Model, Not, Predicate, Variable, World
+from owlready2 import get_ontology
 
 
 def get_name(iri: str):

diff --git a/chebai/models/recursive.py b/chebai/models/recursive.py
@@ -1,9 +1,9 @@
 import logging
 
-from torch import exp, nn, tensor
 import networkx as nx
 import torch
 import torch.nn.functional as F
+from torch import exp, nn, tensor
 
 from chebai.models.base import ChebaiBaseNet
 

diff --git a/chebai/preprocessing/bin/BPE_SWJ/vocab.json b/chebai/preprocessing/bin/BPE_SWJ/vocab.json
diff --git a/chebai/preprocessing/collate.py b/chebai/preprocessing/collate.py
@@ -1,5 +1,5 @@
-from torch.nn.utils.rnn import pad_sequence
 import torch
+from torch.nn.utils.rnn import pad_sequence
 
 from chebai.preprocessing.structures import XYData
 

diff --git a/chebai/preprocessing/collect_all.py b/chebai/preprocessing/collect_all.py
@@ -2,16 +2,16 @@
 import os
 import sys
 
+import pytorch_lightning as pl
+import torch
+import torch.nn.functional as F
 from pytorch_lightning import loggers as pl_loggers
 from pytorch_lightning.callbacks import ModelCheckpoint
 from pytorch_lightning.metrics import F1
 from sklearn.metrics import f1_score
 from torch import nn
 from torch_geometric import nn as tgnn
 from torch_geometric.data import DataLoader
-import pytorch_lightning as pl
-import torch
-import torch.nn.functional as F
 
 from data import ClassificationData, JCIClassificationData
 

diff --git a/chebai/preprocessing/datasets/base.py b/chebai/preprocessing/datasets/base.py
@@ -1,14 +1,14 @@
-from typing import List, Union
 import os
 import random
 import typing
+from typing import List, Union
 
-from lightning.pytorch.core.datamodule import LightningDataModule
-from lightning_utilities.core.rank_zero import rank_zero_info
-from torch.utils.data import DataLoader
 import lightning as pl
 import torch
 import tqdm
+from lightning.pytorch.core.datamodule import LightningDataModule
+from lightning_utilities.core.rank_zero import rank_zero_info
+from torch.utils.data import DataLoader
 
 from chebai.preprocessing import reader as dr