Skip to content

Commit

Permalink
fix: read the docs build error
Browse files Browse the repository at this point in the history
  • Loading branch information
Kohulan committed Jan 17, 2024
1 parent af8a779 commit 24ce5e1
Show file tree
Hide file tree
Showing 14 changed files with 127 additions and 125 deletions.
2 changes: 1 addition & 1 deletion .readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ build:
os: ubuntu-22.04
tools:
python: "3.11"

# Build documentation in the docs/ directory with Sphinx
sphinx:
configuration: docs/conf.py
Expand Down
26 changes: 15 additions & 11 deletions Benchmark/distort_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@


def distort_image(input_image_path: str, output_image_path: str) -> None:
"""
This function takes the path of an input image and the desired output image path (both str).
It rotates the input image randomly (-5° or +5°) and applies shearing (in x or y-direction with an
angle of -9° or 9°) as described by Clevert et al (Img2Mol (preprint)).
I was a bit confused about the "shearing factor" of +/- 0.1 described by Clevert et al. Normally the transformation
is done with a shearing angle. I assume here that the correponding angle is 0.1*90° = 9° as 90° is the extreme case
where the image turns into a horizontal or vertical line.
This seems reasonable as it results in a mild distortion as described by the authors.
"""This function takes the path of an input image and the desired output
image path (both str). It rotates the input image randomly (-5° or +5°) and
applies shearing (in x or y-direction with an angle of -9° or 9°) as
described by Clevert et al (Img2Mol (preprint)). I was a bit confused about
the "shearing factor" of +/- 0.1 described by Clevert et al. Normally the
transformation is done with a shearing angle. I assume here that the
correponding angle is 0.1*90° = 9° as 90° is the extreme case where the
image turns into a horizontal or vertical line. This seems reasonable as it
results in a mild distortion as described by the authors.
Args:
input_image_path (str): Path of input image
Expand All @@ -43,9 +44,12 @@ def distort_image(input_image_path: str, output_image_path: str) -> None:


def main():
"""This script takes an input directory with images of chemical structure depictions, applies a random
rotation (-5° or +5°) as well as shearing (angle drawn from [-0.1, 0.1]) to every input image. These
distortions are supposed to imitate the image modifications described by Clevert et al (Img2Mol (preprint))
"""This script takes an input directory with images of chemical structure
depictions, applies a random rotation (-5° or +5°) as well as shearing
(angle drawn from [-0.1, 0.1]) to every input image.
These distortions are supposed to imitate the image modifications
described by Clevert et al (Img2Mol (preprint))
"""
input_dir = os.path.abspath(sys.argv[1])
output_dir = os.path.abspath(sys.argv[2])
Expand Down
5 changes: 2 additions & 3 deletions Benchmark/evaluate_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,8 @@
def compare_molecules_inchi_match(
input_file_path: str, reference_directory: str
) -> None:
"""
This function checks if the molecules in the DECIMER results to a set of reference
mol-files using Standard InChI.
"""This function checks if the molecules in the DECIMER results to a set of
reference mol-files using Standard InChI.
Args:
input_file (str): Path of file that contains image names and SMILES as created by run_decimer_save_results.py
Expand Down
7 changes: 3 additions & 4 deletions Benchmark/run_decimer_save_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,9 @@


def main():
"""
This script runs Decimer on every image in a given directory (first argument) and saves the
results in a text file with a given ID (second argument).
"""
"""This script runs Decimer on every image in a given directory (first
argument) and saves the results in a text file with a given ID (second
argument)."""
im_path = sys.argv[1]
save_ID = sys.argv[2]

Expand Down
30 changes: 14 additions & 16 deletions DECIMER/DECIMER_EfficinetNetV2_Transfomer_Trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@


def decode_image(image_data):
"""Preprocess the input image for Efficient-Net and
returned the preprocessed image
"""Preprocess the input image for Efficient-Net and returned the
preprocessed image.
Args:
image_data (int array): Decoded image in 2D array
Expand All @@ -98,8 +98,8 @@ def decode_image(image_data):


def read_tfrecord(example):
"""Read a tf record file and decodes the image and text data
back into original form.
"""Read a tf record file and decodes the image and text data back into
original form.
Args:
example (tf.record): single entry from tf record file
Expand All @@ -124,7 +124,7 @@ def read_tfrecord(example):


def numericalSort(value):
"""Sorts the filenames numerically
"""Sorts the filenames numerically.
Args:
value (int): numerical value of the file name
Expand All @@ -138,7 +138,7 @@ def numericalSort(value):


def get_dataset(batch_size=BATCH_SIZE, buffered_size=BUFFER_SIZE, path=""):
"""Creates a batch of data rom a given dataset
"""Creates a batch of data rom a given dataset.
Args:
batch_size (int, optional): number of datapoints per batch. Defaults to BATCH_SIZE.
Expand Down Expand Up @@ -207,8 +207,8 @@ def get_dataset(batch_size=BATCH_SIZE, buffered_size=BUFFER_SIZE, path=""):


def prepare_for_training(lr_config, encoder_config, transformer_config, verbose=0):
"""Preparte the model for training. initiate the learning rate, loss object, metrics
and optimizer
"""Preparte the model for training. initiate the learning rate, loss
object, metrics and optimizer.
Args:
lr_config (int): values for learning rate configuration
Expand Down Expand Up @@ -239,13 +239,11 @@ def loss_fn(real, pred):
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
name="train_accuracy", dtype=tf.float32
)
"""
validation_loss = tf.keras.metrics.Mean(
name="validation_loss", dtype=tf.float32
)
validation_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
name="validation_accuracy", dtype=tf.float32
)
"""validation_loss = tf.keras.metrics.Mean(
name="validation_loss", dtype=tf.float32 ) validation_accuracy =
tf.keras.metrics.SparseCategoricalAccuracy(
name="validation_accuracy", dtype=tf.float32 )
"""
# Declare the learning rate schedule (try this as actual lr schedule and list...)
lr_scheduler = config.CustomSchedule(
Expand Down Expand Up @@ -327,7 +325,7 @@ def loss_fn(real, pred):

# Main training step fucntion
def train_step(image_batch, selfies_batch):
"""Main training step function
"""Main training step function.
Args:
image_batch (float array): Input image batch
Expand Down
25 changes: 10 additions & 15 deletions DECIMER/Predictor_usingCheckpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,8 @@ def __init__(self, encoder, tokenizer, transformer, max_length):
self.max_length = max_length

def __call__(self, Decoded_image):
"""
Run the DECIMER predictor model when called.
Usage of predict_SMILES or predict_SMILES_with_confidence is recommended instead
"""Run the DECIMER predictor model when called. Usage of predict_SMILES
or predict_SMILES_with_confidence is recommended instead.
Args:
Decoded_image (_type_): output of config.decode_image
Expand Down Expand Up @@ -180,9 +179,8 @@ def __call__(self, Decoded_image):


def detokenize_output(predicted_array: tf.Tensor) -> str:
"""
This function takes the predicted array of tokens and returns the predicted SMILES
string.
"""This function takes the predicted array of tokens and returns the
predicted SMILES string.
Args:
predicted_array (tf.Tensor): Transformer Decoder output array (predicted tokens)
Expand All @@ -203,11 +201,10 @@ def detokenize_output_add_confidence(
predicted_array: tf.Tensor,
confidence_array: tf.Tensor,
) -> List[Tuple[str, float]]:
"""
This function takes the predicted array of tokens as well as the confidence values
returned by the Transformer Decoder and returns a list of tuples
that contain each token of the predicted SMILES string and the confidence
value.
"""This function takes the predicted array of tokens as well as the
confidence values returned by the Transformer Decoder and returns a list of
tuples that contain each token of the predicted SMILES string and the
confidence value.
Args:
predicted_array (tf.Tensor): Transformer Decoder output array (predicted tokens)
Expand All @@ -234,8 +231,7 @@ def detokenize_output_add_confidence(


def predict_SMILES_with_confidence(image_path: str) -> List[Tuple[str, float]]:
"""
This function takes an image path (str) and returns a list of tuples
"""This function takes an image path (str) and returns a list of tuples
that contain each token of the predicted SMILES string and the confidence
level from the last layer of the Transformer decoder.
Expand All @@ -255,8 +251,7 @@ def predict_SMILES_with_confidence(image_path: str) -> List[Tuple[str, float]]:


def predict_SMILES(image_path: str) -> str:
"""
This function takes an image path (str) and returns the SMILES
"""This function takes an image path (str) and returns the SMILES
representation of the depicted molecule (str).
Args:
Expand Down
41 changes: 23 additions & 18 deletions DECIMER/Repack_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,8 @@


def detokenize_output(predicted_array: tf.Tensor) -> str:
"""
This function takes the predicted array of tokens and returns the predicted SMILES
string.
"""This function takes the predicted array of tokens and returns the
predicted SMILES string.
Args:
predicted_array (tf.Tensor): Transformer Decoder output array (predicted tokens)
Expand All @@ -110,11 +109,10 @@ def detokenize_output_add_confidence(
predicted_array: tf.Tensor,
confidence_array: tf.Tensor,
) -> List[Tuple[str, float]]:
"""
This function takes the predicted array of tokens as well as the confidence values
returned by the Transformer Decoder and returns a list of tuples
that contain each token of the predicted SMILES string and the confidence
value.
"""This function takes the predicted array of tokens as well as the
confidence values returned by the Transformer Decoder and returns a list of
tuples that contain each token of the predicted SMILES string and the
confidence value.
Args:
predicted_array (tf.Tensor): Transformer Decoder output array (predicted tokens)
Expand All @@ -137,14 +135,18 @@ def detokenize_output_add_confidence(


class DECIMER_Predictor(tf.Module):
"""This is a class which takes care of inference. It loads the saved checkpoint and the necessary
tokenizers. The inference begins with the start token (<start>) and ends when the end token(<end>)
is met. This class can only work with tf.Tensor objects. The strings shoul gets transformed into np.arrays
before feeding them into this class.
"""This is a class which takes care of inference.
It loads the saved checkpoint and the necessary tokenizers. The
inference begins with the start token (<start>) and ends when the
end token(<end>) is met. This class can only work with tf.Tensor
objects. The strings shoul gets transformed into np.arrays before
feeding them into this class.
"""

def __init__(self, encoder, tokenizer, transformer, max_length):
"""Load the tokenizers, the maximum input and output length and the model.
"""Load the tokenizers, the maximum input and output length and the
model.
Args:
encoder (tf.keras.model): The encoder model
Expand All @@ -158,8 +160,8 @@ def __init__(self, encoder, tokenizer, transformer, max_length):
self.max_length = max_length

def __call__(self, Decoded_image):
"""This fuction takes in the Decoded image as input and
makes the predicted list of tokens and return the tokens as tf.Tensor array.
"""This fuction takes in the Decoded image as input and makes the
predicted list of tokens and return the tokens as tf.Tensor array.
Before feeding the input array we must define start and the end tokens.
Args:
Expand Down Expand Up @@ -211,7 +213,9 @@ def __call__(self, Decoded_image):


class ExportDECIMERPredictor(tf.Module):
"""This class wraps the inference class into a module into tf.Module sub-class, with a tf.function on the __call__ method.
"""This class wraps the inference class into a module into tf.Module sub-
class, with a tf.function on the __call__ method.
So we could export the model as a tf.saved_model.
"""

Expand All @@ -222,8 +226,9 @@ def __init__(self, DECIMER):
@tf.function
def __call__(self, Decoded_Image):
"""This fucntion calls the __call__function from the translator class.
In the tf.function only the output sentence is returned.
Thanks to the non-strict execution in tf.function any unnecessary values are never computed.
In the tf.function only the output sentence is returned. Thanks to the
non-strict execution in tf.function any unnecessary values are never
computed.
Args:
sentence (tf.Tensor[tf.int32]): Input array in tf.Easgertensor format.
Expand Down
5 changes: 1 addition & 4 deletions DECIMER/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# -*- coding: utf-8 -*-
"""
DECIMER V2.4.0 Python Package.
============================
"""DECIMER V2.4.0 Python Package. ============================
This repository contains DECIMER-V2,Deep lEarning for Chemical ImagE Recognition) project
was launched to address the OCSR problem with the latest computational intelligence methods
Expand All @@ -19,7 +17,6 @@
For comments, bug reports or feature ideas,
please raise a issue on the Github repository.
"""

__version__ = "2.4.0"
Expand Down

0 comments on commit 24ce5e1

Please sign in to comment.