update docs

AI4Bharat · Oct 19, 2021 · 975b088 · 975b088
1 parent 8960900
commit 975b088
Show file tree

Hide file tree

Showing 29 changed files with 281 additions and 93 deletions.
diff --git a/docs/api.rst b/docs/api.rst
@@ -22,6 +22,12 @@ Decoders
 .. automodule:: openhands.models.decoder
     :members:
 
+SSL-Models
+^^^^^^^^
+
+.. automodule:: openhands.models.ssl
+    :members:
+
 Datasets
 --------
 

diff --git a/docs/instructions/inference.rst b/docs/instructions/inference.rst
@@ -15,7 +15,7 @@ Computing accuacy using test set
 .. code:: python
 
     import omegaconf
-    from openhands.core.inference import InferenceModel
+    from openhands.apis.inference import InferenceModel
 
     cfg = omegaconf.OmegaConf.load("path/to/config.yaml")
     model = InferenceModel(cfg=cfg)

diff --git a/docs/instructions/self_supervised.rst b/docs/instructions/self_supervised.rst
@@ -43,7 +43,7 @@ Finally, run the following snippet to perform the pretraining:
 .. code:: python
 
     import omegaconf
-    from openhands.core.dpc import PretrainingModelDPC
+    from openhands.apis.dpc import PretrainingModelDPC
 
     cfg = omegaconf.OmegaConf.load("path/to/config.yaml")
     trainer = PretrainingModelDPC(cfg=cfg)

diff --git a/docs/instructions/training.rst b/docs/instructions/training.rst
@@ -13,7 +13,7 @@ After you have a config ready, run the following python snippet:
 .. code:: python
 
     import omegaconf
-    from openhands.core.classification_model import ClassificationModel
+    from openhands.apis.classification_model import ClassificationModel
     from openhands.core.exp_utils import get_trainer
 
     cfg = omegaconf.OmegaConf.load("path/to/config.yaml")

diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -22,3 +22,4 @@ scikit_learn==1.0
 sphinx
 myst-parser
 sphinx_rtd_theme
+sphinx_copybutton
diff --git a/openhands/apis/dpc.py b/openhands/apis/dpc.py
@@ -33,7 +33,7 @@ def calc_topk_accuracy(output, target, topk=(1,)):
 
 def process_output(mask):
     """task mask as input, compute the target for contrastive loss"""
-    (B, NP, B2, NS) = mask.size()  # [B, P, SQ, B, N, SQ]
+    (B, NP, B2, NS) = mask.size()  # [B, P, B, N,]
     target = (mask == 1).float()
     target.requires_grad = False
     return target, (B, B2, NS, NP)

diff --git a/openhands/datasets/isolated/autsl.py b/openhands/datasets/isolated/autsl.py
@@ -5,9 +5,9 @@
 
 class AUTSLDataset(BaseIsolatedDataset):
     """
-    Turkish Isolated Sign language dataset from the paper
+    Turkish Isolated Sign language dataset from the paper:
     
-    > [AUTSL: A Large Scale Multi-modal Turkish Sign Language Dataset and Baseline Methods](https://arxiv.org/abs/2008.00932)<br>
+    `AUTSL: A Large Scale Multi-modal Turkish Sign Language Dataset and Baseline Methods <https://arxiv.org/abs/2008.00932>`_
     """
     def read_glosses(self):
         class_mappings_df = pd.read_csv(self.class_mappings_file_path)

diff --git a/openhands/datasets/isolated/csl.py b/openhands/datasets/isolated/csl.py
@@ -5,9 +5,9 @@
 
 class CSLDataset(BaseIsolatedDataset):
     """
-    Chinese Isolated Sign language dataset from the paper
+    Chinese Isolated Sign language dataset from the paper:
     
-    > [Attention-Based 3D-CNNs for Large-Vocabulary Sign Language Recognition](https://ieeexplore.ieee.org/document/8466903)<br>
+    `Attention-Based 3D-CNNs for Large-Vocabulary Sign Language Recognition <https://ieeexplore.ieee.org/document/8466903>`_
     """
     def read_glosses(self):
         self.glosses = []

diff --git a/openhands/datasets/isolated/devisign.py b/openhands/datasets/isolated/devisign.py
@@ -7,9 +7,9 @@
 
 class DeviSignDataset(BaseIsolatedDataset):
     """
-    Chinese Isolated Sign language dataset from the paper
+    Chinese Isolated Sign language dataset from the paper:
     
-    > [The devisign large vocabulary of chinese sign language database and baseline evaluations]<br>
+    `The devisign large vocabulary of chinese sign language database and baseline evaluations`
     """
     def read_glosses(self):
         self.glosses = []

diff --git a/openhands/datasets/isolated/gsl.py b/openhands/datasets/isolated/gsl.py
@@ -5,9 +5,9 @@
 
 class GSLDataset(BaseIsolatedDataset):
     """
-    Greek Isolated Sign language dataset from the paper
+    Greek Isolated Sign language dataset from the paper:
     
-    > [A Comprehensive Study on Deep Learning-based Methods for Sign Language Recognition](https://ieeexplore.ieee.org/document/8466903)<br>
+    `A Comprehensive Study on Deep Learning-based Methods for Sign Language Recognition <https://ieeexplore.ieee.org/document/8466903>`_
     """
     def read_glosses(self):
         self.glosses = [

diff --git a/openhands/datasets/isolated/include.py b/openhands/datasets/isolated/include.py
@@ -5,9 +5,9 @@
 
 class INCLUDEDataset(BaseIsolatedDataset):
     """
-    Indian Isolated Sign language dataset from the paper
+    Indian Isolated Sign language dataset from the paper:
     
-    > [INCLUDE: A Large Scale Dataset for Indian Sign Language Recognition](https://dl.acm.org/doi/10.1145/3394171.3413528)<br>
+    `INCLUDE: A Large Scale Dataset for Indian Sign Language Recognition <https://dl.acm.org/doi/10.1145/3394171.3413528>`_
     """
     def read_glosses(self):
         # TODO: Separate the classes into a separate file?

diff --git a/openhands/datasets/isolated/lsa64.py b/openhands/datasets/isolated/lsa64.py
@@ -6,9 +6,9 @@
 
 class LSA64Dataset(BaseIsolatedDataset):
     """
-    Argentinian Isolated Sign language dataset from the paper
+    Argentinian Isolated Sign language dataset from the paper:
     
-    > [LSA64: An Argentinian Sign Language Dataset](http://sedici.unlp.edu.ar/bitstream/handle/10915/56764/Documento_completo.pdf-PDFA.pdf)<br>
+    `LSA64: An Argentinian Sign Language Dataset <http://sedici.unlp.edu.ar/bitstream/handle/10915/56764/Documento_completo.pdf-PDFA.pdf>`_
     """
     def read_glosses(self):
         df = pd.read_csv(self.class_mappings_file_path, delimiter="|", header=None)

diff --git a/openhands/datasets/isolated/ms_asl.py b/openhands/datasets/isolated/ms_asl.py
@@ -5,9 +5,9 @@
 
 class MSASLDataset(BaseIsolatedDataset):
     """
-    American Isolated Sign language dataset from the paper
+    American Isolated Sign language dataset from the paper:
     
-    > [MS-ASL: A Large-Scale Data Set and Benchmark for Understanding American Sign Language](https://arxiv.org/abs/1812.01053)<br>
+    `MS-ASL: A Large-Scale Data Set and Benchmark for Understanding American Sign Language <https://arxiv.org/abs/1812.01053>`_
     """
     def read_glosses(self):
         # TODO: Separate the classes into a separate file?

diff --git a/openhands/datasets/isolated/wlasl.py b/openhands/datasets/isolated/wlasl.py
@@ -5,9 +5,9 @@
 
 class WLASLDataset(BaseIsolatedDataset):
     """
-    American Isolated Sign language dataset from the paper
+    American Isolated Sign language dataset from the paper:
     
-    > [Word-level Deep Sign Language Recognition from Video: A New Large-scale Dataset and Methods Comparison](https://arxiv.org/abs/1910.11006)<br>
+    `Word-level Deep Sign Language Recognition from Video: A New Large-scale Dataset and Methods Comparison <https://arxiv.org/abs/1910.11006>`_
     """
     def read_glosses(self):
         with open(self.split_file, "r") as f:

diff --git a/openhands/datasets/pose_transforms.py b/openhands/datasets/pose_transforms.py
@@ -95,6 +95,7 @@ def __call__(self, data:dict):
 class PoseSelect:
     """
     Select the given index keypoints from all keypoints. 
+    
     Args:
         preset (str | None, optional): can be used to specify existing presets - `mediapipe_holistic_minimal_27` or `mediapipe_holistic_top_body_59`
         If None, then the `pose_indexes` argument indexes will be used to select. Default: ``None``
@@ -135,7 +136,7 @@ def __call__(self, data:dict):
 # Adopted from: https://github.com/AmitMY/pose-format/
 class ShearTransform:
     """
-    Applies [2D shear transform](https://en.wikipedia.org/wiki/Shear_matrix)
+    Applies `2D shear <https://en.wikipedia.org/wiki/Shear_matrix>`_ transformation
     
     Args:
         shear_std (float): std to use for shear transformation. Default: 0.2
@@ -168,7 +169,7 @@ def __call__(self, data:dict):
 
 class RotatationTransform:
     """
-    Applies [2D rotation transformation](https://en.wikipedia.org/wiki/Rotation_matrix).
+    Applies `2D rotation <https://en.wikipedia.org/wiki/Rotation_matrix>`_ transformation.
     
     Args:
         rotation_std (float): std to use for rotation transformation. Default: 0.2
@@ -205,7 +206,8 @@ def __call__(self, data):
 
 class ScaleTransform:
     """
-    Applies [Scaling](https://en.wikipedia.org/wiki/Scaling_(geometry)) transformation
+    Applies `Scaling <https://en.wikipedia.org/wiki/Scaling_(geometry)>`_ transformation
+
     Args:
         scale_std (float): std to use for Scaling transformation. Default: 0.2
     """
@@ -452,10 +454,10 @@ def __call__(self, data):
 class TemporalSample:
     """
     Randomly choose Uniform and Temporal subsample
-    If subsample_mode==2, randomly sub-sampling or uniform-sampling is done
-    If subsample_mode==0, only uniform-sampling (for test sets)
-    If subsample_mode==1, only sub-sampling (to reproduce results of some papers that use only subsampling)
-    
+        - If subsample_mode==2, randomly sub-sampling or uniform-sampling is done
+        - If subsample_mode==0, only uniform-sampling (for test sets)
+        - If subsample_mode==1, only sub-sampling (to reproduce results of some papers that use only subsampling)
+        
     Args:
         num_frames (int): Number of frames to subsample.
         subsample_mode (int): Mode to choose.

diff --git a/openhands/datasets/ssl/dpc_dataset.py b/openhands/datasets/ssl/dpc_dataset.py
@@ -11,6 +11,18 @@
 from ...core.data import create_pose_transforms
 
 class WindowedDatasetHDF5(torch.utils.data.DataLoader):
+    """
+    Windowed dataset loader from HDF5 for SL-DPC model.
+
+    Args:
+        root_dir (str): Directory which contains the data.
+        file_format (str): File type. Default: ``h5``.
+        transforms (obj | None): Compose object with transforms or None. Default: ``None``.
+        seq_len (int): Sequence length for each window. Default: 10. 
+        num_seq (int): Total number of windows. Default: 7.
+        downsample (int): Number of frames to skip per timestep when sampling. Default: 3.
+        num_channels (int): Number of input channels. Default: 2.
+    """
     def __init__(
         self,
         root_dir,
@@ -113,13 +125,26 @@ def get_weights_for_balanced_sampling(self):
 
 
 class WindowedDatasetPickle(torch.utils.data.DataLoader):
+    """
+    Windowed dataset loader from HDF5 for SL-DPC model. 
+    This module is for loading finetuning datasets.
+
+    Args:
+        root_dir (str): Directory which contains the data.
+        file_format (str): File type. Default: ``pkl``.
+        transforms (obj | None): Compose object with transforms or None. Default: ``None``.
+        seq_len (int): Sequence length for each window. Default: 10. 
+        num_seq (int): Total number of windows. Default: 10.
+        downsample (int): Number of frames to skip per timestep when sampling. Default: 1.
+        num_channels (int): Number of input channels. Default: 2.
+    """
     def __init__(
         self,
         root_dir,
         file_format='pkl',
         transforms=None,
         seq_len=10,
-        num_seq=6,
+        num_seq=10,
         downsample=1,
         num_channels=2,
     ):

diff --git a/...ands/models/encoder/transformer_layers.py → ...hands/models/common/transformer_layers.py b/...ands/models/encoder/transformer_layers.py → ...hands/models/common/transformer_layers.py
diff --git a/openhands/models/decoder/bert_hf.py b/openhands/models/decoder/bert_hf.py
@@ -35,6 +35,15 @@ def forward(self, x):
 
 
 class BERT(nn.Module):
+    """
+    BERT decoder module. 
+
+    Args:
+        n_features (int): Number of features in the input.
+        num_class (int): Number of class for classification.
+        config (dict): Configuration set for BERT layer.
+    
+    """
     def __init__(self, n_features, num_class, config):
         """
         pooling_type -> ["max","avg","att","cls"]
@@ -71,7 +80,11 @@ def __init__(self, n_features, num_class, config):
 
     def forward(self, x):
         """
-        x.shape: (batch_size, T, n_features)
+        Args:
+            x (torch.Tensor): Input tensor of shape: (batch_size, T, n_features)
+        
+        returns:
+            torch.Tensor: logits for classification.
         """
         x = self.l1(x)
         if self.cls_token:

diff --git a/openhands/models/decoder/fc.py b/openhands/models/decoder/fc.py
@@ -3,7 +3,15 @@
 
 
 class FC(nn.Module):
-    def __init__(self, n_features, num_class, dropout_ratio=0.2, batch_norm=False, **kwargs):
+    """
+    Fully connected layer head
+    Args:
+        n_features (int): Number of features in the input.
+        num_class (int): Number of class for classification.
+        dropout_ratio (float): Dropout ratio to use. Default: 0.2.
+        batch_norm (bool): Whether to use batch norm or not. Default: ``False``.
+    """
+    def __init__(self, n_features, num_class, dropout_ratio=0.2, batch_norm=False):
         super().__init__()
         self.dropout = nn.Dropout(p=dropout_ratio)
         self.bn = batch_norm
@@ -15,9 +23,14 @@ def __init__(self, n_features, num_class, dropout_ratio=0.2, batch_norm=False, *
         nn.init.normal_(self.classifier.weight, 0, math.sqrt(2.0 / num_class))
 
     def forward(self, x):
-        '''
-        x.shape: (batch_size, n_features)
-        '''
+        """
+        Args:
+            x (torch.Tensor): Input tensor of shape: (batch_size, n_features)
+        
+        returns:
+            torch.Tensor: logits for classification.
+        """
+
         x = self.dropout(x)
         if self.bn:
             x = self.bn(x)

diff --git a/openhands/models/decoder/rnn.py b/openhands/models/decoder/rnn.py
@@ -5,6 +5,19 @@
 
 
 class RNNClassifier(nn.Module):
+    """
+    RNN head for classification.
+    
+    Args:
+        n_features (int): Number of features in the input.
+        num_class (int): Number of class for classification.
+        rnn_type (str): GRU or LSTM. Default: ``GRU``.
+        hidden_size (str): Hidden dim to use for RNN. Default: 512.
+        num_layers (int): Number of layers of RNN to use. Default: 1.
+        bidirectional (bool): Whether to use bidirectional RNN or not. Default: ``True``.
+        use_attention (bool): Whether to use attenion for pooling or not. Default: ``False``.
+
+    """
     def __init__(
         self,
         n_features,
@@ -32,7 +45,11 @@ def __init__(
 
     def forward(self, x):
         """
-        x.shape: (batch_size, T, n_features)
+        Args:
+            x (torch.Tensor): Input tensor of shape: (batch_size, T, n_features)
+        
+        returns:
+            torch.Tensor: logits for classification.
         """
         self.rnn.flatten_parameters()
 

diff --git a/openhands/models/encoder/__init__.py b/openhands/models/encoder/__init__.py
@@ -1,5 +1,9 @@
 from .graph.pose_flattener import PoseFlattener
 from .graph.decoupled_gcn import DecoupledGCN
 from .graph.st_gcn import STGCN
+from .graph.sgn import SGN
 
-__all__ = ["PoseFlattener", "DecoupledGCN", "STGCN"]
+from .cnn2d import CNN2D
+from .cnn3d import CNN3D
+
+__all__ = ["PoseFlattener", "DecoupledGCN", "STGCN", "SGN", "CNN2D", "CNN3D"]
diff --git a/openhands/models/encoder/cnn2d.py b/openhands/models/encoder/cnn2d.py
@@ -20,6 +20,9 @@ def __init__(self, in_channels=3, backbone="resnet18", pretrained=True):
         self.backbone.fc = nn.Identity()
 
     def forward(self, x):
+        """
+        forward step
+        """
         b, c, t, h, w = x.shape
         cnn_embeds = []
         for i in range(t):

diff --git a/openhands/models/encoder/cnn3d.py b/openhands/models/encoder/cnn3d.py
@@ -52,6 +52,9 @@ def __init__(self, in_channels, backbone, pretrained=True, **kwargs):
         self.n_out_features = 400  # list(self.backbone.modules())[-2].out_features
 
     def forward(self, x):
+        """
+        forward step
+        """
         x = self.backbone(x)
         return x.transpose(0, 1) # Batch-first