Skip to content

Commit

Permalink
update docs
Browse files Browse the repository at this point in the history
  • Loading branch information
Prem-kumar27 committed Oct 19, 2021
1 parent 8960900 commit 975b088
Show file tree
Hide file tree
Showing 29 changed files with 281 additions and 93 deletions.
6 changes: 6 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ Decoders
.. automodule:: openhands.models.decoder
:members:

SSL-Models
^^^^^^^^

.. automodule:: openhands.models.ssl
:members:

Datasets
--------

Expand Down
2 changes: 1 addition & 1 deletion docs/instructions/inference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Computing accuacy using test set
.. code:: python
import omegaconf
from openhands.core.inference import InferenceModel
from openhands.apis.inference import InferenceModel
cfg = omegaconf.OmegaConf.load("path/to/config.yaml")
model = InferenceModel(cfg=cfg)
Expand Down
2 changes: 1 addition & 1 deletion docs/instructions/self_supervised.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ Finally, run the following snippet to perform the pretraining:
.. code:: python
import omegaconf
from openhands.core.dpc import PretrainingModelDPC
from openhands.apis.dpc import PretrainingModelDPC
cfg = omegaconf.OmegaConf.load("path/to/config.yaml")
trainer = PretrainingModelDPC(cfg=cfg)
Expand Down
2 changes: 1 addition & 1 deletion docs/instructions/training.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ After you have a config ready, run the following python snippet:
.. code:: python
import omegaconf
from openhands.core.classification_model import ClassificationModel
from openhands.apis.classification_model import ClassificationModel
from openhands.core.exp_utils import get_trainer
cfg = omegaconf.OmegaConf.load("path/to/config.yaml")
Expand Down
1 change: 1 addition & 0 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@ scikit_learn==1.0
sphinx
myst-parser
sphinx_rtd_theme
sphinx_copybutton
2 changes: 1 addition & 1 deletion openhands/apis/dpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def calc_topk_accuracy(output, target, topk=(1,)):

def process_output(mask):
"""task mask as input, compute the target for contrastive loss"""
(B, NP, B2, NS) = mask.size() # [B, P, SQ, B, N, SQ]
(B, NP, B2, NS) = mask.size() # [B, P, B, N,]
target = (mask == 1).float()
target.requires_grad = False
return target, (B, B2, NS, NP)
Expand Down
4 changes: 2 additions & 2 deletions openhands/datasets/isolated/autsl.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@

class AUTSLDataset(BaseIsolatedDataset):
"""
Turkish Isolated Sign language dataset from the paper
Turkish Isolated Sign language dataset from the paper:
> [AUTSL: A Large Scale Multi-modal Turkish Sign Language Dataset and Baseline Methods](https://arxiv.org/abs/2008.00932)<br>
`AUTSL: A Large Scale Multi-modal Turkish Sign Language Dataset and Baseline Methods <https://arxiv.org/abs/2008.00932>`_
"""
def read_glosses(self):
class_mappings_df = pd.read_csv(self.class_mappings_file_path)
Expand Down
4 changes: 2 additions & 2 deletions openhands/datasets/isolated/csl.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@

class CSLDataset(BaseIsolatedDataset):
"""
Chinese Isolated Sign language dataset from the paper
Chinese Isolated Sign language dataset from the paper:
> [Attention-Based 3D-CNNs for Large-Vocabulary Sign Language Recognition](https://ieeexplore.ieee.org/document/8466903)<br>
`Attention-Based 3D-CNNs for Large-Vocabulary Sign Language Recognition <https://ieeexplore.ieee.org/document/8466903>`_
"""
def read_glosses(self):
self.glosses = []
Expand Down
4 changes: 2 additions & 2 deletions openhands/datasets/isolated/devisign.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@

class DeviSignDataset(BaseIsolatedDataset):
"""
Chinese Isolated Sign language dataset from the paper
Chinese Isolated Sign language dataset from the paper:
> [The devisign large vocabulary of chinese sign language database and baseline evaluations]<br>
`The devisign large vocabulary of chinese sign language database and baseline evaluations`
"""
def read_glosses(self):
self.glosses = []
Expand Down
4 changes: 2 additions & 2 deletions openhands/datasets/isolated/gsl.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@

class GSLDataset(BaseIsolatedDataset):
"""
Greek Isolated Sign language dataset from the paper
Greek Isolated Sign language dataset from the paper:
> [A Comprehensive Study on Deep Learning-based Methods for Sign Language Recognition](https://ieeexplore.ieee.org/document/8466903)<br>
`A Comprehensive Study on Deep Learning-based Methods for Sign Language Recognition <https://ieeexplore.ieee.org/document/8466903>`_
"""
def read_glosses(self):
self.glosses = [
Expand Down
4 changes: 2 additions & 2 deletions openhands/datasets/isolated/include.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@

class INCLUDEDataset(BaseIsolatedDataset):
"""
Indian Isolated Sign language dataset from the paper
Indian Isolated Sign language dataset from the paper:
> [INCLUDE: A Large Scale Dataset for Indian Sign Language Recognition](https://dl.acm.org/doi/10.1145/3394171.3413528)<br>
`INCLUDE: A Large Scale Dataset for Indian Sign Language Recognition <https://dl.acm.org/doi/10.1145/3394171.3413528>`_
"""
def read_glosses(self):
# TODO: Separate the classes into a separate file?
Expand Down
4 changes: 2 additions & 2 deletions openhands/datasets/isolated/lsa64.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@

class LSA64Dataset(BaseIsolatedDataset):
"""
Argentinian Isolated Sign language dataset from the paper
Argentinian Isolated Sign language dataset from the paper:
> [LSA64: An Argentinian Sign Language Dataset](http://sedici.unlp.edu.ar/bitstream/handle/10915/56764/Documento_completo.pdf-PDFA.pdf)<br>
`LSA64: An Argentinian Sign Language Dataset <http://sedici.unlp.edu.ar/bitstream/handle/10915/56764/Documento_completo.pdf-PDFA.pdf>`_
"""
def read_glosses(self):
df = pd.read_csv(self.class_mappings_file_path, delimiter="|", header=None)
Expand Down
4 changes: 2 additions & 2 deletions openhands/datasets/isolated/ms_asl.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@

class MSASLDataset(BaseIsolatedDataset):
"""
American Isolated Sign language dataset from the paper
American Isolated Sign language dataset from the paper:
> [MS-ASL: A Large-Scale Data Set and Benchmark for Understanding American Sign Language](https://arxiv.org/abs/1812.01053)<br>
`MS-ASL: A Large-Scale Data Set and Benchmark for Understanding American Sign Language <https://arxiv.org/abs/1812.01053>`_
"""
def read_glosses(self):
# TODO: Separate the classes into a separate file?
Expand Down
4 changes: 2 additions & 2 deletions openhands/datasets/isolated/wlasl.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@

class WLASLDataset(BaseIsolatedDataset):
"""
American Isolated Sign language dataset from the paper
American Isolated Sign language dataset from the paper:
> [Word-level Deep Sign Language Recognition from Video: A New Large-scale Dataset and Methods Comparison](https://arxiv.org/abs/1910.11006)<br>
`Word-level Deep Sign Language Recognition from Video: A New Large-scale Dataset and Methods Comparison <https://arxiv.org/abs/1910.11006>`_
"""
def read_glosses(self):
with open(self.split_file, "r") as f:
Expand Down
16 changes: 9 additions & 7 deletions openhands/datasets/pose_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ def __call__(self, data:dict):
class PoseSelect:
"""
Select the given index keypoints from all keypoints.
Args:
preset (str | None, optional): can be used to specify existing presets - `mediapipe_holistic_minimal_27` or `mediapipe_holistic_top_body_59`
If None, then the `pose_indexes` argument indexes will be used to select. Default: ``None``
Expand Down Expand Up @@ -135,7 +136,7 @@ def __call__(self, data:dict):
# Adopted from: https://github.com/AmitMY/pose-format/
class ShearTransform:
"""
Applies [2D shear transform](https://en.wikipedia.org/wiki/Shear_matrix)
Applies `2D shear <https://en.wikipedia.org/wiki/Shear_matrix>`_ transformation
Args:
shear_std (float): std to use for shear transformation. Default: 0.2
Expand Down Expand Up @@ -168,7 +169,7 @@ def __call__(self, data:dict):

class RotatationTransform:
"""
Applies [2D rotation transformation](https://en.wikipedia.org/wiki/Rotation_matrix).
Applies `2D rotation <https://en.wikipedia.org/wiki/Rotation_matrix>`_ transformation.
Args:
rotation_std (float): std to use for rotation transformation. Default: 0.2
Expand Down Expand Up @@ -205,7 +206,8 @@ def __call__(self, data):

class ScaleTransform:
"""
Applies [Scaling](https://en.wikipedia.org/wiki/Scaling_(geometry)) transformation
Applies `Scaling <https://en.wikipedia.org/wiki/Scaling_(geometry)>`_ transformation
Args:
scale_std (float): std to use for Scaling transformation. Default: 0.2
"""
Expand Down Expand Up @@ -452,10 +454,10 @@ def __call__(self, data):
class TemporalSample:
"""
Randomly choose Uniform and Temporal subsample
If subsample_mode==2, randomly sub-sampling or uniform-sampling is done
If subsample_mode==0, only uniform-sampling (for test sets)
If subsample_mode==1, only sub-sampling (to reproduce results of some papers that use only subsampling)
- If subsample_mode==2, randomly sub-sampling or uniform-sampling is done
- If subsample_mode==0, only uniform-sampling (for test sets)
- If subsample_mode==1, only sub-sampling (to reproduce results of some papers that use only subsampling)
Args:
num_frames (int): Number of frames to subsample.
subsample_mode (int): Mode to choose.
Expand Down
27 changes: 26 additions & 1 deletion openhands/datasets/ssl/dpc_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,18 @@
from ...core.data import create_pose_transforms

class WindowedDatasetHDF5(torch.utils.data.DataLoader):
"""
Windowed dataset loader from HDF5 for SL-DPC model.
Args:
root_dir (str): Directory which contains the data.
file_format (str): File type. Default: ``h5``.
transforms (obj | None): Compose object with transforms or None. Default: ``None``.
seq_len (int): Sequence length for each window. Default: 10.
num_seq (int): Total number of windows. Default: 7.
downsample (int): Number of frames to skip per timestep when sampling. Default: 3.
num_channels (int): Number of input channels. Default: 2.
"""
def __init__(
self,
root_dir,
Expand Down Expand Up @@ -113,13 +125,26 @@ def get_weights_for_balanced_sampling(self):


class WindowedDatasetPickle(torch.utils.data.DataLoader):
"""
Windowed dataset loader from HDF5 for SL-DPC model.
This module is for loading finetuning datasets.
Args:
root_dir (str): Directory which contains the data.
file_format (str): File type. Default: ``pkl``.
transforms (obj | None): Compose object with transforms or None. Default: ``None``.
seq_len (int): Sequence length for each window. Default: 10.
num_seq (int): Total number of windows. Default: 10.
downsample (int): Number of frames to skip per timestep when sampling. Default: 1.
num_channels (int): Number of input channels. Default: 2.
"""
def __init__(
self,
root_dir,
file_format='pkl',
transforms=None,
seq_len=10,
num_seq=6,
num_seq=10,
downsample=1,
num_channels=2,
):
Expand Down
15 changes: 14 additions & 1 deletion openhands/models/decoder/bert_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,15 @@ def forward(self, x):


class BERT(nn.Module):
"""
BERT decoder module.
Args:
n_features (int): Number of features in the input.
num_class (int): Number of class for classification.
config (dict): Configuration set for BERT layer.
"""
def __init__(self, n_features, num_class, config):
"""
pooling_type -> ["max","avg","att","cls"]
Expand Down Expand Up @@ -71,7 +80,11 @@ def __init__(self, n_features, num_class, config):

def forward(self, x):
"""
x.shape: (batch_size, T, n_features)
Args:
x (torch.Tensor): Input tensor of shape: (batch_size, T, n_features)
returns:
torch.Tensor: logits for classification.
"""
x = self.l1(x)
if self.cls_token:
Expand Down
21 changes: 17 additions & 4 deletions openhands/models/decoder/fc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,15 @@


class FC(nn.Module):
def __init__(self, n_features, num_class, dropout_ratio=0.2, batch_norm=False, **kwargs):
"""
Fully connected layer head
Args:
n_features (int): Number of features in the input.
num_class (int): Number of class for classification.
dropout_ratio (float): Dropout ratio to use. Default: 0.2.
batch_norm (bool): Whether to use batch norm or not. Default: ``False``.
"""
def __init__(self, n_features, num_class, dropout_ratio=0.2, batch_norm=False):
super().__init__()
self.dropout = nn.Dropout(p=dropout_ratio)
self.bn = batch_norm
Expand All @@ -15,9 +23,14 @@ def __init__(self, n_features, num_class, dropout_ratio=0.2, batch_norm=False, *
nn.init.normal_(self.classifier.weight, 0, math.sqrt(2.0 / num_class))

def forward(self, x):
'''
x.shape: (batch_size, n_features)
'''
"""
Args:
x (torch.Tensor): Input tensor of shape: (batch_size, n_features)
returns:
torch.Tensor: logits for classification.
"""

x = self.dropout(x)
if self.bn:
x = self.bn(x)
Expand Down
19 changes: 18 additions & 1 deletion openhands/models/decoder/rnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,19 @@


class RNNClassifier(nn.Module):
"""
RNN head for classification.
Args:
n_features (int): Number of features in the input.
num_class (int): Number of class for classification.
rnn_type (str): GRU or LSTM. Default: ``GRU``.
hidden_size (str): Hidden dim to use for RNN. Default: 512.
num_layers (int): Number of layers of RNN to use. Default: 1.
bidirectional (bool): Whether to use bidirectional RNN or not. Default: ``True``.
use_attention (bool): Whether to use attenion for pooling or not. Default: ``False``.
"""
def __init__(
self,
n_features,
Expand Down Expand Up @@ -32,7 +45,11 @@ def __init__(

def forward(self, x):
"""
x.shape: (batch_size, T, n_features)
Args:
x (torch.Tensor): Input tensor of shape: (batch_size, T, n_features)
returns:
torch.Tensor: logits for classification.
"""
self.rnn.flatten_parameters()

Expand Down
6 changes: 5 additions & 1 deletion openhands/models/encoder/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from .graph.pose_flattener import PoseFlattener
from .graph.decoupled_gcn import DecoupledGCN
from .graph.st_gcn import STGCN
from .graph.sgn import SGN

__all__ = ["PoseFlattener", "DecoupledGCN", "STGCN"]
from .cnn2d import CNN2D
from .cnn3d import CNN3D

__all__ = ["PoseFlattener", "DecoupledGCN", "STGCN", "SGN", "CNN2D", "CNN3D"]
3 changes: 3 additions & 0 deletions openhands/models/encoder/cnn2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ def __init__(self, in_channels=3, backbone="resnet18", pretrained=True):
self.backbone.fc = nn.Identity()

def forward(self, x):
"""
forward step
"""
b, c, t, h, w = x.shape
cnn_embeds = []
for i in range(t):
Expand Down
3 changes: 3 additions & 0 deletions openhands/models/encoder/cnn3d.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ def __init__(self, in_channels, backbone, pretrained=True, **kwargs):
self.n_out_features = 400 # list(self.backbone.modules())[-2].out_features

def forward(self, x):
"""
forward step
"""
x = self.backbone(x)
return x.transpose(0, 1) # Batch-first

Expand Down

0 comments on commit 975b088

Please sign in to comment.