# ENG to ASL

### Setting Up

In [2]:
!git clone --depth 1 https://github.com/sign-language-processing/signbank-plus
!cd signbank-plus/signbank_plus/nmt/ && git clone --depth 1 https://github.com/J22Melody/signwriting-translation
%pip install -r signbank-plus/requirements.txt
!apt update && apt install -y sentencepiece

# !pip install pip --upgrade
# !git clone https://github.com/NVIDIA/apex
# !cd apex && pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./

Cloning into 'signbank-plus'...
remote: Enumerating objects: 89, done.[K
remote: Counting objects: 100% (89/89), done.[K
remote: Compressing objects: 100% (79/79), done.[K
remote: Total 89 (delta 9), reused 63 (delta 8), pack-reused 0[K
Unpacking objects: 100% (89/89), 36.82 MiB | 6.90 MiB/s, done.
Cloning into 'signwriting-translation'...
remote: Enumerating objects: 1171, done.[K
remote: Counting objects: 100% (1171/1171), done.[K
remote: Compressing objects: 100% (992/992), done.[K
remote: Total 1171 (delta 244), reused 968 (delta 151), pack-reused 0[K
Receiving objects: 100% (1171/1171), 165.17 MiB | 6.61 MiB/s, done.
Resolving deltas: 100% (244/244), done.
Updating files: 100% (1191/1191), done.
Collecting signwriting@ git+https://github.com/sign-language-processing/signwriting (from -r signbank-plus/requirements.txt (line 1))
  Cloning https://github.com/sign-language-processing/signwriting to /tmp/pip-install-fjgb4lqp/signwriting_9fd4ec2008f2493ca06c1dbb597dc57a
  Runnin

### Installing Requirements

In [3]:
!echo "numpy" > req.txt
!echo "git+https://github.com/sign-language-processing/datasets" >> req.txt
!echo "tensorboard" >> req.txt
!echo "tensorflow" >> req.txt
!echo "torch" >> req.txt
!echo "torchtext" >> req.txt
!echo "sacrebleu" >> req.txt
!echo "setuptools" >> req.txt
!echo "sockeye>=3.0.13" >> req.txt
!echo "mxnet" >> req.txt
!echo "webvtt-py" >> req.txt

!pip install -r req.txt

Collecting git+https://github.com/sign-language-processing/datasets (from -r req.txt (line 2))
  Cloning https://github.com/sign-language-processing/datasets to /tmp/pip-req-build-duk4lpkj
  Running command git clone --filter=blob:none --quiet https://github.com/sign-language-processing/datasets /tmp/pip-req-build-duk4lpkj
  Resolved https://github.com/sign-language-processing/datasets to commit 3aa515c0da9f3c5f43db5a8cc407a7abbe083db0
  Preparing metadata (setup.py) ... [?25ldone
Collecting sacrebleu (from -r req.txt (line 7))
  Downloading sacrebleu-2.4.0-py3-none-any.whl.metadata (57 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.4/57.4 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
Collecting sockeye>=3.0.13 (from -r req.txt (line 9))
  Downloading sockeye-3.1.34-py3-none-any.whl (218 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m218.1/218.1 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting mxnet (from -r req.txt (line 10))


### Importing

In [4]:
import csv
import gzip
import itertools
import random
from collections import defaultdict
from pathlib import Path

from tqdm import tqdm
import importlib
module_name = 'signbank-plus.signbank_plus.load_data'
load_data_fol = importlib.import_module(module_name)
load_data = load_data_fol.load_data
load_file = load_data_fol.load_file
from signwriting.tokenizer import SignWritingTokenizer

from shutil import copy2
import subprocess
from os.path import exists, join, isfile
from os import listdir
import re

### Data Pre - Processing

In [5]:
ALL_FLAGS = set()

def get_source_target(data, field="annotated_texts"):
    random.Random(42).shuffle(data)  # Shuffle data consistently
    for instance in data:
        if field in instance:
            for text in instance[field]:
                if len(text.strip()) > 0 and len(instance["sign_writing"].strip()) > 0:
                    yield {
                        "puddle_id": instance["puddle_id"] if "puddle_id" in instance else None,
                        "example_id": instance["example_id"] if "example_id" in instance else None,
                        "flags": [instance["spoken_language"], instance["sign_language"]],
                        "source": instance["sign_writing"].strip(),
                        "target": text.strip(),
                    }


def get_source_target_no_test(data, field="annotated_texts"):
    test_instances = load_data("benchmark")
    test_instances = {(instance['puddle_id'], instance['example_id']) for instance in test_instances}
    for instance in get_source_target(data, field):
        if (instance['puddle_id'], instance['example_id']) not in test_instances:
            yield instance


# Model 1: Original data
def get_original_data():
    data = load_data("raw")
    yield from get_source_target_no_test(data, field="texts")


# Model 2: Cleaned data
def get_cleaned_data():
    data = load_data("raw", "gpt-3.5-cleaned", "manually-cleaned", "bible")
    yield from get_source_target_no_test(data, field="annotated_texts")


# Model 3: Expanded data
def get_expanded_data():
    data = load_data("raw", "gpt-3.5-cleaned", "gpt-3.5-expanded", "manually-cleaned", "bible")
    yield from get_source_target_no_test(data, field="annotated_texts")


def get_expanded_data_en():
    data = load_data("gpt-3.5-expanded.en")
    yield from get_source_target_no_test(data, field="annotated_texts")


def test_set():
    data = load_file("benchmark", array_fields=["gold_texts"])
    yield from get_source_target(data, field="gold_texts")


def save_parallel_csv(path: Path, data: iter, split="train", extra_flags=[]):
    for flag in extra_flags:
        ALL_FLAGS.add(flag)

    f_source = open(f"{path}/{split}.source", "w", encoding="utf-8")
    f_source_tokenized = open(f"{path}/{split}.source.tokenized", "w", encoding="utf-8")
    f_target = open(f"{path}/{split}.target", "w", encoding="utf-8")
    f_csv = open(f"{path}/{split}.csv", "w", encoding="utf-8")

    f_spoken_gzip = gzip.open(path.joinpath(f'{split}.spoken.gz'), 'wt', encoding='utf-8')
    f_signed_gzip = gzip.open(path.joinpath(f'{split}.signed.gz'), 'wt', encoding='utf-8')

    tokenizer = SignWritingTokenizer()

    writer = csv.DictWriter(f_csv, fieldnames=["source", "target"])
    writer.writeheader()
    for instance in tqdm(data):
        if 0 < len(instance["target"]) < 512 and 0 < len(instance["source"]) < 1024:
            flag_tokens = [f"${flag}" for flag in instance["flags"]]
            for flag in flag_tokens:
                ALL_FLAGS.add(flag)
            flags = " ".join(flag_tokens)

            source = flags + " " + instance["source"]
            writer.writerow({
                "source": source,
                "target": instance["target"],
            })
            f_source.write(source + "\n")
            f_target.write(instance["target"] + "\n")

            tokens_source = list(tokenizer.text_to_tokens(instance["source"]))
            tokenized_source = " ".join(tokens_source)
            f_source_tokenized.write(flags + " " + tokenized_source + "\n")

            gzip_flags = " ".join(extra_flags) + " " + flags
            # We detokenize the SignWriting, which removes "A" prefixes, and box placement
            detokenized_source = tokenizer.tokens_to_text(tokens_source)
            f_spoken_gzip.write(gzip_flags + " " + instance["target"] + "\n")
            f_signed_gzip.write(gzip_flags + " " + detokenized_source + "\n")

    f_source.close()
    f_source_tokenized.close()
    f_target.close()
    f_csv.close()


def save_splits(path: Path, data: iter, extra_flags: list = [], dev_num=3000):
    path.mkdir(parents=True, exist_ok=True)
    if dev_num > 0:
        save_parallel_csv(path, itertools.islice(data, dev_num), split="dev", extra_flags=extra_flags)
    save_parallel_csv(path, data, split="train", extra_flags=extra_flags)


def save_test(path: Path, data: iter):
    path.mkdir(parents=True, exist_ok=True)
    save_parallel_csv(path, data, split="all")

    # Read source file and target file
    with open(f"{path}/all.source", 'r', encoding='utf-8') as f:
        source_lines = [l.strip() for l in f.readlines()]
    with open(f"{path}/all.source.tokenized", 'r', encoding='utf-8') as f:
        source_lines_tokenized = [l.strip() for l in f.readlines()]
    with open(f"{path}/all.target", 'r', encoding='utf-8') as f:
        target_lines = [l.strip() for l in f.readlines()]

    source_map = {source_tokenized: source for source_tokenized, source in zip(source_lines_tokenized, source_lines)}

    source_target_map = defaultdict(list)
    for source, target in zip(source_lines_tokenized, target_lines):
        source_target_map[source].append(target)

    max_references = max(len(references) for references in source_target_map.values())
    print(f"Max test references: {max_references}")

    path.mkdir(parents=True, exist_ok=True)

    with open(f"{path}/test.source.unique", 'w') as f1:
        with open(f"{path}/test.source.unique.tokenized", 'w') as f2:
            for source, references in source_target_map.items():
                f1.write(source_map[source])
                f1.write("\n")
                f2.write(source)
                f2.write("\n")

    for i in range(max_references):
        with open(f"{path}/test.target.{i}", 'w', encoding='utf-8') as f:
            for source, references in source_target_map.items():
                if len(references) > i:
                    f.write(references[i])
                f.write("\n")


if True:
    parallel_path = Path("signbank-plus/data/parallel")

    save_test(parallel_path / "test", test_set())

    save_splits(parallel_path / "original", get_original_data())
    save_splits(parallel_path / "cleaned", get_cleaned_data())
    save_splits(parallel_path / "expanded", itertools.chain.from_iterable([
        get_expanded_data(),
        get_expanded_data_en()
    ]))

    save_splits(parallel_path / "more", itertools.chain.from_iterable([
        get_source_target(load_data("sign2mint"), field="texts"),
        get_source_target(load_data("signsuisse"), field="texts"),
        get_source_target(load_data("fingerspelling"), field="texts"),
    ]), dev_num=0)

    print("\n" + ",".join(ALL_FLAGS))

900it [00:00, 6148.85it/s]


Max test references: 8


3000it [00:03, 770.32it/s] 
545163it [01:22, 6614.39it/s]
3000it [00:07, 426.59it/s]
374090it [00:59, 6306.28it/s]
3000it [00:09, 322.00it/s]
1048212it [02:28, 7036.79it/s]
135279it [00:36, 3755.66it/s]


$rsl,$mdl,$sq,$gss,$icl,$mt,$tr,$ncs,$sls,$jsl,$bfi,$ps,$cs,$sqk,$fi,$eth,$th,$bg,$fil,$haf,$asf,$,$ugy,$jos,$tss,$sfb,$sfs,$eo,$he,$asq,$nl,$fr,$ru,$vn,$is,$ysl,$nsi,$aed,$en,$slf,$sv,$dsl,$tsm,$swl,$pl,$pys,$ur,$svk,$no,$bzs,$kvk,$ins,$csc,$psr,$prl,$sl,$psc,$da,$uk,$zh-CN,$sdl,$esl,$ssp,$am,$ar,$gr,$rms,$bvl,$de,$gsg,$lws,$bqn,$fcs,$hi,$mw,$hu,$fse,$ise,$csn,$ssr,$ko,$sgg,$tse,$sw,$isr,$tsq,$dse,$ca,$ne,$ukl,$isg,$vsl,$zh,$nzs,$pt,$ms,$ase,$nsp,$ht,$zh-TW,$es,$ja,$sk,$psp,$csg,$it,$afg,$xki,$esn,$csl,$pso,$cse,$hds,$hsh,$nsl,$pks,$xml,$fsl,$mfs,$gn,$vgt,$ro





### Data Prepraring

In [6]:
!cd signbank-plus/signbank_plus/nmt/signwriting-translation/ && sh ./scripts_new/prepare_data.sh original data_new_original
!cd signbank-plus/signbank_plus/nmt/signwriting-translation/ && sh ./scripts_new/prepare_data.sh cleaned data_new_cleaned
!cd signbank-plus/signbank_plus/nmt/signwriting-translation/ && sh ./scripts_new/prepare_data.sh expanded data_new_expanded
!cd signbank-plus/signbank_plus/nmt/signwriting-translation/ && sh ./scripts_new/prepare_data.sh cleaned data_new_expanded_cleaned data_new_expanded

Prepare the original SignBank+ dataset in ./data_new_original ...
sentencepiece_trainer.cc(49) LOG(INFO) Starts training with : 
TrainerSpec {
  input: data_new_original/train.spoken
  input_format: 
  model_prefix: data_new_original/spm
  model_type: BPE
  vocab_size: 3000
  self_test_sample_size: 0
  character_coverage: 0.9995
  input_sentence_size: 0
  shuffle_input_sentence: 1
  seed_sentencepiece_size: 1000000
  shrinking_factor: 0.75
  max_sentence_length: 4192
  num_threads: 16
  num_sub_iterations: 2
  max_sentencepiece_length: 16
  split_by_unicode_script: 1
  split_by_number: 1
  split_by_whitespace: 1
  treat_whitespace_as_suffix: 0
  hard_vocab_limit: 1
  use_all_vocab: 0
  unk_id: 0
  bos_id: 1
  eos_id: 2
  pad_id: -1
  unk_piece: <unk>
  bos_piece: <s>
  eos_piece: </s>
  pad_piece: <pad>
  unk_surface:  ⁇ 
}
NormalizerSpec {
  name: nmt_nfkc
  add_dummy_prefix: 1
  remove_extra_whitespaces: 1
  escape_whitespaces: 1
  normalization_rule_tsv: 
}

trainer_interface.cc(267

### Filtering

In [7]:
!cd signbank-plus/signbank_plus/nmt/signwriting-translation/ && bash ./scripts_new/find_lines.sh ../../../data/parallel/test/test.source.unique "\$en" > ../../../data/parallel/test/en_ids.txt
!cd signbank-plus/signbank_plus/nmt/signwriting-translation/ && bash ./scripts_new/filter_test_set.sh

### Sockeye Preparing

In [8]:
# sh ./scripts_new/sockeye_prepare_factor.sh \
# data_new_cleaned data_sockeye_new_cleaned
# sh ./scripts_new/sockeye_prepare_factor.sh \
# data_new_expanded_cleaned data_sockeye_new_expanded_cleaned oldm

data_dir="data_new_cleaned"
data_dir_output="data_sockeye_new_cleaned"
data_dir_pretrained=""

if data_dir_pretrained:
    optional_prepare_data_args = f"--target-vocab {data_dir_pretrained}/vocab.trg.0.json \
    --source-vocab {data_dir_pretrained}/vocab.src.0.json \
    --source-factor-vocabs {data_dir_pretrained}/vocab.src.1.json \
    {data_dir_pretrained}/vocab.src.2.json {data_dir_pretrained}/vocab.src.3.json \
    {data_dir_pretrained}/vocab.src.4.json {data_dir_pretrained}/vocab.src.5.json \
    {data_dir_pretrained}/vocab.src.6.json {data_dir_pretrained}/vocab.src.7.json"
else:
    optional_prepare_data_args = ""

command = f"""sockeye.prepare_data \
--target {data_dir}/train.spm.spoken \
--source {data_dir}/train.sign \
--source-factors {data_dir}/train.feat_x {data_dir}/train.feat_y {data_dir}/train.feat_x_rel \
{data_dir}/train.feat_y_rel {data_dir}/train.sign+ {data_dir}/train.feat_col {data_dir}/train.feat_row \
--output {data_dir_output} \
--max-seq-len 200 \
--seed 42 \
{optional_prepare_data_args}"""

print(command, end="\n\n")
!cd signbank-plus/signbank_plus/nmt/signwriting-translation/ \
&& python -m {command}

sockeye.prepare_data --target data_new_cleaned/train.spm.spoken --source data_new_cleaned/train.sign --source-factors data_new_cleaned/train.feat_x data_new_cleaned/train.feat_y data_new_cleaned/train.feat_x_rel data_new_cleaned/train.feat_y_rel data_new_cleaned/train.sign+ data_new_cleaned/train.feat_col data_new_cleaned/train.feat_row --output data_sockeye_new_cleaned --max-seq-len 200 --seed 42 

[INFO:sockeye.utils] Sockeye: 3.1.34, commit 4c30942ddb523533bccb4d2cbb3e894e45b1db93, path /opt/conda/lib/python3.10/site-packages/sockeye/__init__.py
[INFO:sockeye.utils] PyTorch: 1.13.1+cu117 (/opt/conda/lib/python3.10/site-packages/torch/__init__.py)
[INFO:sockeye.utils] Command: /opt/conda/lib/python3.10/site-packages/sockeye/prepare_data.py --target data_new_cleaned/train.spm.spoken --source data_new_cleaned/train.sign --source-factors data_new_cleaned/train.feat_x data_new_cleaned/train.feat_y data_new_cleaned/train.feat_x_rel data_new_cleaned/train.feat_y_rel data_new_cleaned/train.

### Training

In [11]:
# sh ./scripts_new/sockeye_train_factor.sh \
# data_new_cleaned data_sockeye_new_cleaned cleaned oldm

data_dir="data_new_cleaned"
data_dir_prepared="data_sockeye_new_cleaned"
model_name="cleaned"
model_pretrained_name="oldm"

if model_pretrained_name:
    optional_training_args = f"--params models_new/{model_pretrained_name}/params.best"
else:
    optional_training_args = ""

command = f"""sockeye.train \
--prepared-data {data_dir_prepared} \
-vt {data_dir}/dev.spm.spoken \
-vs {data_dir}/dev.sign \
-vsf {data_dir}/dev.feat_x {data_dir}/dev.feat_y {data_dir}/dev.feat_x_rel {data_dir}/dev.feat_y_rel \
    {data_dir}/dev.sign+ {data_dir}/dev.feat_col {data_dir}/dev.feat_row \
--output models_new/{model_name} \
--overwrite-output \
--weight-tying-type trg_softmax \
--label-smoothing 0.2 \
--optimized-metric bleu \
--checkpoint-interval 4000 \
--update-interval 2 \
--max-num-epochs 300 \
--max-num-checkpoint-not-improved 10 \
--embed-dropout 0.5 \
--transformer-dropout-attention 0.5 \
--initial-learning-rate 0.0001 \
--learning-rate-reduce-factor 0.7 \
--learning-rate-reduce-num-not-improved 5 \
--decode-and-evaluate 500 \
--keep-last-params -1 \
--device-id 0 \
--seed 42 \
--source-factors-num-embed 16 16 16 16 16 16 16 \
--source-factors-combine concat \
--batch-size 2048 \
{optional_training_args}"""

print(command, end="\n\n")
!cd signbank-plus/signbank_plus/nmt/signwriting-translation/ \
&& python -m {command}

sockeye.train --prepared-data data_sockeye_new_cleaned -vt data_new_cleaned/dev.spm.spoken -vs data_new_cleaned/dev.sign -vsf data_new_cleaned/dev.feat_x data_new_cleaned/dev.feat_y data_new_cleaned/dev.feat_x_rel data_new_cleaned/dev.feat_y_rel     data_new_cleaned/dev.sign+ data_new_cleaned/dev.feat_col data_new_cleaned/dev.feat_row --output models_new/cleaned --overwrite-output --weight-tying-type trg_softmax --label-smoothing 0.2 --optimized-metric bleu --checkpoint-interval 4000 --update-interval 2 --max-num-epochs 300 --max-num-checkpoint-not-improved 10 --embed-dropout 0.5 --transformer-dropout-attention 0.5 --initial-learning-rate 0.0001 --learning-rate-reduce-factor 0.7 --learning-rate-reduce-num-not-improved 5 --decode-and-evaluate 500 --keep-last-params -1 --device-id 0 --seed 42 --source-factors-num-embed 16 16 16 16 16 16 16 --source-factors-combine concat --batch-size 2048 --params models_new/oldm/params.best

[INFO:sockeye.utils] Sockeye: 3.1.34, commit 4c30942ddb523533b

### Evaluation

In [None]:
# !cd signbank-plus/signbank_plus/nmt/signwriting-translation/ && \
# sh ./scripts_new/sockeye_translate_factor.sh data_new_cleaned oldm

data_dir="data_new_cleaned"
model_name="oldm"
test_source_dir='../../../data/parallel/test'
test_out=f"models_new/{model_name}/test.hyps"

command = f"sockeye.translate \
--models models_new/{model_name} \
--input {data_dir}/test.sign \
--input-factors {data_dir}/test.feat_x {data_dir}/test.feat_y {data_dir}/test.feat_x_rel $data_dir/test.feat_y_rel \
    $data_dir/test.sign+ $data_dir/test.feat_col $data_dir/test.feat_row \
--output $test_out.spm.spoken \
--max-input-length 99999 \
--beam-size 5 \
--device-id 0 \
--brevity-penalty-type constant \
--seed 42"

!python -m {command}

cat $test_out.spm.spoken | spm_decode --model=$data_dir/spm.model > $test_out.spoken
sacrebleu $(find $test_source_dir -type f -name "test.target*") -i $test_out.spoken -m bleu chrf --width 2 > $test_out.spoken.eval

# eval on en subset
sh ./scripts_new/filter_lines.sh ../../../data/parallel/test/en_ids.txt $test_out.spoken > $test_out.en
sacrebleu $(find $test_source_dir -type f -name "test.en*") -i $test_out.en -m bleu chrf --width 2 > $test_out.en.eval

#### For Saving

In [12]:
!rm tmodel.zip
!mkdir -p tmodels
!cp -r signbank-plus/signbank_plus/nmt/signwriting-translation/models_new/cleaned/* tmodels/
!zip -9 -r tmodel.zip tmodels/
!rm -rf tmodels/

  adding: tmodels/ (stored 0%)
  adding: tmodels/config (deflated 74%)
  adding: tmodels/decode.output.0.00004 (deflated 59%)
  adding: tmodels/decode.output.0.00002 (deflated 60%)
  adding: tmodels/version (stored 0%)
  adding: tmodels/params.00004 (deflated 7%)
  adding: tmodels/params.00002 (deflated 7%)
  adding: tmodels/vocab.src.1.json (deflated 72%)
  adding: tmodels/log (deflated 82%)
  adding: tmodels/decode.target.0 (deflated 54%)
  adding: tmodels/decode.source.4 (deflated 83%)
  adding: tmodels/tensorboard/ (stored 0%)
  adding: tmodels/tensorboard/events.out.tfevents.1707835123.60c51629c039.864.0 (deflated 62%)
  adding: tmodels/training_state/ (stored 0%)
  adding: tmodels/training_state/bucket.pkl (deflated 8%)
  adding: tmodels/training_state/lr_scheduler_last.pkl (deflated 52%)
  adding: tmodels/training_state/params (deflated 7%)
  adding: tmodels/training_state/training.pkl (deflated 50%)
  adding: tmodels/training_state/rng.pkl (deflated 19%)
  adding: tmodels/train

In [10]:
!rm -r signbank-plus/signbank_plus/nmt/signwriting-translation/models_new/oldm/
!mkdir -p signbank-plus/signbank_plus/nmt/signwriting-translation/models_new/oldm/
!unzip tmodel.zip
!mv tmodels/* signbank-plus/signbank_plus/nmt/signwriting-translation/models_new/oldm/
!rm -r tmodels/

Archive:  tmodel.zip
   creating: tmodels/
  inflating: tmodels/args.yaml       
   creating: tmodels/tensorboard/
  inflating: tmodels/tensorboard/events.out.tfevents.1707808487.f0f2bb064a3c.838.0  
  inflating: tmodels/vocab.src.7.json  
  inflating: tmodels/vocab.src.4.json  
  inflating: tmodels/log             
  inflating: tmodels/vocab.src.3.json  
  inflating: tmodels/decode.source.6  
  inflating: tmodels/decode.source.2  
  inflating: tmodels/optimizer_best.pkl  
 extracting: tmodels/version         
  inflating: tmodels/vocab.trg.0.json  
  inflating: tmodels/vocab.src.5.json  
  inflating: tmodels/decode.source.3  
  inflating: tmodels/params.best     
  inflating: tmodels/decode.source.1  
  inflating: tmodels/vocab.src.1.json  
  inflating: tmodels/decode.source.5  
  inflating: tmodels/params.00000    
  inflating: tmodels/decode.output.0.00001  
  inflating: tmodels/decode.source.0  
  inflating: tmodels/config          
  inflating: tmodels/decode.output.0.00003  
  in

In [13]:
# !rm -rf signbank-plus/

In [None]:
# !rm tmodel.zip