In [2]:
!pip install -q jiwer==3.1.0
!pip install -q evaluate
!pip install -qU accelerate
!pip install -Uq torch
!pip install -q transformers[torch]
!pip install -q soundfile
!git clone https://github.com/SunbirdAI/salt.git
!pip install -qr salt/requirements.txt
!pip install -q peft
!pip install -q torchaudio torchvision

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torchaudio 2.7.0+cu128 requires torch==2.7.0, but you have torch 2.7.1 which is incompatible.
torchvision 0.22.0+cu128 requires torch==2.7.0, but you have torch 2.7.1 which is incompatible.[0m[31m
[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torchaudio 2.7.0+cu128 requires torch==2.7.0, but you have torch 2.6.0 which is incompatible.
torchvision 0.22.0+cu128 requires torch==2.7.0, but you have torch 2.6.0 which is incompatible.[0m[31m
[0mCloning into 'salt'...
remote: Enumerating objects: 1324, done.[K
remote: Counting objects: 100% (232/232), done.[K
remote: Compressing objects: 100% (136/136), done.[K
remote: Total 1324 (delta 138), reused 100 (delta 96), pack-re

In [1]:
import os
from getpass import getpass
import yaml
import torch
import numpy as np
import datasets
import evaluate
import salt.dataset
import salt.metrics
import salt.constants
from salt.utils import DataCollatorCTCWithPadding as DataCollator
import transformers
from transformers import (
    AutoModelForCTC,
    AutoProcessor,
    TrainingArguments,
    Trainer
)
from transformers.models.wav2vec2.modeling_wav2vec2 import WAV2VEC2_ADAPTER_SAFE_FILE
from safetensors.torch import save_file as safe_save_file
import mlflow
import mlflow.pytorch
import huggingface_hub

In [2]:
# ==== Experiment flags ==== 
use_wandb = False
use_mlflow = True

# ==== MLflow setup ====
os.environ['MLFLOW_TRACKING_USERNAME'] = getpass('MLFLOW_TRACKING_USERNAME: ')
os.environ['MLFLOW_TRACKING_PASSWORD'] = getpass('MLFLOW_TRACKING_PASSWORD: ')
mlflow.set_tracking_uri('https://mlflow.sunbird.ai')

MLFLOW_TRACKING_USERNAME:  ········
MLFLOW_TRACKING_PASSWORD:  ········


In [5]:
yaml_config = f"""
pretrained_model: facebook/mms-1b-all
pretrained_adapter: kin
mlflow_experiment_name: kinyarwanda-asr
mlflow_run_name: sunbird-mms-kin-1b
adapter_save_id: kin

training_args:
  output_dir: stt-mms-kin
  per_device_train_batch_size: 24
  gradient_accumulation_steps: 2
  eval_strategy: steps
  max_steps: 60000
  gradient_checkpointing: True
  gradient_checkpointing_kwargs:
    use_reentrant: True
  fp16: True
  save_steps: 100
  eval_steps: 100
  logging_steps: 100
  learning_rate: 0.0003
  warmup_steps: 100
  save_total_limit: 2
  load_best_model_at_end: True
  metric_for_best_model: loss
  greater_is_better: False
  weight_decay: 0.01
  report_to: []

Wav2Vec2ForCTC_args:
  attention_dropout: 0.0
  hidden_dropout: 0.0
  feat_proj_dropout: 0.0
  layerdrop: 0.0
  ctc_loss_reduction: mean
  ignore_mismatched_sizes: True

train:
  huggingface_load:
    - path: jq/kinyarwanda-speech-hackathon
      split: train
    - path: jq/kinyarwanda-speech-hackathon
      split: dev_test[1000:]
      trust_remote_code: True
  source:
    type: speech
    language: [kin]
    preprocessing:
      - set_sample_rate:
          rate: 8000
          p: 0.1
      - set_sample_rate:
          rate: 16000
      - normalize_audio
      - augment_audio_speed:
          p: 0.2
          low: 0.95
          high: 1.15
      - augment_audio_noise:
          max_relative_amplitude: 0.5
          noise_audio_repo:
            path: Sunbird/urban-noise
            name: small
            split: train
  target:
    type: text
    language: [kin]
    preprocessing:
      - lower_case
      - clean_and_remove_punctuation:
          allowed_punctuation: "'"
  shuffle: True

validation:
  huggingface_load:
    - path: jq/kinyarwanda-speech-hackathon
      split: dev_test[:100]
  source:
    type: speech
    language: [kin]
    preprocessing:
      - set_sample_rate:
          rate: 16000
  target:
    type: text
    language: [kin]
    preprocessing:
      - lower_case
      - clean_and_remove_punctuation:
          allowed_punctuation: "'"
"""

In [6]:
config = yaml.safe_load(yaml_config)

# ==== Data Loading ==== 
train_ds = salt.dataset.create(config['train'], verbose=True)
valid_ds = salt.dataset.create(config['validation'], verbose=True)
salt.utils.show_dataset(train_ds, audio_features=['source'], N=5)

Resolving data files:   0%|          | 0/75 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/75 [00:00<?, ?it/s]

Downloading data:   0%|          | 0/75 [00:00<?, ?files/s]

train-00025-of-00075.parquet:   0%|          | 0.00/499M [00:00<?, ?B/s]

train-00026-of-00075.parquet:   0%|          | 0.00/498M [00:00<?, ?B/s]

train-00027-of-00075.parquet:   0%|          | 0.00/499M [00:00<?, ?B/s]

train-00028-of-00075.parquet:   0%|          | 0.00/502M [00:00<?, ?B/s]

train-00029-of-00075.parquet:   0%|          | 0.00/499M [00:00<?, ?B/s]

train-00030-of-00075.parquet:   0%|          | 0.00/499M [00:00<?, ?B/s]

train-00031-of-00075.parquet:   0%|          | 0.00/496M [00:00<?, ?B/s]

train-00032-of-00075.parquet:   0%|          | 0.00/499M [00:00<?, ?B/s]

train-00033-of-00075.parquet:   0%|          | 0.00/495M [00:00<?, ?B/s]

train-00034-of-00075.parquet:   0%|          | 0.00/501M [00:00<?, ?B/s]

train-00035-of-00075.parquet:   0%|          | 0.00/497M [00:00<?, ?B/s]

train-00036-of-00075.parquet:   0%|          | 0.00/497M [00:00<?, ?B/s]

train-00037-of-00075.parquet:   0%|          | 0.00/502M [00:00<?, ?B/s]

train-00038-of-00075.parquet:   0%|          | 0.00/500M [00:00<?, ?B/s]

train-00039-of-00075.parquet:   0%|          | 0.00/498M [00:00<?, ?B/s]

train-00040-of-00075.parquet:   0%|          | 0.00/497M [00:00<?, ?B/s]

train-00041-of-00075.parquet:   0%|          | 0.00/500M [00:00<?, ?B/s]

train-00042-of-00075.parquet:   0%|          | 0.00/498M [00:00<?, ?B/s]

train-00043-of-00075.parquet:   0%|          | 0.00/497M [00:00<?, ?B/s]

train-00044-of-00075.parquet:   0%|          | 0.00/496M [00:00<?, ?B/s]

train-00045-of-00075.parquet:   0%|          | 0.00/498M [00:00<?, ?B/s]

train-00046-of-00075.parquet:   0%|          | 0.00/498M [00:00<?, ?B/s]

train-00047-of-00075.parquet:   0%|          | 0.00/497M [00:00<?, ?B/s]

train-00048-of-00075.parquet:   0%|          | 0.00/498M [00:00<?, ?B/s]

train-00049-of-00075.parquet:   0%|          | 0.00/497M [00:00<?, ?B/s]

train-00050-of-00075.parquet:   0%|          | 0.00/500M [00:00<?, ?B/s]

train-00051-of-00075.parquet:   0%|          | 0.00/496M [00:00<?, ?B/s]

train-00052-of-00075.parquet:   0%|          | 0.00/494M [00:00<?, ?B/s]

train-00053-of-00075.parquet:   0%|          | 0.00/497M [00:00<?, ?B/s]

train-00054-of-00075.parquet:   0%|          | 0.00/494M [00:00<?, ?B/s]

train-00055-of-00075.parquet:   0%|          | 0.00/499M [00:00<?, ?B/s]

train-00056-of-00075.parquet:   0%|          | 0.00/498M [00:00<?, ?B/s]

train-00057-of-00075.parquet:   0%|          | 0.00/500M [00:00<?, ?B/s]

train-00058-of-00075.parquet:   0%|          | 0.00/497M [00:00<?, ?B/s]

train-00059-of-00075.parquet:   0%|          | 0.00/497M [00:00<?, ?B/s]

train-00060-of-00075.parquet:   0%|          | 0.00/504M [00:00<?, ?B/s]

train-00061-of-00075.parquet:   0%|          | 0.00/517M [00:00<?, ?B/s]

train-00062-of-00075.parquet:   0%|          | 0.00/513M [00:00<?, ?B/s]

train-00063-of-00075.parquet:   0%|          | 0.00/514M [00:00<?, ?B/s]

train-00064-of-00075.parquet:   0%|          | 0.00/515M [00:00<?, ?B/s]

train-00065-of-00075.parquet:   0%|          | 0.00/513M [00:00<?, ?B/s]

train-00066-of-00075.parquet:   0%|          | 0.00/514M [00:00<?, ?B/s]

train-00067-of-00075.parquet:   0%|          | 0.00/515M [00:00<?, ?B/s]

train-00068-of-00075.parquet:   0%|          | 0.00/513M [00:00<?, ?B/s]

train-00069-of-00075.parquet:   0%|          | 0.00/515M [00:00<?, ?B/s]

train-00070-of-00075.parquet:   0%|          | 0.00/500M [00:00<?, ?B/s]

train-00071-of-00075.parquet:   0%|          | 0.00/493M [00:00<?, ?B/s]

train-00072-of-00075.parquet:   0%|          | 0.00/492M [00:00<?, ?B/s]

train-00073-of-00075.parquet:   0%|          | 0.00/492M [00:00<?, ?B/s]

train-00074-of-00075.parquet:   0%|          | 0.00/491M [00:00<?, ?B/s]

test-00000-of-00003.parquet:   0%|          | 0.00/428M [00:00<?, ?B/s]

test-00001-of-00003.parquet:   0%|          | 0.00/443M [00:00<?, ?B/s]

test-00002-of-00003.parquet:   0%|          | 0.00/454M [00:00<?, ?B/s]

dev_test-00000-of-00003.parquet:   0%|          | 0.00/430M [00:00<?, ?B/s]

dev_test-00001-of-00003.parquet:   0%|          | 0.00/439M [00:00<?, ?B/s]

dev_test-00002-of-00003.parquet:   0%|          | 0.00/452M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/261657 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/9265 [00:00<?, ? examples/s]

Generating dev_test split:   0%|          | 0/9263 [00:00<?, ? examples/s]

Loading dataset shards:   0%|          | 0/74 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/75 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/75 [00:00<?, ?it/s]

jq/kinyarwanda-speech-hackathon: 261657 rows
jq/kinyarwanda-speech-hackathon: 8263 rows
Total rows: 269920


README.md:   0%|          | 0.00/2.03k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/48.5M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1000 [00:00<?, ? examples/s]

Unnamed: 0,source,target,source.language,target.language
0,Your browser does not support the audio element.,ubutabera ndetse n'uburenganzira bw'umuntu ni ikintu gituma iterambere ry'igihugu ryiyongera ubu ngubu hashyizweho n'ikintu aho umuntu akoze icyaha avanwa aho yakoreye icyaha imbere y'abaturage agahabwa ubutabera ndetse n'uburenganzira bw'umuntu bukabungabungwa aho usanga ari ibintu byiza mu bituma iterambere ry'igihugu rizamuka,kin,kin
1,Your browser does not support the audio element.,aba ngaba rero nk'uko mubibona murabona ko ari abaturage bateranye bisa n'aho bari mu nama bari kugenda babwirwa bimwe mu bintu byinshi bimwe mu byakuka igihugu nk'uko mubibona rero murabona ko ahantu hadakoye hakoreshejwe amabara y'ibendera ry'u rwanda rero biragaragara ko ari inama y'abaturage bahurijwe hamwe kugira ngo babwirwe bimwe mu byakubaka igihugu,kin,kin
2,Your browser does not support the audio element.,ikimera gikorwamo umuti uzwi nk'umuti gakondo ukoreshwa havurwa indwara zitandukanye yaba ari indwara z'uruhu indwara zo mu nda ndetse n'izindi ndwara kugira ngo uzirwaye abashe kugira ubuzima bwiza yitabweho kandi akomeze kumererwa neza,kin,kin
3,Your browser does not support the audio element.,amafunguro meza cyane rwose abereye ijisho ateguye ku masahani asa neza imbere hari wino iri mu icupa ryiza r'umutuku aha hantu biragaragara ko ari hoteri nziza itanga serivisi,kin,kin
4,Your browser does not support the audio element.,itsinda ry'abantu bifotoje bahagaze bambaye imyenda irimo amabara y'ubururu umutuku cyatsi n'ayandi inyuma ya bo hari inzu iteye irangi ry'icyatsi muri koridoro ndetse n'ubururu inyuma ku gikuta,kin,kin


In [7]:
# ==== Processor / Tokenizer ==== 
processor = AutoProcessor.from_pretrained(config['pretrained_model'])
feature_extractor = processor.feature_extractor
tokenizer = processor.tokenizer


# ==== Data Collator ==== 
data_collator = DataCollator(processor=processor, padding=True)

preprocessor_config.json:   0%|          | 0.00/254 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/397 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.34M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/96.0 [00:00<?, ?B/s]

In [8]:
# ==== Dataset Preparation ==== 
def prepare_dataset(batch):
    # Extract input features
    batch['input_values'] = processor(batch['source'], sampling_rate=16000).input_values
    # Tokenize labels
    batch['labels'] = processor.tokenizer(batch['target']).input_ids
    return batch

train_data = train_ds.map(prepare_dataset, batched=True, remove_columns=['source','target'])
val_data = valid_ds.map(prepare_dataset, batched=True, remove_columns=['source','target'])


In [9]:
# ==== Metrics ==== 
compute_metrics = salt.metrics.multilingual_eval_fn(
    valid_ds,
    [evaluate.load('wer'), evaluate.load('cer')],
    tokenizer,
    log_first_N_predictions=2,
    speech_processor=processor
)

# ==== Model Setup ==== 
model = AutoModelForCTC.from_pretrained(
    config['pretrained_model'],
    pad_token_id=tokenizer.pad_token_id,
    vocab_size=len(tokenizer),
    **config['Wav2Vec2ForCTC_args']
)


Downloading builder script:   0%|          | 0.00/5.13k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/6.61k [00:00<?, ?B/s]

Resolving data files:   0%|          | 0/75 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/75 [00:00<?, ?it/s]

jq/kinyarwanda-speech-hackathon: 100 rows
Total rows: 100


config.json:   0%|          | 0.00/2.04k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/3.86G [00:00<?, ?B/s]

In [10]:
# Load or init adapter
if config.get('pretrained_adapter'):
    model.load_adapter(config['pretrained_adapter'])
else:
    model.init_adapter_layers()

# Freeze base and train only adapter weights
model.freeze_base_model()
adapters = model._get_adapters()
for p in adapters.values():
    p.requires_grad = True


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


(…)484fa50b63903c17c08ff761bba0d7062f1d80cd:   0%|          | 0.00/9.36M [00:00<?, ?B/s]

In [11]:
# ==== Training Arguments ==== 
report_to = []
if use_wandb: report_to.append('wandb')
if use_mlflow: report_to.append('mlflow')
config['training_args']['report_to'] = report_to

tf_args = transformers.TrainingArguments(**config['training_args'])


In [12]:
# ==== Trainer ==== 
trainer = Trainer(
    model=model,
    args=tf_args,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    train_dataset=train_data,
    eval_dataset=val_data,
    tokenizer=feature_extractor,
    callbacks=[salt.utils.MlflowExtendedLoggingCallback()] if use_mlflow else None
)

# ==== MLflow Experiment ==== 
if use_mlflow:
    if not mlflow.get_experiment_by_name(config['mlflow_experiment_name']):
        mlflow.create_experiment(config['mlflow_experiment_name'])
    mlflow.set_experiment(config['mlflow_experiment_name'])


  trainer = Trainer(


In [None]:
# ==== Run Training ==== 
with mlflow.start_run(run_name=config['mlflow_run_name'], log_system_metrics=use_mlflow) as run:
    mlflow.log_params(config)
    trainer.train()
    metrics = trainer.evaluate()
    trainer.save_model()

    # Save adapter weights
    adapter_file = WAV2VEC2_ADAPTER_SAFE_FILE.format(config['adapter_save_id'])
    adapter_path = os.path.join(tf_args.output_dir, adapter_file)
    safe_save_file(model._get_adapters(), adapter_path, metadata={"format": "pt"})

    # Log artifacts
    mlflow.log_artifact(os.path.join(tf_args.output_dir, 'preprocessor_config.json'), artifact_path='model_artifacts')
    mlflow.log_artifact(os.path.join(tf_args.output_dir, 'training_args.bin'), artifact_path='model_artifacts')
    mlflow.log_artifact(adapter_path, artifact_path='model_artifacts')


2025/06/24 00:10:45 INFO mlflow.system_metrics.system_metrics_monitor: Skip logging GPU metrics. Set logger level to DEBUG for more details.
2025/06/24 00:10:45 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.


Resolving data files:   0%|          | 0/75 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/75 [00:00<?, ?it/s]

Loading dataset shards:   0%|          | 0/74 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/75 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/75 [00:00<?, ?it/s]

jq/kinyarwanda-speech-hackathon: 261657 rows
jq/kinyarwanda-speech-hackathon: 8263 rows
Total rows: 269920


Step,Training Loss,Validation Loss,Wer Kin,Wer Mean,Cer Kin,Cer Mean
100,4.7124,2.865982,1.0,1.0,0.999,0.999
200,1.3773,0.233008,0.288,0.288,0.061,0.061
300,0.5212,0.215353,0.269,0.269,0.057,0.057
400,0.5084,0.205482,0.262,0.262,0.056,0.056
500,0.4863,0.199843,0.263,0.263,0.055,0.055
600,0.4852,0.203514,0.261,0.261,0.054,0.054
700,0.5055,0.199119,0.261,0.261,0.054,0.054
800,0.4885,0.194146,0.257,0.257,0.053,0.053
900,0.4749,0.194607,0.253,0.253,0.052,0.052


Resolving data files:   0%|          | 0/75 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/75 [00:00<?, ?it/s]

jq/kinyarwanda-speech-hackathon: 100 rows
Total rows: 100
First N predictions in eval set:
Prediction (kin to kin): "", True label: "umugore wambaye umupira w'akazi mpuzankano iri mu ibara ry'umuhondo handitseho amagambo yandikishije ibara ry'ubururu afite igikoresho cy'itumanaho gikoreshwa mu guhamagara no kwandika ubutumwa bugufi"
Prediction (kin to kin): "", True label: "uburyo emutiyene yatangije wishyura amafaranga kuri terefone ukoresheje cyangwa mudasobwa batagize amafaranga na make bagukata wohereza"


Resolving data files:   0%|          | 0/75 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/75 [00:00<?, ?it/s]

jq/kinyarwanda-speech-hackathon: 100 rows
Total rows: 100
First N predictions in eval set:
Prediction (kin to kin): "umugore wambaye umupira wakazi muzankano irimwibara ry'umuhondo handitse amagambo yandikishije ibara ry'ubururu afite igikoresho cy'ituma naho gikoreshwa mu guhamagara no kwandika ubutumwa bugufi", True label: "umugore wambaye umupira w'akazi mpuzankano iri mu ibara ry'umuhondo handitseho amagambo yandikishije ibara ry'ubururu afite igikoresho cy'itumanaho gikoreshwa mu guhamagara no kwandika ubutumwa bugufi"
Prediction (kin to kin): "uburyo em tieni yatangije wishyura amafaranga kuri terefone ukoresheje cyangwa mudasobwa batagize amafaranga n'amake bagukata wohereza", True label: "uburyo emutiyene yatangije wishyura amafaranga kuri terefone ukoresheje cyangwa mudasobwa batagize amafaranga na make bagukata wohereza"


Resolving data files:   0%|          | 0/75 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/75 [00:00<?, ?it/s]

jq/kinyarwanda-speech-hackathon: 100 rows
Total rows: 100
First N predictions in eval set:
Prediction (kin to kin): "umugore wambaye umupira w'akazi muzankano irimw'ibara ry'umuhondo handitse amagambo yandikishije ibara ry'ubururu afite igikoresho cyituma naho gikoreshwa mu guhamagara no kwandika ubutumwa bugufi", True label: "umugore wambaye umupira w'akazi mpuzankano iri mu ibara ry'umuhondo handitseho amagambo yandikishije ibara ry'ubururu afite igikoresho cy'itumanaho gikoreshwa mu guhamagara no kwandika ubutumwa bugufi"
Prediction (kin to kin): "uburyo emutiyeni yatangije wishyura amafaranga kuri terefone ukoresheje cyangwa mudasobwa batagize amafaranga n'amake bagukata wohereza", True label: "uburyo emutiyene yatangije wishyura amafaranga kuri terefone ukoresheje cyangwa mudasobwa batagize amafaranga na make bagukata wohereza"


Resolving data files:   0%|          | 0/75 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/75 [00:00<?, ?it/s]

jq/kinyarwanda-speech-hackathon: 100 rows
Total rows: 100
First N predictions in eval set:
Prediction (kin to kin): "umugore wambaye umupira w'akazi muzankano irimw ibara ry'umuhondo handitse amagambo yandikishije ibara ry'ubururu afite igikoresho cy'ituma naho gikoreshwa mu guhamagara no kwandika ubutumwa bugufi", True label: "umugore wambaye umupira w'akazi mpuzankano iri mu ibara ry'umuhondo handitseho amagambo yandikishije ibara ry'ubururu afite igikoresho cy'itumanaho gikoreshwa mu guhamagara no kwandika ubutumwa bugufi"
Prediction (kin to kin): "uburyo emutiyeni yatangije wishyura amafaranga kuri terefone ukoresheje cyangwa mudasobwa batagize amafaranga n'amake bagukata wohereza", True label: "uburyo emutiyene yatangije wishyura amafaranga kuri terefone ukoresheje cyangwa mudasobwa batagize amafaranga na make bagukata wohereza"


Resolving data files:   0%|          | 0/75 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/75 [00:00<?, ?it/s]

jq/kinyarwanda-speech-hackathon: 100 rows
Total rows: 100
First N predictions in eval set:
Prediction (kin to kin): "umugore wambaye umupira wakazi mu zankano irimw ibara ry'umuhondo handitse amagambo yandikishije ibara ry'ubururu afite igikoresho cy'ituma naho gikoreshwa mu guhamagara no kwandika ubutumwa bugufi", True label: "umugore wambaye umupira w'akazi mpuzankano iri mu ibara ry'umuhondo handitseho amagambo yandikishije ibara ry'ubururu afite igikoresho cy'itumanaho gikoreshwa mu guhamagara no kwandika ubutumwa bugufi"
Prediction (kin to kin): "uburyo emutieni yatangije wishyura amafaranga kuri terefone ukoresheje cyangwa mudasobwa batagize amafaranga n'amake bagukata wohereza", True label: "uburyo emutiyene yatangije wishyura amafaranga kuri terefone ukoresheje cyangwa mudasobwa batagize amafaranga na make bagukata wohereza"


Resolving data files:   0%|          | 0/75 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/75 [00:00<?, ?it/s]

jq/kinyarwanda-speech-hackathon: 100 rows
Total rows: 100
First N predictions in eval set:
Prediction (kin to kin): "umugore wambaye umupira w'akazi muzankano irimu ibara ry'umuhondo handitse amagambo yandikishije ibara ry'ubururu afite igikoresho cy'ituma naho gikoreshwa mu guhamagara no kwandika ubutumwa bugufi", True label: "umugore wambaye umupira w'akazi mpuzankano iri mu ibara ry'umuhondo handitseho amagambo yandikishije ibara ry'ubururu afite igikoresho cy'itumanaho gikoreshwa mu guhamagara no kwandika ubutumwa bugufi"
Prediction (kin to kin): "uburyo emutiyen yatangije wishyura amafaranga kuri terefone ukoresheje cyangwa mudasobwa batagize amafaranga n'amake bagukata wohereza", True label: "uburyo emutiyene yatangije wishyura amafaranga kuri terefone ukoresheje cyangwa mudasobwa batagize amafaranga na make bagukata wohereza"


Resolving data files:   0%|          | 0/75 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/75 [00:00<?, ?it/s]

jq/kinyarwanda-speech-hackathon: 100 rows
Total rows: 100
First N predictions in eval set:
Prediction (kin to kin): "umugore wambaye umupira wakazi muzankano irimu ibara ry'umuhondo handitse amagambo yandikishije ibara ry'ubururu afite igikoresho cy'ituma naho gikoreshwa mu guhamagara no kwandika ubutumwa bugufi", True label: "umugore wambaye umupira w'akazi mpuzankano iri mu ibara ry'umuhondo handitseho amagambo yandikishije ibara ry'ubururu afite igikoresho cy'itumanaho gikoreshwa mu guhamagara no kwandika ubutumwa bugufi"
Prediction (kin to kin): "uburyo emutieni yatangije wishyura amafaranga kuri terefone ukoresheje cyangwa mudasobwa batagize amafaranga n'amake bagukata wohereza", True label: "uburyo emutiyene yatangije wishyura amafaranga kuri terefone ukoresheje cyangwa mudasobwa batagize amafaranga na make bagukata wohereza"


Resolving data files:   0%|          | 0/75 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/75 [00:00<?, ?it/s]

jq/kinyarwanda-speech-hackathon: 100 rows
Total rows: 100
First N predictions in eval set:
Prediction (kin to kin): "umugore wambaye umupira w'akazi muzankano irimu ibara ry'umuhondo handitse amagambo yandikishije ibara ry'ubururu afite igikoresho cy'ituma naho gikoreshwa mu guhamagara no kwandika ubutumwa bugufi", True label: "umugore wambaye umupira w'akazi mpuzankano iri mu ibara ry'umuhondo handitseho amagambo yandikishije ibara ry'ubururu afite igikoresho cy'itumanaho gikoreshwa mu guhamagara no kwandika ubutumwa bugufi"
Prediction (kin to kin): "uburyo emutiyeni yatangije wishyura amafaranga kuri terefone ukoresheje cyangwa mudasobwa batagize amafaranga n'amake bagukata wohereza", True label: "uburyo emutiyene yatangije wishyura amafaranga kuri terefone ukoresheje cyangwa mudasobwa batagize amafaranga na make bagukata wohereza"


Resolving data files:   0%|          | 0/75 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/75 [00:00<?, ?it/s]

jq/kinyarwanda-speech-hackathon: 100 rows
Total rows: 100
First N predictions in eval set:
Prediction (kin to kin): "umugore wambaye umupira wakazi muzankano irimu ibara ry'umuhondo handitse amagambo yandikishije ibara ry'ubururu afite igikoresho cy'itumanaho gikoreshwa mu guhamagara no kwandika ubutumwa bugufi", True label: "umugore wambaye umupira w'akazi mpuzankano iri mu ibara ry'umuhondo handitseho amagambo yandikishije ibara ry'ubururu afite igikoresho cy'itumanaho gikoreshwa mu guhamagara no kwandika ubutumwa bugufi"
Prediction (kin to kin): "uburyo emutiyeni yatangije wishyura amafaranga kuri terefone ukoresheje cyangwa mudasobwa batagize amafaranga n'amake bagukata wohereza", True label: "uburyo emutiyene yatangije wishyura amafaranga kuri terefone ukoresheje cyangwa mudasobwa batagize amafaranga na make bagukata wohereza"


In [None]:
# ==== Push to Hub ==== 
processor.push_to_hub(tf_args.output_dir, private=True)
model.push_to_hub(tf_args.output_dir, private=True)
