wav2vec

In [1]:
import json
import torchaudio
from speechbrain.utils.data_utils import get_all_files
from pathlib import Path

audiofiles = get_all_files('../../data/speech/', match_and=['.wav'])
print(len(audiofiles))

def split_data(audiofiles):
    
    train_valid_files, test_files = [], []
  
    for audiofile in audiofiles:

        if 'FR01' in audiofile:
            if 'H03' in audiofile:
                test_files.append(audiofile)
            elif 'F03' in audiofile:
                test_files.append(audiofile)
            else:
                train_valid_files.append(audiofile)

        elif 'FR02' in audiofile:
            if 'H02' in audiofile:
                test_files.append(audiofile)
            elif 'F02' in audiofile:
                test_files.append(audiofile)
            else:
                train_valid_files.append(audiofile)

        elif 'GB' in audiofile:
            if 'H02' in audiofile:
                test_files.append(audiofile)
            elif 'F02' in audiofile:
                test_files.append(audiofile) 
            else:
                train_valid_files.append(audiofile)
  
    return train_valid_files, test_files

# Splitting data
train_valid_files, test_files = split_data(audiofiles)

torchvision is not available - cannot save figures


334


In [2]:
def create_json(json_file, audiolist):
    json_dict = {}
    for audiofile in audiolist:
      # Getting info
      audioinfo = torchaudio.info(audiofile)

      # Compute the duration in seconds.
      # This is the number of samples divided by the sampling frequency
      duration =  round(audioinfo.num_frames/audioinfo.sample_rate, 2)

      # Get digit Label by manipulating the audio path
      level =  audiofile.split('/')[-2]

      # Get a unique utterance id
      uttid =  audiofile.split('.wav')[0].split('/')[-1]
      
      # Get gender
      if uttid[0] == 'F':
          gender = "female"
      else:
          gender = "male"

      # Create entry for this utterance
      json_dict[uttid] = {
              "path": audiofile,
              "length": duration,
              "level": level,
              "gender": gender
      }

      # Writing the dictionary to the json file
    with open(json_file, mode="w") as json_f:
      json.dump(json_dict, json_f, indent=2)

In [3]:
create_json('train_valid.json', train_valid_files)
create_json('test.json', test_files)   

print(len(train_valid_files))
print(len(test_files))

304
30


In [50]:
%%file hparams_wav2vec.yaml

# Seed needs to be set at top of yaml, before objects with parameters are made
seed: 1993
__set_seed: !apply:torch.manual_seed [!ref <seed>]


# URL for the wav2vec2 model, you can change to benchmark diffrenet models
# Important: we use wav2vec2 base and not the fine-tuned one with ASR task
# This allow you to have ~4% improvment
sslmodel_hub: facebook/wav2vec2-base
sslmodel_folder: ./results/Level_Gender_Classification/wav2vec/1986/save

# Path where data manifest files are stored
train_valid_annotation: train_valid.json
test_annotation: test.json
data_augmentation: False


# Training parameters
number_of_epochs: 5
batch_size: 16
lr: 0.0001
lr_sslmodel: 0.00001
sample_rate: 16000

#freeze all wav2vec2
freeze_sslmodel: False
# freeze_sslmodel: True
#set to true to freeze the CONV part of the wav2vec2 model
# We see an improvement of 2% with freezing CNNs
freeze_sslmodel_conv: True

# Model parameters
encoder_dim: 768

n_classes: 3 # In this case, we have 3 levels
n_gender_classes: 2 # male and female

dataloader_options:
    batch_size: !ref <batch_size>
    shuffle: True
    num_workers: 2  # 2 on linux but 0 works on windows
    drop_last: False

# Wav2vec2 encoder
sslmodel: !new:speechbrain.lobes.models.huggingface_wav2vec.HuggingFaceWav2Vec2
    source: !ref <sslmodel_hub>
    output_norm: True
    freeze: !ref <freeze_sslmodel>
    freeze_feature_extractor: !ref <freeze_sslmodel_conv>
    save_path: !ref <sslmodel_folder>

avg_pool: !new:speechbrain.nnet.pooling.StatisticsPooling
    return_std: False

output_mlp: !new:speechbrain.nnet.linear.Linear
    input_size: !ref <encoder_dim>
    n_neurons: !ref <n_classes>
    bias: False

gender_output_mlp: !new:speechbrain.nnet.linear.Linear
    input_size: !ref <encoder_dim>
    n_neurons: !ref <n_gender_classes>
    bias: False

epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
    limit: !ref <number_of_epochs>

modules:
    sslmodel: !ref <sslmodel>
    output_mlp: !ref <output_mlp>
    gender_output_mlp: !ref <gender_output_mlp>

model: !new:torch.nn.ModuleList
    - [!ref <output_mlp>]

log_softmax: !new:speechbrain.nnet.activations.Softmax
    apply_log: True

compute_cost: !name:speechbrain.nnet.losses.nll_loss

error_stats: !name:speechbrain.utils.metric_stats.MetricStats
    metric: !name:speechbrain.nnet.losses.classification_error
        reduction: batch
            
gender_error_stats: !name:speechbrain.utils.metric_stats.MetricStats
    metric: !name:speechbrain.nnet.losses.classification_error
        reduction: batch
       
opt_class: !name:torch.optim.Adam
    lr: !ref <lr>

sslmodel_opt_class: !name:torch.optim.Adam
    lr: !ref <lr_sslmodel>

lr_annealing: !new:speechbrain.nnet.schedulers.NewBobScheduler
    initial_value: !ref <lr>
    improvement_threshold: 0.0025
    annealing_factor: 0.9
    patient: 0

lr_annealing_sslmodel: !new:speechbrain.nnet.schedulers.NewBobScheduler
    initial_value: !ref <lr_sslmodel>
    improvement_threshold: 0.0025
    annealing_factor: 0.9

recoverables:
    model: !ref <model>
    sslmodel: !ref <sslmodel>
    lr_annealing_output: !ref <lr_annealing>
    lr_annealing_sslmodel: !ref <lr_annealing_sslmodel>
    counter: !ref <epoch_counter>

Overwriting hparams_wav2vec.yaml


In [62]:
%%file train.py

"Recipe for training a level_gender classification system."
import os
import sys
import torch
import torchaudio
import speechbrain as sb
from hyperpyyaml import load_hyperpyyaml
from sklearn.model_selection import StratifiedKFold
import pandas as pd
from sklearn.model_selection import GroupKFold
import json
import random

# Brain class for speech enhancement training
class DigitBrain(sb.Brain):
    """Class that manages the training loop. See speechbrain.core.Brain."""

    def compute_forward(self, batch, stage):
        """Runs all the computations that transforms the input into the
        output probabilities over the N classes.

        Arguments
        ---------
        batch : PaddedBatch
            This batch object contains all the relevant tensors for computation.
        stage : sb.Stage
            One of sb.Stage.TRAIN, sb.Stage.VALID, or sb.Stage.TEST.
        Returns
        -------
        predictions : Tensor
            Tensor that contains the posterior probabilities over the N classes.
        """
        batch = batch.to(self.device)
        wavs, lens = batch.sig
        
        outputs = self.modules.sslmodel(wavs, lens)

        # last dim will be used for AdaptativeAVG pool
        outputs = self.hparams.avg_pool(outputs, lens)
        outputs = outputs.view(outputs.shape[0], -1)

        level_logits = self.modules.output_mlp(outputs)
        gender_logits = self.modules.gender_output_mlp(outputs)

        level_predictions = self.hparams.log_softmax(level_logits)
        gender_predictions = self.hparams.log_softmax(gender_logits)
        
        return level_predictions, gender_predictions


    def compute_objectives(self, predictions, batch, stage):
        """Computes the loss given the predicted and targeted outputs.

        Arguments
        ---------
        predictions : tensor
            The output tensor from `compute_forward`.
        batch : PaddedBatch
            This batch object contains all the relevant tensors for computation.
        stage : sb.Stage
            One of sb.Stage.TRAIN, sb.Stage.VALID, or sb.Stage.TEST.
        Returns
        -------
        loss : torch.Tensor
            A one-element tensor used for backpropagating the gradient.
        """

        levelId, _ = batch.level_encoded
        genderId, _ = batch.gender_encoded

        levelId = levelId.squeeze(1)
        genderId = genderId.squeeze(1)
        
        level_predictions, gender_predictions = predictions
        
        level_loss = self.hparams.compute_cost(level_predictions, levelId)
        gender_loss = self.hparams.compute_cost(gender_predictions, genderId)

        if stage != sb.Stage.TRAIN:
            self.error_metrics.append(batch.id, level_predictions, levelId)
            self.gender_error_metrics.append(batch.id, gender_predictions, genderId)
        
        loss = gender_loss + (3*level_loss)
        
        return loss
    

    def fit_batch(self, batch):
        """Trains the parameters given a single batch in input"""

        predictions = self.compute_forward(batch, sb.Stage.TRAIN)
        loss = self.compute_objectives(predictions, batch, sb.Stage.TRAIN)
        loss.backward()
        if self.check_gradients(loss):
            self.sslmodel_optimizer.step()
            self.optimizer.step()

        self.sslmodel_optimizer.zero_grad()
        self.optimizer.zero_grad()

        return loss.detach()

    def on_stage_start(self, stage, epoch=None):
        """Gets called at the beginning of each epoch.
        Arguments
        ---------
        stage : sb.Stage
            One of sb.Stage.TRAIN, sb.Stage.VALID, or sb.Stage.TEST.
        epoch : int
            The currently-starting epoch. This is passed
            `None` during the test stage.
        """

        # Set up statistics trackers for this stage
        self.loss_metric = sb.utils.metric_stats.MetricStats(
            metric=sb.nnet.losses.nll_loss
        )
        self.gender_loss_metric = sb.utils.metric_stats.MetricStats(
            metric=sb.nnet.losses.nll_loss
        )

        # Set up evaluation-only statistics trackers
        if stage != sb.Stage.TRAIN:
            self.error_metrics = self.hparams.error_stats()
            self.gender_error_metrics = self.hparams.gender_error_stats()


    def on_stage_end(self, stage, stage_loss, epoch=None):
        """Gets called at the end of an epoch.
        Arguments
        ---------
        stage : sb.Stage
            One of sb.Stage.TRAIN, sb.Stage.VALID, sb.Stage.TEST
        stage_loss : float
            The average loss for all of the data processed in this stage.
        epoch : int
            The currently-starting epoch. This is passed
            `None` during the test stage.
        """

        # Store the train loss until the validation stage.
        if stage == sb.Stage.TRAIN:
            self.train_loss = stage_loss

        # Summarize the statistics from the stage for record-keeping.
        else:
            stats = {
                "loss": stage_loss,
                "error_rate": self.error_metrics.summarize("average"),
                "gender_error": self.gender_error_metrics.summarize("average"),
            }

        # At the end of validation...
        if stage == sb.Stage.VALID:

            old_lr, new_lr = self.hparams.lr_annealing(stats["error_rate"])
            sb.nnet.schedulers.update_learning_rate(self.optimizer, new_lr)

            (
                old_lr_sslmodel,
                new_lr_sslmodel,
            ) = self.hparams.lr_annealing_sslmodel(stats["error_rate"])
            sb.nnet.schedulers.update_learning_rate(
                self.sslmodel_optimizer, new_lr_sslmodel
            )

            # The train_logger writes a summary to stdout and to the logfile.
            self.hparams.train_logger.log_stats(
                {"Epoch": epoch, "lr": old_lr, "sslmodel_lr": old_lr_sslmodel},
                train_stats={"loss": self.train_loss},
                valid_stats=stats,
            )

            # Save the current checkpoint and delete previous checkpoints,
            self.checkpointer.save_and_keep_only(
                meta=stats, min_keys=["error_rate"]
            )

        # We also write statistics about test data to stdout and to logfile.
        if stage == sb.Stage.TEST:
            self.hparams.train_logger.log_stats(
                {"Epoch loaded": self.hparams.epoch_counter.current},
                test_stats=stats,
            )


    def init_optimizers(self):
        "Initializes the sslmodel optimizer and model optimizer"
        self.sslmodel_optimizer = self.hparams.sslmodel_opt_class(
            self.modules.sslmodel.parameters()
        )
        self.optimizer = self.hparams.opt_class(self.hparams.model.parameters())

        if self.checkpointer is not None:
            self.checkpointer.add_recoverable(
                "sslmodel_opt", self.sslmodel_optimizer
            )
            self.checkpointer.add_recoverable("optimizer", self.optimizer)

    def zero_grad(self, set_to_none=False):
        self.sslmodel_optimizer.zero_grad(set_to_none)
        self.optimizer.zero_grad(set_to_none)

def dataio_prep(hparams, phase):
    """This function prepares the datasets to be used in the brain class.
    It also defines the data processing pipeline through user-defined functions.
    We expect `prepare_mini_librispeech` to have been called before this,
    so that the `train.json`, `valid.json`,  and `valid.json` manifest files
    are available.
    Arguments
    ---------
    hparams : dict
        This dictionary is loaded from the `train.yaml` file, and it includes
        all the hyperparameters needed for dataset construction and loading.
    Returns
    -------
    datasets : dict
        Contains two keys, "train" and "valid" that correspond
        to the appropriate DynamicItemDataset object.
    """

    # Initialization of the label encoder. The label encoder assigns to each
    # of the observed label a unique index (e.g, 'FR01': 0, 'FR01': 1, ..)
    label_encoder = sb.dataio.encoder.CategoricalEncoder()
    gender_encoder = sb.dataio.encoder.CategoricalEncoder()

    # Define audio pipeline
    @sb.utils.data_pipeline.takes("path")
    @sb.utils.data_pipeline.provides("sig")
    def audio_pipeline_subset(path):
        """Load the signal, and pass it and its length to the corruption class.
        This is done on the CPU in the `collate_fn`."""
        sig, fs = torchaudio.load(path)

        # Resampling
        sig = sig[-1]
        sig = torchaudio.functional.resample(sig, fs, hparams["sample_rate"])
                
        # Get the 8 second of the sig
        maxNumber = sig.shape[-1] - (8 * 16000)
        number = random.randint(0, maxNumber)
        sig = sig[number:number + (8*16000)]
        
        return sig
        
    @sb.utils.data_pipeline.takes("path")
    @sb.utils.data_pipeline.provides("sig")
    def audio_pipeline(path):
        """Load the signal, and pass it and its length to the corruption class.
        This is done on the CPU in the `collate_fn`."""
        sig, fs = torchaudio.load(path)

        # Resampling
        sig = sig[-1]
        sig = torchaudio.functional.resample(sig, fs, hparams["sample_rate"])
        
        return sig

    # Define label pipeline:
    @sb.utils.data_pipeline.takes("level")
    @sb.utils.data_pipeline.provides("level", "level_encoded")
    def label_pipeline(level):
        """Defines the pipeline to process the digit labels.
        Note that we have to assign a different integer to each class
        through the label encoder.
        """
        yield level
        level_encoded = label_encoder.encode_label_torch(level)
        yield level_encoded
        
    @sb.utils.data_pipeline.takes("gender")
    @sb.utils.data_pipeline.provides("gender", "gender_encoded")
    def gender_pipeline(gender):
        """Defines the pipeline to process the digit labels.
        Note that we have to assign a different integer to each class
        through the label encoder.
        """
        yield gender
        gender_encoded = gender_encoder.encode_label_torch(gender)
        yield gender_encoded
        

    # Define datasets. We also connect the dataset with the data processing
    # functions defined above.
    datasets = {}
    if phase == "train":
        data_info = {
            "train": hparams["train_annotation"],
            "valid": hparams["valid_annotation"],
        }
        hparams["dataloader_options"]["shuffle"] = True
        for dataset in data_info:
            datasets[dataset] = sb.dataio.dataset.DynamicItemDataset.from_json(
                json_path=data_info[dataset],
                dynamic_items=[audio_pipeline_subset, label_pipeline, gender_pipeline],
                output_keys=["id", "sig", "level_encoded", "gender_encoded"],
            )
            
        # Load or compute the label encoder (with multi-GPU DDP support)
        # Please, take a look into the lab_enc_file to see the label to index
        # mapping.
        lab_enc_file = os.path.join(hparams["save_folder"], "label_encoder.txt")
        label_encoder.load_or_create(
            path=lab_enc_file,
            from_didatasets=[datasets["train"]],
            output_key="level",
        )

        lab_enc_file = os.path.join(hparams["save_folder"], "gender_encoder.txt")
        gender_encoder.load_or_create(
            path=lab_enc_file,
            from_didatasets=[datasets["train"]],
            output_key="gender",
        )
    else:
        data_info = {
            "test": hparams["test_annotation"],
            "train_valid": hparams["train_valid_annotation"],
        }
        hparams["dataloader_options"]["shuffle"] = True
        for dataset in data_info:
            datasets[dataset] = sb.dataio.dataset.DynamicItemDataset.from_json(
                json_path=data_info[dataset],
                dynamic_items=[audio_pipeline_subset, label_pipeline, gender_pipeline],
                output_keys=["id", "sig", "level_encoded", "gender_encoded"],
            )        
        # Load or compute the label encoder (with multi-GPU DDP support)
        # Please, take a look into the lab_enc_file to see the label to index
        # mapping.
        lab_enc_file = os.path.join(hparams["save_folder"], "label_encoder.txt")
        label_encoder.load_or_create(
            path=lab_enc_file,
            from_didatasets=[datasets["train_valid"]],
            output_key="level",
        )

        lab_enc_file = os.path.join(hparams["save_folder"], "gender_encoder.txt")
        gender_encoder.load_or_create(
            path=lab_enc_file,
            from_didatasets=[datasets["train_valid"]],
            output_key="gender",
        )

    return datasets


# Recipe begins!
if __name__ == "__main__":
      
    # Reading command line arguments.
    hparams_file, run_opts, overrides = sb.parse_arguments(sys.argv[1:])
        
    # Reading the train-valid data drom json
    id_list = []
    with open('train_valid.json', 'r') as f:
        json_data = json.load(f)

        # Access the data
        for key in json_data:
            if 'H' in key:
                if 'FR1' in key:
                    item = [key, 'FR1', 'Male']
                elif 'FR2' in key:
                    item = [key, 'FR2', 'Male']
                else:
                    item = [key, 'GB', 'Male']
            else:
                if 'FR1' in key:
                    item = [key, 'FR1', 'Female']
                elif 'FR2' in key:
                    item = [key, 'FR2', 'Female']
                else:
                    item = [key, 'GB', 'Female']    
            id_list.append(item)
    
    # Create the pandas DataFrame
    df = pd.DataFrame(id_list, columns=['ID', 'level', 'gender'])
    df['level_gender'] = df['level'].astype(str) + '_' + df['gender'].astype(str)
  
    # Create StratifiedKFold on level_gender
    skf = StratifiedKFold(n_splits=10)
    target = df.loc[:,'level_gender']
    
    fold_no = 1
    min_test_stats = 100
    for train_index, valid_index in skf.split(df, target):

        # Load hyperparameters file with command-line overrides.
        with open(hparams_file) as fin:
            hparams = load_hyperpyyaml(fin,  overrides)
        
        # Creating output_folder/save_folder/train_log/checkpointer/train_logger for each fold
        hparams['output_folder'] = './results/Level_Gender_Classification/Xvector/1986/Fold' + str(fold_no)
        hparams['save_folder'] = hparams['output_folder'] + "/save"
        hparams['train_log'] = hparams['output_folder'] + "/train_log.txt"
        hparams['checkpointer'] = sb.utils.checkpoints.Checkpointer(checkpoints_dir = hparams['save_folder'], recoverables = hparams['recoverables'])
        hparams['train_logger'] = sb.utils.train_logger.FileTrainLogger(save_file = hparams['train_log'])
        hparams['train_annotation'] = hparams['output_folder'] + '/train.json'
        hparams['valid_annotation'] = hparams['output_folder'] + '/valid.json'
        hparams['sslmodel_folder'] = hparams['save_folder'] + '/ssl_checkpoint'

        train = df.loc[train_index,:]
        valid = df.loc[valid_index,:]

        # Create train keys
        train_keys = []
        train_ids = df['ID'].iloc[train_index]
        for train_id in train_ids:
            train_keys.append(train_id)
        
        # Create valid keys
        valid_keys = []
        val_ids = df['ID'].iloc[valid_index]
        for val_id in val_ids:
            valid_keys.append(val_id)
        
        # Filter the data based on a condition
        train_filtered_data = {key: json_data[key] for key in json_data if key in train_keys}
        valid_filtered_data = {key: json_data[key] for key in json_data if key in valid_keys}

        # Convert the filtered data to JSON format
        train_json_output = json.dumps(train_filtered_data)
        train_dict = json.loads(train_json_output)
                
        # Convert the filtered data to JSON format
        valid_json_output = json.dumps(valid_filtered_data)
        valid_dict = json.loads(valid_json_output)
        
        # Create json for validation set
        with open(hparams['output_folder'] + '/train.json', 'w') as f:
            json.dump(train_dict, f, indent=2)   
            
        # Create json for validation set
        with open(hparams['output_folder'] + '/valid.json', 'w') as f:
            json.dump(valid_dict, f, indent=2)

        # Start print
        print('--------------------------------')
        print(f'FOLD {fold_no}')
        print('--------------------------------')

        datasets = dataio_prep(hparams, "train")
        
        # Create experiment directory
        sb.create_experiment_directory(
            experiment_directory=hparams["output_folder"],
            hyperparams_to_save=hparams_file,
            overrides=overrides,
        )

        hparams["sslmodel"] = hparams["sslmodel"].to(device=run_opts["device"])
        # freeze the feature extractor part when unfreezing
        if not hparams["freeze_sslmodel"] and hparams["freeze_sslmodel_conv"]:
            hparams["sslmodel"].model.feature_extractor._freeze_parameters()

        # Initialize the Brain object to prepare for mask training.
        digit_brain = DigitBrain(
            modules=hparams["modules"],
            opt_class=hparams["opt_class"],
            hparams=hparams,
            run_opts=run_opts,
            checkpointer=hparams["checkpointer"],
        )

        # The `fit()` method iterates the training loop, calling the methods
        # necessary to update the parameters of the model. Since all objects
        # with changing state are managed by the Checkpointer, training can be
        # stopped at any point, and will be resumed on next call.
        digit_brain.fit(
            epoch_counter=digit_brain.hparams.epoch_counter,
            train_set=datasets["train"],
            valid_set=datasets["valid"],
            train_loader_kwargs=hparams["dataloader_options"],
            valid_loader_kwargs=hparams["dataloader_options"],
        )

        #Load the best checkpoint for evaluation
        test_stats = digit_brain.evaluate(
            test_set=datasets["valid"],
            min_key="error",
            test_loader_kwargs=hparams["dataloader_options"],
        )
        
        # Getting the best fold base on test_stats
        if test_stats < min_test_stats: 
            min_test_stats = test_stats
            best_model = digit_brain
            best_fold = fold_no
        
        fold_no += 1
        
    # Train the model with train+valid data and evaluate it by test data 
    # Load hyperparameters file with command-line overrides.
    with open(hparams_file) as fin:
        hparams = load_hyperpyyaml(fin,  overrides)
    
    # Creating output_folder/save_folder/train_log/checkpointer/train_logger for each fold
    hparams['output_folder'] = './results/Level_Gender_Classification/wav2vec/1986/FinalFold'
    hparams['save_folder'] = hparams['output_folder'] + "/save"
    hparams['train_log'] = hparams['output_folder'] + "/train_log.txt"
    hparams['checkpointer'] = sb.utils.checkpoints.Checkpointer(checkpoints_dir = hparams['save_folder'], recoverables = hparams['recoverables'])
    hparams['train_logger'] = sb.utils.train_logger.FileTrainLogger(save_file = hparams['train_log'])

    # Start print
    print('--------------------------------')
    print(f'Final Result')
    print('--------------------------------')

    datasets = dataio_prep(hparams, "test")
    
    # Create experiment directory
    sb.create_experiment_directory(
        experiment_directory=hparams["output_folder"],
        hyperparams_to_save=hparams_file,
        overrides=overrides,
    )


    hparams["sslmodel"] = hparams["sslmodel"].to(device=run_opts["device"])
    # freeze the feature extractor part when unfreezing
    if not hparams["freeze_sslmodel"] and hparams["freeze_sslmodel_conv"]:
        hparams["sslmodel"].model.feature_extractor._freeze_parameters()

    # Initialize the Brain object to prepare for mask training.
    digit_brain = DigitBrain(
        modules=hparams["modules"],
        opt_class=hparams["opt_class"],
        hparams=hparams,
        run_opts=run_opts,
        checkpointer=hparams["checkpointer"],
    )

    # The `fit()` method iterates the training loop, calling the methods
    # necessary to update the parameters of the model. Since all objects
    # with changing state are managed by the Checkpointer, training can be
    # stopped at any point, and will be resumed on next call.
    digit_brain.fit(
        epoch_counter=digit_brain.hparams.epoch_counter,
        train_set=datasets["train_valid"],
        train_loader_kwargs=hparams["dataloader_options"],
    )

    #Load the best checkpoint for evaluation
    test_stats = digit_brain.evaluate(
        test_set=datasets["test"],
        min_key="error",
        test_loader_kwargs=hparams["dataloader_options"],
    )

Overwriting train.py


In [63]:
!rm -rf ./results/Level_Gender_Classification/wav2vec/1986

!python train.py hparams_wav2vec.yaml --device='cuda:0' --data_augmentation=True

torchvision is not available - cannot save figures
Downloading (…)rocessor_config.json: 100%|█████| 159/159 [00:00<00:00, 13.7kB/s]
Downloading (…)lve/main/config.json: 100%|██| 1.84k/1.84k [00:00<00:00, 161kB/s]
Downloading pytorch_model.bin: 100%|█████████| 380M/380M [00:04<00:00, 92.2MB/s]
Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2Model: ['project_q.weight', 'project_q.bias', 'project_hid.weight', 'quantizer.codevectors', 'quantizer.weight_proj.weight', 'quantizer.weight_proj.bias', 'project_hid.bias']
- This IS expected if you are initializing Wav2Vec2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertF

speechbrain.core - Beginning experiment!
speechbrain.core - Experiment folder: ./results/Level_Gender_Classification/Xvector/1986/Fold4
speechbrain.core - 90.2M trainable parameters in DigitBrain
speechbrain.utils.checkpoints - Loading a checkpoint from results/Level_Gender_Classification/Xvector/1986/Fold4/save/CKPT+2023-04-15+23-51-19+00
speechbrain.utils.checkpoints - Would load a checkpoint here, but none found yet.
100%|█████████████████████████████████████████████| 2/2 [00:01<00:00,  1.69it/s]
speechbrain.utils.train_logger - Epoch loaded: 5 - test loss: 9.68e-02, test error_rate: 0.00e+00, test gender_error: 0.00e+00
Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2Model: ['project_q.weight', 'project_q.bias', 'project_hid.weight', 'quantizer.codevectors', 'quantizer.weight_proj.weight', 'quantizer.weight_proj.bias', 'project_hid.bias']
- This IS expected if you are initializing Wav2Vec2Model from the checkpoint of a model tr

--------------------------------
FOLD 8
--------------------------------
speechbrain.dataio.encoder - Load called, but CategoricalEncoder is not empty. Loaded data will overwrite everything. This is normal if there is e.g. an unk label defined at init.
speechbrain.dataio.encoder - Load called, but CategoricalEncoder is not empty. Loaded data will overwrite everything. This is normal if there is e.g. an unk label defined at init.
speechbrain.core - Beginning experiment!
speechbrain.core - Experiment folder: ./results/Level_Gender_Classification/Xvector/1986/Fold8
speechbrain.core - 90.2M trainable parameters in DigitBrain
speechbrain.utils.checkpoints - Loading a checkpoint from results/Level_Gender_Classification/Xvector/1986/Fold8/save/CKPT+2023-04-15+23-57-03+00
speechbrain.utils.checkpoints - Would load a checkpoint here, but none found yet.
100%|█████████████████████████████████████████████| 2/2 [00:01<00:00,  1.20it/s]
speechbrain.utils.train_logger - Epoch loaded: 5 - test loss: 

100%|█████████████████████████| 19/19 [00:14<00:00,  1.35it/s, train_loss=0.731]
speechbrain.utils.epoch_loop - Going into epoch 5
100%|█████████████████████████| 19/19 [00:14<00:00,  1.35it/s, train_loss=0.954]
speechbrain.utils.checkpoints - Would load a checkpoint here, but none found yet.
100%|█████████████████████████████████████████████| 2/2 [00:01<00:00,  1.20it/s]
speechbrain.utils.train_logger - Epoch loaded: 5 - test loss: 2.05, test error_rate: 2.00e-01, test gender_error: 0.00e+00


In [None]:
loss = gender_loss + level_loss

In [None]:
torchvision is not available - cannot save figures
/home/as03720/anaconda3/envs/workspace/lib/python3.9/site-packages/transformers/configuration_utils.py:379: UserWarning: Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 Transformers. Using `model.gradient_checkpointing_enable()` instead, or if you are using the `Trainer` API, pass `gradient_checkpointing=True` in your `TrainingArguments`.
  warnings.warn(
Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2Model: ['project_q.weight', 'quantizer.weight_proj.bias', 'project_hid.bias', 'quantizer.weight_proj.weight', 'project_hid.weight', 'project_q.bias', 'quantizer.codevectors']
- This IS expected if you are initializing Wav2Vec2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
speechbrain.lobes.models.huggingface_wav2vec - wav2vec 2.0 feature extractor is frozen.
--------------------------------
FOLD 1
--------------------------------
speechbrain.core - Beginning experiment!
speechbrain.core - Experiment folder: ./results/Level_Gender_Classification/Xvector/1986/Fold1
speechbrain.core - 90.2M trainable parameters in DigitBrain
speechbrain.utils.checkpoints - Loading a checkpoint from results/Level_Gender_Classification/Xvector/1986/Fold1/save/CKPT+2023-04-15+23-47-13+00
speechbrain.utils.checkpoints - Would load a checkpoint here, but none found yet.
100%|█████████████████████████████████████████████| 2/2 [00:03<00:00,  1.60s/it]
speechbrain.utils.train_logger - Epoch loaded: 5 - test loss: 9.80e-02, test error_rate: 0.00e+00, test gender_error: 0.00e+00
/home/as03720/anaconda3/envs/workspace/lib/python3.9/site-packages/transformers/configuration_utils.py:379: UserWarning: Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 Transformers. Using `model.gradient_checkpointing_enable()` instead, or if you are using the `Trainer` API, pass `gradient_checkpointing=True` in your `TrainingArguments`.
  warnings.warn(
Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2Model: ['project_q.weight', 'quantizer.weight_proj.bias', 'project_hid.bias', 'quantizer.weight_proj.weight', 'project_hid.weight', 'project_q.bias', 'quantizer.codevectors']
- This IS expected if you are initializing Wav2Vec2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
speechbrain.lobes.models.huggingface_wav2vec - speechbrain.lobes.models.huggingface_wav2vec - wav2vec 2.0 feature extractor is frozen.
--------------------------------
FOLD 2
--------------------------------
speechbrain.dataio.encoder - Load called, but CategoricalEncoder is not empty. Loaded data will overwrite everything. This is normal if there is e.g. an unk label defined at init.
speechbrain.dataio.encoder - Load called, but CategoricalEncoder is not empty. Loaded data will overwrite everything. This is normal if there is e.g. an unk label defined at init.
speechbrain.core - Beginning experiment!
speechbrain.core - Experiment folder: ./results/Level_Gender_Classification/Xvector/1986/Fold2
speechbrain.core - 90.2M trainable parameters in DigitBrain
speechbrain.utils.checkpoints - Loading a checkpoint from results/Level_Gender_Classification/Xvector/1986/Fold2/save/CKPT+2023-04-15+23-48-35+00
speechbrain.utils.checkpoints - Would load a checkpoint here, but none found yet.
100%|█████████████████████████████████████████████| 2/2 [00:01<00:00,  1.30it/s]
speechbrain.utils.train_logger - Epoch loaded: 5 - test loss: 5.19e-01, test error_rate: 1.29e-01, test gender_error: 0.00e+00
/home/as03720/anaconda3/envs/workspace/lib/python3.9/site-packages/transformers/configuration_utils.py:379: UserWarning: Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 Transformers. Using `model.gradient_checkpointing_enable()` instead, or if you are using the `Trainer` API, pass `gradient_checkpointing=True` in your `TrainingArguments`.
  warnings.warn(
Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2Model: ['project_q.weight', 'quantizer.weight_proj.bias', 'project_hid.bias', 'quantizer.weight_proj.weight', 'project_hid.weight', 'project_q.bias', 'quantizer.codevectors']
- This IS expected if you are initializing Wav2Vec2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
speechbrain.lobes.models.huggingface_wav2vec - speechbrain.lobes.models.huggingface_wav2vec - wav2vec 2.0 feature extractor is frozen.
--------------------------------
FOLD 3
--------------------------------
speechbrain.dataio.encoder - Load called, but CategoricalEncoder is not empty. Loaded data will overwrite everything. This is normal if there is e.g. an unk label defined at init.
speechbrain.dataio.encoder - Load called, but CategoricalEncoder is not empty. Loaded data will overwrite everything. This is normal if there is e.g. an unk label defined at init.
speechbrain.core - Beginning experiment!
speechbrain.core - Experiment folder: ./results/Level_Gender_Classification/Xvector/1986/Fold3
speechbrain.core - 90.2M trainable parameters in DigitBrain
speechbrain.utils.checkpoints - Loading a checkpoint from results/Level_Gender_Classification/Xvector/1986/Fold3/save/CKPT+2023-04-15+23-49-58+00
speechbrain.utils.checkpoints - Would load a checkpoint here, but none found yet.
100%|█████████████████████████████████████████████| 2/2 [00:01<00:00,  1.29it/s]
speechbrain.utils.train_logger - Epoch loaded: 5 - test loss: 2.92e-01, test error_rate: 9.68e-02, test gender_error: 0.00e+00
/home/as03720/anaconda3/envs/workspace/lib/python3.9/site-packages/transformers/configuration_utils.py:379: UserWarning: Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 Transformers. Using `model.gradient_checkpointing_enable()` instead, or if you are using the `Trainer` API, pass `gradient_checkpointing=True` in your `TrainingArguments`.
  warnings.warn(
Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2Model: ['project_q.weight', 'quantizer.weight_proj.bias', 'project_hid.bias', 'quantizer.weight_proj.weight', 'project_hid.weight', 'project_q.bias', 'quantizer.codevectors']
- This IS expected if you are initializing Wav2Vec2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
speechbrain.lobes.models.huggingface_wav2vec - speechbrain.lobes.models.huggingface_wav2vec - wav2vec 2.0 feature extractor is frozen.
--------------------------------
FOLD 4
--------------------------------
speechbrain.dataio.encoder - Load called, but CategoricalEncoder is not empty. Loaded data will overwrite everything. This is normal if there is e.g. an unk label defined at init.
speechbrain.dataio.encoder - Load called, but CategoricalEncoder is not empty. Loaded data will overwrite everything. This is normal if there is e.g. an unk label defined at init.
speechbrain.core - Beginning experiment!
speechbrain.core - Experiment folder: ./results/Level_Gender_Classification/Xvector/1986/Fold4
speechbrain.core - 90.2M trainable parameters in DigitBrain
speechbrain.utils.checkpoints - Loading a checkpoint from results/Level_Gender_Classification/Xvector/1986/Fold4/save/CKPT+2023-04-15+23-51-19+00
speechbrain.utils.checkpoints - Would load a checkpoint here, but none found yet.
100%|█████████████████████████████████████████████| 2/2 [00:01<00:00,  1.71it/s]
speechbrain.utils.train_logger - Epoch loaded: 5 - test loss: 5.28e-02, test error_rate: 0.00e+00, test gender_error: 0.00e+00
/home/as03720/anaconda3/envs/workspace/lib/python3.9/site-packages/transformers/configuration_utils.py:379: UserWarning: Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 Transformers. Using `model.gradient_checkpointing_enable()` instead, or if you are using the `Trainer` API, pass `gradient_checkpointing=True` in your `TrainingArguments`.
  warnings.warn(
Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2Model: ['project_q.weight', 'quantizer.weight_proj.bias', 'project_hid.bias', 'quantizer.weight_proj.weight', 'project_hid.weight', 'project_q.bias', 'quantizer.codevectors']
- This IS expected if you are initializing Wav2Vec2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
speechbrain.lobes.models.huggingface_wav2vec - speechbrain.lobes.models.huggingface_wav2vec - wav2vec 2.0 feature extractor is frozen.
--------------------------------
FOLD 5
--------------------------------
speechbrain.dataio.encoder - Load called, but CategoricalEncoder is not empty. Loaded data will overwrite everything. This is normal if there is e.g. an unk label defined at init.
speechbrain.dataio.encoder - Load called, but CategoricalEncoder is not empty. Loaded data will overwrite everything. This is normal if there is e.g. an unk label defined at init.
speechbrain.core - Beginning experiment!
speechbrain.core - Experiment folder: ./results/Level_Gender_Classification/Xvector/1986/Fold5
speechbrain.core - 90.2M trainable parameters in DigitBrain
speechbrain.utils.checkpoints - Loading a checkpoint from results/Level_Gender_Classification/Xvector/1986/Fold5/save/CKPT+2023-04-15+23-52-48+00
speechbrain.utils.checkpoints - Would load a checkpoint here, but none found yet.
100%|█████████████████████████████████████████████| 2/2 [00:01<00:00,  1.20it/s]
speechbrain.utils.train_logger - Epoch loaded: 5 - test loss: 4.74e-01, test error_rate: 1.67e-01, test gender_error: 0.00e+00
/home/as03720/anaconda3/envs/workspace/lib/python3.9/site-packages/transformers/configuration_utils.py:379: UserWarning: Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 Transformers. Using `model.gradient_checkpointing_enable()` instead, or if you are using the `Trainer` API, pass `gradient_checkpointing=True` in your `TrainingArguments`.
  warnings.warn(
Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2Model: ['project_q.weight', 'quantizer.weight_proj.bias', 'project_hid.bias', 'quantizer.weight_proj.weight', 'project_hid.weight', 'project_q.bias', 'quantizer.codevectors']
- This IS expected if you are initializing Wav2Vec2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
speechbrain.lobes.models.huggingface_wav2vec - speechbrain.lobes.models.huggingface_wav2vec - wav2vec 2.0 feature extractor is frozen.
--------------------------------
FOLD 6
--------------------------------
speechbrain.dataio.encoder - Load called, but CategoricalEncoder is not empty. Loaded data will overwrite everything. This is normal if there is e.g. an unk label defined at init.
speechbrain.dataio.encoder - Load called, but CategoricalEncoder is not empty. Loaded data will overwrite everything. This is normal if there is e.g. an unk label defined at init.
speechbrain.core - Beginning experiment!
speechbrain.core - Experiment folder: ./results/Level_Gender_Classification/Xvector/1986/Fold6
speechbrain.core - 90.2M trainable parameters in DigitBrain
speechbrain.utils.checkpoints - Loading a checkpoint from results/Level_Gender_Classification/Xvector/1986/Fold6/save/CKPT+2023-04-15+23-54-15+00
speechbrain.utils.checkpoints - Would load a checkpoint here, but none found yet.
100%|█████████████████████████████████████████████| 2/2 [00:01<00:00,  1.30it/s]
speechbrain.utils.train_logger - Epoch loaded: 5 - test loss: 1.83e-01, test error_rate: 3.33e-02, test gender_error: 0.00e+00
/home/as03720/anaconda3/envs/workspace/lib/python3.9/site-packages/transformers/configuration_utils.py:379: UserWarning: Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 Transformers. Using `model.gradient_checkpointing_enable()` instead, or if you are using the `Trainer` API, pass `gradient_checkpointing=True` in your `TrainingArguments`.
  warnings.warn(
Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2Model: ['project_q.weight', 'quantizer.weight_proj.bias', 'project_hid.bias', 'quantizer.weight_proj.weight', 'project_hid.weight', 'project_q.bias', 'quantizer.codevectors']
- This IS expected if you are initializing Wav2Vec2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
speechbrain.lobes.models.huggingface_wav2vec - speechbrain.lobes.models.huggingface_wav2vec - wav2vec 2.0 feature extractor is frozen.
--------------------------------
FOLD 7
--------------------------------
speechbrain.dataio.encoder - Load called, but CategoricalEncoder is not empty. Loaded data will overwrite everything. This is normal if there is e.g. an unk label defined at init.
speechbrain.dataio.encoder - Load called, but CategoricalEncoder is not empty. Loaded data will overwrite everything. This is normal if there is e.g. an unk label defined at init.
speechbrain.core - Beginning experiment!
speechbrain.core - Experiment folder: ./results/Level_Gender_Classification/Xvector/1986/Fold7
speechbrain.core - 90.2M trainable parameters in DigitBrain
speechbrain.utils.checkpoints - Loading a checkpoint from results/Level_Gender_Classification/Xvector/1986/Fold7/save/CKPT+2023-04-15+23-55-37+00
speechbrain.utils.checkpoints - Would load a checkpoint here, but none found yet.
100%|█████████████████████████████████████████████| 2/2 [00:01<00:00,  1.67it/s]
speechbrain.utils.train_logger - Epoch loaded: 5 - test loss: 7.07e-02, test error_rate: 0.00e+00, test gender_error: 0.00e+00
/home/as03720/anaconda3/envs/workspace/lib/python3.9/site-packages/transformers/configuration_utils.py:379: UserWarning: Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 Transformers. Using `model.gradient_checkpointing_enable()` instead, or if you are using the `Trainer` API, pass `gradient_checkpointing=True` in your `TrainingArguments`.
  warnings.warn(
Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2Model: ['project_q.weight', 'quantizer.weight_proj.bias', 'project_hid.bias', 'quantizer.weight_proj.weight', 'project_hid.weight', 'project_q.bias', 'quantizer.codevectors']
- This IS expected if you are initializing Wav2Vec2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
speechbrain.lobes.models.huggingface_wav2vec - speechbrain.lobes.models.huggingface_wav2vec - wav2vec 2.0 feature extractor is frozen.
--------------------------------
FOLD 8
--------------------------------
speechbrain.dataio.encoder - Load called, but CategoricalEncoder is not empty. Loaded data will overwrite everything. This is normal if there is e.g. an unk label defined at init.
speechbrain.dataio.encoder - Load called, but CategoricalEncoder is not empty. Loaded data will overwrite everything. This is normal if there is e.g. an unk label defined at init.
speechbrain.core - Beginning experiment!
speechbrain.core - Experiment folder: ./results/Level_Gender_Classification/Xvector/1986/Fold8
speechbrain.core - 90.2M trainable parameters in DigitBrain
speechbrain.utils.checkpoints - Loading a checkpoint from results/Level_Gender_Classification/Xvector/1986/Fold8/save/CKPT+2023-04-15+23-57-03+00
speechbrain.utils.checkpoints - Would load a checkpoint here, but none found yet.
100%|█████████████████████████████████████████████| 2/2 [00:01<00:00,  1.20it/s]
speechbrain.utils.train_logger - Epoch loaded: 5 - test loss: 1.60e-01, test error_rate: 6.67e-02, test gender_error: 0.00e+00
/home/as03720/anaconda3/envs/workspace/lib/python3.9/site-packages/transformers/configuration_utils.py:379: UserWarning: Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 Transformers. Using `model.gradient_checkpointing_enable()` instead, or if you are using the `Trainer` API, pass `gradient_checkpointing=True` in your `TrainingArguments`.
  warnings.warn(
Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2Model: ['project_q.weight', 'quantizer.weight_proj.bias', 'project_hid.bias', 'quantizer.weight_proj.weight', 'project_hid.weight', 'project_q.bias', 'quantizer.codevectors']
- This IS expected if you are initializing Wav2Vec2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
speechbrain.lobes.models.huggingface_wav2vec - speechbrain.lobes.models.huggingface_wav2vec - wav2vec 2.0 feature extractor is frozen.
--------------------------------
FOLD 9
--------------------------------
speechbrain.dataio.encoder - Load called, but CategoricalEncoder is not empty. Loaded data will overwrite everything. This is normal if there is e.g. an unk label defined at init.
speechbrain.dataio.encoder - Load called, but CategoricalEncoder is not empty. Loaded data will overwrite everything. This is normal if there is e.g. an unk label defined at init.
speechbrain.core - Beginning experiment!
speechbrain.core - Experiment folder: ./results/Level_Gender_Classification/Xvector/1986/Fold9
speechbrain.core - 90.2M trainable parameters in DigitBrain
speechbrain.utils.checkpoints - Loading a checkpoint from results/Level_Gender_Classification/Xvector/1986/Fold9/save/CKPT+2023-04-15+23-58-29+00
speechbrain.utils.checkpoints - Would load a checkpoint here, but none found yet.
100%|█████████████████████████████████████████████| 2/2 [00:01<00:00,  1.71it/s]
speechbrain.utils.train_logger - Epoch loaded: 5 - test loss: 1.14e-01, test error_rate: 3.33e-02, test gender_error: 0.00e+00
/home/as03720/anaconda3/envs/workspace/lib/python3.9/site-packages/transformers/configuration_utils.py:379: UserWarning: Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 Transformers. Using `model.gradient_checkpointing_enable()` instead, or if you are using the `Trainer` API, pass `gradient_checkpointing=True` in your `TrainingArguments`.
  warnings.warn(
Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2Model: ['project_q.weight', 'quantizer.weight_proj.bias', 'project_hid.bias', 'quantizer.weight_proj.weight', 'project_hid.weight', 'project_q.bias', 'quantizer.codevectors']
- This IS expected if you are initializing Wav2Vec2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
speechbrain.lobes.models.huggingface_wav2vec - speechbrain.lobes.models.huggingface_wav2vec - wav2vec 2.0 feature extractor is frozen.
--------------------------------
FOLD 10
--------------------------------
speechbrain.dataio.encoder - Load called, but CategoricalEncoder is not empty. Loaded data will overwrite everything. This is normal if there is e.g. an unk label defined at init.
speechbrain.dataio.encoder - Load called, but CategoricalEncoder is not empty. Loaded data will overwrite everything. This is normal if there is e.g. an unk label defined at init.
speechbrain.core - Beginning experiment!
speechbrain.core - Experiment folder: ./results/Level_Gender_Classification/Xvector/1986/Fold10
speechbrain.core - 90.2M trainable parameters in DigitBrain
speechbrain.utils.checkpoints - Loading a checkpoint from results/Level_Gender_Classification/Xvector/1986/Fold10/save/CKPT+2023-04-15+23-59-53+00
speechbrain.utils.checkpoints - Would load a checkpoint here, but none found yet.
100%|█████████████████████████████████████████████| 2/2 [00:01<00:00,  1.41it/s]
speechbrain.utils.train_logger - Epoch loaded: 5 - test loss: 2.54e-01, test error_rate: 1.00e-01, test gender_error: 0.00e+00
/home/as03720/anaconda3/envs/workspace/lib/python3.9/site-packages/transformers/configuration_utils.py:379: UserWarning: Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 Transformers. Using `model.gradient_checkpointing_enable()` instead, or if you are using the `Trainer` API, pass `gradient_checkpointing=True` in your `TrainingArguments`.
  warnings.warn(
Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2Model: ['project_q.weight', 'quantizer.weight_proj.bias', 'project_hid.bias', 'quantizer.weight_proj.weight', 'project_hid.weight', 'project_q.bias', 'quantizer.codevectors']
- This IS expected if you are initializing Wav2Vec2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
speechbrain.lobes.models.huggingface_wav2vec - speechbrain.lobes.models.huggingface_wav2vec - wav2vec 2.0 feature extractor is frozen.
--------------------------------
Final Result
--------------------------------
speechbrain.dataio.encoder - Load called, but CategoricalEncoder is not empty. Loaded data will overwrite everything. This is normal if there is e.g. an unk label defined at init.
speechbrain.dataio.encoder - Load called, but CategoricalEncoder is not empty. Loaded data will overwrite everything. This is normal if there is e.g. an unk label defined at init.
speechbrain.core - Beginning experiment!
speechbrain.core - Experiment folder: ./results/Level_Gender_Classification/wav2vec/1986/FinalFold
speechbrain.core - 90.2M trainable parameters in DigitBrain
speechbrain.utils.checkpoints - Would load a checkpoint here, but none found yet.
speechbrain.utils.epoch_loop - Going into epoch 1
100%|██████████████████████████| 19/19 [00:14<00:00,  1.34it/s, train_loss=1.72]
speechbrain.utils.epoch_loop - Going into epoch 2
100%|███████████████████████████| 19/19 [00:14<00:00,  1.35it/s, train_loss=1.3]
speechbrain.utils.epoch_loop - Going into epoch 3
100%|█████████████████████████| 19/19 [00:14<00:00,  1.34it/s, train_loss=0.587]
speechbrain.utils.epoch_loop - Going into epoch 4
100%|█████████████████████████| 19/19 [00:14<00:00,  1.35it/s, train_loss=0.214]
speechbrain.utils.epoch_loop - Going into epoch 5
100%|█████████████████████████| 19/19 [00:14<00:00,  1.34it/s, train_loss=0.193]
speechbrain.utils.checkpoints - Would load a checkpoint here, but none found yet.
100%|█████████████████████████████████████████████| 2/2 [00:01<00:00,  1.23it/s]
speechbrain.utils.train_logger - Epoch loaded: 5 - test loss: 8.40e-01, test error_rate: 2.67e-01, test gender_error: 0.00e+00

In [None]:
import matplotlib.pyplot as plt

def get_losses(log_file):
    """This function takes in input a path of a log-file and outputs the train and
    valid losses in lists of float numbers"""

    # Your code here. Aim for 9-10 lines
    train_losses = []
    valid_losses = []
    with open(log_file) as f:
        lines = f.readlines()
        for l in lines:
            words = l.split()
            for i, word in enumerate(words):
                if word == "train":
                    train_losses.append(float(words[i + 2]))
                    valid_losses.append(float(words[i + 6].split(',')[0]))

    return train_losses, valid_losses

log_file = 'results/Level_Gender_Classification/wav2vec/1986/Fold1/train_log.txt'
train_losses, valid_losses = get_losses(log_file)

plt.plot(train_losses, label='train')
plt.plot(valid_losses, label='valid')
plt.ylabel('Loss')
plt.xlabel('# Epochs')
plt.legend()
plt.show()

FileNotFoundError: ignored