## Toronto Emotional Speech Set (TESS)

Dataset Source: https://www.kaggle.com/datasets/ejlok1/toronto-emotional-speech-set-tess

#### Import Necessary Libraries

In [1]:
import os, sys, random, glob
os.environ['TOKENIZERS_PARALLELISM']='false'

import numpy as np
import pandas as pd

import datasets
from datasets import load_dataset, Audio, DatasetDict
from datasets import Audio, Features, ClassLabel

import torch

import transformers
from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
from transformers import TrainingArguments, Trainer

import evaluate

from IPython.display import display

!git lfs install

Git LFS initialized.


#### Display Library Versions

In [2]:
n = 18

print("Python:".rjust(n), sys.version[0:6])
print("NumPy:".rjust(n), np.__version__)
print("Pandas:".rjust(n), pd.__version__)
print("Datasets:".rjust(n), datasets.__version__)
print("Torch:".rjust(n), torch.__version__)
print("Transformers:".rjust(n), transformers.__version__)
print("Evaluate:".rjust(n), evaluate.__version__)

           Python: 3.9.12
            NumPy: 1.23.5
           Pandas: 2.0.0
         Datasets: 2.11.0
            Torch: 2.0.0
     Transformers: 4.27.4
         Evaluate: 0.4.0


#### Create Dictionaries to Convert Labels Between Strings & Integers

In [3]:
labels = ["angry", 
          "disgust", 
          "fear", 
          "happy", 
          "neutral", 
          "pleasant_surprise", 
          "sad"
          ]

NUM_OF_LABELS = len(labels)

label2id, id2label = dict(), dict()

for i, label in enumerate(labels):
    label2id[label] = str(i)
    id2label[str(i)] = label

print(labels)
print(NUM_OF_LABELS)
print(label2id)
print(id2label)

['angry', 'disgust', 'fear', 'happy', 'neutral', 'pleasant_surprise', 'sad']
7
{'angry': '0', 'disgust': '1', 'fear': '2', 'happy': '3', 'neutral': '4', 'pleasant_surprise': '5', 'sad': '6'}
{'0': 'angry', '1': 'disgust', '2': 'fear', '3': 'happy', '4': 'neutral', '5': 'pleasant_surprise', '6': 'sad'}


#### Ingest Dataset

In [4]:
dataset = load_dataset("audiofolder",
                       data_dir="/Users/briandunn/Desktop/current projects/Toronto Emotional Speech Set (TESS)/data",
                       drop_labels=False)

dataset

Resolving data files:   0%|          | 0/2800 [00:00<?, ?it/s]

Found cached dataset audiofolder (/Users/briandunn/.cache/huggingface/datasets/audiofolder/default-e077a6efa0d71b8f/0.0.0/6cbdd16f8688354c63b4e2a36e1585d05de285023ee6443ffd71c4182055c0fc)


  0%|          | 0/1 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['audio', 'label'],
        num_rows: 2800
    })
})

#### Display Example Sample

In [5]:
dataset['train'][1228]

{'audio': {'path': '/Users/briandunn/Desktop/current projects/Toronto Emotional Speech Set (TESS)/data/happy/OAF_death_happy.wav',
  'array': array([ 0.        , -0.00012207, -0.00134277, ...,  0.00036621,
          0.00067139,  0.00073242]),
  'sampling_rate': 24414},
 'label': 3}

#### Cast Audio Feature to Data Type of Audio

In [6]:
audio_data = dataset.cast_column("audio", Audio(sampling_rate=24_414))

#### Split Dataset into Training & Testing Datasets

In [7]:
audio_data = audio_data.shuffle(seed=42)

audio_data_split = audio_data['train'].train_test_split(test_size=0.20)

ds = DatasetDict({
    'train' : audio_data_split['train'],
    'eval' : audio_data_split['test']
})

Loading cached shuffled indices for dataset at /Users/briandunn/.cache/huggingface/datasets/audiofolder/default-e077a6efa0d71b8f/0.0.0/6cbdd16f8688354c63b4e2a36e1585d05de285023ee6443ffd71c4182055c0fc/cache-f4bb2429baea6bea.arrow


#### Some Information About Training & Validation Datasets

In [8]:
print("Training Dataset")
print("Training Dataset Info: ", ds['train'])
print("First Sample in Training Dataset", ds['train'][0])
print("Last Sample in Training Dataset", ds['train'][-1])
print("Unique Values in Label/Class: ", sorted(ds['train'].unique("label")))

print("\n\nEvaluation Dataset")
print("Evaluation Dataset Info: ", ds['eval'])
print("First Sample in Evaluation Dataset", ds['eval'][0])
print("Last Sample in Evaluation Dataset", ds['eval'][-1])
print("Unique Values in Label/Class: ", sorted(ds['eval'].unique("label")))

Training Dataset
Training Dataset Info:  Dataset({
    features: ['audio', 'label'],
    num_rows: 2240
})
First Sample in Training Dataset {'audio': {'path': '/Users/briandunn/Desktop/current projects/Toronto Emotional Speech Set (TESS)/data/neutral/OAF_soup_neutral.wav', 'array': array([ 0.00000000e+00,  0.00000000e+00, -9.15527344e-05, ...,
        2.13623047e-04,  1.52587891e-04,  1.22070312e-04]), 'sampling_rate': 24414}, 'label': 4}
Last Sample in Training Dataset {'audio': {'path': '/Users/briandunn/Desktop/current projects/Toronto Emotional Speech Set (TESS)/data/fear/OAF_date_fear.wav', 'array': array([ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
       -1.52587891e-04, -9.15527344e-05, -1.52587891e-04]), 'sampling_rate': 24414}, 'label': 2}


Flattening the indices:   0%|          | 0/2240 [00:00<?, ? examples/s]

Unique Values in Label/Class:  [0, 1, 2, 3, 4, 5, 6]


Evaluation Dataset
Evaluation Dataset Info:  Dataset({
    features: ['audio', 'label'],
    num_rows: 560
})
First Sample in Evaluation Dataset {'audio': {'path': '/Users/briandunn/Desktop/current projects/Toronto Emotional Speech Set (TESS)/data/happy/YAF_nice_happy.wav', 'array': array([ 7.93457031e-04, -1.22070312e-04, -3.23486328e-03, ...,
       -5.49316406e-04, -4.88281250e-04,  6.10351562e-05]), 'sampling_rate': 24414}, 'label': 3}
Last Sample in Evaluation Dataset {'audio': {'path': '/Users/briandunn/Desktop/current projects/Toronto Emotional Speech Set (TESS)/data/fear/OAF_kick_fear.wav', 'array': array([ 0.00000000e+00, -1.22070312e-04, -9.15527344e-05, ...,
        9.15527344e-05,  9.15527344e-05,  6.10351562e-05]), 'sampling_rate': 24414}, 'label': 2}


Flattening the indices:   0%|          | 0/560 [00:00<?, ? examples/s]

Unique Values in Label/Class:  [0, 1, 2, 3, 4, 5, 6]


#### Display Some Examples with Ability to Listen to Them

In [9]:
for _ in range(5):
    from IPython.display import Audio, display
    rand_idx = random.randint(0, len(ds["train"])-1)
    example = ds["train"][rand_idx]
    audio = example["audio"]
    
    print(f'Label: {id2label[str(example["label"])]}')
    print(f'Shape: {audio["array"].shape}, sampling rate: {audio["sampling_rate"]}')
    display(Audio(audio["array"], rate=audio["sampling_rate"]))
    print()

Label: happy
Shape: (47006,), sampling rate: 24414



Label: happy
Shape: (50606,), sampling rate: 24414



Label: fear
Shape: (35487,), sampling rate: 24414



Label: happy
Shape: (50129,), sampling rate: 24414



Label: fear
Shape: (44890,), sampling rate: 24414





#### Basic Values/Constants

In [10]:
MODEL_CKPT = "facebook/wav2vec2-base"
MODEL_NAME = MODEL_CKPT.split("/")[-1] + "-Toronto_emotional_speech_set"

NUM_OF_EPOCHS = 15
LEARNING_RATE = 3e-5

BATCH_SIZE = 32
STRATEGY = "epoch"

DEVICE = torch.device("mps")

#### Set Sample Rate

In [11]:
sampling_rate = ds["train"].features["audio"].sampling_rate
sampling_rate

24414

#### Instantiate Instance of Feature Extractor

In [12]:
feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_CKPT)



#### Define function to Preprocess Data

In [13]:
def preprocess_function(examples):
    '''
    This function prepares the dataset for the transformer
    by applying the feature extractor to it (among other 
    processes).
    '''
    max_duration = 4.0 # seconds (or shape/sampling_rate)
    audio_arrays = [x["array"] for x in examples["audio"]]
    inputs = feature_extractor(audio_arrays, 
                               sampling_rate=feature_extractor.sampling_rate, 
                               max_length=int(feature_extractor.sampling_rate * max_duration),
                               truncation=True)
    return inputs

encoded_audio = ds.map(preprocess_function, remove_columns="audio", batched=True)

Map:   0%|          | 0/2240 [00:00<?, ? examples/s]

  tensor = as_tensor(value)


Map:   0%|          | 0/560 [00:00<?, ? examples/s]

#### Define Metrics Evaluation Function 

In [14]:
def compute_metrics(p):
    '''
    This function calculates & returns the following metrics:
    - accuracy
    - f1 score
    - recall
    - precision
    '''
    import evaluate
    
    accuracy_metric = evaluate.load("accuracy")
    
    accuracy = accuracy_metric.compute(predictions=np.argmax(p.predictions, 
                                                             axis=1), 
                                       references=p.label_ids)['accuracy']
    
    ### ------------------- F1 scores -------------------
    
    f1_score_metric = evaluate.load("f1")
   
    weighted_f1_score = f1_score_metric.compute(predictions=np.argmax(p.predictions, 
                                                                      axis=1), 
                                                references=p.label_ids, 
                                                average='weighted')["f1"]
    
    micro_f1_score = f1_score_metric.compute(predictions=np.argmax(p.predictions, 
                                                                   axis=1), 
                                             references=p.label_ids, 
                                             average='micro')['f1']
    
    macro_f1_score = f1_score_metric.compute(predictions=np.argmax(p.predictions, 
                                                                   axis=1), 
                                             references=p.label_ids, 
                                             average='macro')["f1"]
    
    ### ------------------- recall -------------------
    
    recall_metric = evaluate.load("recall")
    
    weighted_recall = recall_metric.compute(predictions=np.argmax(p.predictions, 
                                                                  axis=1), 
                                            references=p.label_ids, 
                                            average='weighted')["recall"]
    
    micro_recall = recall_metric.compute(predictions=np.argmax(p.predictions, 
                                                               axis=1), 
                                         references=p.label_ids, 
                                         average='micro')["recall"]
    
    macro_recall = recall_metric.compute(predictions=np.argmax(p.predictions, 
                                                               axis=1), 
                                         references=p.label_ids, 
                                         average='macro')["recall"]
    
    ### ------------------- precision -------------------
    
    precision_metric = evaluate.load("precision")
    
    weighted_precision = precision_metric.compute(predictions=np.argmax(p.predictions, 
                                                                        axis=1), 
                                                  references=p.label_ids, 
                                                  average='weighted')["precision"]
    
    micro_precision = precision_metric.compute(predictions=np.argmax(p.predictions, 
                                                                     axis=1), 
                                               references=p.label_ids, 
                                               average='micro')["precision"]
    
    macro_precision = precision_metric.compute(predictions=np.argmax(p.predictions, 
                                                                     axis=1), 
                                               references=p.label_ids, 
                                               average='macro')["precision"]
    
    return {"accuracy" : accuracy, 
            "Weighted F1" : weighted_f1_score,
            "Micro F1" : micro_f1_score,
            "Macro F1" : macro_f1_score,
            "Weighted Recall" : weighted_recall,
            "Micro Recall" : micro_recall,
            "Macro Recall" : macro_recall,
            "Weighted Precision" : weighted_precision,
            "Micro Precision" : micro_precision,
            "Macro Precision" : macro_precision
            }

#### Instantiate Model

In [15]:
model = AutoModelForAudioClassification.from_pretrained(MODEL_CKPT, 
                                                        num_labels=NUM_OF_LABELS, 
                                                        label2id=label2id,
                                                        id2label= id2label
                                                        ).to(DEVICE)

Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2ForSequenceClassification: ['quantizer.weight_proj.bias', 'project_q.weight', 'project_hid.weight', 'project_q.bias', 'quantizer.weight_proj.weight', 'project_hid.bias', 'quantizer.codevectors']
- This IS expected if you are initializing Wav2Vec2ForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2ForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Wav2Vec2ForSequenceClassification were not initialized from the model checkpoint at facebook/wav2vec2-base and are newly initialized: ['projector.weight', 'classifier.bias', 'projector.

#### Define Training Arguments

In [16]:
args = TrainingArguments(
    output_dir=MODEL_NAME,
    evaluation_strategy=STRATEGY,
    num_train_epochs=NUM_OF_EPOCHS,
    save_strategy=STRATEGY,
    logging_strategy="steps",
    learning_rate=LEARNING_RATE,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    warmup_ratio=0.10,
    gradient_accumulation_steps=4,
    logging_first_step=True,
    report_to="tensorboard",
    hub_private_repo=True,
    push_to_hub=True
)

#### Define Trainer

In [17]:
trainer = Trainer(
    model = model,
    args = args,
    train_dataset = encoded_audio["train"],
    eval_dataset = encoded_audio["eval"],
    tokenizer = feature_extractor,
    compute_metrics = compute_metrics,
)

Cloning https://huggingface.co/DunnBC22/wav2vec2-base-Toronto_emotional_speech_set into local empty directory.


#### Train Model

In [18]:
trainer.train()



  0%|          | 0/255 [00:00<?, ?it/s]

{'loss': 1.9517, 'learning_rate': 1.153846153846154e-06, 'epoch': 0.06}


  0%|          | 0/18 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 1.9431936740875244, 'eval_accuracy': 0.24107142857142858, 'eval_Weighted F1': 0.13383467839934637, 'eval_Micro F1': 0.24107142857142858, 'eval_Macro F1': 0.1201244073234975, 'eval_Weighted Recall': 0.24107142857142858, 'eval_Micro Recall': 0.24107142857142858, 'eval_Macro Recall': 0.2168186182460851, 'eval_Weighted Precision': 0.11608532811999539, 'eval_Micro Precision': 0.24107142857142858, 'eval_Macro Precision': 0.10490231034670308, 'eval_runtime': 223.5347, 'eval_samples_per_second': 2.505, 'eval_steps_per_second': 0.081, 'epoch': 0.97}


  0%|          | 0/18 [00:00<?, ?it/s]

{'eval_loss': 1.9036115407943726, 'eval_accuracy': 0.3375, 'eval_Weighted F1': 0.3037364257776405, 'eval_Micro F1': 0.3375, 'eval_Macro F1': 0.30815430780926484, 'eval_Weighted Recall': 0.3375, 'eval_Micro Recall': 0.3375, 'eval_Macro Recall': 0.3533042497686724, 'eval_Weighted Precision': 0.5363771848303185, 'eval_Micro Precision': 0.3375, 'eval_Macro Precision': 0.5378759187995031, 'eval_runtime': 223.5282, 'eval_samples_per_second': 2.505, 'eval_steps_per_second': 0.081, 'epoch': 2.0}


  0%|          | 0/18 [00:00<?, ?it/s]

{'eval_loss': 1.6629307270050049, 'eval_accuracy': 0.4517857142857143, 'eval_Weighted F1': 0.40203571704391644, 'eval_Micro F1': 0.4517857142857143, 'eval_Macro F1': 0.39363688263295377, 'eval_Weighted Recall': 0.4517857142857143, 'eval_Micro Recall': 0.4517857142857143, 'eval_Macro Recall': 0.4502916871122686, 'eval_Weighted Precision': 0.6751368308982086, 'eval_Micro Precision': 0.4517857142857143, 'eval_Macro Precision': 0.6555189180145532, 'eval_runtime': 223.5952, 'eval_samples_per_second': 2.505, 'eval_steps_per_second': 0.081, 'epoch': 2.97}


  0%|          | 0/18 [00:00<?, ?it/s]

{'eval_loss': 1.202552080154419, 'eval_accuracy': 0.7357142857142858, 'eval_Weighted F1': 0.7121418070236116, 'eval_Micro F1': 0.7357142857142858, 'eval_Macro F1': 0.6988776592730226, 'eval_Weighted Recall': 0.7357142857142858, 'eval_Micro Recall': 0.7357142857142858, 'eval_Macro Recall': 0.7239981005005428, 'eval_Weighted Precision': 0.7903049080362985, 'eval_Micro Precision': 0.7357142857142858, 'eval_Macro Precision': 0.7848214912023492, 'eval_runtime': 223.1375, 'eval_samples_per_second': 2.51, 'eval_steps_per_second': 0.081, 'epoch': 4.0}


  0%|          | 0/18 [00:00<?, ?it/s]

{'eval_loss': 0.8457635641098022, 'eval_accuracy': 0.8839285714285714, 'eval_Weighted F1': 0.8796262520983313, 'eval_Micro F1': 0.8839285714285714, 'eval_Macro F1': 0.8766531328296392, 'eval_Weighted Recall': 0.8839285714285714, 'eval_Micro Recall': 0.8839285714285714, 'eval_Macro Recall': 0.8845344513486326, 'eval_Weighted Precision': 0.8873692416697911, 'eval_Micro Precision': 0.8839285714285714, 'eval_Macro Precision': 0.8806824692416629, 'eval_runtime': 223.3363, 'eval_samples_per_second': 2.507, 'eval_steps_per_second': 0.081, 'epoch': 4.97}


  0%|          | 0/18 [00:00<?, ?it/s]

{'eval_loss': 0.6493406295776367, 'eval_accuracy': 0.8946428571428572, 'eval_Weighted F1': 0.8939199453580743, 'eval_Micro F1': 0.8946428571428572, 'eval_Macro F1': 0.8913743171654662, 'eval_Weighted Recall': 0.8946428571428572, 'eval_Micro Recall': 0.8946428571428572, 'eval_Macro Recall': 0.8936944737333521, 'eval_Weighted Precision': 0.9049154297457814, 'eval_Micro Precision': 0.8946428571428572, 'eval_Macro Precision': 0.901445447282318, 'eval_runtime': 223.1686, 'eval_samples_per_second': 2.509, 'eval_steps_per_second': 0.081, 'epoch': 6.0}


  0%|          | 0/18 [00:00<?, ?it/s]

{'eval_loss': 0.5148770809173584, 'eval_accuracy': 0.9089285714285714, 'eval_Weighted F1': 0.9045966657788822, 'eval_Micro F1': 0.9089285714285714, 'eval_Macro F1': 0.8989019266845292, 'eval_Weighted Recall': 0.9089285714285714, 'eval_Micro Recall': 0.9089285714285714, 'eval_Macro Recall': 0.8956901498254266, 'eval_Weighted Precision': 0.9275079034356466, 'eval_Micro Precision': 0.9089285714285714, 'eval_Macro Precision': 0.9327120198663463, 'eval_runtime': 222.7441, 'eval_samples_per_second': 2.514, 'eval_steps_per_second': 0.081, 'epoch': 6.97}


  0%|          | 0/18 [00:00<?, ?it/s]

{'eval_loss': 0.3814040720462799, 'eval_accuracy': 0.9535714285714286, 'eval_Weighted F1': 0.9531306769738188, 'eval_Micro F1': 0.9535714285714286, 'eval_Macro F1': 0.9500988975800718, 'eval_Weighted Recall': 0.9535714285714286, 'eval_Micro Recall': 0.9535714285714286, 'eval_Macro Recall': 0.9473681591751487, 'eval_Weighted Precision': 0.9577324959704161, 'eval_Micro Precision': 0.9535714285714286, 'eval_Macro Precision': 0.9583315755140823, 'eval_runtime': 223.9041, 'eval_samples_per_second': 2.501, 'eval_steps_per_second': 0.08, 'epoch': 8.0}


  0%|          | 0/18 [00:00<?, ?it/s]

{'eval_loss': 0.5626518130302429, 'eval_accuracy': 0.85, 'eval_Weighted F1': 0.8459137912291639, 'eval_Micro F1': 0.85, 'eval_Macro F1': 0.8401731332350794, 'eval_Weighted Recall': 0.85, 'eval_Micro Recall': 0.85, 'eval_Macro Recall': 0.8378098199763276, 'eval_Weighted Precision': 0.9099704717051657, 'eval_Micro Precision': 0.85, 'eval_Macro Precision': 0.9160312476639006, 'eval_runtime': 223.0459, 'eval_samples_per_second': 2.511, 'eval_steps_per_second': 0.081, 'epoch': 8.97}


  0%|          | 0/18 [00:00<?, ?it/s]

{'eval_loss': 0.4701941907405853, 'eval_accuracy': 0.8910714285714286, 'eval_Weighted F1': 0.8861303435469198, 'eval_Micro F1': 0.8910714285714286, 'eval_Macro F1': 0.885351987663403, 'eval_Weighted Recall': 0.8910714285714286, 'eval_Micro Recall': 0.8910714285714286, 'eval_Macro Recall': 0.893798777898879, 'eval_Weighted Precision': 0.9021189240049888, 'eval_Micro Precision': 0.8910714285714286, 'eval_Macro Precision': 0.8967391329677932, 'eval_runtime': 222.7619, 'eval_samples_per_second': 2.514, 'eval_steps_per_second': 0.081, 'epoch': 10.0}


  0%|          | 0/18 [00:00<?, ?it/s]

{'eval_loss': 0.3362300992012024, 'eval_accuracy': 0.9392857142857143, 'eval_Weighted F1': 0.9375802786212682, 'eval_Micro F1': 0.9392857142857143, 'eval_Macro F1': 0.936086349805871, 'eval_Weighted Recall': 0.9392857142857143, 'eval_Micro Recall': 0.9392857142857143, 'eval_Macro Recall': 0.9399294111124242, 'eval_Weighted Precision': 0.9402330149657375, 'eval_Micro Precision': 0.9392857142857143, 'eval_Macro Precision': 0.9364768012701683, 'eval_runtime': 223.5216, 'eval_samples_per_second': 2.505, 'eval_steps_per_second': 0.081, 'epoch': 10.97}


  0%|          | 0/18 [00:00<?, ?it/s]

{'eval_loss': 0.3808061182498932, 'eval_accuracy': 0.9178571428571428, 'eval_Weighted F1': 0.9181258550576402, 'eval_Micro F1': 0.9178571428571429, 'eval_Macro F1': 0.9175971860106519, 'eval_Weighted Recall': 0.9178571428571428, 'eval_Micro Recall': 0.9178571428571428, 'eval_Macro Recall': 0.9180011100912139, 'eval_Weighted Precision': 0.925056670601057, 'eval_Micro Precision': 0.9178571428571428, 'eval_Macro Precision': 0.9234973575993705, 'eval_runtime': 223.1884, 'eval_samples_per_second': 2.509, 'eval_steps_per_second': 0.081, 'epoch': 12.0}


  0%|          | 0/18 [00:00<?, ?it/s]

{'eval_loss': 0.45458880066871643, 'eval_accuracy': 0.9035714285714286, 'eval_Weighted F1': 0.9044516748740495, 'eval_Micro F1': 0.9035714285714286, 'eval_Macro F1': 0.9023841316195239, 'eval_Weighted Recall': 0.9035714285714286, 'eval_Micro Recall': 0.9035714285714286, 'eval_Macro Recall': 0.898786279174778, 'eval_Weighted Precision': 0.9151270659892686, 'eval_Micro Precision': 0.9035714285714286, 'eval_Macro Precision': 0.9157404239541742, 'eval_runtime': 222.9035, 'eval_samples_per_second': 2.512, 'eval_steps_per_second': 0.081, 'epoch': 12.97}


  0%|          | 0/18 [00:00<?, ?it/s]

{'eval_loss': 0.5065118074417114, 'eval_accuracy': 0.8785714285714286, 'eval_Weighted F1': 0.8825674735858728, 'eval_Micro F1': 0.8785714285714286, 'eval_Macro F1': 0.8813364141979209, 'eval_Weighted Recall': 0.8785714285714286, 'eval_Micro Recall': 0.8785714285714286, 'eval_Macro Recall': 0.8742209258451302, 'eval_Weighted Precision': 0.9039507709900181, 'eval_Micro Precision': 0.8785714285714286, 'eval_Macro Precision': 0.9054521800876599, 'eval_runtime': 222.7538, 'eval_samples_per_second': 2.514, 'eval_steps_per_second': 0.081, 'epoch': 14.0}


  0%|          | 0/18 [00:00<?, ?it/s]

{'eval_loss': 0.4925352931022644, 'eval_accuracy': 0.8803571428571428, 'eval_Weighted F1': 0.8836750712816211, 'eval_Micro F1': 0.8803571428571428, 'eval_Macro F1': 0.8822042847118763, 'eval_Weighted Recall': 0.8803571428571428, 'eval_Micro Recall': 0.8803571428571428, 'eval_Macro Recall': 0.8756541197938825, 'eval_Weighted Precision': 0.9044332600616519, 'eval_Micro Precision': 0.8803571428571428, 'eval_Macro Precision': 0.9058857791079385, 'eval_runtime': 223.7256, 'eval_samples_per_second': 2.503, 'eval_steps_per_second': 0.08, 'epoch': 14.57}
{'train_runtime': 53605.2357, 'train_samples_per_second': 0.627, 'train_steps_per_second': 0.005, 'train_loss': 0.8421506914437986, 'epoch': 14.57}


TrainOutput(global_step=255, training_loss=0.8421506914437986, metrics={'train_runtime': 53605.2357, 'train_samples_per_second': 0.627, 'train_steps_per_second': 0.005, 'train_loss': 0.8421506914437986, 'epoch': 14.57})

#### Evaluate Model

In [19]:
trainer.evaluate()

  0%|          | 0/18 [00:00<?, ?it/s]

{'eval_loss': 0.4925352931022644,
 'eval_accuracy': 0.8803571428571428,
 'eval_Weighted F1': 0.8836750712816211,
 'eval_Micro F1': 0.8803571428571428,
 'eval_Macro F1': 0.8822042847118763,
 'eval_Weighted Recall': 0.8803571428571428,
 'eval_Micro Recall': 0.8803571428571428,
 'eval_Macro Recall': 0.8756541197938825,
 'eval_Weighted Precision': 0.9044332600616519,
 'eval_Micro Precision': 0.8803571428571428,
 'eval_Macro Precision': 0.9058857791079385,
 'eval_runtime': 221.5845,
 'eval_samples_per_second': 2.527,
 'eval_steps_per_second': 0.081,
 'epoch': 14.57}

#### Push Model to Hub (My Profile!!!)

In [20]:
trainer.push_to_hub()

Several commits (2) will be pushed upstream.
The progress bars may be unreliable.


Upload file pytorch_model.bin:   0%|          | 1.00/361M [00:00<?, ?B/s]

Upload file runs/May01_22-26-27_Brians-Mac-mini/events.out.tfevents.1682997993.Brians-Mac-mini.9802.0:   0%|  …

Upload file runs/May01_22-26-27_Brians-Mac-mini/events.out.tfevents.1683051820.Brians-Mac-mini.local.9802.2:  …

To https://huggingface.co/DunnBC22/wav2vec2-base-Toronto_emotional_speech_set
   054ddca..3cc36d1  main -> main

To https://huggingface.co/DunnBC22/wav2vec2-base-Toronto_emotional_speech_set
   3cc36d1..c7a3986  main -> main



'https://huggingface.co/DunnBC22/wav2vec2-base-Toronto_emotional_speech_set/commit/3cc36d1bff9ee699193ff430ca8b71fe3aab0001'

### Notes & Other Takeaways From This Project
****
- I should have (and usually do) add the parameters to return the best results (based on Weighted Evaluation F1-Score).
- Results:
    - Accuracy: 0.8803571428571428
    - Weighted F1: 0.8836750712816211
    - Weighted Recall: 0.8803571428571428
    - Weighted Precision: 0.9044332600616519
****