In [1]:
import numpy as np
import pandas as pd 
from sklearn.model_selection import train_test_split
import random 
import torch
import os 
from torch.utils.data import Dataset, DataLoader

In [None]:
paths = []
labels = []
audio_files = ["Audio_Speech_Actors_01-24", "Audio_Song_Actors_01-24"]
drive_base_path = "/content/drive/MyDrive/MARS"

for main_file in audio_files:
    main_file_path = os.path.join(drive_base_path, main_file)
    for actor in os.listdir(main_file_path):
        actor_path = os.path.join(main_file_path, actor)
        for audio in os.listdir(actor_path):
            if audio.endswith(".wav"):
                path = os.path.join(actor_path, audio)
                paths.append(path)
                emotion = int(audio.split("-")[2])
                labels.append(emotion)


In [3]:
print("Length of paths: ",len(paths))
print("Length of labels: ",len(labels))
print("Maximum label: ",max(labels))
print("Minimum label: ",min(labels))
print("Data type of paths: ",type(paths[0]))
print("Data type of lables: ",type(labels[0]))


Length of paths:  2452
Length of labels:  2452
Maximum label:  8
Minimum label:  1
Data type of paths:  <class 'str'>
Data type of lables:  <class 'int'>


In [4]:
data = list(zip(paths, labels))
seed = random.Random(42)
seed.shuffle(data)
data[:5]

[('Audio_Song_Actors_01-24\\Actor_24\\03-02-03-02-02-01-24.wav', 3),
 ('Audio_Song_Actors_01-24\\Actor_02\\03-02-03-02-02-02-02.wav', 3),
 ('Audio_Speech_Actors_01-24\\Actor_14\\03-01-03-01-02-01-14.wav', 3),
 ('Audio_Speech_Actors_01-24\\Actor_17\\03-01-07-01-02-01-17.wav', 7),
 ('Audio_Song_Actors_01-24\\Actor_06\\03-02-06-01-02-01-06.wav', 6)]

In [5]:
df_combined = pd.DataFrame(data, columns=["path","label"])
df_combined.head()


Unnamed: 0,path,label
0,Audio_Song_Actors_01-24\Actor_24\03-02-03-02-0...,3
1,Audio_Song_Actors_01-24\Actor_02\03-02-03-02-0...,3
2,Audio_Speech_Actors_01-24\Actor_14\03-01-03-01...,3
3,Audio_Speech_Actors_01-24\Actor_17\03-01-07-01...,7
4,Audio_Song_Actors_01-24\Actor_06\03-02-06-01-0...,6


In [6]:
df_combined.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2452 entries, 0 to 2451
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   path    2452 non-null   object
 1   label   2452 non-null   int64 
dtypes: int64(1), object(1)
memory usage: 38.4+ KB


In [7]:
df_combined.describe()

Unnamed: 0,label
count,2452.0
mean,4.318108
std,2.020284
min,1.0
25%,3.0
50%,4.0
75%,6.0
max,8.0


In [8]:
df_combined.loc[0]

path     Audio_Song_Actors_01-24\Actor_24\03-02-03-02-0...
label                                                    3
Name: 0, dtype: object

In [9]:
from pathlib import Path
df_combined["path"] = df_combined["path"].str.replace("\\", "/", regex=False)


In [10]:
df_combined.head()

Unnamed: 0,path,label
0,Audio_Song_Actors_01-24/Actor_24/03-02-03-02-0...,3
1,Audio_Song_Actors_01-24/Actor_02/03-02-03-02-0...,3
2,Audio_Speech_Actors_01-24/Actor_14/03-01-03-01...,3
3,Audio_Speech_Actors_01-24/Actor_17/03-01-07-01...,7
4,Audio_Song_Actors_01-24/Actor_06/03-02-06-01-0...,6


In [11]:
df_combined["label"] = df_combined["label"].apply(lambda x: x-1)
print("minimum label value: ", df_combined["label"].min())
print("maximum label value: ", df_combined["label"].max())

minimum label value:  0
maximum label value:  7


In [12]:
training_data, validation_data = train_test_split(df_combined, test_size=0.2, random_state=32)

In [13]:
training_data.head()

Unnamed: 0,path,label
2138,Audio_Speech_Actors_01-24/Actor_08/03-01-08-01...,7
623,Audio_Speech_Actors_01-24/Actor_05/03-01-01-01...,0
1942,Audio_Song_Actors_01-24/Actor_15/03-02-03-02-0...,2
1331,Audio_Speech_Actors_01-24/Actor_05/03-01-08-02...,7
401,Audio_Speech_Actors_01-24/Actor_20/03-01-02-02...,1


In [14]:
validation_data.head()

Unnamed: 0,path,label
1794,Audio_Speech_Actors_01-24/Actor_16/03-01-08-02...,7
1833,Audio_Speech_Actors_01-24/Actor_20/03-01-08-02...,7
1488,Audio_Song_Actors_01-24/Actor_17/03-02-05-02-0...,4
2267,Audio_Song_Actors_01-24/Actor_24/03-02-04-01-0...,3
290,Audio_Speech_Actors_01-24/Actor_13/03-01-04-02...,3


In [15]:
print("Length of Training Data: ", len(training_data))
print("Length of Validation Data: ", len(validation_data))

Length of Training Data:  1961
Length of Validation Data:  491


In [16]:
import soundfile as sf 
import torchaudio
import librosa
waveform, sr = sf.read(training_data.loc[0,"path"])
print((waveform.shape),sr)
waveform, sr = librosa.load(training_data.loc[0,"path"], sr=16000)
print(waveform.shape, sr)

(237037,) 48000
(79013,) 16000


In [17]:
max_duration = 0
longest_file = ""
for path in df_combined['path']:
    # full_path = os.path.join(base_dir, path)
    if os.path.isfile(path):
        waveform, sample_rate = librosa.load(path, sr=16000)
        duration = waveform.shape[0] / sample_rate
        if duration > max_duration:
            max_duration = duration
            longest_file = path
    else:
        print(f"one file path is not available {path}")
        break

print(f"Max duration: {max_duration:.8f} seconds")
print(f"sample rate: {sample_rate:.2f}")
print(f"Longest file: {longest_file}")

Max duration: 6.37306250 seconds
sample rate: 16000.00
Longest file: Audio_Song_Actors_01-24/Actor_22/03-02-02-02-02-01-22.wav


In [18]:
waveform, sr = librosa.load("Audio_Song_Actors_01-24/Actor_22/03-02-02-02-02-01-22.wav", sr =16000)
print(waveform.shape, sr)

(101969,) 16000


In [19]:
from collections import Counter
sample_rates = []
for full_path in df_combined['path']:
    # full_path = os.path.join(base_dir, path)
    if os.path.isfile(full_path):
        _, sr = librosa.load(full_path, sr=16000)
        sample_rates.append(sr)

# Count frequency of each sample rate
rate_counts = Counter(sample_rates)
print(rate_counts)

Counter({16000: 2452})


In [20]:
class AudioDataset(Dataset):
    def __init__(self, data, processor, max_length= int(6.37306250*16000)):
        self.data = data
        self.processor = processor
        self.max_length = max_length
    def __len__(self):
        return len(self.data)
    def __getitem__(self, index):
        audio_path = self.data.iloc[index]["path"]
        label = self.data.iloc[index]["label"]

        audio, sr = librosa.load(audio_path)
        audio = audio.squeeze()

        if len(audio) > self.max_length :
            audio = audio[:self.max_length]
            # print(f"found a audio file greater than max length : {audio_path} with an audio length {len(audio)}")
        else:
            audio = np.pad(audio, (0,int(self.max_length-len(audio))), "constant")
            
        inputs = self.processor(audio, sampling_rate=16000, return_tensors='pt', padding=True, truncate=True, max_length=self.max_length)
        input_values = inputs.input_values.squeeze()

        return {'input_values': input_values, 'labels': torch.tensor(label, dtype=torch.long)}

    

In [21]:
import torch
print(torch.__version__)


2.5.1+cu121


In [22]:
from transformers import Wav2Vec2Model, Wav2Vec2Processor, Wav2Vec2ForSequenceClassification, Wav2Vec2Config, Trainer, TrainingArguments
model = Wav2Vec2ForSequenceClassification.from_pretrained("facebook/wav2vec2-large", num_labels=8)
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base")

  from .autonotebook import tqdm as notebook_tqdm
Some weights of Wav2Vec2ForSequenceClassification were not initialized from the model checkpoint at facebook/wav2vec2-large and are newly initialized: ['classifier.bias', 'classifier.weight', 'projector.bias', 'projector.weight', 'wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original1']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [23]:
train_dataset = AudioDataset(training_data, processor)
validation_dataset = AudioDataset(validation_data, processor)

In [24]:
train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)
validation_dataloader = DataLoader(validation_dataset, batch_size=8, shuffle=False)

In [25]:
from transformers.utils import logging
logging.set_verbosity_info()

In [26]:
from transformers import __version__
print(__version__)


4.39.3


In [None]:
from transformers import TrainingArguments
training_args = TrainingArguments(
    output_dir="./wav2vec2-large",
    overwrite_output_dir=True,
    eval_strategy="epoch",  # Changed from evaluation_strategy
    save_strategy="epoch",
    logging_strategy="epoch",
    metric_for_best_model="eval_accuracy",
    save_total_limit=10,
    logging_dir="./wav2vec2-large/logs",
    greater_is_better=True,
    gradient_accumulation_steps=2,
    learning_rate=2e-5,
    weight_decay=0.01,
    num_train_epochs=19,
)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [28]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def get_metrics(pred):
    labels = pred.label_ids
    preds = np.argmax(pred.predictions, axis=1)
    accuracy = accuracy_score(labels, preds)
    precision, recall, fscore, _ = precision_recall_fscore_support(labels, preds, average="weighted")
    return {
        'accuracy':accuracy,
        'precision':precision,
        'recall':recall,
        'fscore':fscore
    }

In [29]:
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [30]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=validation_dataset,
    compute_metrics=get_metrics
)


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [31]:
trainer.train()

***** Running training *****
  Num examples = 1,961
  Num Epochs = 19
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 2
  Total optimization steps = 2,337
  Number of trainable parameters = 315,693,448
  5%|▌         | 123/2337 [06:21<1:44:20,  2.83s/it]***** Running Evaluation *****
  Num examples = 491
  Batch size = 8


{'loss': 1.9064, 'grad_norm': 11.772726058959961, 'learning_rate': 1.894736842105263e-05, 'epoch': 1.0}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
                                                    
  5%|▌         | 123/2337 [06:57<1:44:20,  2.83s/it]

{'eval_loss': 1.6625241041183472, 'eval_accuracy': 0.3727087576374745, 'eval_precision': 0.3084582023706751, 'eval_recall': 0.3727087576374745, 'eval_fscore': 0.2881233079036416, 'eval_runtime': 35.7512, 'eval_samples_per_second': 13.734, 'eval_steps_per_second': 1.734, 'epoch': 1.0}


Saving model checkpoint to ./wav2vec2-large\checkpoint-123
Configuration saved in ./wav2vec2-large\checkpoint-123\config.json
Model weights saved in ./wav2vec2-large\checkpoint-123\model.safetensors
Deleting older checkpoint [wav2vec2-large\checkpoint-246] due to args.save_total_limit
 11%|█         | 246/2337 [13:46<1:37:07,  2.79s/it] ***** Running Evaluation *****
  Num examples = 491
  Batch size = 8


{'loss': 1.5752, 'grad_norm': 35.67006301879883, 'learning_rate': 1.7894736842105264e-05, 'epoch': 2.0}


                                                    
 11%|█         | 246/2337 [14:22<1:37:07,  2.79s/it]Saving model checkpoint to ./wav2vec2-large\checkpoint-246
Configuration saved in ./wav2vec2-large\checkpoint-246\config.json


{'eval_loss': 1.4160149097442627, 'eval_accuracy': 0.4419551934826884, 'eval_precision': 0.48137021794493967, 'eval_recall': 0.4419551934826884, 'eval_fscore': 0.3841170165094852, 'eval_runtime': 35.8858, 'eval_samples_per_second': 13.682, 'eval_steps_per_second': 1.728, 'epoch': 2.0}


Model weights saved in ./wav2vec2-large\checkpoint-246\model.safetensors
Deleting older checkpoint [wav2vec2-large\checkpoint-123] due to args.save_total_limit
 16%|█▌        | 369/2337 [20:43<1:29:09,  2.72s/it]***** Running Evaluation *****
  Num examples = 491
  Batch size = 8


{'loss': 1.2409, 'grad_norm': 4.1494903564453125, 'learning_rate': 1.6842105263157896e-05, 'epoch': 3.0}


                                                    
 16%|█▌        | 369/2337 [21:17<1:29:09,  2.72s/it]Saving model checkpoint to ./wav2vec2-large\checkpoint-369
Configuration saved in ./wav2vec2-large\checkpoint-369\config.json


{'eval_loss': 1.1935038566589355, 'eval_accuracy': 0.5926680244399185, 'eval_precision': 0.6305708950838942, 'eval_recall': 0.5926680244399185, 'eval_fscore': 0.5622118872112042, 'eval_runtime': 34.3578, 'eval_samples_per_second': 14.291, 'eval_steps_per_second': 1.805, 'epoch': 3.0}


Model weights saved in ./wav2vec2-large\checkpoint-369\model.safetensors
Deleting older checkpoint [wav2vec2-large\checkpoint-246] due to args.save_total_limit
 21%|██        | 492/2337 [27:42<1:24:25,  2.75s/it]***** Running Evaluation *****
  Num examples = 491
  Batch size = 8


{'loss': 0.8977, 'grad_norm': 6.837130069732666, 'learning_rate': 1.578947368421053e-05, 'epoch': 4.0}


                                                    
 21%|██        | 492/2337 [28:17<1:24:25,  2.75s/it]Saving model checkpoint to ./wav2vec2-large\checkpoint-492
Configuration saved in ./wav2vec2-large\checkpoint-492\config.json


{'eval_loss': 0.9394488334655762, 'eval_accuracy': 0.6924643584521385, 'eval_precision': 0.7182692366817568, 'eval_recall': 0.6924643584521385, 'eval_fscore': 0.6748167575183395, 'eval_runtime': 34.3736, 'eval_samples_per_second': 14.284, 'eval_steps_per_second': 1.804, 'epoch': 4.0}


Model weights saved in ./wav2vec2-large\checkpoint-492\model.safetensors
Deleting older checkpoint [wav2vec2-large\checkpoint-369] due to args.save_total_limit
 26%|██▋       | 615/2337 [34:56<1:18:54,  2.75s/it] ***** Running Evaluation *****
  Num examples = 491
  Batch size = 8


{'loss': 0.7507, 'grad_norm': 11.67033863067627, 'learning_rate': 1.4736842105263159e-05, 'epoch': 5.0}


                                                    
 26%|██▋       | 615/2337 [35:31<1:18:54,  2.75s/it]Saving model checkpoint to ./wav2vec2-large\checkpoint-615
Configuration saved in ./wav2vec2-large\checkpoint-615\config.json


{'eval_loss': 0.8867451548576355, 'eval_accuracy': 0.7067209775967414, 'eval_precision': 0.7295698366343304, 'eval_recall': 0.7067209775967414, 'eval_fscore': 0.6941280559043705, 'eval_runtime': 34.5509, 'eval_samples_per_second': 14.211, 'eval_steps_per_second': 1.794, 'epoch': 5.0}


Model weights saved in ./wav2vec2-large\checkpoint-615\model.safetensors
Deleting older checkpoint [wav2vec2-large\checkpoint-492] due to args.save_total_limit
 32%|███▏      | 738/2337 [42:22<1:12:01,  2.70s/it] ***** Running Evaluation *****
  Num examples = 491
  Batch size = 8


{'loss': 0.5938, 'grad_norm': 74.11929321289062, 'learning_rate': 1.3684210526315791e-05, 'epoch': 6.0}


                                                    
 32%|███▏      | 738/2337 [42:58<1:12:01,  2.70s/it]Saving model checkpoint to ./wav2vec2-large\checkpoint-738
Configuration saved in ./wav2vec2-large\checkpoint-738\config.json


{'eval_loss': 0.7368270754814148, 'eval_accuracy': 0.7718940936863544, 'eval_precision': 0.7879950134429433, 'eval_recall': 0.7718940936863544, 'eval_fscore': 0.7694035363482191, 'eval_runtime': 35.837, 'eval_samples_per_second': 13.701, 'eval_steps_per_second': 1.73, 'epoch': 6.0}


Model weights saved in ./wav2vec2-large\checkpoint-738\model.safetensors
 37%|███▋      | 861/2337 [56:43<3:57:39,  9.66s/it]***** Running Evaluation *****
  Num examples = 491
  Batch size = 8


{'loss': 0.4914, 'grad_norm': 13.611947059631348, 'learning_rate': 1.263157894736842e-05, 'epoch': 7.0}


                                                    
 37%|███▋      | 861/2337 [58:38<3:57:39,  9.66s/it]Saving model checkpoint to ./wav2vec2-large\checkpoint-861
Configuration saved in ./wav2vec2-large\checkpoint-861\config.json


{'eval_loss': 0.715597927570343, 'eval_accuracy': 0.7922606924643585, 'eval_precision': 0.8097401585556814, 'eval_recall': 0.7922606924643585, 'eval_fscore': 0.790948071056942, 'eval_runtime': 115.4483, 'eval_samples_per_second': 4.253, 'eval_steps_per_second': 0.537, 'epoch': 7.0}


Model weights saved in ./wav2vec2-large\checkpoint-861\model.safetensors
 42%|████▏     | 984/2337 [1:20:53<3:33:55,  9.49s/it]***** Running Evaluation *****
  Num examples = 491
  Batch size = 8


{'loss': 0.4096, 'grad_norm': 77.42636108398438, 'learning_rate': 1.1578947368421053e-05, 'epoch': 8.0}


                                                      
 42%|████▏     | 984/2337 [1:22:49<3:33:55,  9.49s/it]Saving model checkpoint to ./wav2vec2-large\checkpoint-984
Configuration saved in ./wav2vec2-large\checkpoint-984\config.json


{'eval_loss': 0.9186778664588928, 'eval_accuracy': 0.7617107942973523, 'eval_precision': 0.797199935668136, 'eval_recall': 0.7617107942973523, 'eval_fscore': 0.7558129068728657, 'eval_runtime': 116.2041, 'eval_samples_per_second': 4.225, 'eval_steps_per_second': 0.534, 'epoch': 8.0}


Model weights saved in ./wav2vec2-large\checkpoint-984\model.safetensors
 47%|████▋     | 1107/2337 [1:45:10<3:14:55,  9.51s/it]***** Running Evaluation *****
  Num examples = 491
  Batch size = 8


{'loss': 0.3456, 'grad_norm': 31.8961124420166, 'learning_rate': 1.0526315789473684e-05, 'epoch': 9.0}


                                                       
 47%|████▋     | 1107/2337 [1:47:06<3:14:55,  9.51s/it]Saving model checkpoint to ./wav2vec2-large\checkpoint-1107
Configuration saved in ./wav2vec2-large\checkpoint-1107\config.json


{'eval_loss': 0.8136625289916992, 'eval_accuracy': 0.769857433808554, 'eval_precision': 0.8199336937348892, 'eval_recall': 0.769857433808554, 'eval_fscore': 0.7683072065498567, 'eval_runtime': 115.8418, 'eval_samples_per_second': 4.239, 'eval_steps_per_second': 0.535, 'epoch': 9.0}


Model weights saved in ./wav2vec2-large\checkpoint-1107\model.safetensors
 53%|█████▎    | 1230/2337 [2:09:35<2:56:35,  9.57s/it] ***** Running Evaluation *****
  Num examples = 491
  Batch size = 8


{'loss': 0.2876, 'grad_norm': 5.187353134155273, 'learning_rate': 9.473684210526315e-06, 'epoch': 10.0}


                                                       
 53%|█████▎    | 1230/2337 [2:11:31<2:56:35,  9.57s/it]Saving model checkpoint to ./wav2vec2-large\checkpoint-1230
Configuration saved in ./wav2vec2-large\checkpoint-1230\config.json


{'eval_loss': 0.7763690948486328, 'eval_accuracy': 0.8167006109979633, 'eval_precision': 0.8330097202245799, 'eval_recall': 0.8167006109979633, 'eval_fscore': 0.8137089104170204, 'eval_runtime': 116.5439, 'eval_samples_per_second': 4.213, 'eval_steps_per_second': 0.532, 'epoch': 10.0}


Model weights saved in ./wav2vec2-large\checkpoint-1230\model.safetensors
Deleting older checkpoint [wav2vec2-large\checkpoint-615] due to args.save_total_limit
 58%|█████▊    | 1353/2337 [2:27:31<43:09,  2.63s/it]   ***** Running Evaluation *****
  Num examples = 491
  Batch size = 8


{'loss': 0.1913, 'grad_norm': 10.44920825958252, 'learning_rate': 8.421052631578948e-06, 'epoch': 11.0}


                                                     
 58%|█████▊    | 1353/2337 [2:28:05<43:09,  2.63s/it]Saving model checkpoint to ./wav2vec2-large\checkpoint-1353
Configuration saved in ./wav2vec2-large\checkpoint-1353\config.json


{'eval_loss': 0.8816369771957397, 'eval_accuracy': 0.814663951120163, 'eval_precision': 0.8434726944324458, 'eval_recall': 0.814663951120163, 'eval_fscore': 0.8153558914137363, 'eval_runtime': 34.0534, 'eval_samples_per_second': 14.419, 'eval_steps_per_second': 1.821, 'epoch': 11.0}


Model weights saved in ./wav2vec2-large\checkpoint-1353\model.safetensors
 63%|██████▎   | 1476/2337 [2:34:39<39:06,  2.73s/it]  ***** Running Evaluation *****
  Num examples = 491
  Batch size = 8


{'loss': 0.1607, 'grad_norm': 112.14482879638672, 'learning_rate': 7.368421052631579e-06, 'epoch': 12.0}


                                                     
 63%|██████▎   | 1476/2337 [2:35:13<39:06,  2.73s/it]Saving model checkpoint to ./wav2vec2-large\checkpoint-1476
Configuration saved in ./wav2vec2-large\checkpoint-1476\config.json


{'eval_loss': 0.8583009839057922, 'eval_accuracy': 0.8329938900203666, 'eval_precision': 0.856434827834732, 'eval_recall': 0.8329938900203666, 'eval_fscore': 0.8337764201791169, 'eval_runtime': 34.4322, 'eval_samples_per_second': 14.26, 'eval_steps_per_second': 1.801, 'epoch': 12.0}


Model weights saved in ./wav2vec2-large\checkpoint-1476\model.safetensors
 68%|██████▊   | 1599/2337 [2:41:34<33:04,  2.69s/it]  ***** Running Evaluation *****
  Num examples = 491
  Batch size = 8


{'loss': 0.1356, 'grad_norm': 31.450481414794922, 'learning_rate': 6.31578947368421e-06, 'epoch': 13.0}


                                                     
 68%|██████▊   | 1599/2337 [2:42:09<33:04,  2.69s/it]Saving model checkpoint to ./wav2vec2-large\checkpoint-1599
Configuration saved in ./wav2vec2-large\checkpoint-1599\config.json


{'eval_loss': 0.7978490591049194, 'eval_accuracy': 0.845213849287169, 'eval_precision': 0.8552578781502548, 'eval_recall': 0.845213849287169, 'eval_fscore': 0.842188796117147, 'eval_runtime': 34.6136, 'eval_samples_per_second': 14.185, 'eval_steps_per_second': 1.791, 'epoch': 13.0}


Model weights saved in ./wav2vec2-large\checkpoint-1599\model.safetensors
Deleting older checkpoint [wav2vec2-large\checkpoint-738] due to args.save_total_limit
 74%|███████▎  | 1722/2337 [2:48:42<27:46,  2.71s/it]  ***** Running Evaluation *****
  Num examples = 491
  Batch size = 8


{'loss': 0.1092, 'grad_norm': 38.83863067626953, 'learning_rate': 5.263157894736842e-06, 'epoch': 14.0}


                                                     
 74%|███████▎  | 1722/2337 [2:49:17<27:46,  2.71s/it]Saving model checkpoint to ./wav2vec2-large\checkpoint-1722
Configuration saved in ./wav2vec2-large\checkpoint-1722\config.json


{'eval_loss': 0.7833456993103027, 'eval_accuracy': 0.8553971486761711, 'eval_precision': 0.8704182072304655, 'eval_recall': 0.8553971486761711, 'eval_fscore': 0.8562505351000519, 'eval_runtime': 34.4751, 'eval_samples_per_second': 14.242, 'eval_steps_per_second': 1.798, 'epoch': 14.0}


Model weights saved in ./wav2vec2-large\checkpoint-1722\model.safetensors
Deleting older checkpoint [wav2vec2-large\checkpoint-861] due to args.save_total_limit
 79%|███████▉  | 1845/2337 [2:55:40<22:40,  2.77s/it]  ***** Running Evaluation *****
  Num examples = 491
  Batch size = 8


{'loss': 0.0776, 'grad_norm': 0.08204229921102524, 'learning_rate': 4.210526315789474e-06, 'epoch': 15.0}


                                                     
 79%|███████▉  | 1845/2337 [2:56:14<22:40,  2.77s/it]Saving model checkpoint to ./wav2vec2-large\checkpoint-1845
Configuration saved in ./wav2vec2-large\checkpoint-1845\config.json


{'eval_loss': 0.8844490647315979, 'eval_accuracy': 0.8492871690427699, 'eval_precision': 0.8617661720537513, 'eval_recall': 0.8492871690427699, 'eval_fscore': 0.8496078550168151, 'eval_runtime': 34.4589, 'eval_samples_per_second': 14.249, 'eval_steps_per_second': 1.799, 'epoch': 15.0}


Model weights saved in ./wav2vec2-large\checkpoint-1845\model.safetensors
Deleting older checkpoint [wav2vec2-large\checkpoint-984] due to args.save_total_limit
 84%|████████▍ | 1968/2337 [3:02:53<16:31,  2.69s/it]  ***** Running Evaluation *****
  Num examples = 491
  Batch size = 8


{'loss': 0.0572, 'grad_norm': 6.022532939910889, 'learning_rate': 3.157894736842105e-06, 'epoch': 16.0}


                                                     
 84%|████████▍ | 1968/2337 [3:03:27<16:31,  2.69s/it]Saving model checkpoint to ./wav2vec2-large\checkpoint-1968
Configuration saved in ./wav2vec2-large\checkpoint-1968\config.json


{'eval_loss': 0.7746022939682007, 'eval_accuracy': 0.8615071283095723, 'eval_precision': 0.8708490430421506, 'eval_recall': 0.8615071283095723, 'eval_fscore': 0.860070991088992, 'eval_runtime': 34.4619, 'eval_samples_per_second': 14.248, 'eval_steps_per_second': 1.799, 'epoch': 16.0}


Model weights saved in ./wav2vec2-large\checkpoint-1968\model.safetensors
Deleting older checkpoint [wav2vec2-large\checkpoint-1107] due to args.save_total_limit
 89%|████████▉ | 2091/2337 [3:09:48<11:08,  2.72s/it]  ***** Running Evaluation *****
  Num examples = 491
  Batch size = 8


{'loss': 0.059, 'grad_norm': 0.11070801317691803, 'learning_rate': 2.105263157894737e-06, 'epoch': 17.0}


                                                     
 89%|████████▉ | 2091/2337 [3:10:22<11:08,  2.72s/it]Saving model checkpoint to ./wav2vec2-large\checkpoint-2091
Configuration saved in ./wav2vec2-large\checkpoint-2091\config.json


{'eval_loss': 0.8993044495582581, 'eval_accuracy': 0.8513238289205702, 'eval_precision': 0.860713740709382, 'eval_recall': 0.8513238289205702, 'eval_fscore': 0.8492581510565718, 'eval_runtime': 34.4322, 'eval_samples_per_second': 14.26, 'eval_steps_per_second': 1.801, 'epoch': 17.0}


Model weights saved in ./wav2vec2-large\checkpoint-2091\model.safetensors
 95%|█████████▍| 2214/2337 [3:17:02<05:33,  2.71s/it]  ***** Running Evaluation *****
  Num examples = 491
  Batch size = 8


{'loss': 0.0321, 'grad_norm': 0.09706176817417145, 'learning_rate': 1.0526315789473685e-06, 'epoch': 18.0}


                                                     
 95%|█████████▍| 2214/2337 [3:17:36<05:33,  2.71s/it]Saving model checkpoint to ./wav2vec2-large\checkpoint-2214
Configuration saved in ./wav2vec2-large\checkpoint-2214\config.json


{'eval_loss': 0.7754173278808594, 'eval_accuracy': 0.8757637474541752, 'eval_precision': 0.8845444727995149, 'eval_recall': 0.8757637474541752, 'eval_fscore': 0.8753336214574016, 'eval_runtime': 34.4781, 'eval_samples_per_second': 14.241, 'eval_steps_per_second': 1.798, 'epoch': 18.0}


Model weights saved in ./wav2vec2-large\checkpoint-2214\model.safetensors
100%|██████████| 2337/2337 [3:24:08<00:00,  2.67s/it]***** Running Evaluation *****
  Num examples = 491
  Batch size = 8


{'loss': 0.039, 'grad_norm': 1.7784452438354492, 'learning_rate': 0.0, 'epoch': 19.0}


                                                     
100%|██████████| 2337/2337 [3:24:43<00:00,  2.67s/it]Saving model checkpoint to ./wav2vec2-large\checkpoint-2337
Configuration saved in ./wav2vec2-large\checkpoint-2337\config.json


{'eval_loss': 0.7227576375007629, 'eval_accuracy': 0.8839103869653768, 'eval_precision': 0.8871974205525632, 'eval_recall': 0.8839103869653768, 'eval_fscore': 0.881463162315191, 'eval_runtime': 34.477, 'eval_samples_per_second': 14.241, 'eval_steps_per_second': 1.798, 'epoch': 19.0}


Model weights saved in ./wav2vec2-large\checkpoint-2337\model.safetensors


Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 2337/2337 [3:24:55<00:00,  5.26s/it]

{'train_runtime': 12295.9256, 'train_samples_per_second': 3.03, 'train_steps_per_second': 0.19, 'train_loss': 0.4926736000821689, 'epoch': 19.0}





TrainOutput(global_step=2337, training_loss=0.4926736000821689, metrics={'train_runtime': 12295.9256, 'train_samples_per_second': 3.03, 'train_steps_per_second': 0.19, 'train_loss': 0.4926736000821689, 'epoch': 19.0})

In [None]:
trainer.train(resume_from_checkpoint="Give the path")

means that you are instructing the Hugging Face Trainer to resume training from a previously saved checkpoint located at the specified path ("Give the path"). This allows you to continue training from where you left off, rather than starting over from scratch.