<a href="https://colab.research.google.com/github/HamdanXI/nlp_adventure/blob/main/803/w2v2_uclass_clipped_10_seconds_fb_labeled_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers datasets torch
!pip install transformers[torch]

In [3]:
from datasets import load_dataset
from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Config, Wav2Vec2Processor, TrainingArguments, Trainer
import torch
from sklearn.metrics import accuracy_score

# Load the dataset
dataset = load_dataset("HamdanXI/fb_labeled_v5")

# Filter out invalid entries
def is_audio_valid(batch):
    return batch["audio"] is not None and batch["Output"] is not None

valid_dataset = dataset.filter(is_audio_valid)

# Map labels to zero-indexed
label_list = sorted(set(valid_dataset['train']['Output']))
label_dict = {label: idx for idx, label in enumerate(label_list)}

def map_labels(batch):
    batch["Output"] = label_dict[batch["Output"]]
    return batch

valid_dataset = valid_dataset.map(map_labels)

# Load configuration and create model
model_path = "HamdanXI/w2v2_uclass_clipped_10_seconds"

NUM_CLASSES = len(label_dict)

config = Wav2Vec2Config.from_pretrained(model_path, num_labels=NUM_CLASSES, add_adapter=False)
model = Wav2Vec2ForSequenceClassification(config)
processor = Wav2Vec2Processor.from_pretrained(model_path)

# Prepare the dataset
def prepare_dataset(batch):
    audio = batch["audio"]
    inputs = processor(audio["array"], sampling_rate=audio["sampling_rate"], return_tensors="pt", padding=True)
    input_values = inputs.input_values.squeeze()  # Adjust as needed
    labels = torch.tensor(batch["Output"], dtype=torch.long)
    return {"input_values": input_values, "labels": labels}

processed_dataset = valid_dataset.map(prepare_dataset, remove_columns=valid_dataset.column_names['train'])

# Split the dataset
train_test_split = processed_dataset['train'].train_test_split(test_size=0.1)

# Set training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="steps",
    learning_rate=1e-4,
    per_device_train_batch_size=16,
    gradient_accumulation_steps=2,
    num_train_epochs=3,
    save_steps=500,
    eval_steps=500,
    logging_steps=500,
    load_best_model_at_end=True,
)

# Compute metrics function
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    acc = accuracy_score(labels, preds)
    return {"accuracy": acc}

import torch
from torch.nn.utils.rnn import pad_sequence

class DataCollatorForWav2Vec2:
    def __init__(self, processor):
        self.processor = processor

    def __call__(self, features):
        # Ensure each input value is a tensor, then pad
        input_values = [torch.tensor(feature['input_values'], dtype=torch.float) for feature in features]
        labels = [feature['labels'] for feature in features]

        # Pad input values
        input_values_padded = pad_sequence(input_values, batch_first=True, padding_value=0.0)  # Update padding value if needed

        # Convert labels to tensors
        labels = torch.tensor(labels, dtype=torch.long)

        return {
            'input_values': input_values_padded,
            'labels': labels
        }

# Initialize the custom data collator with the processor
data_collator = DataCollatorForWav2Vec2(processor=processor)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_test_split['train'],
    eval_dataset=train_test_split['test'],
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

# Train the model
trainer.train()

You are using a model of type wav2vec2-bert to instantiate a model of type wav2vec2. This is not supported for all configurations of models and can yield errors.


preprocessor_config.json:   0%|          | 0.00/277 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.10k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/398 [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/30.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/96.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Map:   0%|          | 0/9469 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy
500,1.6178,1.603536,0.515312


TrainOutput(global_step=798, training_loss=1.5984600325276082, metrics={'train_runtime': 1332.378, 'train_samples_per_second': 19.188, 'train_steps_per_second': 0.599, 'total_flos': 2.324520748695168e+18, 'train_loss': 1.5984600325276082, 'epoch': 2.9943714821763603})

In [4]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [5]:
trainer.push_to_hub("HamdanXI/w2v2_uclass_clipped_10_seconds_fb_labeled")

model.safetensors:   0%|          | 0.00/1.26G [00:00<?, ?B/s]

Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

events.out.tfevents.1714677223.66f4a5d44daa.5906.0:   0%|          | 0.00/7.67k [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/4.98k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/HamdanXI/results/commit/1ab3064f00b789bafca3a1e6c69dfcb171cdd0aa', commit_message='HamdanXI/w2v2_uclass_clipped_10_seconds_fb_labeled', commit_description='', oid='1ab3064f00b789bafca3a1e6c69dfcb171cdd0aa', pr_url=None, pr_revision=None, pr_num=None)

In [6]:
# F1 Score
from sklearn.metrics import accuracy_score, f1_score

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    acc = accuracy_score(labels, preds)
    f1_scores = f1_score(labels, preds, average=None)  # Returns an array of F1 scores for each class
    return {"accuracy": acc, **{f"f1_score_class_{i}": score for i, score in enumerate(f1_scores)}}

from transformers import TrainingArguments, Trainer

# Setup training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=1e-4,
    per_device_train_batch_size=16,
    num_train_epochs=10,
    logging_dir='./logs',
    logging_steps=10,
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_test_split['train'],
    eval_dataset=train_test_split['test'],
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

# Train the model
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,F1 Score Class 0,F1 Score Class 1,F1 Score Class 2,F1 Score Class 3,F1 Score Class 4,F1 Score Class 5,F1 Score Class 6,F1 Score Class 7,F1 Score Class 8
1,1.506,1.637802,0.515312,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.680139,0.0
2,1.4754,1.608094,0.515312,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.680139,0.0
3,1.5661,1.608551,0.515312,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.680139,0.0
4,1.5656,1.601161,0.515312,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.680139,0.0
5,1.6768,1.628127,0.515312,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.680139,0.0
6,1.6289,1.60112,0.515312,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.680139,0.0
7,1.4727,1.60154,0.515312,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.680139,0.0
8,1.5386,1.605441,0.515312,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.680139,0.0
9,1.5436,1.601962,0.515312,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.680139,0.0
10,1.4974,1.600648,0.515312,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.680139,0.0


TrainOutput(global_step=5330, training_loss=1.5665093151758134, metrics={'train_runtime': 4866.4589, 'train_samples_per_second': 17.512, 'train_steps_per_second': 1.095, 'total_flos': 7.761152570279037e+18, 'train_loss': 1.5665093151758134, 'epoch': 10.0})

In [7]:
results = trainer.evaluate()
print(results)

{'eval_loss': 1.6006478071212769, 'eval_accuracy': 0.515311510031679, 'eval_f1_score_class_0': 0.0, 'eval_f1_score_class_1': 0.0, 'eval_f1_score_class_2': 0.0, 'eval_f1_score_class_3': 0.0, 'eval_f1_score_class_4': 0.0, 'eval_f1_score_class_5': 0.0, 'eval_f1_score_class_6': 0.0, 'eval_f1_score_class_7': 0.6801393728222996, 'eval_f1_score_class_8': 0.0, 'eval_runtime': 29.9851, 'eval_samples_per_second': 31.582, 'eval_steps_per_second': 3.969, 'epoch': 10.0}


In [8]:
trainer.push_to_hub("HamdanXI/w2v2_uclass_clipped_10_seconds_fb_labeled_v2")

model.safetensors:   0%|          | 0.00/1.26G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/HamdanXI/results/commit/ec3c8460d31eaa966573ee17d22818bfe3c82846', commit_message='HamdanXI/w2v2_uclass_clipped_10_seconds_fb_labeled_v2', commit_description='', oid='ec3c8460d31eaa966573ee17d22818bfe3c82846', pr_url=None, pr_revision=None, pr_num=None)