<a href="https://colab.research.google.com/github/Ameer-Hesham/Algorithm-/blob/main/Baby_cry_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install datasets



In [None]:
# Loading the dataset from hugging face

from datasets import load_dataset
dataset = load_dataset("Nooon/Donate_a_cry")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Resolving data files:   0%|          | 0/457 [00:00<?, ?it/s]

In [None]:
# Splitting the data

dataset=dataset["train"].train_test_split(seed=42, shuffle=True, test_size=0.1)
dataset

DatasetDict({
    train: Dataset({
        features: ['audio', 'label'],
        num_rows: 411
    })
    test: Dataset({
        features: ['audio', 'label'],
        num_rows: 46
    })
})

In [None]:
# Mapping the label from intgers to readable strings

label2class=dataset['train'].features['label'].int2str
label2class(dataset['train'][0]['label'])

'hungry'

In [None]:
# Encoding Audio Features

from transformers import AutoFeatureExtractor

model_id = "ntu-spml/distilhubert"
feature_extractor = AutoFeatureExtractor.from_pretrained(
    model_id, do_normalize=True, return_attention_mask=True
)

In [None]:
# getting the requrid sample rate by the model

sampling_rate = feature_extractor.sampling_rate
sampling_rate

16000

In [None]:
# Resampling the sample rate to match the sample rate of the model

from datasets import Audio

dataset = dataset.cast_column("audio", Audio(sampling_rate=sampling_rate))

In [None]:
# Viewing the Mean and Variance

import numpy as np

sample = dataset["train"][0]["audio"]

print(f"Mean: {np.mean(sample['array']):.3}, Variance: {np.var(sample['array']):.3}")

Mean: -0.000294, Variance: 0.0164


In [None]:
# Using the feature extractor method to rescale our input data

inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])

print(f"inputs keys: {list(inputs.keys())}")

print(
    f"Mean: {np.mean(inputs['input_values']):.3}, Variance: {np.var(inputs['input_values']):.3}"
)

inputs keys: ['input_values', 'attention_mask']
Mean: -5.38e-09, Variance: 1.0


In [None]:
# function to preprocess our inputs

max_duration = 15.0


def preprocess_function(examples):
    audio_arrays = [x["array"] for x in examples["audio"]]
    inputs = feature_extractor(
        audio_arrays,
        sampling_rate=feature_extractor.sampling_rate,
        max_length=int(feature_extractor.sampling_rate * max_duration),
        truncation=True,
        return_attention_mask=True,
    )
    return inputs

In [None]:
# Using map method to apply the past fun on our dataset

baby_encoded = dataset.map(
    preprocess_function,
    remove_columns=["audio"],
    batched=True,
    batch_size=100,
    num_proc=1,
)
baby_encoded

DatasetDict({
    train: Dataset({
        features: ['label', 'input_values', 'attention_mask'],
        num_rows: 411
    })
    test: Dataset({
        features: ['label', 'input_values', 'attention_mask'],
        num_rows: 46
    })
})

In [None]:
# Mapping labels from integers to readable text

id2label = {
    str(i): label2class(i)
    for i in range(len(baby_encoded["train"].features["label"].names))
}
label2id = {v: k for k, v in id2label.items()}

id2label["2"]

'discomfort'

In [None]:
# automatically add the appropriate classification head to our pretrained DistilHuBERT model

from transformers import AutoModelForAudioClassification

num_labels = len(id2label)

model = AutoModelForAudioClassification.from_pretrained(
    model_id,
    num_labels=num_labels,
    label2id=label2id,
    id2label=id2label,
)

Some weights of HubertForSequenceClassification were not initialized from the model checkpoint at ntu-spml/distilhubert and are newly initialized: ['classifier.bias', 'classifier.weight', 'encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'projector.bias', 'projector.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
pip install accelerate -U



In [None]:
# Defining the training argument

from transformers import TrainingArguments

model_name = model_id.split("/")[-1]
batch_size = 8
gradient_accumulation_steps = 1
num_train_epochs = 10

training_args = TrainingArguments(
    f"{model_name}-finetuned-baby_cry",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_train_epochs,
    warmup_ratio=0.1,
    logging_steps=5,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    fp16=False,
    push_to_hub=False,
)

In [None]:
pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.1-py3-none-any.whl (84 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/84.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m81.9/84.1 kB[0m [31m2.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
Collecting responses<0.19 (from evaluate)
  Downloading responses-0.18.0-py3-none-any.whl (38 kB)
Installing collected packages: responses, evaluate
Successfully installed evaluate-0.4.1 responses-0.18.0


In [None]:
#

import evaluate
import numpy as np

metric = evaluate.load("accuracy")


def compute_metrics(eval_pred):
    """Computes accuracy on a batch of predictions"""
    predictions = np.argmax(eval_pred.predictions, axis=1)
    return metric.compute(predictions=predictions, references=eval_pred.label_ids)

Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

In [None]:
from transformers import Trainer

trainer = Trainer(
    model,
    training_args,
    train_dataset=baby_encoded["train"],
    eval_dataset=baby_encoded["test"],
    tokenizer=feature_extractor,
    compute_metrics=compute_metrics,
)

trainer.train()

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Epoch,Training Loss,Validation Loss,Accuracy
1,0.7814,0.396397,0.934783
2,0.7018,0.357562,0.934783
3,0.6262,0.355071,0.934783
4,0.4743,0.348876,0.934783
5,0.6709,0.398259,0.934783
6,0.5627,0.343001,0.934783
7,0.6891,0.354928,0.934783
8,0.7408,0.350508,0.934783
9,0.4493,0.316052,0.934783
10,0.2918,0.321393,0.934783


TrainOutput(global_step=520, training_loss=0.6703545726262606, metrics={'train_runtime': 752.1032, 'train_samples_per_second': 5.465, 'train_steps_per_second': 0.691, 'total_flos': 6.560136809236032e+16, 'train_loss': 0.6703545726262606, 'epoch': 10.0})

In [None]:
kwargs = {
    "dataset_tags": "Nooon/Donate_a_cry",
    "dataset": "Donate_a_cry",
    "model_name": f"{model_name}-finetuned-Donate_a_cry",
    "finetuned_from": model_id,
    "tasks": "audio-classification",
}

In [None]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
trainer.push_to_hub(**kwargs)

model.safetensors:   0%|          | 0.00/94.8M [00:00<?, ?B/s]

Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

events.out.tfevents.1712006613.efd531068521.6614.0:   0%|          | 0.00/31.0k [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/4.92k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/AmeerHesham/distilhubert-finetuned-baby_cry/commit/9f8992352d60ba75492b46a9ec60997c4beaec9d', commit_message='End of training', commit_description='', oid='9f8992352d60ba75492b46a9ec60997c4beaec9d', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
#
from transformers import pipeline
model_id="AmeerHesham/distilhubert-finetuned-baby_cry"
pipe = pipeline("audio-classification", model=model_id)