### Installation des dÃ©pendances

In [None]:
%pip install transformers==4.48.2 peft==0.14.0 datasets==3.2.0 datasets_sql nbformat librosa soundfile  # ipython==8.32.0

In [None]:
import os
import shutil
import functools
import itertools
import time

from dataclasses import dataclass

from pprint import pprint

from transformers import (
    AutoProcessor,
    AutoModelForTextToWaveform,
    AutoFeatureExtractor,
    Seq2SeqTrainer,
    Seq2SeqTrainingArguments,
)

from datasets import (
    load_dataset, 
    load_from_disk,
    Dataset,
    Audio as HFAudio,
)

from peft import LoraConfig, get_peft_model

from datasets_sql import query

from IPython.display import Audio, display

import numpy as np

import pandas as pd

import torch

import librosa

### Empty GPU

In [None]:
torch.cuda.empty_cache()

### Fonctions utilitaires additionnelles

In [None]:
def text_to_music(
    processor, 
    model, 
    prompts, 
    max_new_tokens, 
    sampling_rate, 
    do_sample, 
    guidance_scale,
):
    """Converst prompts (list of sting) to list of audio files (musics)"""
    torch.cuda.empty_cache()
    inputs = processor(
        text=prompts,
        padding=True,
        return_tensors="pt",
    ).to(model.device)
    audio_values = model.generate(
        **inputs, 
        do_sample=do_sample, 
        guidance_scale=guidance_scale, 
        max_new_tokens=max_new_tokens
    )
    musics = [
        Audio(m.squeeze(), rate=sampling_rate)
        for m in audio_values.cpu().numpy()
    ]
    for m in musics:
        display(m)
    return musics

In [None]:
def chunk_audio_file(audio_file, chunk_duration, chunk_overlap, sampling_rate=None):
    """Chunks audio file into possibly overlapping (chunk_overlap) shorter pieces (chunk_duration)"""
    y, sr = librosa.load(audio_file, sr=sampling_rate)
    chunk_samples = int(chunk_duration * sr)
    chunk_overlap *= sr
    chunks = [
        {
            "array": y[i:i + chunk_samples],
            "sampling_rate": sr,
        }
        for i in range(0, len(y), chunk_samples - chunk_overlap)
    ]
    return chunks

In [None]:
def prompt_engineering(sport_session_name: str, music_style: str) -> str:
    return f"{music_style} music for {sport_session_name} sport session"

## Classes utilitaires additionnelles

In [None]:
@dataclass
class DataCollatorMusicGenWithPadding:
    """
    Data collator that will dynamically pad the inputs received.
    Args:
        processor (:class:`~transformers.AutoProcessor`)
            The processor used for proccessing the data.
    """

    processor: AutoProcessor

    def __call__(
        self, features
    ):
        # split inputs and labels since they have to be of different lengths and need
        # different padding methods
        labels = [
            torch.tensor(feature["labels"]).transpose(0, 1) for feature in features
        ]
        # (bsz, seq_len, num_codebooks)
        labels = torch.nn.utils.rnn.pad_sequence(
            labels, batch_first=True, padding_value=-100
        )

        input_ids = [{"input_ids": feature["input_ids"]} for feature in features]
        input_ids = self.processor.tokenizer.pad(input_ids, return_tensors="pt")

        batch = {"labels": labels, **input_ids}

        return batch

In [None]:
class MusicgenTrainer(Seq2SeqTrainer):
    def _pad_tensors_to_max_len(self, tensor, max_length):
        if self.tokenizer is not None and hasattr(self.tokenizer, "pad_token_id"):
            # If PAD token is not defined at least EOS token has to be defined
            pad_token_id = (
                self.tokenizer.pad_token_id
                if self.tokenizer.pad_token_id is not None
                else self.tokenizer.eos_token_id
            )
        else:
            if self.model.config.pad_token_id is not None:
                pad_token_id = self.model.config.pad_token_id
            else:
                raise ValueError(
                    "Pad_token_id must be set in the configuration of the model, in order to pad tensors"
                )

        padded_tensor = pad_token_id * torch.ones(
            (tensor.shape[0], max_length, tensor.shape[2]),
            dtype=tensor.dtype,
            device=tensor.device,
        )
        length = min(max_length, tensor.shape[1])
        padded_tensor[:, :length] = tensor[:, :length]
        return padded_tensor