# TEAM GMM PROJECT
*Lukas Elenbergas, **1712238***

*Team Name: **Solo Mission***

*Task: **Music Generation***
https://www.kaggle.com/datasets/googleai/musiccaps

In [None]:
# ---------------------------------------------------
# IMPORTS
# ---------------------------------------------------
import os
import torch
import gradio as gr
from Helpers import train
from Helpers import download_musiccaps_modified
from datasets import load_dataset
from audiocraft.models import MusicGen
from audiocraft.data.audio import audio_write
# ---------------------------------------------------
# ENVIRONMENT VARIABLES
# ---------------------------------------------------
BANDS_TO_GENERATE = [
    'Queens of the Stone Age', 
    'King Gizzard and the Lizard Wizard', 
    'The Beatles', 
    'Metallica', 
    'Radiohead', 
    'Ghost', 
    'Lil Nas X', 
    'ACDC', 
    'Doja Cat', 
    '100 gecs'
]
# ---------------------------------------------------

# MusicCaps - Dataset Creation

MusicCaps dataset implementation based on: https://github.com/nateraw/download-musiccaps-dataset

In [1]:
# ---------------------------------------------------
# MUSICCAPS DATASET INIT
# ---------------------------------------------------
music_caps = download_musiccaps_modified.main(
    'C:\\Projects\\GMM\\Music',        
    num_proc=16,
)
# ---------------------------------------------------

In [2]:
# ---------------------------------------------------
# USED TO CHECK WHICH VIDEOS DO NOT EXIST ANYMORE
# ---------------------------------------------------
required = False

if required:
    ds = load_dataset('google/MusicCaps', split='train')
    non_existent_yt = []

    for sample in ds:
        output_filename = 'C:\\Projects\\GMM\\Music\\' + sample['ytid'] + '.wav'
        if not os.path.exists(output_filename):
            non_existent_yt.append(sample['ytid'])

    print(non_existent_yt)
# ---------------------------------------------------

How a dataset sample looks:

{

        'ytid': '-0Gj8-vB1q4'
        'start_s': 30, 
        'end_s': 40, 
        'audioset_positive_labels': '/m/0140xf,/m/02cjck,/m/04rlf', 
        'aspect_list': '[
            'low quality', 
            'sustained strings melody', 
            'soft female vocal', 
            'mellow piano melody', 
            'sad', 
            'soulful', 
            'ballad'
            ]', 
        'caption': 'The low quality recording features a ballad song that contains sustained strings, mellow piano melody and soft female vocal singing over it. It sounds sad and soulful, like something you would hear at Sunday services.', 
        'author_id': 4, 
        'is_balanced_subset': False, 
        'is_audioset_eval': True, 
        'audio': 
        {
            'path': 'C:\\Projects\\GMM\\MusicData\\-0Gj8-vB1q4.wav', 
            'array': array([-0.00193254,  0.00109734,  0.00309335, ..., -0.01940443,-0.02377406,  0.]),
            'sampling_rate': 44100
        }, 
        'download_status': True
        
}

In [3]:
# ---------------------------------------------------
# SAMPLE EXPLORATION VIA GRADIO
# ---------------------------------------------------
explore = False

def get_example(idx):
    ex = music_caps[idx]
    return ex['audio']['path'], ex['caption']

if explore:
    gr.Interface(
        get_example,
        inputs=gr.Slider(0, len(music_caps) - 1, value=0, step=1),
        outputs=['audio', 'textarea'],
        live=True
    ).launch()
# ---------------------------------------------------

# MusicGen - Training and Initialization

Using the AudioCraft library for the MusicGen model: https://github.com/facebookresearch/audiocraft


In [4]:
# ---------------------------------------------------
# TRAINING INIT
# ---------------------------------------------------
train = False

if train:
    train.train(
        init_dataset=music_caps,
        model_id='small',
        lr=1e-3,
        epochs=15,
        use_wandb=False,
        grad_acc=1,
        batch_size=4,
    )
# ---------------------------------------------------

In [5]:
# ---------------------------------------------------
# MODEL INIT AND LOAD
# ---------------------------------------------------
model = MusicGen.get_pretrained('small', device='cpu')
model.lm.load_state_dict(torch.load('Models/lm_final_init.pt'))
model.set_generation_params(duration=10)
# ---------------------------------------------------

# OpenAI GPT-3.5 - Band Description Generation

In [None]:
# ---------------------------------------------------
#
# ---------------------------------------------------

# ---------------------------------------------------

# Sample Generation

In [7]:
# ---------------------------------------------------
#
# ---------------------------------------------------
band = 'Queens of the Stone Age'
description = 'Hard-hitting, riff-driven rock with a dose of desert swagger.'
wav = model.generate(description)

path = 'Generated\\' + band
wav = wav.squeeze(1)
audio_write(path, wav, model.sample_rate, strategy='clip')
# ---------------------------------------------------