In [None]:
%%capture
! pip install -U datasets[audio]
! pip install yt-dlp

# For the interactive interface we'll need gradio
! pip install gradio
!pip install farm-haystack[faiss]
!pip install faiss-cpu
!pip install farm-haystack[inference]
!pip install -U sentence-transformers


# restart after install

In [None]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) n
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [None]:
#load datset
from datasets import load_dataset

ds = load_dataset('google/MusicCaps', split='train')
ds

Dataset({
    features: ['ytid', 'start_s', 'end_s', 'audioset_positive_labels', 'aspect_list', 'caption', 'author_id', 'is_balanced_subset', 'is_audioset_eval'],
    num_rows: 5521
})

In [None]:
# download corresponding music pieces
import subprocess
import os
from pathlib import Path

def download_clip(
    video_identifier,
    output_filename,
    start_time,
    end_time,
    tmp_dir='/tmp/musiccaps',
    num_attempts=5,
    url_base='https://www.youtube.com/watch?v='
):
    status = False

    command = f"""
        yt-dlp --quiet --no-warnings -x --audio-format wav -f bestaudio -o "{output_filename}" --download-sections "*{start_time}-{end_time}" {url_base}{video_identifier}
    """.strip()

    attempts = 0
    while True:
        try:
            output = subprocess.check_output(command, shell=True,
                                                stderr=subprocess.STDOUT)
        except subprocess.CalledProcessError as err:
            attempts += 1
            if attempts == num_attempts:
                return status, err.output
        else:
            break

    # Check if the video was successfully saved.
    status = os.path.exists(output_filename)
    return status, 'Downloaded'

def process(example):
    outfile_path = str(data_dir / f"{example['ytid']}.wav")
    status = True
    if not os.path.exists(outfile_path):
        status = False
        status, log = download_clip(
            example['ytid'],
            outfile_path,
            example['start_s'],
            example['end_s'],
        )

    example['audio'] = outfile_path
    example['download_status'] = status
    return example

In [None]:
# download corresponding music pieces

from datasets import Audio

samples_to_load = 100      # How many samples to load
cores = 4                 # How many processes to use for the loading
sampling_rate = 44100     # Sampling rate for the audio, keep in 44100
writer_batch_size = 1000  # How many examples to keep in memory per worker. Reduce if OOM.
data_dir = "./music_data" # Where to save the data

# Just select some samples
ds = ds.select(range(samples_to_load))

# Create directory where data will be saved
data_dir = Path(data_dir)
data_dir.mkdir(exist_ok=True, parents=True)

ds = ds.map(
        process,
        num_proc=cores,
        writer_batch_size=writer_batch_size,
        keep_in_memory=False
    ).cast_column('audio', Audio(sampling_rate=sampling_rate))

  self.pid = os.fork()


Map (num_proc=4):   0%|          | 0/100 [00:00<?, ? examples/s]

In [None]:
# transfer to pd
df = ds.to_pandas()

# rename data columns
df = df.rename(columns={'ytid': 'music_id', 'author_id': 'music_name', 'caption': 'music_description'})
df = df[['music_id', 'music_name', 'music_description']]
df['music_name'] = df['music_name'].astype(str)

In [None]:
#Step 1: Prepare the DataFrame
#First, let's create a sample DataFrame with columns "music_id", "music_name", and "music_description".
import pandas as pd

# data = {
#     "music_id": [1, 2, 3, 4, 5],
#     "music_name": ["Song 1", "Song 2", "Song 3", "Song 4", "Song 5"],
#     "music_description": [
#         "A beautiful ballad with heartfelt lyrics.",
#         "An upbeat pop song that will make you dance.",
#         "A classic rock anthem with powerful guitar riffs.",
#         "A soulful R&B track with smooth vocals.",
#         "An electronic dance track with pulsing beats."
#     ]
# }

# df = pd.DataFrame(data)
# print(df)

In [None]:
document_store.delete_all_documents()

                1. delete_all_documents() method is deprecated, please use delete_documents method
                For more details, please refer to the issue: https://github.com/deepset-ai/haystack/issues/1045
                


In [None]:
# Initialize the FAISSDocumentStore
from haystack.document_stores import FAISSDocumentStore

document_store = FAISSDocumentStore(
    faiss_index_factory_str="Flat",  # Change this to your configuration needs
    sql_url="sqlite:///faiss_inde.db",  # Specify your SQL URL here
    return_embedding=True
)

# If needed, delete existing documents and embeddings
document_store.delete_all_documents()

                1. delete_all_documents() method is deprecated, please use delete_documents method
                For more details, please refer to the issue: https://github.com/deepset-ai/haystack/issues/1045
                


In [None]:
#Step 3: Convert DataFrame to Documents
#We need to convert the DataFrame rows into Haystack Document objects.
from haystack import Document

documents = []
for _, row in df.iterrows():
    document = Document(
        content=row["music_description"],
        meta={"name": row["music_name"], "id": row["music_id"]}
    )
    documents.append(document)

In [None]:
from sentence_transformers import SentenceTransformer

In [None]:
#Step 4: Initialize the Retriever
#We'll initialize the DensePassageRetriever to encode and retrieve the most relevant music documents based on a given query.
from haystack.nodes import DensePassageRetriever

retriever = DensePassageRetriever(
    document_store=document_store,
    query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
    passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base",
    use_gpu=False,
    embed_title=True,
)

In [None]:
# check if documents are successfully stored
print(f"Document count in store: {document_store.get_document_count()}")

# check content
for i, doc in enumerate(documents):
    if not doc.content:
        print(f"Document at index {i} is missing content.")
    else:
        print(f"Document at index {i} content is ready.")

# try embedding
try:
    embeddings = retriever.embed_documents(documents[:5])  # try first five
    print("Embeddings generated successfully.")
except Exception as e:
    print(f"Error during embedding generation: {e}")


Document count in store: 0
Document at index 0 content is ready.
Document at index 1 content is ready.
Document at index 2 content is ready.
Document at index 3 content is ready.
Document at index 4 content is ready.
Document at index 5 content is ready.
Document at index 6 content is ready.
Document at index 7 content is ready.
Document at index 8 content is ready.
Document at index 9 content is ready.
Document at index 10 content is ready.
Document at index 11 content is ready.
Document at index 12 content is ready.
Document at index 13 content is ready.
Document at index 14 content is ready.
Document at index 15 content is ready.
Document at index 16 content is ready.
Document at index 17 content is ready.
Document at index 18 content is ready.
Document at index 19 content is ready.
Document at index 20 content is ready.
Document at index 21 content is ready.
Document at index 22 content is ready.
Document at index 23 content is ready.
Document at index 24 content is ready.
Document


Create embeddings:   0%|          | 0/16 [00:00<?, ? Docs/s][A
Create embeddings: 100%|██████████| 16/16 [00:04<00:00,  3.84 Docs/s][A
                                                                     [A

Embeddings generated successfully.


In [None]:
for doc in documents:
  print(f"Document ID: {doc.id}") # Adjust if the actual field name
  print(f"Content: {doc.content}") # Adjust if the actual field nan
  print("----")

Document ID: a8bae4d333608d4bf4866bb01fcea128
Content: The low quality recording features a ballad song that contains sustained strings, mellow piano melody and soft female vocal singing over it. It sounds sad and soulful, like something you would hear at Sunday services.
----
Document ID: 2159a9974a3223d8bd692223fda23239
Content: This song features an electric guitar as the main instrument. The guitar plays a descending run in the beginning then plays an arpeggiated chord followed by a double stop hammer on to a higher note and a descending slide followed by a descending chord run. The percussion plays a simple beat using rim shots. The percussion plays in common time. The bass plays only one note on the first count of each bar. The piano plays backing chords. There are no voices in this song. The mood of this song is relaxing. This song can be played in a coffee shop.
----
Document ID: 3fdc26c52cc04c01f47851294297ff
Content: a male voice is singing a melody with changing tempos while

In [None]:
# Writing new documents to the document store
print("writing documents to the document store...")
document_store.write_documents(documents)
print(f"{len(documents)} documents written to the document store.")
# Update embeddings for all documents in the document store
print("preparing to update embeddings...")
try:
  document_store.update_embeddings(retriever)
  print("Embeddings updated.")
except Exception as e:
  print(f"Error during embeddings update: {e}")

writing documents to the document store...


Writing Documents: 10000it [00:00, 326745.71it/s]       


5 documents written to the document store.
preparing to update embeddings...


Updating Embedding:   0%|          | 0/5 [00:00<?, ? docs/s]ERROR:haystack.modeling.data_handler.processor:There were 5 errors during preprocessing at positions: {0, 1, 2, 3, 4}
Updating Embedding:   0%|          | 0/5 [00:00<?, ? docs/s]

Error during embeddings update: object of type 'NoneType' has no len()





In [None]:
#Step 5: Write Documents to DocumentStore
#We'll delete any existing documents in the DocumentStore and write the new documents. Then, we'll update the embeddings using the retriever.
document_store.delete_documents()
document_store.write_documents(documents)
document_store.update_embeddings(retriever)

Writing Documents: 10000it [00:00, 423171.23it/s]       
Updating Embedding:   0%|          | 0/5 [00:00<?, ? docs/s]
Create embeddings:   0%|          | 0/16 [00:00<?, ? Docs/s][A
Create embeddings: 100%|██████████| 16/16 [00:03<00:00,  4.93 Docs/s][A
Documents Processed: 10000 docs [00:03, 3040.20 docs/s]


In [None]:
#Step 6: Retrieve Relevant Documents
#Now, we can use the retriever to find the most relevant music documents based on a given query.
query = "romantic country music, joy and love"

def retrieve_music_document(query):
    # Your RAG retrieval code here
    retrieved_docs = retriever.retrieve(query)
    return retrieved_docs[0]  # Return the most relevant document
retrieved_doc = retrieve_music_document(query)
print(retrieved_doc)

<Document: id=a8bae4d333608d4bf4866bb01fcea128, content='The low quality recording features a ballad song that contains sustained strings, mellow piano melod...'>


## Music Generation

In [None]:
!pip install --upgrade --quiet pip
!pip install --upgrade --quiet transformers datasets[audio]

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m138.0/138.0 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.0/9.0 MB[0m [31m29.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m57.9 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
farm-haystack 1.25.5 requires transformers==4.39.3, but you have transformers 4.40.1 which is incompatible.[0m[31m
[0m

In [None]:
import os
from transformers import AutoProcessor, MusicgenForConditionalGeneration
from datasets import load_dataset

# Load the MusicGen model and processor
processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")


preprocessor_config.json:   0%|          | 0.00/275 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.37k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/7.87k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.36G [00:00<?, ?B/s]

  self.register_buffer("padding_total", torch.tensor(kernel_size - stride, dtype=torch.int64), persistent=False)


generation_config.json:   0%|          | 0.00/224 [00:00<?, ?B/s]

In [None]:
# Specify the folder containing the music files
music_folder = "/content/music_data"
query = "sad, slow, piano"
retrieved_doc = retrieve_music_document(query)
retrieved_doc

<Document: {'content': 'a male voice is singing a melody with changing tempos while snipping his fingers rhythmically. The recording sounds like it has been recorded in an empty room. This song may be playing, practicing snipping and singing along.', 'content_type': 'text', 'score': 0.6557043554420529, 'meta': {'name': '6', 'id': '-0vPFx-wRRI', 'vector_id': '2'}, 'id_hash_keys': ['content'], 'embedding': '<embedding of shape (768,)>', 'id': '3fdc26c52cc04c01f47851294297ff'}>

In [None]:
!pip install librosa

[0m

In [None]:
import librosa
# Get the music ID from the retrieved document
music_id = retrieved_doc.meta["id"]

# Construct the path to the music file
music_file = os.path.join(music_folder, f"{music_id}.wav")

audio, sampling_rate = librosa.load(music_file, sr=None, duration=30)  # sr=None to preserve the original sampling rate


In [None]:
import torch
# Load the music file using the datasets library
# dataset = load_dataset("audio", data_files={"audio": music_file})
# sample = dataset["audio"][0]

music_tensor = torch.tensor(audio)
stride = len(music_tensor) // 96000
indices = torch.arange(0, len(music_tensor), stride)
music_tensor = music_tensor[indices]

if len(music_tensor) > 96000:
    music_tensor = music_tensor[:96000]

# Prepare the inputs for music generation
inputs = processor(
    audio=music_tensor,
    sampling_rate=32000,
    text=[query],
    padding=True,
    return_tensors="pt",
)

In [None]:
print(music_file)

/content/music_data/-0vPFx-wRRI.wav


In [None]:
# Generate the music
audio_values = model.generate(**inputs, max_new_tokens=32)

In [None]:
#Play or save the generated audio:
#To play the audio in a notebook:
from IPython.display import Audio

sampling_rate = model.config.audio_encoder.sampling_rate
Audio(audio_values[0].numpy(), rate=sampling_rate)


In [None]:
#To save the audio as a WAV file
import scipy

scipy.io.wavfile.write("musicgen_out.wav", rate=sampling_rate, data=audio_values[0, 0].cpu().numpy())