# Encode audio with a text encoder!

In [None]:
# Install necessary libraries
!pip install transformers datasets pandas sentencepiece

[33mDEPRECATION: pytorch-lightning 1.7.7 has a non-standard dependency specifier torch>=1.9.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063[0m[33m
[0m

In [4]:
import os
import pandas as pd
import pickle
from tqdm.auto import tqdm
from transformers import T5Tokenizer, T5Model, T5EncoderModel

  from .autonotebook import tqdm as notebook_tqdm


In [5]:

# Load the CSV file
csv_file_path = "../cache/spotify_sleep_dataset/csv/Sleep_FullDataset_withDuplicates.csv"
df = pd.read_csv(csv_file_path)

# Load the T5 model and tokenizer
model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5EncoderModel.from_pretrained(model_name)

# Function to encode text using T5 model
def encode_text(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).detach().numpy()


You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [7]:
import numpy as np

# Encode the given texts
text1 = 'chillhop lo-fi beats'
text2 = 'dance pop pop post-teen pop'
text3 = 'lullaby'
text4 = 'cinematic post-rock compositional ambient'
text5 = 'british soundtrack epicore scorecore soundtrack video game music'
text6 = 'french soundtrack hollywood'
text7 = 'piano cover'
text8 = 'contemporary country'


encoding1 = encode_text(text1)
encoding2 = encode_text(text2)
encoding3 = encode_text(text3)
encoding4 = encode_text(text4)
encoding5 = encode_text(text5)
encoding6 = encode_text(text6)
encoding7 = encode_text(text7)
encoding8 = encode_text(text8)

# Combine the encodings into a single tensor and reshape
encodings = np.stack([encoding1, encoding2, encoding3, encoding4, encoding5, encoding6, encoding7, encoding8], axis=0).reshape(8, 1, 512)

# Save the encodings to a pickle file
with open('encodings.pkl', 'wb') as f:
    pickle.dump(encodings, f)

print("Encodings saved to 'encodings.pkl'")

Encodings saved to 'encodings.pkl'


In [10]:
# Directory containing the audio files
audio_dir = "../cache/spotify_sleep_dataset/waveform"

# Process each audio file and encode the corresponding genre text
encodings = {}
for audio_file in tqdm(os.listdir(audio_dir)):
    if audio_file.endswith(".wav"):
        file_id = int(os.path.splitext(audio_file)[0])
        genre_text = df.loc[df.index == file_id, 'Genres'].values[0]
        encodings[audio_file] = encode_text(genre_text)

# Save the encodings to a pickle file
output_file = "../cache/spotify_sleep_dataset/genre_encodings.p"
with open(output_file, "wb") as f:
    pickle.dump(encodings, f)

print(f"Encodings saved to {output_file}")


  0%|          | 0/11485 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


100%|██████████| 11485/11485 [14:24<00:00, 13.28it/s]


Encodings saved to ../cache/spotify_sleep_dataset/genre_encodings.p


In [5]:
ssd_df = pd.read_csv(csv_file_path)

In [6]:
ssd_df

Unnamed: 0.1,Unnamed: 0,TrackName,TrackID,SampleURL,ReleaseYear,Genres,danceability,energy,loudness,speechiness,...,Popularity,pNum,playlistID,label,userCat,demoCat,length,playlistTitle,nFoll,nTracks
0,1,Universo,48mOMNLnlbok3W6anP7sTS,https://p.scdn.co/mp3-preview/c767ce200a853fb3...,2020-06-08,[unknown],0.0697,0.0936,-22.559,0.0459,...,63.0,1,37i9dQZF1DWYcDQ1hSjOpY,,spotify,Adult,13h11,Deep Sleep,1168876,221
1,2,The Journey,77Fs2NajDBQaOOZkYDsFLE,https://p.scdn.co/mp3-preview/2d8351f57b42cb23...,2020-04-10,[unknown],0.1650,0.0375,-30.391,0.0446,...,65.0,1,37i9dQZF1DWYcDQ1hSjOpY,,spotify,Adult,13h11,Deep Sleep,1168876,221
2,3,always,5Nusgvqw46McIdfuqrvM4c,https://p.scdn.co/mp3-preview/cef392818732a423...,2020-03-20,[unknown],0.1960,0.0409,-27.052,0.0422,...,63.0,1,37i9dQZF1DWYcDQ1hSjOpY,,spotify,Adult,13h11,Deep Sleep,1168876,221
3,4,Stellaristique Vision,0MufjWQZiuRDaL2pLgQ2f7,https://p.scdn.co/mp3-preview/8ef68bf48b440528...,2020-06-29,[unknown],0.2200,0.1620,-22.440,0.0332,...,62.0,1,37i9dQZF1DWYcDQ1hSjOpY,,spotify,Adult,13h11,Deep Sleep,1168876,221
4,5,Virga I (ii),5dxWZlVi3k8omltMsPvqpI,https://p.scdn.co/mp3-preview/b9b2e97fbb3c2ab4...,2020-02-21,"['ambient', 'compositional ambient', 'drift', ...",0.1700,0.1700,-23.001,0.0425,...,63.0,1,37i9dQZF1DWYcDQ1hSjOpY,,spotify,Adult,13h11,Deep Sleep,1168876,221
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
225922,4,At Last,2j3oM8tdb0evXlP0w1xu7Q,https://p.scdn.co/mp3-preview/5f347648d0e320e4...,2012-03-01,"['hangpan', 'new tribe']",0.6590,0.1510,-17.563,0.0552,...,29.0,989,72YImO6zl8TCGjPeFQgdqC,Instrumental,user,Adult,0h44,Handpan Sleep,306,9
225923,5,From the View,0NciXIZ4pwyThvnm0fPZD7,https://p.scdn.co/mp3-preview/3daa7c636221ee57...,2012-03-01,"['hangpan', 'new tribe']",0.5180,0.2300,-16.884,0.0779,...,31.0,989,72YImO6zl8TCGjPeFQgdqC,Instrumental,user,Adult,0h44,Handpan Sleep,306,9
225924,6,Swing,4z2i0TktkIk8WNNC8YuDAC,https://p.scdn.co/mp3-preview/57be5770399588ce...,2013-10-22,"['hangpan', 'new tribe']",0.4490,0.2290,-12.172,0.0636,...,35.0,989,72YImO6zl8TCGjPeFQgdqC,Instrumental,user,Adult,0h44,Handpan Sleep,306,9
225925,7,Once Again (2011),2FCl1hxO2u70TLtNykj9yQ,https://p.scdn.co/mp3-preview/164f0fe0384d9ecf...,2011-07-20,"['hangpan', 'new tribe']",0.7140,0.2790,-18.883,0.0588,...,31.0,989,72YImO6zl8TCGjPeFQgdqC,Instrumental,user,Adult,0h44,Handpan Sleep,306,9
