In [None]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [None]:
# Visualize mel spectrgram for 2 genrres: jazz and rock

# Use latex
plt.rc('text', usetex=True)
plt.rc('font', family='serif')

# Load the mel spectrogram
jazz = np.load('mel_spectrogram/Jazz/004_004704.npy')
rock = np.load('mel_spectrogram/Metal/001_001017.npy')

# Set the same min and max for amplitude for both
# Look for minimum value in jazz mel spec
vmin = min(np.min(jazz), np.min(rock))
vmax = max(np.max(jazz), np.max(rock))

fig, axs = plt.subplots(2, 1, figsize=(12, 8))
sns.heatmap(jazz, ax=axs[0], vmin=vmin, vmax=vmax, cmap='viridis')
axs[0].set_title('Jazz', fontdict={'fontsize': 14})
axs[0].set_xlabel('Time', fontdict={'fontsize': 12})
axs[0].set_ylabel('Mel bands', fontdict={'fontsize': 12})
# Set custom tick positions and labels for Jazz
time_ticks = np.linspace(0, jazz.shape[1], num=5)  # 5 evenly spaced ticks
axs[0].set_xticks(time_ticks)
axs[0].set_xticklabels([f"{int(tick)}" for tick in np.linspace(0, 30, num=5)])  # Map to time in seconds
freq_ticks = np.arange(0, jazz.shape[0] + 1, step=12)  # Ticks every 12 Mel bands
axs[0].set_yticks(freq_ticks)
axs[0].set_yticklabels([str(int(tick)) for tick in freq_ticks])  # Keep labels numeric

# Invert y axis
axs[0].invert_yaxis()


sns.heatmap(rock, ax=axs[1], vmin=vmin, vmax=vmax, cmap='viridis')
axs[1].set_title('Metal', fontdict={'fontsize': 14})
axs[1].set_xlabel('Time', fontdict={'fontsize': 12})
axs[1].set_ylabel('Mel bands', fontdict={'fontsize': 12})
# Set custom tick positions and labels for Rock
time_ticks = np.linspace(0, rock.shape[1], num=5)  # 5 evenly spaced ticks
axs[1].set_xticks(time_ticks)
axs[1].set_xticklabels([f"{int(tick)}" for tick in np.linspace(0, 30, num=5)])  # Map to time in seconds

freq_ticks = np.arange(0, jazz.shape[0] +1 , step=12)  # Ticks every 12 Mel bands
axs[1].set_yticks(freq_ticks)
axs[1].set_yticklabels([str(int(tick)) for tick in freq_ticks])  # Keep labels numeric

axs[1].invert_yaxis()

plt.tight_layout()
plt.savefig('melspec.png', dpi=300)
# plt.savefig('melspec.svg', dpi=300)
plt.savefig('melspec.eps', dpi=300)
plt.show()


In [None]:
import os
os.getcwd()

In [None]:
|

In [None]:
jazz_file = "data/001058.mp3"
metal_file = "data/006716.mp3"

# Convert mp3 to mel spectrogram


In [None]:
import librosa
import librosa.display
# Convert MP3 to mel spectrogram with librosa
def convert_to_mel_spectrogram(file_path):
    y, sr = librosa.load(file_path, sr=None)
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=1024)
    mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
    return mel_spectrogram_db, sr

jazz_mel, sr_jazz = convert_to_mel_spectrogram(jazz_file)
metal_mel, sr_metal = convert_to_mel_spectrogram(metal_file)

In [None]:
# Plot mel spectrograms of both
y, sr = librosa.load(jazz_file)
plt.plot(y)
plt.title('Signal')
plt.xlabel('Time (samples)')
plt.ylabel('Amplitude')
plt.show()

In [None]:
n_fft = 2048
ft = np.abs(librosa.stft(y[:n_fft], hop_length = n_fft+1))
plt.plot(ft)
plt.title('Spectrum')
plt.xlabel('Frequency Bin')
plt.ylabel('Amplitude')
plt.show()

In [None]:
spec = np.abs(librosa.stft(y, hop_length=512))
spec = librosa.amplitude_to_db(spec, ref=np.max)
librosa.display.specshow(spec, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title('Spectrogram')
plt.show()

In [None]:
mel_spect = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=1024)
mel_spect = librosa.power_to_db(spec, ref=np.max)
librosa.display.specshow(mel_spect, y_axis='mel', fmax=8000, x_axis='time')
plt.title('Mel Spectrogram')
plt.colorbar(format='%+2.0f dB')
plt.show()

In [None]:
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# Load mp3 file
signal, sr = librosa.load("data/006716.mp3")

# Mel filter banks
filter_banks = librosa.filters.mel(n_fft=2048, sr=22050, n_mels=10)

In [None]:
filter_banks.shape

In [None]:
plt.figure(figsize=(25, 10))
librosa.display.specshow(filter_banks, sr=sr, x_axis='linear')
plt.colorbar(format='%+2.f')
plt.show()

In [None]:
mel_spectrogram = librosa.feature.melspectrogram(y = signal, sr=sr, n_fft=2048, hop_length=512, n_mels=96)
mel_spectrogram.shape

In [None]:
# Cut 1024 frames from the middle of mel spectrogram to have (96, 1024) shape
num_frames = mel_spectrogram.shape[1]
if num_frames < 1024:
    raise ValueError(f"The spectrogram is shorter than the required {1024} frames.")
else:

    # Calculate the start and end frames to cut the middle part
    start_frame = (num_frames - 1024) // 2
    end_frame = start_frame + 1024

    # Slice the spectrogram to get the 30-second segment
    mel_spectrogram = mel_spectrogram[:, start_frame:end_frame]
    
mel_spectrogram.shape

In [None]:
log_mel_spectrogram = librosa.power_to_db(mel_spectrogram)

In [None]:
npy_mel_spectrogram = np.load('data/012_012567.npy')
log_mel_spectrogram = librosa.power_to_db(npy_mel_spectrogram)


In [None]:
plt.figure(figsize=(25, 10))
librosa.display.specshow(log_mel_spectrogram, sr=sr, x_axis='time', y_axis='mel')
# Set cmap as 'viridis'
plt.set_cmap('viridis')
plt.colorbar(format='%+2.f')
plt.show()

In [None]:
# Save log mel spectrogram to npy file
np.save('data/006716.npy', log_mel_spectrogram)

In [None]:
import pandas as pd

In [None]:
# Load the metadata
fma = pd.read_csv('data/tracks.csv', header=[0, 1], skiprows=[2])

In [None]:
fma.head()

In [None]:
# Load from path /Users/julia.rozycka/Master/multi_modal_genre_classification/data/fma_full_echonest_lyrics.csv

# Combine tracks with genre
fma = fma.rename(columns={"Unnamed: 0_level_1": "track_id"}, level=1)
fma = fma.rename(columns={"Unnamed: 0_level_0": "track"}, level=0)


In [None]:
genres = pd.read_csv('data/genres.csv')

def get_genres(row):
    return ', '.join(genres[genres['genre_id'].isin(eval(row[('track', 'genres')]))]['title'])

def get_genres_all(row):
    return ', '.join(genres[genres['genre_id'].isin(eval(row[('track', 'genres_all')]))]['title'])

fma[('track', 'genres')] = fma.apply(get_genres, axis=1)
fma[('track', 'genres_all')] = fma.apply(get_genres_all, axis=1)

In [None]:
fma[('track', 'genres')].str.split(', ').explode().value_counts()

In [None]:
fma[('track', 'genres_all')].str.split(', ').explode().value_counts()

In [None]:
fma[('track', 'genre_top')].value_counts()

In [None]:
fma[('track', 'one_genre')] = None

In [None]:
# If genre top is Country then one genre is Country
fma.loc[fma[('track', 'genre_top')] == 'Country', ('track', 'one_genre')] = 'Country'

# If genre top is Jazz then one genre is Jazz
fma.loc[fma[('track', 'genre_top')] == 'Jazz', ('track', 'one_genre')] = 'Jazz'

In [None]:
# Add records that have one genre to fma_one_genre
fma_one_genre = fma[fma[('track', 'one_genre')].notnull()]

In [None]:
# Remove tracks from fma that are in fma_one_genre
fma = fma[~fma[('track', 'track_id')].isin(fma_one_genre[('track', 'track_id')])]

In [None]:
fma.track.track_id.count()

In [None]:
fma_one_genre.track.track_id.count()

In [None]:
# Now lets check what tracks don't have genre_top and to what genres they belong

# Get tracks that don't have genre_top
no_genre_top = fma[fma[('track', 'genre_top')].isnull()]

# Get genres for tracks that don't have genre_top
no_genre_top['track', 'genres'].str.split(', ').explode().value_counts()

In [None]:
# To the track that contain anything with 'Metal' in genres assign one genre 'Metal'
fma.loc[fma[('track', 'genres')].str.contains('Metal'), ('track', 'one_genre')] = 'Metal'

In [None]:
# Check how many tracks have one genre assigned
fma[('track', 'one_genre')].notnull().sum()

In [None]:
# Add them to fma_one_genre and delete from fma
fma_one_genre = pd.concat([fma_one_genre, fma[fma[('track', 'one_genre')].notnull()]])

In [None]:
fma = fma[~fma[('track', 'track_id')].isin(fma_one_genre[('track', 'track_id')])]

In [None]:
fma.track.track_id.count()

In [None]:
fma_one_genre.track.track_id.count()

In [None]:
# Check for duplicates in fma_one_genre
fma_one_genre.track.track_id.duplicated().sum()

In [None]:
fma_one_genre.track.one_genre.value_counts()

In [None]:
# To the track that contain anything with 'Country' in genres assign one genre 'Country'
fma.loc[fma[('track', 'genres')].str.contains('Country'), ('track', 'one_genre')] = 'Country'
fma.track.one_genre.value_counts()

In [None]:
# Add them to fma_one_genre and delete from fma
fma_one_genre = pd.concat([fma_one_genre, fma[fma[('track', 'one_genre')].notnull()]])
fma = fma[~fma[('track', 'track_id')].isin(fma_one_genre[('track', 'track_id')])]

In [None]:
fma_one_genre.track.one_genre.value_counts()

In [None]:
# If exactly 'Jazz' to Jazz

fma.loc[fma[('track', 'genres')].str.contains('Jazz'), ('track', 'one_genre')] = 'Jazz'

fma.track.one_genre.value_counts()


In [None]:
# Add them to fma_one_genre and delete from fma
fma_one_genre = pd.concat([fma_one_genre, fma[fma[('track', 'one_genre')].notnull()]])
fma = fma[~fma[('track', 'track_id')].isin(fma_one_genre[('track', 'track_id')])]

In [None]:
fma_one_genre.track.one_genre.value_counts()

In [None]:
# Now everything that has Indie in genres assign one genre Indie
fma.loc[fma[('track', 'genres')].str.contains('Indie'), ('track', 'one_genre')] = 'Indie'
fma.track.one_genre.value_counts()

In [None]:
# Add them to fma_one_genre and delete from fma
fma_one_genre = pd.concat([fma_one_genre, fma[fma[('track', 'one_genre')].notnull()]])
fma = fma[~fma[('track', 'track_id')].isin(fma_one_genre[('track', 'track_id')])]


In [None]:
fma_one_genre.track.one_genre.value_counts()

In [None]:
# Now everything that has Rap in genres assign one genre Rap
fma.loc[fma[('track', 'genres')].str.contains('Rap'), ('track', 'one_genre')] = 'Rap'
fma.track.one_genre.value_counts()

In [None]:
# Add them to fma_one_genre and delete from fma
fma_one_genre = pd.concat([fma_one_genre, fma[fma[('track', 'one_genre')].notnull()]])
fma = fma[~fma[('track', 'track_id')].isin(fma_one_genre[('track', 'track_id')])]

In [None]:
# Now everything that has Rap in genres_all assign one genre Rap
fma.loc[fma[('track', 'genres_all')].str.contains('Rap'), ('track', 'one_genre')] = 'Rap'
fma.track.one_genre.value_counts()

In [None]:
# Now everything that is Rock in genre_top assign one genre Rock
fma.loc[fma[('track', 'genre_top')] == 'Rock', ('track', 'one_genre')] = 'Rock'
fma.track.one_genre.value_counts()

In [None]:
# Add them to fma_one_genre and delete from fma
fma_one_genre = pd.concat([fma_one_genre, fma[fma[('track', 'one_genre')].notnull()]])
fma = fma[~fma[('track', 'track_id')].isin(fma_one_genre[('track', 'track_id')])]

In [None]:
fma_one_genre.track.one_genre.value_counts()

In [None]:
# Now everything that is Pop in genre_top assign one genre Pop
fma.loc[fma[('track', 'genre_top')] == 'Pop', ('track', 'one_genre')] = 'Pop'
fma.track.one_genre.value_counts()


In [None]:
# Add them to fma_one_genre and delete from fma
fma_one_genre = pd.concat([fma_one_genre, fma[fma[('track', 'one_genre')].notnull()]])
fma = fma[~fma[('track', 'track_id')].isin(fma_one_genre[('track', 'track_id')])]


In [None]:
fma_one_genre.track.one_genre.value_counts()

In [None]:
# Now everything that is Hip-Hop in genre_top assign one genre Hip-Hop
fma.loc[fma[('track', 'genre_top')] == 'Hip-Hop', ('track', 'one_genre')] = 'Hip-Hop'
fma.track.one_genre.value_counts()

In [None]:
# Add them to fma_one_genre and delete from fma
fma_one_genre = pd.concat([fma_one_genre, fma[fma[('track', 'one_genre')].notnull()]])
fma = fma[~fma[('track', 'track_id')].isin(fma_one_genre[('track', 'track_id')])]

In [None]:
fma_one_genre.track.one_genre.value_counts()

In [None]:
# Check for duplicates in fma_one_genre
fma_one_genre.track.track_id.duplicated().sum()

In [None]:
duplicates = fma_one_genre.duplicated(subset=[('artist', 'name'), ('track', 'title')]).sum()

In [None]:
# Check genres in duplicates
fma_one_genre[fma_one_genre.duplicated(subset=[('artist', 'name'), ('track', 'title')])]['track', 'one_genre'].value_counts()

In [None]:
# Drop duplicates
fma_one_genre = fma_one_genre.drop_duplicates(subset=[('artist', 'name'), ('track', 'title')])

In [None]:
fma_one_genre.track.one_genre.value_counts()

In [None]:
# Save fma_one_genre to csv
fma_one_genre.to_csv('data/fma_one_genre.csv', index=False)

In [None]:
# Sample max 2000 records from each genre to create balanced dataset fma_one_genre_balanced but minding error ValueError: Cannot take a larger sample than population when 'replace=False'
fma_one_genre_balanced = fma_one_genre.groupby([('track', 'one_genre')]).apply(lambda x: x.sample(min(len(x), 2000)))

In [None]:
fma_one_genre_balanced.track.one_genre.value_counts()

In [None]:
# Save but skip first column

fma_one_genre_balanced.to_csv('data/fma_one_genre_balanced.csv', index=False)