In [14]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
import sklearn as skl
import IPython.display as ipd

# Directory where mp3 files are stored
AUDIO_DIR = 'data/fma_small'

# Load metadata and features
tracks = pd.read_csv('data/fma_metadata/tracks.csv', index_col=0, header=[0, 1])
genres = pd.read_csv('data/fma_metadata/genres.csv')
features = pd.read_csv('data/fma_metadata/features.csv', index_col=0, header=[0, 1])
echonest = pd.read_csv('data/fma_metadata/echonest.csv', index_col=0, header=[0, 1])

# Check if indexes of features and tracks match
np.testing.assert_array_equal(features.index, tracks.index)

# Check that all echonest indexes are in tracks
assert echonest.index.isin(tracks.index).all()

# Print shapes of DataFrames
print(f'Tracks: {tracks.shape}, Genres: {genres.shape}, Features: {features.shape}, Echonest: {echonest.shape}')

# Displaying first few rows of track, album, and artist information
print(tracks['track'].head())
print(tracks['album'].head())
print(tracks['artist'].head())

# Display first few rows of each echonest section
sections = ['metadata', 'audio_features', 'social_features', 'ranks']
for section in sections:
    print(echonest['echonest', section].head())

# Visualize temporal features
small = tracks['set', 'subset'] <= 'small'
genre1 = tracks['track', 'genre_top'] == 'Instrumental'
genre2 = tracks['track', 'genre_top'] == 'Hip-Hop'

X = features.loc[small & (genre1 | genre2), 'mfcc']
X = skl.decomposition.PCA(n_components=2).fit_transform(X)

y = tracks.loc[small & (genre1 | genre2), ('track', 'genre_top')]
y = skl.preprocessing.LabelEncoder().fit_transform(y)

# Scatter plot for PCA results
plt.figure(figsize=(8, 6))
plt.scatter(X[:, 0], X[:, 1], c=y, cmap='coolwarm', alpha=0.5)
plt.title('PCA of MFCCs')
plt.xlabel('PCA Component 1')
plt.ylabel('PCA Component 2')
plt.colorbar()
plt.show()

# Load audio file
filename = f'{AUDIO_DIR}/000/000002.mp3'
print(f'File: {filename}')

x, sr = librosa.load(filename, sr=None, mono=True)
print(f'Duration: {x.shape[-1] / sr:.2f}s, {x.size} samples')

# Play the audio file
start, end = 7, 17
ipd.Audio(data=x[start*sr:end*sr], rate=sr)

# Plot waveform
plt.figure(figsize=(10, 4))
librosa.display.waveshow(x, sr=sr, alpha=0.5)
plt.vlines([start, end], -1, 1, color='r')
plt.title('Waveform')
plt.xlabel('Time (s)')
plt.ylabel('Amplitude')
plt.show()

# Plot a small segment of the waveform
start = len(x) // 2
plt.figure(figsize=(10, 4))
plt.plot(x[start:start+2000])
plt.ylim((-1, 1))
plt.title('Waveform Segment')
plt.xlabel('Sample Number')
plt.ylabel('Amplitude')
plt.show()

# Load example audio file
x, sr = librosa.load(librosa.example('trumpet'))

# Compute STFT
stft = np.abs(librosa.stft(x, n_fft=2048, hop_length=512))

# Compute Mel spectrogram
mel = librosa.feature.melspectrogram(sr=sr, S=stft**2)

# Convert to log scale (dB)
log_mel = librosa.amplitude_to_db(mel, ref=np.max)

# librosa to plot
mfcc = librosa.feature.mfcc(S=librosa.power_to_db(mel), n_mfcc=20)
mfcc = skl.preprocessing.StandardScaler().fit_transform(mfcc)

plt.figure(figsize=(10, 4))
librosa.display.specshow(mfcc, sr=sr, x_axis='time')
plt.title('MFCC')
plt.colorbar(format='%+2.0f dB')
plt.tight_layout()
plt.show()

# Prepare data for training
train = tracks['set', 'split'] == 'training'
val = tracks['set', 'split'] == 'validation'
test = tracks['set', 'split'] == 'test'

y_train = tracks.loc[small & train, ('track', 'genre_top')]
y_test = tracks.loc[small & test, ('track', 'genre_top')]
X_train = features.loc[small & train, 'mfcc']
X_test = features.loc[small & test, 'mfcc']

print(f'{y_train.size} training examples, {y_test.size} testing examples')
print(f'{X_train.shape[1]} features, {np.unique(y_train).size} classes')

# Shuffle training samples
X_train, y_train = skl.utils.shuffle(X_train, y_train, random_state=42)

# Standardize features
scaler = skl.preprocessing.StandardScaler(copy=False)
scaler.fit_transform(X_train)
scaler.transform(X_test)

# Train SVM classifier
clf = skl.svm.SVC()
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)
print(f'Accuracy: {score:.2%}')


  features = pd.read_csv('data/fma_metadata/features.csv', index_col=0, header=[0, 1])
  echonest = pd.read_csv('data/fma_metadata/echonest.csv', index_col=0, header=[0, 1])


AssertionError: 
Arrays are not equal

(shapes (106576,), (106574,) mismatch)
 x: array(['number', 'track_id', '2', ..., 155318, 155319, 155320],
      dtype=object)
 y: array([     2,      3,      5, ..., 155318, 155319, 155320])