In [2]:
from dotenv import load_dotenv
load_dotenv()
import os
import librosa
import numpy as np
import sys
sys.path.append('../scripts')

from chroma_features import extract_chroma_features
from tonnetz_features import extract_tonnetz_features
from mfcc_features import extract_mfcc_features
from zero_crossing_rate import extract_zero_crossing_rate
from temporal_features import extract_temporal_features
from pitch_features import extract_pitch_features

In [3]:
pwd

'/Users/moki/University/Not University/MusicAI/notebooks'

In [4]:
# Replace the path below with the path to a single MP3 file from your collection
file_path = '/Users/moki/University/Not University/MusicAI/data/tracks/Gummihammer.mp3'

In [8]:

audio_data, sr = librosa.load(file_path, mono=False)
chroma = extract_chroma_features(audio_data, sr)
tonnetz = extract_tonnetz_features(audio_data, sr)
mfcc = extract_mfcc_features(audio_data, sr)
zcr = extract_zero_crossing_rate(audio_data, sr)
onset_env, tempo = extract_temporal_features(audio_data, sr)
pitch_values = extract_pitch_features(audio_data, sr)
rms = librosa.feature.rms(audio_data, sr)

features = {
    'chroma': chroma,
    'tonnetz': tonnetz,
    'mfcc': mfcc,
    'zcr': zcr,
    'onset_strength_envelope': onset_env,
    'tempo': tempo,
    'pitch_values': pitch_values,
    'local loudness': rms
}

# Display the shapes of the extracted features
for feature_name, feature_value in features.items():
    if hasattr(feature_value, 'shape'):
        print(f"{feature_name}: {feature_value.shape}")
    else:
        print(f"{feature_name}: {feature_value}")



   1.5487800e-05  0.0000000e+00]
 [-1.4944916e-15  4.5096833e-16  1.5466174e-15 ... -1.5469835e-05
  -1.7700393e-05  0.0000000e+00]], S=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  rms = librosa.feature.rms(audio_data, sr)


chroma: (2, 12, 17547)
tonnetz: (2, 6, 17547)
mfcc: (2, 13, 17547)
zcr: (2, 1, 17547)
onset_strength_envelope: (2, 17547)
tempo: (2, 1)
pitch_values: (array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32), array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32), array([[1.7673626 , 0.3567086 , 0.20399925, ..., 0.7544432 , 0.70340943,
        0.6402794 ],
       [1.8592454 , 0.24477418, 0.15293294, ..., 0.73186266, 0.69198304,
        0.70216984]], dtype=float32))
local loudness: (2, 1, 17547)


In [9]:

total_size = sys.getsizeof(features)
for feature_name, feature_value in features.items():
    total_size += sys.getsizeof(feature_value)

print(f"Total size of feature data for one file: {total_size} bytes")

Total size of feature data for one file: 2107056 bytes


In [10]:

# Display the sizes of the extracted features
for feature_name, feature_value in features.items():
    if hasattr(feature_value, 'shape'):
        feature_size = sys.getsizeof(feature_value)
        print(f"{feature_name}: {feature_value.shape} (Size: {feature_size} bytes)")
    else:
        print(f"{feature_name}: {feature_value}")

# Compute the total size
total_size = sys.getsizeof(features)
for feature_name, feature_value in features.items():
    total_size += sys.getsizeof(feature_value)

print(f"\nTotal size of feature data for one file: {total_size} bytes")

chroma: (2, 12, 17547) (Size: 1684656 bytes)
tonnetz: (2, 6, 17547) (Size: 144 bytes)
mfcc: (2, 13, 17547) (Size: 144 bytes)
zcr: (2, 1, 17547) (Size: 280896 bytes)
onset_strength_envelope: (2, 17547) (Size: 128 bytes)
tempo: (2, 1) (Size: 144 bytes)
pitch_values: (array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32), array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32), array([[1.7673626 , 0.3567086 , 0.20399925, ..., 0.7544432 , 0.70340943,
        0.6402794 ],
       [1.8592454 , 0.24477418, 0.15293294, ..., 0.73186266, 0.69198304,
        0.70216984]], dtype=float32))
local loudness: (2, 1, 17547) (Size: 140520 bytes)

Total size of feature data for one file: 2107056 bytes
