In [None]:
import librosa
import numpy as np
import pandas as pd

df = pd.read_parquet('audio_dataset_bytes.parquet')
metadata = np.load('metadata.npy', allow_pickle=True).item()

# convert Byte to 2D matrix
def bytes_to_2d(row):
    return np.frombuffer(
        row['feature_bytes'], 
        dtype=np.float32
    ).reshape(row['feature_shape'])

df['feature'] = df.apply(bytes_to_2d, axis=1)


print("Example:")
print(f"Shape: {df['feature'].iloc[0].shape}")  
print(f"Data type: {df['feature'].iloc[0].dtype}")  # float32


In [None]:

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# reshape for CNN
X = np.stack(df['feature'].values)
X = X[..., np.newaxis]  # add channel dimension

# transform label from string to numeric
le = LabelEncoder()
y = le.fit_transform(df['label'])

X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    test_size=0.2, 
    stratify=y,
    random_state=42
)

print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")