In [1]:
import json
import numpy as np

DATA_PATH = "data_gtzan_mfcc.json"  # passe Pfad an

def load_json_mfcc(path):
    with open(path, "r") as fp:
        data = json.load(fp)
    X = np.array(data["mfcc"], dtype=np.float32)   # (N, T, 13)
    y = np.array(data["labels"], dtype=np.int64)   # (N,)
    mapping = data.get("mapping", None)            # list[str] oder None
    return X, y, mapping

X, y, class_names = load_json_mfcc(DATA_PATH)
print(X.shape, y.shape)


(9990, 130, 13) (9990,)


In [2]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42, stratify=y
)

X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.2, random_state=42, stratify=y_train
)

print(X_train.shape, X_val.shape, X_test.shape)


(5993, 130, 13) (1499, 130, 13) (2498, 130, 13)


In [3]:
def flatten_mfcc(X):
    # (N,T,13) -> (N, T*13)
    return X.reshape(X.shape[0], -1)

X_train_f = flatten_mfcc(X_train)
X_val_f   = flatten_mfcc(X_val)
X_test_f  = flatten_mfcc(X_test)

print(X_train_f.shape)  # (N, 1690) bei T=130


(5993, 1690)


In [4]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_f = scaler.fit_transform(X_train_f)
X_val_f   = scaler.transform(X_val_f)
X_test_f  = scaler.transform(X_test_f)


In [5]:
import tensorflow as tf
from tensorflow import keras

def build_ffnn(input_dim, num_classes=10):
    model = keras.Sequential([
        keras.layers.Input(shape=(input_dim,)),
        keras.layers.Dense(512, activation="relu"),
        keras.layers.Dropout(0.4),
        keras.layers.Dense(256, activation="relu"),
        keras.layers.Dropout(0.4),
        keras.layers.Dense(num_classes, activation="softmax")
    ])
    return model

model_ffnn = build_ffnn(input_dim=X_train_f.shape[1], num_classes=10)

model_ffnn.compile(
    optimizer=keras.optimizers.Adam(1e-3),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

early = keras.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=7,
    restore_best_weights=True
)

history = model_ffnn.fit(
    X_train_f, y_train,
    validation_data=(X_val_f, y_val),
    epochs=30,
    batch_size=32,
    callbacks=[early],
    verbose=1
)


2026-01-15 15:15:17.916253: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2026-01-15 15:15:17.933582: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2026-01-15 15:15:17.938886: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2026-01-15 15:15:17.952274: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2026-01-15 15:15:19.810451: I tensorflow/core/common_

Epoch 1/30


I0000 00:00:1768490121.332652  107895 service.cc:146] XLA service 0x7f1c780083b0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1768490121.332714  107895 service.cc:154]   StreamExecutor device (0): NVIDIA A16, Compute Capability 8.6
2026-01-15 15:15:21.379447: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2026-01-15 15:15:21.506006: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 8907
2026-01-15 15:15:21.666703: W external/local_xla/xla/service/gpu/nvptx_compiler.cc:762] The NVIDIA driver's CUDA version is 12.2 which is older than the ptxas CUDA version (12.3.107). Because the driver is older than the ptxas version, XLA is disabling parallel compilation, which may slow down compilation. You should update your NVIDIA driver or use the NVIDIA-provided CUDA forward compatibility packages.


[1m 65/188[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m0s[0m 2ms/step - accuracy: 0.2795 - loss: 2.6724

I0000 00:00:1768490123.650762  107895 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 48ms/step - accuracy: 0.3260 - loss: 2.3121 - val_accuracy: 0.5190 - val_loss: 1.4013
Epoch 2/30
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.4515 - loss: 1.5680 - val_accuracy: 0.5390 - val_loss: 1.3230
Epoch 3/30
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5094 - loss: 1.3819 - val_accuracy: 0.5791 - val_loss: 1.2480
Epoch 4/30
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5409 - loss: 1.3054 - val_accuracy: 0.5777 - val_loss: 1.1791
Epoch 5/30
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5655 - loss: 1.2173 - val_accuracy: 0.6031 - val_loss: 1.1552
Epoch 6/30
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5759 - loss: 1.1824 - val_accuracy: 0.5771 - val_loss: 1.1664
Epoch 7/30
[1m188/188[0m [32m━━━━━

In [6]:
model_ffnn.save("ffnn_mfcc.keras")

In [7]:
import joblib
joblib.dump(scaler, "ffnn_scaler.joblib")

['ffnn_scaler.joblib']

In [8]:
test_res = model_ffnn.evaluate(X_test_f, y_test, verbose=0, return_dict=True)
print(test_res)




{'accuracy': 0.6088871359825134, 'loss': 1.1060829162597656}


In [9]:
model_ffnn.summary()