In [1]:
import os
import tensorflow as tf
import numpy as np
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:

# Percorso al dataset scaricato
DATASET_PATH = "ML_TACTIGON/customTSkin/data/audiodati"

# Comandi selezionati per la classificazione
COMMANDS = ["yes", "no", "up", "down"]

# Funzione per il caricamento dei dati
def preprocess_dataset(dataset_path, commands):
    labels = {name: i for i, name in enumerate(commands)}
    data, targets = [], []

    for label, command in labels.items():
        command_path = os.path.join(dataset_path, command)
        if os.path.exists(command_path):
            for file in os.listdir(command_path):
                filepath = os.path.join(command_path, file)
                # Carica file audio
                audio, _ = tf.audio.decode_wav(tf.io.read_file(filepath))
                audio = tf.squeeze(audio, axis=-1)  # Rimuove dimensioni non necessarie
                audio = tf.image.resize_with_pad(audio, target_height=16000, target_width=1)  # Normalizza lunghezza
                data.append(audio.numpy())
                targets.append(labels[label])
    return np.array(data), to_categorical(targets, len(commands))


In [4]:

# Carica e pre-processa i dati
data, labels = preprocess_dataset(DATASET_PATH, COMMANDS)


TypeError: join() argument must be str or bytes, not 'int'

In [None]:

# Divisione del dataset in training e test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

# Estrazione degli spettrogrammi
def audio_to_spectrogram(data):
    spectrograms = []
    for audio in data:
        spectrogram = tf.signal.stft(audio, frame_length=255, frame_step=128)
        spectrogram = tf.abs(spectrogram)
        spectrograms.append(tf.image.resize(spectrogram, [128, 128]).numpy())
    return np.array(spectrograms)

X_train = audio_to_spectrogram(X_train)
X_test = audio_to_spectrogram(X_test)

# Costruzione del modello
model = models.Sequential([
    layers.Input(shape=(128, 128, 1)),
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(len(COMMANDS), activation='softmax')
])

# Compilazione del modello
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Allenamento del modello
model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

# Valutazione
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.2f}")
