In [None]:
ttesttesimport gdown
import numpy as np

# Scaricare gli embeddings di YAMNet
gdown.download("https://drive.google.com/uc?id=1ebHt0zq0m4jWxCox9vSajxZfKLPRFTSm", "yamnet_embeddings_esc50.npz", quiet=False)

# Caricare gli embeddings
yamnet_data = np.load("yamnet_embeddings_esc50.npz")
yamnet_embeddings = yamnet_data['embeddings']  # Shape: (2000, 1024)
yamnet_labels = yamnet_data['labels']          # Shape: (2000,)
yamnet_folds = yamnet_data['folds']            # Shape: (2000,)
yamnet_categories = yamnet_data['categories']  # Optional, se incluso

print(f"YAMNet - Embeddings shape: {yamnet_embeddings.shape}")
print(f"YAMNet - Labels shape: {yamnet_labels.shape}")
print(f"YAMNet - Folds shape: {yamnet_folds.shape}")


Downloading...
From: https://drive.google.com/uc?id=1ebHt0zq0m4jWxCox9vSajxZfKLPRFTSm
To: /content/yamnet_embeddings_esc50.npz
100%|██████████| 8.35M/8.35M [00:00<00:00, 17.7MB/s]

YAMNet - Embeddings shape: (2000, 1024)
YAMNet - Labels shape: (2000,)
YAMNet - Folds shape: (2000,)





In [None]:
import tensorflow_hub as hub
import tensorflow as tf

# Load YAMNet model from TensorFlow Hub
yamnet_model = hub.load('https://tfhub.dev/google/yamnet/1')

print("YAMNet model loaded successfully.")


YAMNet model loaded successfully.


In [None]:
def build_fcnn(input_dim, num_classes):
    model = tf.keras.Sequential([
        # Primo layer
        tf.keras.layers.Input(shape=(input_dim,)),
        tf.keras.layers.Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(1e-4)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.3),

        # Secondo layer
        tf.keras.layers.Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(1e-4)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.2),

        # Terzo layer
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.2),

        # Quarto layer
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dropout(0.1),

        # Layer di output
        tf.keras.layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(
        optimizer=tf.keras.optimizers.AdamW(learning_rate=1e-4, weight_decay=1e-5),
        loss='sparse_categorical_crossentropy',  # Usiamo sparse perché i label sono numerici interi
        metrics=['accuracy']
    )
    return model


In [None]:
from sklearn.metrics import f1_score

# Imposta le liste per raccogliere le metriche
accuracy_scores = []
f1_scores = []

# Dimensione degli embeddings e numero di classi
input_dim = yamnet_embeddings.shape[1]
num_classes = len(np.unique(yamnet_labels))

# Training con i fold predefiniti
for fold in range(1, 6):
    print(f"Training on fold {fold}...")

    # Re-inizializza il modello per ogni fold
    model = build_fcnn(input_dim, num_classes)

    # Split tra train e test basato sui fold
    train_indices = yamnet_folds != fold
    test_indices = yamnet_folds == fold

    X_train, X_test = yamnet_embeddings[train_indices], yamnet_embeddings[test_indices]
    y_train, y_test = yamnet_labels[train_indices], yamnet_labels[test_indices]

    # Training del modello

    model.fit(
        X_train, y_train,
        epochs=50,
        batch_size=32,
        validation_data=(X_test, y_test),
        verbose=2
    )
    # Valutazione sul fold corrente
    loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
    accuracy_scores.append(accuracy)

    # Calcolo F1-score
    y_pred = model.predict(X_test).argmax(axis=1)  # Predizioni come classi
    f1 = f1_score(y_test, y_pred, average='weighted')  # F1-score pesato
    f1_scores.append(f1)

    print(f"Fold {fold} - Test Accuracy: {accuracy:.4f}")
    print(f"Fold {fold} - Test F1-Score: {f1:.4f}")

    # Salvataggio del modello per ogni fold
    model.save(f"yamnet_fcnn_fold_{fold}_embeddings.keras")
    print(f"Model for fold {fold} saved.")

# Calcolo delle metriche medie
accuracy_mean = np.mean(accuracy_scores)
accuracy_std = np.std(accuracy_scores)
f1_mean = np.mean(f1_scores)
f1_std = np.std(f1_scores)

# Stampa dei risultati medi
print("\nYAMNet FCNN Evaluation Results:")
print(f"Mean Accuracy: {accuracy_mean:.4f} ± {accuracy_std:.4f}")
print(f"Mean F1-Score: {f1_mean:.4f} ± {f1_std:.4f}")


Training on fold 1...
Epoch 1/50
50/50 - 9s - 170ms/step - accuracy: 0.0319 - loss: 4.2102 - val_accuracy: 0.0675 - val_loss: 3.9463
Epoch 2/50
50/50 - 0s - 7ms/step - accuracy: 0.1050 - loss: 3.7354 - val_accuracy: 0.1875 - val_loss: 3.8035
Epoch 3/50
50/50 - 1s - 10ms/step - accuracy: 0.1637 - loss: 3.4869 - val_accuracy: 0.2700 - val_loss: 3.6121
Epoch 4/50
50/50 - 0s - 5ms/step - accuracy: 0.2612 - loss: 3.1715 - val_accuracy: 0.3850 - val_loss: 3.3647
Epoch 5/50
50/50 - 0s - 5ms/step - accuracy: 0.3187 - loss: 2.9949 - val_accuracy: 0.4600 - val_loss: 3.0857
Epoch 6/50
50/50 - 0s - 4ms/step - accuracy: 0.3725 - loss: 2.8041 - val_accuracy: 0.5075 - val_loss: 2.8092
Epoch 7/50
50/50 - 0s - 7ms/step - accuracy: 0.4256 - loss: 2.6172 - val_accuracy: 0.5450 - val_loss: 2.5621
Epoch 8/50
50/50 - 0s - 6ms/step - accuracy: 0.4619 - loss: 2.4801 - val_accuracy: 0.5750 - val_loss: 2.3317
Epoch 9/50
50/50 - 0s - 6ms/step - accuracy: 0.4975 - loss: 2.3200 - val_accuracy: 0.5875 - val_loss: 2

In [None]:
import gdown
import numpy as np

# Scarica gli embeddings AST dal link di Google Drive
gdown.download("https://drive.google.com/uc?id=1HAghy-Oqqg2G03KYlnwdO6z32gZEZVL3", "ast_embeddings_esc50.npz", quiet=False)

# Carica il file .npz
ast_data = np.load("ast_embeddings_esc50.npz")
ast_embeddings = ast_data['embeddings']  # Shape: (2000, D)
ast_labels = ast_data['labels']          # Shape: (2000,)
ast_folds = ast_data['folds']            # Shape: (2000,)
ast_categories = ast_data['categories']  # Optional, se incluso

# Controlla le dimensioni dei dati
print(f"AST - Embeddings shape: {ast_embeddings.shape}")
print(f"AST - Labels shape: {ast_labels.shape}")
print(f"AST - Folds shape: {ast_folds.shape}")


Downloading...
From: https://drive.google.com/uc?id=1HAghy-Oqqg2G03KYlnwdO6z32gZEZVL3
To: /content/ast_embeddings_esc50.npz
100%|██████████| 6.31M/6.31M [00:00<00:00, 54.7MB/s]

AST - Embeddings shape: (2000, 768)
AST - Labels shape: (2000,)
AST - Folds shape: (2000,)





In [None]:
from sklearn.metrics import f1_score
import tensorflow_hub as hub
import tensorflow as tf


# Liste per raccogliere le metriche
accuracy_scores = []
f1_scores = []

# Dimensione degli embeddings e numero di classi
input_dim = ast_embeddings.shape[1]
num_classes = len(np.unique(ast_labels))

# Training per i fold predefiniti
for fold in range(1, 6):
    print(f"Training on fold {fold}...")

    # Re-inizializza il modello per ogni fold
    model = build_fcnn(input_dim, num_classes)

    # Split tra train e test basato sui fold
    train_indices = ast_folds != fold
    test_indices = ast_folds == fold

    X_train, X_test = ast_embeddings[train_indices], ast_embeddings[test_indices]
    y_train, y_test = ast_labels[train_indices], ast_labels[test_indices]

    # Addestramento del modello
    model.fit(
        X_train, y_train,
        epochs=50,
        batch_size=32,
        validation_data=(X_test, y_test),
        verbose=2
    )

    # Valutazione sul fold corrente
    loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
    accuracy_scores.append(accuracy)

    # Calcolo F1-score
    y_pred = model.predict(X_test).argmax(axis=1)  # Predizioni come classi
    f1 = f1_score(y_test, y_pred, average='weighted')
    f1_scores.append(f1)

    print(f"Fold {fold} - Test Accuracy: {accuracy:.4f}")
    print(f"Fold {fold} - Test F1-Score: {f1:.4f}")

    # Salvataggio del modello per ogni fold
    model.save(f"ast_fcnn_fold_{fold}_embeddings.keras")
    print(f"Model for fold {fold} saved.")

# Calcolo delle metriche medie
accuracy_mean = np.mean(accuracy_scores)
accuracy_std = np.std(accuracy_scores)
f1_mean = np.mean(f1_scores)
f1_std = np.std(f1_scores)

# Stampa dei risultati medi
print("\nAST FCNN Evaluation Results:")
print(f"Mean Accuracy: {accuracy_mean:.4f} ± {accuracy_std:.4f}")
print(f"Mean F1-Score: {f1_mean:.4f} ± {f1_std:.4f}")


Training on fold 1...
Epoch 1/50
50/50 - 10s - 197ms/step - accuracy: 0.0362 - loss: 4.1804 - val_accuracy: 0.0525 - val_loss: 3.9048
Epoch 2/50
50/50 - 0s - 3ms/step - accuracy: 0.0913 - loss: 3.6811 - val_accuracy: 0.2050 - val_loss: 3.6459
Epoch 3/50
50/50 - 0s - 6ms/step - accuracy: 0.1963 - loss: 3.2571 - val_accuracy: 0.4025 - val_loss: 3.2820
Epoch 4/50
50/50 - 0s - 3ms/step - accuracy: 0.3231 - loss: 2.9343 - val_accuracy: 0.5700 - val_loss: 2.8873
Epoch 5/50
50/50 - 0s - 6ms/step - accuracy: 0.4187 - loss: 2.6391 - val_accuracy: 0.6825 - val_loss: 2.5137
Epoch 6/50
50/50 - 0s - 3ms/step - accuracy: 0.5069 - loss: 2.3604 - val_accuracy: 0.7625 - val_loss: 2.1141
Epoch 7/50
50/50 - 0s - 3ms/step - accuracy: 0.6000 - loss: 2.1323 - val_accuracy: 0.8275 - val_loss: 1.7905
Epoch 8/50
50/50 - 0s - 6ms/step - accuracy: 0.6612 - loss: 1.9337 - val_accuracy: 0.8425 - val_loss: 1.5510
Epoch 9/50
50/50 - 0s - 6ms/step - accuracy: 0.7013 - loss: 1.7523 - val_accuracy: 0.8900 - val_loss: 1

In [None]:
# Scaricare i modelli per i 5 fold
!gdown 11hhTBJZpf-BRFjWhuFfrO5mJCIfpKXr6 -O ast_fcnn_folds.zip

# Estrarre i file
!unzip ast_fcnn_folds.zip


Downloading...
From (original): https://drive.google.com/uc?id=11hhTBJZpf-BRFjWhuFfrO5mJCIfpKXr6
From (redirected): https://drive.google.com/uc?id=11hhTBJZpf-BRFjWhuFfrO5mJCIfpKXr6&confirm=t&uuid=50b91475-dcef-430c-b21a-ecb3fa0b5fd0
To: /content/ast_fcnn_folds.zip
100% 31.9M/31.9M [00:00<00:00, 32.2MB/s]
Archive:  ast_fcnn_folds.zip
  inflating: ast_fcnn_fold_1_embeddings.keras  
  inflating: ast_fcnn_fold_2_embeddings.keras  
  inflating: ast_fcnn_fold_3_embeddings.keras  
  inflating: ast_fcnn_fold_4_embeddings.keras  
  inflating: ast_fcnn_fold_5_embeddings.keras  


In [None]:
# Scaricare il CSV in formato raw dal repository GitHub
!wget https://raw.githubusercontent.com/karolpiczak/ESC-50/master/meta/esc50.csv -O esc50.csv
import pandas as pd

metadata = pd.read_csv("esc50.csv")



--2024-12-28 09:54:47--  https://raw.githubusercontent.com/karolpiczak/ESC-50/master/meta/esc50.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 93742 (92K) [text/plain]
Saving to: ‘esc50.csv’


2024-12-28 09:54:48 (5.57 MB/s) - ‘esc50.csv’ saved [93742/93742]



In [None]:
# Creare un dizionario di mappatura {target: category}
target_to_category = dict(zip(metadata['target'], metadata['category']))


In [None]:
import numpy as np
from sklearn.metrics import classification_report
from tensorflow.keras.models import load_model

# Supponiamo che i file estratti abbiano questi nomi
model_paths = [
    "ast_fcnn_fold_1_embeddings.keras",
    "ast_fcnn_fold_2_embeddings.keras",
    "ast_fcnn_fold_3_embeddings.keras",
    "ast_fcnn_fold_4_embeddings.keras",
    "ast_fcnn_fold_5_embeddings.keras"
]

# Array per memorizzare le predizioni
y_true = []
y_pred = []

# Predefined folds (ast_folds indica il fold di ciascun campione)
for fold, model_path in enumerate(model_paths, start=1):
    print(f"Evaluating fold {fold}...")

    # Caricare il modello del fold corrente
    model = load_model(model_path)

    # Selezionare i dati per il fold corrente
    train_indices = ast_folds != fold
    test_indices = ast_folds == fold

    X_train, X_test = ast_embeddings[train_indices], ast_embeddings[test_indices]
    y_train, y_test = ast_labels[train_indices], ast_labels[test_indices]

    # Memorizzare i veri valori di y
    y_true.extend(y_test)

    # Predire le classi per il test set
    fold_predictions = model.predict(X_test).argmax(axis=1)
    y_pred.extend(fold_predictions)

    print(f"Fold {fold} completed.")

# Convertire le liste in array
y_true = np.array(y_true)
y_pred = np.array(y_pred)

# Convertire i label numerici in categorie usando il dizionario
y_true_categories = [target_to_category[label] for label in y_true]
y_pred_categories = [target_to_category[label] for label in y_pred]

# Generare il classification report usando i nomi delle categorie
print("Classification Report:")
print(classification_report(y_true_categories, y_pred_categories))


Evaluating fold 1...
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step
Fold 1 completed.
Evaluating fold 2...
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step
Fold 2 completed.
Evaluating fold 3...
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
Fold 3 completed.
Evaluating fold 4...
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
Fold 4 completed.
Evaluating fold 5...
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step
Fold 5 completed.
Classification Report:
                  precision    recall  f1-score   support

        airplane       0.82      0.82      0.82        40
       breathing       1.00      0.93      0.96        40
  brushing_teeth       0.98      1.00      0.99        40
     can_opening       0.95      0.93      0.94        40
        car_horn       0.95      0.97      0.96        40
             cat       1.00      0.97      0.99        40
     