In [17]:
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import librosa
import sklearn
import tensorflow as tf
from tensorflow.keras.callbacks import CSVLogger, EarlyStopping, ModelCheckpoint
from tensorflow.keras import regularizers
import pandas as pd
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
plt.style.use("seaborn-v0_8")

from tensorflow import keras
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D


In [3]:
# Define the paths to the audio data folder and the annotations file
audio_data_folder = Path("warblrb10k_public_wav\wav")
annotations_file = Path("warblrb10k_public_metadata_2018.csv")

# Load the annotations into a pandas DataFrame
annotations_df = pd.read_csv(annotations_file)

# Display the first few rows of the annotations DataFrame
print(annotations_df)

                       itemid   datasetid  hasbird
0     759808e5-f824-401e-9058  warblrb10k        1
1     1d94fc4a-1c63-4da0-9cac  warblrb10k        1
2     bb0099ce-3073-4613-8557  warblrb10k        1
3     c4c67e81-9aa8-4af4-8eb7  warblrb10k        1
4     ab322d4b-da69-4b06-a065  warblrb10k        0
...                       ...         ...      ...
7995  ca7b3342-17b0-444f-ba2a  warblrb10k        1
7996  43071f95-d31e-447b-8786  warblrb10k        1
7997  0d4d2fea-743d-46aa-a17f  warblrb10k        1
7998  0d34160d-55db-4c70-93fa  warblrb10k        1
7999  01539aa0-f482-4a71-a944  warblrb10k        1

[8000 rows x 3 columns]


In [4]:
from IPython.display import Audio, display

# Setup filepaths per tre file .wav
file0 = audio_data_folder / f"{annotations_df['itemid'][3]}.wav"
file1 = audio_data_folder / f"{annotations_df['itemid'][4]}.wav"
file2 = audio_data_folder / f"{annotations_df['itemid'][5]}.wav"

# Mostra il player audio (il rate viene letto direttamente dal file, quindi non è nemmeno necessario specificarlo)
print(f"File 0 - hasbird: {annotations_df['hasbird'][3]}")
display(Audio(filename=str(file0)))

print(f"File 1 - hasbird: {annotations_df['hasbird'][4]}")
display(Audio(filename=str(file1)))

print(f"File 2 - hasbird: {annotations_df['hasbird'][5]}")
display(Audio(filename=str(file2)))

File 0 - hasbird: 1


File 1 - hasbird: 0


File 2 - hasbird: 0


In [None]:
labels = annotations_df['hasbird'].values
Xdata = []
for i in range(len(annotations_df)):
    file_path = audio_data_folder / f"{annotations_df['itemid'][i]}.wav"
    y, sr = librosa.load(file_path, sr=44100)
    new_sr = 11000
    y = librosa.resample(y=y, orig_sr=sr, target_sr=new_sr)  # Resample to 22.05kHz
    ## Extract features (e.g., MFCCs) from the audio signal
    #mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    ## Compute the mean of the MFCCs across time frames
    #mfccs_mean = np.mean(mfccs.T, axis=0)
    # scegli un target_len (es. 66000 sample)
    target_len = 10*new_sr
    if len(y) >= target_len:
        y_fixed = y[:target_len]
    else:
        y_fixed = np.pad(y, (0, target_len - len(y)), mode='constant')
    Xdata.append(y_fixed)
    #Xdata.append(y) 
    #Xdata.append(mfccs_mean)  # Append the mean MFCCs to the list



NameError: name 'Xdata_f' is not defined

In [8]:
Xdata_f = np.array(Xdata)
Xdata_f.shape


(8000, 110000)

In [None]:

scaler = StandardScaler()
Xdata_s = scaler.fit_transform(Xdata_f)



(6400, 110000) (6400,) (1600, 110000) (1600,)


In [26]:
X_train, X_test, y_train, y_test = train_test_split(Xdata_s, labels, test_size=0.2, random_state=42)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(6400, 110000) (6400,) (1600, 110000) (1600,)


In [32]:
# Reshape the training and testing data to include a height and channel dimension
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1], 1)

# CNN model definition
model = keras.Sequential([
    Conv2D(32, (1, 3), activation='relu', input_shape=(1, X_train.shape[2], 1), kernel_regularizer=regularizers.l1_l2(l1=0.001, l2=0.001)),
    MaxPooling2D((1, 3), strides=3),
    Conv2D(32, (1, 3), activation='relu', kernel_regularizer=regularizers.l1_l2(l1=0.01, l2=0.01)),
    MaxPooling2D((1, 2), strides=2),
    Conv2D(32, (1, 2), activation='relu', kernel_regularizer=regularizers.l1_l2(l1=0.01, l2=0.01)),
    Flatten(),
    Dense(16, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

#compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

#fit the model
history = model.fit(X_train, y_train, epochs=10, verbose=1, validation_data=(X_test, y_test))

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()

plt.show()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m151s[0m 749ms/step - accuracy: 0.7209 - loss: 5.4839 - val_accuracy: 0.7663 - val_loss: 2.3371
Epoch 2/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m148s[0m 740ms/step - accuracy: 0.7578 - loss: 1.9568 - val_accuracy: 0.7881 - val_loss: 1.1566
Epoch 3/10
[1m 40/200[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m1:52[0m 702ms/step - accuracy: 0.7744 - loss: 1.1520

KeyboardInterrupt: 

In [30]:
# expanding X_train_s and X_test_s to fit conv2d
#X_train_s = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
#X_test_s = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)

# expanding X_train_s and X_test_s to fit conv2d
#X_train_s = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
#X_test_s = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)

# Reshape X_train and X_test for scaling
X_train_reshaped = X_train.reshape(X_train.shape[0], -1)
X_test_reshaped = X_test.reshape(X_test.shape[0], -1)

# Scale the reshaped data
X_train_s = scaler.fit_transform(X_train_reshaped)
X_test_s = scaler.transform(X_test_reshaped)

# Reshape back to original shape
X_train_s = X_train_s.reshape(X_train.shape)
X_test_s = X_test_s.reshape(X_test.shape)

# CNN model definition
model = keras.Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(X_train.shape[1], 1), kernel_regularizer=regularizers.l1_l2(l1=0.001, l2=0.001)),
    MaxPooling2D((3, 3), strides=3),
    Conv2D(32, (3, 3), activation='relu', kernel_regularizer=regularizers.l1_l2(l1=0.01, l2=0.01)),
    MaxPooling2D((2, 2), strides=2),
    Conv2D(32, (2, 2), activation='relu', kernel_regularizer=regularizers.l1_l2(l1=0.01, l2=0.01)),
    Flatten(),
    Dense(16, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

# Reshape the training and testing data to include a channel dimension
X_train = np.expand_dims(X_train, axis=-1)
X_test = np.expand_dims(X_test, axis=-1)

#compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

#fit the model
history = model.fit(X_train, y_train, epochs=10, verbose=1, validation_data=(X_test, y_test))

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()

plt.show()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


ValueError: Input 0 of layer "conv2d_15" is incompatible with the layer: expected min_ndim=4, found ndim=3. Full shape received: (None, 110000, 1)