In [None]:
import librosa
import os
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from keras.layers import Input, Dense, Conv1D, MaxPooling1D, UpSampling1D, BatchNormalization, Flatten, Reshape
from keras.models import Model
from keras.optimizers import Adam
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import pandas as pd
import warnings

# Turn off all warnings
warnings.filterwarnings('ignore')

def load_and_extract_features(directory_path, sr=22050):
    """
    Load audio files from a directory, extract features, and return them as a numpy array.
    
    Parameters:
    - directory_path: str, path to the directory containing audio files.
    - sr: int, sample rate for audio files. Default is 22050.
    
    Returns:
    - features_data: numpy array, extracted features from all audio files.
    """
    feature_names = ['mfcc', 'chroma', 'mel', 'contrast', 'tonnetz']
    data = np.zeros((0, 174))  # Initialize empty array with correct shape
    
    # Iterate over audio files in the directory
    for filename in os.listdir(directory_path):
        if filename.endswith('.wav'):
            file_path = os.path.join(directory_path, filename)
            
            # Load audio file
            y, sr = librosa.load(file_path, sr=sr)
            
            # Extract features
            mfcc = librosa.feature.mfcc(y=y, sr=sr)
            chroma = librosa.feature.chroma_stft(y=y, sr=sr)
            mel = librosa.feature.melspectrogram(y=y, sr=sr)
            contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
            tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
            
            # Concatenate features and append to data
            features = np.concatenate([mfcc, chroma, mel, contrast, tonnetz], axis=0).T
            file_name=np.array([filename]*features.shape[0])
            features = np.hstack((features, file_name.reshape(-1, 1)))
            data = np.vstack((data, features))

    data = np.delete(data, 0, axis=0)
    return data

def build_conv_autoencoder(input_shape):
    input_layer = Input(shape=input_shape)

    x = Conv1D(32, 3, activation="relu", padding="same")(input_layer)
    x = MaxPooling1D(2, padding="same")(x)
    x = BatchNormalization()(x)
    x = Conv1D(16, 3, activation="relu", padding="same")(x)
    x = MaxPooling1D(2, padding="same")(x)
    encoded = BatchNormalization()(x)

    x = Conv1D(16, 3, activation="relu", padding="same")(encoded)
    x = UpSampling1D(2)(x)
    x = BatchNormalization()(x)
    x = Conv1D(32, 3, activation="relu", padding="same")(x)
    x = UpSampling1D(2)(x)
    x = BatchNormalization()(x)

    x = Flatten()(x)
    x = Dense(np.prod(input_shape), activation='relu')(x)
    decoded = Reshape(input_shape)(x)

    autoencoder = Model(input_layer, decoded)
    encoder = Model(input_layer, encoded)

    autoencoder.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
    return autoencoder, encoder


audio_file_path = "scikit-maad/data"

label_features = load_and_extract_features(audio_file_path)
label_names=label_features[:,-1]
label_names=np.array(pd.DataFrame(label_names).iloc[:,0].str.replace('.wav', ''))
label_features=label_features[:,:-1]

scaler = StandardScaler()
features_scaled = scaler.fit_transform(label_features)

features_scaled_reshaped = features_scaled.reshape(-1, features_scaled.shape[1], 1)

input_shape = (features_scaled.shape[1], 1)
autoencoder, encoder = build_conv_autoencoder(input_shape)

autoencoder.fit(features_scaled_reshaped,
                features_scaled_reshaped,
                epochs=50, batch_size=128)

compressed_features = encoder.predict(features_scaled.reshape(features_scaled.shape[0], features_scaled.shape[1], 1))
label_compressed_features_flattened = compressed_features.reshape(compressed_features.shape[0], -1)

kmeans = KMeans(n_clusters=5, random_state=42).fit(label_compressed_features_flattened)

tsne = TSNE(n_components=2, perplexity=30, n_iter=300)
tsne_results = tsne.fit_transform(label_compressed_features_flattened)

plt.figure(figsize=(15, 5))
plt.scatter(tsne_results[:, 0], tsne_results[:, 1], c=kmeans.labels_, cmap='viridis')
plt.title("t-SNE Visualization of Clustered Features")
plt.xlabel("t-SNE Feature 1")
plt.ylabel("t-SNE Feature 2")
plt.colorbar()
plt.show()


cluster_labels=kmeans.labels_
# Add the cluster labels as an additional feature
X_label = np.hstack((label_compressed_features_flattened, cluster_labels.reshape(-1, 1)))
X_label = np.hstack((X_label, label_names.reshape(-1, 1)))


label_data=pd.DataFrame(X_label)
label_data=label_data.iloc[:,704:]
label_data.columns=['Category','Label_Names']
label_data_groupby=label_data.groupby('Label_Names')['Category'].unique().reset_index()


label_data_groupby['Label_Names'][0]='cold_forest_daylight/tropical_forest_morning'
label_data_groupby=label_data_groupby.iloc[:-1,:]

# sphinx_gallery_thumbnail_path = './_images/sphx_glr_plot_unsupervised_sound_classification_004.png'
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from maad import sound, features, rois
from maad.util import power2dB, plot2d, format_features, overlay_rois

# List of audio files
audio_files = [
    'scikit-maad/data/cold_forest_daylight.wav',
    'scikit-maad/data/cold_forest_night.wav',
    'scikit-maad/data/rock_savanna.wav', 
    'scikit-maad/data/spinetail.wav', 
    'scikit-maad/data/tropical_forest_morning.wav'  
]
adio_name=['cold_forest_daylight','cold_forest_night','rock_savanna','spinetail','tropical_forest_morning']

# Parameters for processing
fcut = 100
forder = 3
ftype = 'highpass'
db_max = 70

# Loop over each file and plot the spectrogram
for idx, file in enumerate(audio_files):
    # Load the audio file
    s, fs = sound.load(file)
    
    # Apply a high-pass filter to the audio signal
    s_filt = sound.select_bandwidth(s, fs, fcut=fcut, forder=forder, ftype=ftype)
    
    # Compute the spectrogram
    Sxx, tn, fn, ext = sound.spectrogram(s_filt, fs, nperseg=1024, noverlap=512)
    
    # Convert the power spectrogram to decibel scale and normalize
    Sxx_db = power2dB(Sxx, db_range=db_max) + db_max
    
    # Plot the spectrogram
    plt.figure(figsize=(10, 5))
    plt.imshow(Sxx_db, extent=ext, aspect='auto', origin='lower', cmap='inferno')
    plt.colorbar(format='%+2.0f dB')
    plt.title(adio_name[idx])
    plt.xlabel('Time [s]')
    plt.ylabel('Frequency [Hz]')
    plt.tight_layout()
    plt.show()


audio_file_path = "scikit-maad/data/indices"

features = load_and_extract_features(audio_file_path)
filenames=features[:,-1]
features=features[:,:-1]

scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

features_scaled_reshaped = features_scaled.reshape(-1, features_scaled.shape[1], 1)

input_shape = (features_scaled.shape[1], 1)
autoencoder, encoder = build_conv_autoencoder(input_shape)

autoencoder.fit(features_scaled_reshaped,
                features_scaled_reshaped,
                epochs=50, batch_size=128)

compressed_features = encoder.predict(features_scaled.reshape(features_scaled.shape[0], features_scaled.shape[1], 1))
compressed_features_flattened = compressed_features.reshape(compressed_features.shape[0], -1)

predicted_clusters = kmeans.predict(compressed_features_flattened)

# cluster_labels=kmeans.labels_
X=compressed_features_flattened
# Add the cluster labels as an additional feature
X_extended = np.hstack((X, predicted_clusters.reshape(-1, 1)))
X_extended = np.hstack((X_extended, filenames.reshape(-1, 1)))

data=pd.DataFrame(X_extended)

data=data.iloc[:,704:]
data.columns=['Category','File_Names']

data_groupby=data.groupby('File_Names')['Category'].unique().reset_index()
data_groupby['Features']=data_groupby['Category']
sound_dict={'0.0':'Cicada/Frog','1.0':'Birds','2.0':'Cicada/Frog','3.0':'BackGround','4.0':'Spinetail'}
for idx,col in enumerate(data_groupby['Category']):
    string=''
    for i in col:
        string=string+sound_dict[i]+' / '
    data_groupby['Features'][idx]=string[:-2]

label_data_groupby['Category'] = label_data_groupby['Category'].apply(lambda x: ','.join(map(str, sorted(x))))
data_groupby['Category'] = data_groupby['Category'].apply(lambda x: ','.join(map(str, sorted(x))))
merged_df = pd.merge(data_groupby, label_data_groupby, on='Category', how='left')

merged_df