In [19]:
import librosa
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import multiprocessing as mp
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
import tensorflow as tf



In [20]:
df_meta = pd.read_csv("./birdclef-2024/train_metadata.csv")

In [21]:
df_meta

Unnamed: 0,primary_label,secondary_labels,type,latitude,longitude,scientific_name,common_name,author,license,rating,url,filename
0,asbfly,[],['call'],39.2297,118.1987,Muscicapa dauurica,Asian Brown Flycatcher,Matt Slaymaker,Creative Commons Attribution-NonCommercial-Sha...,5.0,https://www.xeno-canto.org/134896,asbfly/XC134896.ogg
1,asbfly,[],['song'],51.4030,104.6401,Muscicapa dauurica,Asian Brown Flycatcher,Magnus Hellström,Creative Commons Attribution-NonCommercial-Sha...,2.5,https://www.xeno-canto.org/164848,asbfly/XC164848.ogg
2,asbfly,[],['song'],36.3319,127.3555,Muscicapa dauurica,Asian Brown Flycatcher,Stuart Fisher,Creative Commons Attribution-NonCommercial-Sha...,2.5,https://www.xeno-canto.org/175797,asbfly/XC175797.ogg
3,asbfly,[],['call'],21.1697,70.6005,Muscicapa dauurica,Asian Brown Flycatcher,vir joshi,Creative Commons Attribution-NonCommercial-Sha...,4.0,https://www.xeno-canto.org/207738,asbfly/XC207738.ogg
4,asbfly,[],['call'],15.5442,73.7733,Muscicapa dauurica,Asian Brown Flycatcher,Albert Lastukhin & Sergei Karpeev,Creative Commons Attribution-NonCommercial-Sha...,4.0,https://www.xeno-canto.org/209218,asbfly/XC209218.ogg
...,...,...,...,...,...,...,...,...,...,...,...,...
24454,zitcis1,[],[''],43.5925,4.5434,Cisticola juncidis,Zitting Cisticola,Chèvremont Fabian,Creative Commons Attribution-NonCommercial-Sha...,5.0,https://xeno-canto.org/845747,zitcis1/XC845747.ogg
24455,zitcis1,[],[''],43.5925,4.5434,Cisticola juncidis,Zitting Cisticola,Chèvremont Fabian,Creative Commons Attribution-NonCommercial-Sha...,4.0,https://xeno-canto.org/845817,zitcis1/XC845817.ogg
24456,zitcis1,[],[''],51.1207,4.5607,Cisticola juncidis,Zitting Cisticola,Wim Jacobs,Creative Commons Attribution-NonCommercial-Sha...,4.0,https://xeno-canto.org/856176,zitcis1/XC856176.ogg
24457,zitcis1,[],[''],41.5607,-8.4236,Cisticola juncidis,Zitting Cisticola,Jorge Leitão,Creative Commons Attribution-NonCommercial-Sha...,4.5,https://xeno-canto.org/856723,zitcis1/XC856723.ogg


In [22]:
df_train = df_meta[["primary_label", "filename"]]

In [23]:
def create_model():
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(1)
    ])

    model.compile(optimizer='adam',
                  loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                  metrics=['accuracy'])
    return model

    

In [24]:
def extract_audio_features_with_path(args):
    ogg_file_path, max_length = args
    return extract_audio_features(ogg_file_path, max_length)

def extract_audio_features(ogg_file_path, max_length=22050*5):
    y, sr = librosa.load(ogg_file_path, sr=None)
    
    # Ensure the audio is of fixed length
    if len(y) < max_length:
        y = np.pad(y, (0, max_length - len(y)), 'constant')
    else:
        y = y[:max_length]
    
    # Extract features
    features = {}

    # Mel spectrogram
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
    S_dB = librosa.power_to_db(S, ref=np.max)
    features['mel_spectrogram'] = S_dB

    # MFCC
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    features['mfcc'] = mfcc

    # Chroma feature
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    features['chroma'] = chroma

    # Spectral contrast
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    features['spectral_contrast'] = spectral_contrast

    # Tonnetz
    tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(y), sr=sr)
    features['tonnetz'] = tonnetz

    # Spectral centroid
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    features['spectral_centroid'] = spectral_centroid

    # Spectral bandwidth
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    features['spectral_bandwidth'] = spectral_bandwidth

    # Spectral rolloff
    spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    features['spectral_rolloff'] = spectral_rolloff

    # Zero crossing rate
    zero_crossing_rate = librosa.feature.zero_crossing_rate(y)
    features['zero_crossing_rate'] = zero_crossing_rate

    # RMS
    rms = librosa.feature.rms(y=y)
    features['rms'] = rms

    return features


In [25]:

# Example usage
audio_features = extract_audio_features('./birdclef-2024/train_audio/asbfly/XC49755.ogg')

# Convert features to a dictionary of DataFrames for better visualization
features_df = {key: pd.DataFrame(value) for key, value in audio_features.items()}

# Display the extracted features
for feature_name, df in features_df.items():
    print(f"\nFeature: {feature_name}")
    display(df.head())  # Using display() from IPython.display for better visualization in Jupyter


Feature: mel_spectrogram


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,206,207,208,209,210,211,212,213,214,215
0,-80.0,-80.0,-80.0,-80.0,-71.131088,-64.629059,-64.798874,-65.517334,-64.943108,-66.595901,...,-68.542099,-62.939037,-61.753532,-64.483032,-63.943806,-68.828239,-75.564331,-80.0,-69.745407,-58.308243
1,-80.0,-80.0,-80.0,-80.0,-78.152908,-77.86232,-80.0,-80.0,-80.0,-80.0,...,-66.897888,-64.690094,-63.297081,-61.861931,-62.233372,-65.241409,-66.423172,-74.840057,-68.64563,-57.374527
2,-80.0,-80.0,-80.0,-80.0,-80.0,-80.0,-80.0,-80.0,-80.0,-80.0,...,-61.738167,-63.307991,-63.542484,-68.342438,-66.26738,-65.16584,-68.677383,-73.79744,-69.288124,-58.136021
3,-80.0,-80.0,-80.0,-80.0,-78.352684,-80.0,-80.0,-80.0,-80.0,-79.861069,...,-64.366806,-71.845596,-70.448761,-73.537865,-71.473488,-71.168457,-72.105133,-69.305962,-67.361351,-57.3549
4,-80.0,-80.0,-80.0,-80.0,-80.0,-80.0,-80.0,-80.0,-80.0,-80.0,...,-73.917877,-80.0,-76.070702,-72.207718,-73.334587,-75.337097,-80.0,-70.599457,-68.497894,-58.325111



Feature: mfcc


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,206,207,208,209,210,211,212,213,214,215
0,-695.373413,-695.373413,-695.35437,-680.230347,-643.505615,-619.225769,-591.797607,-570.970276,-563.205383,-558.302429,...,-306.195435,-323.671509,-334.408752,-339.167542,-338.049377,-331.991241,-328.520203,-321.373688,-314.66333,-300.228363
1,0.0,0.0,0.000993,-1.203629,-5.26755,-6.745503,-6.516576,-5.824796,-6.088685,-6.661308,...,-41.371971,-18.021278,-13.957634,-14.097694,-14.342091,-13.824339,-19.953579,-27.28154,-24.62007,-22.640427
2,0.0,0.0,-0.026895,-17.487478,-48.59449,-65.481277,-84.124863,-95.908478,-98.228439,-101.852119,...,-174.74382,-175.672653,-179.184845,-179.671799,-181.474518,-182.583755,-186.912918,-188.352631,-176.718719,-133.417725
3,0.0,0.0,-0.002972,2.805939,12.572544,14.2419,11.571678,11.357302,12.28172,13.987471,...,65.48056,54.011566,45.415138,47.022881,52.940201,50.364639,52.326851,65.00193,67.655037,71.527222
4,0.0,0.0,0.026674,8.966082,11.528899,8.19268,3.498781,-2.270909,-9.443661,-8.712378,...,-91.412788,-79.573349,-68.684563,-62.239914,-48.78833,-53.536591,-59.000977,-60.851517,-54.280037,-25.470657



Feature: chroma


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,206,207,208,209,210,211,212,213,214,215
0,0.708294,0.640073,0.707259,0.544579,0.313167,0.284966,0.254466,0.247745,0.324839,0.466124,...,0.114438,0.319022,0.370808,0.189577,0.042638,0.022667,0.024571,0.025156,0.057726,0.065114
1,0.595519,0.761191,0.617285,0.474224,0.340369,0.308571,0.460019,0.379475,0.303072,0.324227,...,0.102003,0.304018,0.35448,0.219087,0.042779,0.023521,0.017317,0.014781,0.018537,0.01597
2,0.703772,0.855086,1.0,0.511553,0.417635,0.389911,0.582625,0.537955,0.582327,0.628322,...,0.233477,0.346289,0.495336,0.2912,0.052698,0.023576,0.022504,0.014497,0.009743,0.012317
3,0.729621,0.740412,0.929025,0.534931,0.51665,0.398323,0.351357,0.400521,0.466656,0.598876,...,0.588485,0.842825,0.87366,0.598802,0.114322,0.032365,0.0447,0.024819,0.015485,0.013818
4,0.96663,0.828872,0.708723,0.531631,0.500283,0.516987,0.690588,0.737945,0.609317,0.782331,...,0.807133,0.98946,0.916617,0.499404,0.099249,0.059439,0.052223,0.020962,0.019204,0.018052



Feature: spectral_contrast


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,206,207,208,209,210,211,212,213,214,215
0,8.354789,11.055998,18.828671,13.490233,10.935961,17.737949,25.388243,23.35961,17.671988,16.888793,...,11.537279,17.708985,20.112713,13.965326,9.895205,7.356818,12.943917,12.273817,1.991143,0.512271
1,5.655446,9.4606,13.589362,8.253297,6.35506,13.855686,5.488953,9.860992,11.352152,20.052817,...,10.357358,7.098369,13.165182,9.920686,10.515408,9.414201,11.883696,5.564685,6.395933,1.331911
2,7.175263,17.008956,10.132587,9.868055,10.725293,9.491114,13.383151,10.775952,15.808019,12.061584,...,10.227369,11.273113,17.308726,12.270389,16.32803,13.583443,14.426049,16.306095,14.946764,10.478193
3,13.181875,18.487175,7.492489,13.558935,11.697553,16.721773,12.770144,18.760145,14.912206,12.216163,...,10.045216,14.306968,15.011783,14.692562,13.983963,16.310986,12.78404,19.126865,17.82947,11.86527
4,11.863661,11.87551,12.481406,14.705046,15.351458,15.150294,13.763278,11.776819,11.068492,17.253255,...,18.722957,19.85183,14.893244,17.568429,17.895546,21.664872,21.846788,22.29771,16.560607,15.399929



Feature: tonnetz


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,206,207,208,209,210,211,212,213,214,215
0,0.053549,0.046576,0.069057,-0.033938,0.008213,0.029023,0.030646,-0.002728,0.043914,0.068861,...,0.000652,-0.044043,-0.096815,-0.092799,-0.093779,-0.024391,0.022639,0.041813,-0.040458,0.034045
1,0.006453,-0.01578,0.014258,0.041921,0.020128,0.015061,0.021392,-0.037868,-0.028053,0.050517,...,-0.01172,-0.085404,0.003189,-0.08578,0.012057,-0.011803,-0.030609,0.036463,-0.000963,-0.023262
2,0.05975,0.059457,0.07442,0.06872,0.029005,-0.084037,0.049583,-0.014404,0.035721,0.032192,...,0.006173,0.045641,0.028443,-0.021291,-0.084611,0.07851,0.001111,-0.084613,0.032052,-0.095086
3,0.063891,0.044799,0.006855,0.047675,0.045305,0.079124,0.070752,0.129191,0.09102,0.04713,...,-0.062414,-0.093077,0.102964,0.102695,-0.054047,0.059749,-0.066663,0.001458,0.000603,-0.094087
4,-0.00368,-0.011251,0.000642,-0.045971,0.009795,-0.042154,0.01835,0.020736,0.005233,-0.031041,...,-0.034227,-0.024232,-0.041018,-0.120868,-0.063487,0.012695,-0.023469,-0.047408,-0.042899,-0.044499



Feature: spectral_centroid


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,206,207,208,209,210,211,212,213,214,215
0,7677.594528,6883.725503,5391.344916,4644.819401,4256.435294,4117.689976,4004.929147,3984.25724,4061.659935,4006.887955,...,5067.518441,4680.967684,4378.585645,4230.923209,3971.862282,3890.484954,3928.581835,4022.806053,4128.249826,4278.453378



Feature: spectral_bandwidth


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,206,207,208,209,210,211,212,213,214,215
0,4317.705763,4241.620245,3764.478469,3102.941045,2587.063385,2375.034496,2266.970605,2138.18024,2141.049603,2112.659337,...,1282.843748,1538.289498,1647.334559,1606.466902,1534.741207,1504.182463,1461.898525,1388.454018,1453.933675,1711.389611



Feature: spectral_rolloff


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,206,207,208,209,210,211,212,213,214,215
0,13000.0,12156.25,10078.125,7750.0,6421.875,6343.75,6312.5,6078.125,6203.125,6015.625,...,6109.375,6093.75,5968.75,5796.875,5703.125,5515.625,5375.0,5375.0,5609.375,6031.25



Feature: zero_crossing_rate


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,206,207,208,209,210,211,212,213,214,215
0,0.223633,0.297852,0.361328,0.295898,0.250488,0.234863,0.235352,0.245117,0.246582,0.242676,...,0.323242,0.3125,0.288086,0.258301,0.239258,0.218262,0.212891,0.219727,0.186035,0.133789



Feature: rms


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,206,207,208,209,210,211,212,213,214,215
0,1.3e-05,3.1e-05,7.3e-05,0.000177,0.00027,0.000456,0.000616,0.000727,0.000869,0.000953,...,0.090907,0.066837,0.030106,0.029994,0.035177,0.04654,0.061427,0.067807,0.066481,0.057839


In [26]:
def aggregate_features(features):
    aggregated_features = {}
    for key, value in features.items():
        aggregated_features[key] = {
            'mean': np.mean(value, axis=1),
            'std': np.std(value, axis=1),
            'min': np.min(value, axis=1),
            'max': np.max(value, axis=1)
        }
    return aggregated_features

def format_features(aggregated_features):
    formatted_features = []
    for key in aggregated_features:
        for stat in aggregated_features[key]:
            formatted_features.extend(aggregated_features[key][stat])
    return np.array(formatted_features)

In [27]:
ogg_file_path = './birdclef-2024/train_audio/asbfly/XC49755.ogg'
max_length = 22050 * 5  # For example, 5 seconds at a sample rate of 22050 Hz

# Extract and aggregate features
features = extract_audio_features(ogg_file_path, max_length)
aggregated_features = aggregate_features(features)

# Format features for model input
formatted_features = format_features(aggregated_features)

print(formatted_features.shape)
print(formatted_features)


(684,)
[-6.78951492e+01 -7.15780563e+01 -7.08037796e+01 -7.23653183e+01
 -7.57353897e+01 -7.60923386e+01 -7.72794800e+01 -7.69549332e+01
 -7.69534836e+01 -7.48166885e+01 -7.59738770e+01 -7.50231247e+01
 -7.48816299e+01 -7.21270599e+01 -7.18847809e+01 -6.99784622e+01
 -6.92440186e+01 -6.64677048e+01 -6.55641174e+01 -6.29078369e+01
 -6.35348587e+01 -6.19041214e+01 -6.14968338e+01 -6.00429764e+01
 -5.98880310e+01 -5.87049522e+01 -5.84747009e+01 -5.72715302e+01
 -5.75240135e+01 -5.58471909e+01 -5.55498390e+01 -5.40977554e+01
 -5.38592224e+01 -5.26902885e+01 -5.27443390e+01 -5.14852295e+01
 -5.19623528e+01 -5.10422821e+01 -5.14217949e+01 -5.06474037e+01
 -5.04771423e+01 -4.96308174e+01 -5.02442932e+01 -4.85827980e+01
 -4.86090584e+01 -4.86892662e+01 -4.79537125e+01 -4.74637413e+01
 -4.74799538e+01 -4.70840149e+01 -4.66258774e+01 -4.66038132e+01
 -4.59247398e+01 -4.57369804e+01 -4.46376724e+01 -4.42834740e+01
 -4.41352196e+01 -4.43094559e+01 -4.40298653e+01 -4.37026901e+01
 -4.32755165e+01 -

In [28]:

def load_data(file_paths, labels, max_length=22050*5):
    X = []
    y = []

    for file_path, label in zip(file_paths, labels):
        features = extract_audio_features(file_path, max_length)
        aggregated_features = aggregate_features(features)
        formatted_features = format_features(aggregated_features)
        X.append(formatted_features)
        y.append(label)
        # print(len(X), len(X[-1]),y)
        if len(X) % 100 == 0:
            print(f"Processed {len(X)} files")
    return np.array(X), np.array(y)


In [29]:
import jupyterlab
print(jupyterlab.__version__)

4.2.1


In [30]:
# Assuming df_train is already loaded in your notebook
file_paths = df_train['filename'].apply(lambda x : "./birdclef-2024/train_audio/" + x ).tolist()
labels = df_train['primary_label'].astype('category').cat.codes.tolist()

# Load data
# X, y = load_data(file_paths, labels)
X, y = np.load('X.npy'), np.load('y.npy')# load_data(file_paths, labels)

In [31]:
# save the x and y

np.save('X.npy', X)
np.save('y.npy', y)

In [32]:
import numpy as np
from concurrent.futures import ThreadPoolExecutor, as_completed

def extract_and_format_features(file_path, max_length=22050*5):
    try:
        features = extract_audio_features(file_path, max_length)
        aggregated_features = aggregate_features(features)
        formatted_features = format_features(aggregated_features)
        return formatted_features
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

def process_batch(file_paths, labels, max_length):
    X_batch = []
    y_batch = []
    for file_path, label in zip(file_paths, labels):
        formatted_features = extract_and_format_features(file_path, max_length)
        if formatted_features is not None:
            X_batch.append(formatted_features)
            y_batch.append(label)
    return X_batch, y_batch

def load_data(file_paths, labels, max_length=22050*5, batch_size=1000):
    X = []
    y = []

    # Split the data into batches
    batches = [(file_paths[i:i + batch_size], labels[i:i + batch_size])
               for i in range(0, len(file_paths), batch_size)]

    # Process batches in parallel using ThreadPoolExecutor
    with ThreadPoolExecutor() as executor:
        futures = [executor.submit(process_batch, batch_files, batch_labels, max_length) 
                   for batch_files, batch_labels in batches]
        
        for future in futures:
            X_batch, y_batch = future.result()
            X.extend(X_batch)
            y.extend(y_batch)

    return np.array(X), np.array(y)

# Assuming df_train is already loaded in your notebook
file_paths = df_train['filename'].apply(lambda x: "./birdclef-2024/train_audio/" + x).tolist()
labels = df_train['primary_label'].astype('category').cat.codes.tolist()

# Load data
X, y = np.load('X.npy'), np.load('y.npy')# load_data(file_paths, labels)

print(f"Loaded {X.shape[0]} files.")
print(X.shape)
print(y.shape)


Loaded 24459 files.
(24459, 684)
(24459,)


In [33]:
len(df_train)

24459

In [34]:
# save x and y as csv

# np.savetxt('X.csv', X, delimiter=',')
# np.savetxt('y.csv', y, delimiter=',')
print(X.shape)
print(y.shape)


(24459, 684)
(24459,)


In [35]:
print(X[1])

[-1.58963785e+01 -1.62785816e+01 -1.60039845e+01 -1.61279907e+01
 -1.68902264e+01 -1.66303806e+01 -1.90093765e+01 -1.82996597e+01
 -1.97097206e+01 -1.88165798e+01 -1.93298340e+01 -1.86974010e+01
 -1.96464577e+01 -1.79825916e+01 -1.93120747e+01 -1.85301857e+01
 -1.84365807e+01 -1.69842072e+01 -1.81452885e+01 -1.72752266e+01
 -1.80543804e+01 -1.79443626e+01 -1.84281940e+01 -1.83274288e+01
 -1.93307648e+01 -1.77445602e+01 -1.90370407e+01 -1.87021999e+01
 -1.97975559e+01 -1.95816879e+01 -2.00255089e+01 -1.96887741e+01
 -2.06255875e+01 -2.05158596e+01 -2.18840008e+01 -2.18313236e+01
 -2.27161179e+01 -2.29569702e+01 -2.43032150e+01 -2.38553371e+01
 -2.37021084e+01 -2.30606480e+01 -2.36355991e+01 -2.39111691e+01
 -2.52166252e+01 -2.51034756e+01 -2.48021603e+01 -2.50561848e+01
 -2.54581928e+01 -2.63199348e+01 -2.61479130e+01 -2.68570290e+01
 -2.75378208e+01 -2.76385975e+01 -2.76007042e+01 -2.82462063e+01
 -2.88554058e+01 -2.94312649e+01 -2.99256134e+01 -2.95353432e+01
 -3.00814590e+01 -2.95706

In [36]:
# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Convert labels to categorical
y = to_categorical(y)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(X.shape, y.shape)
# Define the neural network model
def create_model(input_shape):
    model = Sequential([
        Dense(256, input_shape=(input_shape,), activation='relu'),
        Dropout(0.5),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dense(y_train.shape[1], activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Create and train the model
model = create_model(X_train.shape[1])
# model.summary()

# Train the model
history = model.fit(X_train, y_train, epochs=2, batch_size=256, validation_data=(X_test, y_test))

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

(24459, 684) (24459, 182)
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.0249 - loss: 5.0090 - val_accuracy: 0.1065 - val_loss: 4.3312
Epoch 2/2
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.0888 - loss: 4.2861 - val_accuracy: 0.1609 - val_loss: 3.8701
[1m153/153[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 922us/step - accuracy: 0.1547 - loss: 3.8771
Test Accuracy: 0.1609


In [37]:
# import numpy as np
# import pandas as pd
# import tensorflow as tf
# from tensorflow.keras.models import Model
# from tensorflow.keras.layers import Input, Dense, Dropout, Conv2D, MaxPooling2D, Flatten, concatenate
# from tensorflow.keras.optimizers import Adam
# from tensorflow.keras.utils import to_categorical
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import StandardScaler

# # Load X and y from CSV files
# X = pd.read_csv('X.csv', header=None).values
# y = pd.read_csv('y.csv', header=None).values

# print(f"Loaded {X.shape[0]} files.")
# print(X.shape)
# print(y.shape)

# # Standardize the features
# scaler = StandardScaler()
# X = scaler.fit_transform(X)

# # Convert labels to categorical
# y = to_categorical(y)

# # Split data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# print(X.shape, y.shape)

# # Define the CNN model with metadata input
# def build_cnn_model(input_shape=(128, 128, 3), num_meta_features=2, num_classes=10):
#     # Image input branch
#     img_input = Input(shape=input_shape, name='img_input')
#     x = Conv2D(32, (3, 3), activation='relu')(img_input)
#     x = MaxPooling2D((2, 2))(x)
#     x = Dropout(0.25)(x)
#     x = Conv2D(64, (3, 3), activation='relu')(x)
#     x = MaxPooling2D((2, 2))(x)
#     x = Dropout(0.25)(x)
#     x = Conv2D(128, (3, 3), activation='relu')(x)
#     x = MaxPooling2D((2, 2))(x)
#     x = Dropout(0.25)(x)
#     x = Flatten()(x)
    
#     # Metadata input branch
#     meta_input = Input(shape=(num_meta_features,), name='meta_input')
#     y = Dense(256, activation='relu')(meta_input)
#     y = Dropout(0.5)(y)
#     y = Dense(128, activation='relu')(y)
#     y = Dropout(0.5)(y)
#     y = Dense(64, activation='relu')(y)
    
#     # Concatenate the outputs of the image and metadata branches
#     combined = concatenate([x, y])
#     z = Dense(256, activation='relu')(combined)
#     z = Dense(num_classes, activation='softmax')(z)
    
#     model = Model(inputs=[img_input, meta_input], outputs=z)
#     model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
    
#     return model



In [38]:
import os
import librosa
import numpy as np
import pandas as pd
import librosa
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, Input, concatenate, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.utils import Sequence

# def get_max_audio_length(file_paths):
#     max_length = 0
#     for file_path in file_paths:
#         y, sr = librosa.load(file_path, sr=None)
#         length = len(y)
#         if length > max_length:
#             max_length = length
#     return max_length

# # Assuming df_train is already loaded in your notebook
# file_paths = df_train['filename'].apply(lambda x: "./birdclef-2024/train_audio/" + x).tolist()

# max_length = get_max_audio_length(file_paths)
# print(f"The maximum length of the audio files is: {max_length} samples")
max_length = 190855200 # already calculated

In [39]:
class AudioDataGenerator(Sequence):
    def __init__(self, file_paths, labels, batch_size, max_length, shuffle=True):
        self.file_paths = file_paths
        self.labels = labels
        self.batch_size = batch_size
        self.max_length = max_length
        self.shuffle = shuffle
        self.indices = np.arange(len(self.file_paths))
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.file_paths) / self.batch_size))

    def __getitem__(self, index):
        batch_indices = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
        batch_files = [self.file_paths[i] for i in batch_indices]
        batch_labels = [self.labels[i] for i in batch_indices]
        
        X_audio = np.array([self.extract_audio_features(f) for f in batch_files])
        y = np.array(batch_labels)
        
        return X_audio, y

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)

    def extract_audio_features(self, file_path):
        y, sr = librosa.load(file_path, sr=SAMPLE_RATE)
        if len(y) < self.max_length:
            y = np.pad(y, (0, self.max_length - len(y)), 'constant')
        else:
            y = y[:self.max_length]
        
        features = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
        features_db = librosa.power_to_db(features, ref=np.max)
        return features_db.flatten()

# Constants
SAMPLE_RATE = 32000
FMIN = 20
FMAX = 16000
IMG_SIZE = (128, 128, 3)
BATCH_SIZE = 4
EPOCHS = 3
NUM_CLASSES = 182  # Update this based on your actual number of classes

# Load max_length from your notebook
max_length = 190855200  # Replace with your actual value

# Load X and y from CSV files
X = np.loadtxt('X.csv', delimiter=',')
y = np.loadtxt('y.csv', delimiter=',')

# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Convert labels to categorical
y = to_categorical(y)

# Split data into training and testing sets
indices = np.arange(len(df_train))
train_indices, test_indices, y_train, y_test = train_test_split(indices, y, test_size=0.2, random_state=42)

# Usage
train_audio_gen = AudioDataGenerator([file_paths[i] for i in train_indices], y_train, batch_size=BATCH_SIZE, max_length=max_length)
val_audio_gen = AudioDataGenerator([file_paths[i] for i in test_indices], y_test, batch_size=BATCH_SIZE, max_length=max_length)


In [40]:
class ImageDataGenerator(Sequence):
    def __init__(self, file_paths, labels, batch_size, max_length, img_size, shuffle=True):
        self.file_paths = file_paths
        self.labels = labels
        self.batch_size = batch_size
        self.max_length = max_length
        self.img_size = img_size
        self.shuffle = shuffle
        self.indices = np.arange(len(self.file_paths))
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.file_paths) / self.batch_size))

    def __getitem__(self, index):
        batch_indices = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
        batch_files = [self.file_paths[i] for i in batch_indices]
        batch_labels = [self.labels[i] for i in batch_indices]
        
        X_images = np.array([self.generate_image(f) for f in batch_files])
        y = np.array(batch_labels)
        
        return X_images, y

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)

    def generate_image(self, file_path):
        audio, sr = librosa.load(file_path, sr=SAMPLE_RATE)
        if len(audio) < self.max_length:
            audio = np.tile(audio, int(np.ceil(self.max_length / len(audio))))[:self.max_length]
        else:
            audio = audio[:self.max_length]

        spec = librosa.feature.melspectrogram(y=audio, sr=SAMPLE_RATE, n_mels=256, n_fft=2048, hop_length=512, fmax=FMAX, fmin=FMIN)
        spec_db = librosa.power_to_db(spec, ref=np.max)
        spec_img = plt.cm.viridis(spec_db)
        spec_img = (spec_img[:, :, :3] * 255).astype(np.uint8)
        spec_img = tf.image.resize(spec_img, self.img_size[:2]) / 255.0
        
        return spec_img

# Usage
train_image_gen = ImageDataGenerator([file_paths[i] for i in train_indices], y_train, batch_size=BATCH_SIZE, max_length=max_length, img_size=IMG_SIZE)
val_image_gen = ImageDataGenerator([file_paths[i] for i in test_indices], y_test, batch_size=BATCH_SIZE, max_length=max_length, img_size=IMG_SIZE)


In [41]:
class CombinedDataGenerator(Sequence):
    def __init__(self, audio_gen, image_gen):
        self.audio_gen = audio_gen
        self.image_gen = image_gen

    def __len__(self):
        return min(len(self.audio_gen), len(self.image_gen))

    def __getitem__(self, index):
        X_audio, y_audio = self.audio_gen[index]
        X_image, y_image = self.image_gen[index]
        return [X_audio, X_image], y_audio  # Ensure labels match

    def on_epoch_end(self):
        self.audio_gen.on_epoch_end()
        self.image_gen.on_epoch_end()

# Combined generators
train_combined_gen = CombinedDataGenerator(train_audio_gen, train_image_gen)
val_combined_gen = CombinedDataGenerator(val_audio_gen, val_image_gen)


In [42]:
# Define the neural network model for audio features
audio_input = Input(shape=(X.shape[1],))
x_audio = Dense(256, activation='relu')(audio_input)
x_audio = Dropout(0.5)(x_audio)
x_audio = Dense(128, activation='relu')(x_audio)
x_audio = Dropout(0.5)(x_audio)
x_audio = Dense(64, activation='relu')(x_audio)

# Define the neural network model for image features
image_input = Input(shape=IMG_SIZE)
x_image = Conv2D(32, (3, 3), activation='relu')(image_input)
x_image = MaxPooling2D((2, 2))(x_image)
x_image = Conv2D(64, (3, 3), activation='relu')(x_image)
x_image = MaxPooling2D((2, 2))(x_image)
x_image = Flatten()(x_image)
x_image = Dense(128, activation='relu')(x_image)
x_image = Dropout(0.5)(x_image)

# Concatenate audio and image features
combined = concatenate([x_audio, x_image])
output = Dense(NUM_CLASSES, activation='softmax')(combined)

# Create the model
multi_input_model = Model(inputs=[audio_input, image_input], outputs=output)
multi_input_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = multi_input_model.fit(train_combined_gen, epochs=EPOCHS, validation_data=val_combined_gen)

# Evaluate the model
loss, accuracy = multi_input_model.evaluate(val_combined_gen)
print(f"Test Accuracy: {accuracy:.4f}")

# Now use the model to predict on the test set
predictions = multi_input_model.predict(val_combined_gen)
print(predictions)


In [None]:

def load_audio(filepath):
    audio, sr = librosa.load(filepath, sr=SAMPLE_RATE)
    return audio, sr

def get_spectrogram_rgb(audio, max_length, img_size=(128, 128)):
    if len(audio) < max_length:
        audio = np.tile(audio, int(np.ceil(max_length / len(audio))))[:max_length]
    else:
        audio = audio[:max_length]
    
    spec = librosa.feature.melspectrogram(y=audio, sr=SAMPLE_RATE, n_mels=256, n_fft=2048, hop_length=512, fmax=FMAX, fmin=FMIN)
    spec_db = librosa.power_to_db(spec, ref=np.max)
    spec_img = plt.cm.viridis(spec_db)
    spec_img = (spec_img[:, :, :3] * 255).astype(np.uint8)  # Discard alpha channel and convert to 8-bit integer
    spec_img = tf.image.resize(spec_img, img_size[:2]) / 255.0
    return spec_img

class DataGenerator(Sequence):
    def __init__(self, file_paths, labels, indices, batch_size, max_length, img_size, shuffle=True):
        self.file_paths = file_paths
        self.labels = labels
        self.indices = indices
        self.batch_size = batch_size
        self.max_length = max_length
        self.img_size = img_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.indices) / self.batch_size))

    def __getitem__(self, index):
        batch_indices = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
        X_audio = np.array([X[i] for i in batch_indices])
        y = np.array([self.labels[i] for i in batch_indices])
        X_images = self.generate_images(batch_indices)
        return [X_audio, X_images], y

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)

    def generate_images(self, batch_indices):
        images = []
        for i in batch_indices:
            audio, sr = load_audio(self.file_paths[i])
            img = get_spectrogram_rgb(audio, self.max_length, self.img_size)
            images.append(img)
        return np.array(images)

# Assuming df_train is already loaded in your notebook
file_paths = df_train['filename'].apply(lambda x: "./birdclef-2024/train_audio/" + x).tolist()

# Create DataGenerators for training and validation
train_generator = DataGenerator(file_paths, y, train_indices, batch_size=BATCH_SIZE, max_length=max_length, img_size=IMG_SIZE)
val_generator = DataGenerator(file_paths, y, test_indices, batch_size=BATCH_SIZE, max_length=max_length, img_size=IMG_SIZE, shuffle=False)

# Define the neural network model for audio features
audio_input = Input(shape=(X.shape[1],))
x_audio = Dense(256, activation='relu')(audio_input)
x_audio = Dropout(0.5)(x_audio)
x_audio = Dense(128, activation='relu')(x_audio)
x_audio = Dropout(0.5)(x_audio)
x_audio = Dense(64, activation='relu')(x_audio)

# Define the neural network model for image features
image_input = Input(shape=IMG_SIZE)
x_image = Conv2D(32, (3, 3), activation='relu')(image_input)
x_image = MaxPooling2D((2, 2))(x_image)
x_image = Conv2D(64, (3, 3), activation='relu')(x_image)
x_image = MaxPooling2D((2, 2))(x_image)
x_image = Flatten()(x_image)
x_image = Dense(128, activation='relu')(x_image)
x_image = Dropout(0.5)(x_image)

# Concatenate audio and image features
combined = concatenate([x_audio, x_image])
output = Dense(NUM_CLASSES, activation='softmax')(combined)

# Create the model
multi_input_model = Model(inputs=[audio_input, image_input], outputs=output)
multi_input_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = multi_input_model.fit(train_generator, epochs=EPOCHS, validation_data=val_generator)

# Evaluate the model
loss, accuracy = multi_input_model.evaluate(val_generator)
print(f"Test Accuracy: {accuracy:.4f}")

# Now use the model to predict on the test set
predictions = multi_input_model.predict(val_generator)
print(predictions)


In [None]:
# Define the neural network model for audio features
audio_input = Input(shape=(X_train.shape[1],))
x_audio = Dense(256, activation='relu')(audio_input)
x_audio = Dropout(0.5)(x_audio)
x_audio = Dense(128, activation='relu')(x_audio)
x_audio = Dropout(0.5)(x_audio)
x_audio = Dense(64, activation='relu')(x_audio)

# Define the neural network model for image features
image_input = Input(shape=IMG_SIZE)
x_image = Conv2D(32, (3, 3), activation='relu')(image_input)
x_image = MaxPooling2D((2, 2))(x_image)
x_image = Conv2D(64, (3, 3), activation='relu')(x_image)
x_image = MaxPooling2D((2, 2))(x_image)
x_image = Flatten()(x_image)
x_image = Dense(128, activation='relu')(x_image)
x_image = Dropout(0.5)(x_image)

# Concatenate audio and image features
combined = concatenate([x_audio, x_image])
output = Dense(NUM_CLASSES, activation='softmax')(combined)

# Create the model
multi_input_model = Model(inputs=[audio_input, image_input], outputs=output)
multi_input_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])



In [None]:
# Train the model
history = multi_input_model.fit([X_train, X_train_img], y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_data=([X_test, X_test_img], y_test))

# Evaluate the model
loss, accuracy = multi_input_model.evaluate([X_test, X_test_img], y_test)
print(f"Test Accuracy: {accuracy:.4f}")

# Now use the model to predict on the test set
predictions = multi_input_model.predict([X_test, X_test_img])
print(predictions)

# Hecatombe²

In [None]:
import os
import librosa
import matplotlib.pyplot as plt
import numpy as np

class CFG:
    seed = 42
    
    # Input image size and batch size
    img_size = [128, 128]  # Adjusted to only include height and width
    batch_size = 64
    
    # Audio duration, sample rate, and length
    duration = 15  # second
    sample_rate = 32000
    audio_len = duration * sample_rate
    
    # STFT parameters
    nfft = 2028
    window = 2048
    hop_length = audio_len // (img_size[1] - 1)
    fmin = 20
    fmax = 16000
    max_length = 190855200 # already calculated
    # Number of epochs, model name
    epochs = 3  # à changer quand on sera sûrs de nous
    preset = 'efficientnetv2_b2_imagenet'
    
    # Data augmentation parameters
    augment = True

    # Class Labels for BirdCLEF 24
    class_names = sorted(os.listdir('birdclef-2024/train_audio/'))
    num_classes = len(class_names)
    class_labels = list(range(num_classes))
    label2name = dict(zip(class_labels, class_names))
    name2label = {v: k for k, v in label2name.items()}

def load_audio(filepath):
    audio, sr = librosa.load(filepath, sr=CFG.sample_rate)
    return audio, sr

def get_spectrogram_rgb(audio):
    spec = librosa.feature.melspectrogram(y=audio, sr=CFG.sample_rate, n_mels=256, n_fft=2048, hop_length=512, fmax=CFG.fmax, fmin=CFG.fmin)
    spec_db = librosa.power_to_db(spec, ref=np.max)
    # Convert spectrogram to color image
    spec_img = plt.cm.viridis(spec_db)
    spec_img = (spec_img[:, :, :3] * 255).astype(np.uint8)  # Discard alpha channel and convert to 8-bit integer
    return spec_img

In [None]:

class DataGenerator(Sequence):
    def __init__(self, audio_data, labels, batch_size, max_length, img_size, shuffle=True):
        self.audio_data = audio_data
        self.labels = labels
        self.batch_size = batch_size
        self.max_length = max_length
        self.img_size = img_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.audio_data) / self.batch_size))

    def __getitem__(self, index):
        indices = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
        audio_batch = [self.audio_data[i] for i in indices]
        X_audio = np.array(audio_batch)
        y_batch = [self.labels[i] for i in indices]
        y = np.array(y_batch)
        X_images = self.generate_images(indices)
        return [X_audio, X_images], y

    def on_epoch_end(self):
        self.indices = np.arange(len(self.audio_data))
        if self.shuffle:
            np.random.shuffle(self.indices)

    def generate_images(self, indices):
        images = []
        for i in indices:
            audio = self.audio_data[i]
            img = self.audio_to_image(audio)
            images.append(img)
        return np.array(images)

    def audio_to_image(self, audio):
        if len(audio) < self.max_length:
            audio = np.tile(audio, int(np.ceil(self.max_length / len(audio))))[:self.max_length]
        else:
            audio = audio[:self.max_length]

        spec_img = get_spectrogram_rgb(audio)
        img_array = tf.image.resize(spec_img, self.img_size[:2]) / 255.0  # Ensure img_size has only height and width
        return img_array

# Example Usage
batch_size = CFG.batch_size
img_size = (CFG.img_size[0], CFG.img_size[1], 3)  # Height and width with 3 channels
max_length = CFG.audio_len
n_classes = len(CFG.class_names)

# Assuming X and y are already computed and standardized
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

train_generator = DataGenerator(X_train, y_train, batch_size=batch_size, max_length=max_length, img_size=CFG.img_size)
val_generator = DataGenerator(X_test, y_test, batch_size=batch_size, max_length=max_length, img_size=CFG.img_size, shuffle=False)

def create_tf_dataset(generator):
    def gen():
        for inputs, outputs in generator:
            yield inputs, outputs

    audio_shape = tf.TensorSpec(shape=(None, X_train.shape[1]), dtype=tf.float32)
    image_shape = tf.TensorSpec(shape=(None, CFG.img_size[0], CFG.img_size[1], 3), dtype=tf.float32)
    label_shape = tf.TensorSpec(shape=(None, len(CFG.class_names)), dtype=tf.float32)

    output_signature = (
        ([audio_shape, image_shape], label_shape)
    )

    return tf.data.Dataset.from_generator(gen, output_signature=output_signature)

train_dataset = create_tf_dataset(train_generator)
val_dataset = create_tf_dataset(val_generator)


TypeError: `output_signature` must contain objects that are subclass of `tf.TypeSpec` but found <class 'list'> which is not.

In [None]:

# Define your multi-input model as before
def create_audio_model(input_shape):
    audio_input = Input(shape=(input_shape,))
    x = Dense(256, activation='relu')(audio_input)
    x = Dropout(0.5)(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(64, activation='relu')(x)
    return audio_input, x

def create_image_model(input_shape):
    image_input = Input(shape=input_shape)
    y = Conv2D(32, (3, 3), activation='relu')(image_input)
    y = MaxPooling2D((2, 2))(y)
    y = Dropout(0.5)(y)
    y = Conv2D(64, (3, 3), activation='relu')(image_input)
    y = MaxPooling2D((2, 2))(y)
    y = Dropout(0.5)(y)
    y = Flatten()(y)
    y = Dense(128, activation='relu')(y)
    y = Dropout(0.5)(y)
    return image_input, y

def create_multi_input_model(audio_input_shape, image_input_shape, num_classes):
    audio_input, audio_output = create_audio_model(audio_input_shape)
    image_input, image_output = create_image_model(image_input_shape)

    combined = concatenate([audio_output, image_output])
    z = Dense(64, activation='relu')(combined)
    z = Dropout(0.5)(z)
    z = Dense(num_classes, activation='softmax')(z)

    model = Model(inputs=[audio_input, image_input], outputs=z)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

audio_input_shape = X_train.shape[1]
image_input_shape = (CFG.img_size[0], CFG.img_size[1], 3)
num_classes = len(CFG.class_names)

multi_input_model = create_multi_input_model(audio_input_shape, image_input_shape, num_classes)
multi_input_model.summary()

In [None]:

# Train the model
history = multi_input_model.fit(train_dataset, epochs=CFG.epochs, validation_data=val_dataset)

# Evaluate the model
loss, accuracy = multi_input_model.evaluate(val_dataset)
print(f"Test Accuracy: {accuracy:.4f}")

TypeError: `output_signature` must contain objects that are subclass of `tf.TypeSpec` but found <class 'list'> which is not.

In [None]:
import matplotlib.pyplot as plt
# Generate spectrograms for a sample of 50 files
spectrograms_list = df_train.sample(50)['filename'].apply(lambda x: get_spectrogram_rgb(load_audio(f'./birdclef-2024/train_audio/{x}')[0])).tolist()

# Convert the list to a 4D NumPy array
spectrograms = np.stack(spectrograms_list, axis=0)

# Verify the shape of the spectrograms array
print(f"Spectrograms shape: {spectrograms.shape}")

ValueError: all input arrays must have the same shape

In [None]:

# Number of metadata features
num_meta_features = X.shape[1]

# Number of classes
num_classes = y.shape[1]

# Build the model
model_2 = build_cnn_model(input_shape=(128, 128, 1), num_meta_features=num_meta_features, num_classes=num_classes)

# Train the model
history_2 = model_2.fit(
    {'img_input': spectrograms, 'meta_input': X},
    y,
    epochs=50,  # Reduce epochs to check early stopping
    batch_size=64,  # Reduce batch size to mitigate memory issues
    validation_split=0.2
)

# Evaluate the model
loss_2, accuracy_2 = model_2.evaluate({'img_input': spectrograms, 'meta_input': X_test}, y_test)
print(f"Test Accuracy: {accuracy_2:.4f}")


ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type numpy.ndarray).

In [None]:

# Visualize training history
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.show()