In [None]:
# Install necessary libraries
!pip install librosa wget


Collecting wget
  Downloading wget-3.2.zip (10 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: wget
  Building wheel for wget (setup.py) ... [?25l[?25hdone
  Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9657 sha256=83f9e5726c2e76b4fc3c1ef01497c327def4025bf3d067d5bb779a99744bc17a
  Stored in directory: /root/.cache/pip/wheels/8b/f1/7f/5c94f0a7a505ca1c81cd1d9208ae2064675d97582078e6c769
Successfully built wget
Installing collected packages: wget
Successfully installed wget-3.2


In [None]:

# Import required libraries
import os
import librosa
import numpy as np
import pandas as pd
import wget
import zipfile


In [None]:

# Download and extract the ESC-50 dataset
esc50_url = "https://github.com/karoldvl/ESC-50/archive/master.zip"
download_dir = "/content/esc50_dataset"

# Create the directory if it doesn't exist
os.makedirs(download_dir, exist_ok=True)

# Download the dataset zip file
zip_file_path = os.path.join(download_dir, "esc50.zip")
wget.download(esc50_url, out=zip_file_path)

# Extract the contents of the zip file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(download_dir)

# Clean up: Remove the zip file
os.remove(zip_file_path)

# Path to the ESC-50 dataset
esc50_path = os.path.join(download_dir, "ESC-50-master/audio/")


In [None]:
# Function to extract features life mfcc from audio files
def extract_features(file_path):
    audio_data, _ = librosa.load(file_path, sr=None)
    features = librosa.feature.mfcc(y=audio_data)
    return np.mean(features, axis=1)

# Create an empty DataFrame to store the features and labels
df = pd.DataFrame(columns=["feature", "label"])

# Loop through each audio file in the ESC-50 dataset
for filename in os.listdir(esc50_path):
    if filename.endswith(".wav"):
        file_path = os.path.join(esc50_path, filename)
        label = filename.split("-")[1]  # Extract label from the filename
        features = extract_features(file_path)
        df = df.append({"feature": features, "label": 'non-bird'}, ignore_index=True)

# Save the DataFrame to a CSV file
df.to_csv("/content/esc50_features.csv", index=False)

In [None]:
# Display the first few rows of the DataFrame
df.head()


Unnamed: 0,feature,label
0,"[-305.2682, 146.93658, -14.617731, 6.6561866, ...",non-bird
1,"[-224.59676, 195.54973, -21.89352, 43.532825, ...",non-bird
2,"[-482.77548, 134.1601, 60.584145, 45.561996, 1...",non-bird
3,"[-382.64383, 153.57903, -69.277245, 32.24831, ...",non-bird
4,"[-297.14456, 197.8043, 32.94539, 37.395073, 27...",non-bird
