In [2]:
import os
import librosa
import numpy as np
import pandas as pd
import librosa.display
from sklearn.preprocessing import LabelEncoder

In [3]:
def extract_mfcc_features(folder, output_csv, n_mfcc = 13, sampling_rate=22050):

    features = []
    labels = []

    for root, dirs, files in os.walk(folder):
        for file in files:
            if file.endswith('.wav'):
                file_path = os.path.join(root, file)
                label = os.path.basename(os.path.dirname(file_path))  
                audio, sr = librosa.load(file_path, sr=sampling_rate)
                mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
                mfcc_mean = np.mean(mfccs, axis=1)  
                features.append(mfcc_mean)
                labels.append(label)

    data = pd.DataFrame(features)
    data['label'] = labels
    data.to_csv(output_csv, index=False)

def shuffle_csv_and_save_in_place(input_csv):
    
    df = pd.read_csv(input_csv)
    df_shuffled = df.sample(frac=1).reset_index(drop=True)
    df_shuffled.to_csv(input_csv, index=False)

def encode_labels_and_save_in_place(input_csv):

    df = pd.read_csv(input_csv)
    le = LabelEncoder()
    
    df['Encoded_label'] = le.fit_transform(df['label'])
    df.to_csv(input_csv, index=False)

def delete_label(input_csv):

    df = pd.read_csv(input_csv)
    df = df.drop(columns='label')
    df.to_csv(input_csv, index=False)


# 1 sekundowe pliki

## 13 cech

In [6]:
folder = "Sounds_1_seconds/train"
output_csv = "Sounds_1_seconds/mfcc_1_sec/train_13_mfcc_features_with_labels_1sec.csv"
n_mfcc = 13

extract_mfcc_features(folder, output_csv, n_mfcc=n_mfcc, sampling_rate=22050)

In [8]:
input_csv = "Sounds_1_seconds/mfcc_1_sec/test_13_mfcc_features_with_labels_1sec.csv"

shuffle_csv_and_save_in_place(input_csv)
encode_labels_and_save_in_place(input_csv)

In [10]:
input_csv = "Sounds_1_seconds/mfcc_1_sec/train_13_mfcc_features_with_labels_1sec.csv"
delete_label(input_csv)

## 30 cech

In [11]:
folder = "Sounds_1_seconds/train"
output_csv = "Sounds_1_seconds/mfcc_1_sec/train_30_mfcc_features_with_labels_1sec.csv"
n_mfcc = 30

extract_mfcc_features(folder, output_csv, n_mfcc=n_mfcc, sampling_rate=22050)

In [12]:
input_csv = "Sounds_1_seconds/mfcc_1_sec/train_30_mfcc_features_with_labels_1sec.csv"
shuffle_csv_and_save_in_place(input_csv)
encode_labels_and_save_in_place(input_csv)

In [6]:
input_csv = "Sounds_1_seconds/mfcc_1_sec/train_30_mfcc_features_with_labels_1sec.csv"
delete_label(input_csv)

## 50 cech

In [15]:
folder = "Sounds_1_seconds/test"
output_csv = "Sounds_1_seconds/mfcc_1_sec/test_50_mfcc_features_with_labels_1sec.csv"
n_mfcc = 50

extract_mfcc_features(folder, output_csv, n_mfcc=n_mfcc, sampling_rate=22050)

In [16]:
input_csv = "Sounds_1_seconds/mfcc_1_sec/test_50_mfcc_features_with_labels_1sec.csv"
shuffle_csv_and_save_in_place(input_csv)
encode_labels_and_save_in_place(input_csv)

In [7]:
input_csv = "Sounds_1_seconds/mfcc_1_sec/train_50_mfcc_features_with_labels_1sec.csv"
delete_label(input_csv)

# 3 sekundowe pliki

In [None]:
folder = "Sounds_3_seconds/train"
output_csv = "Sounds_3_seconds/mfcc_3_sec/train_13_mfcc_features_with_labels_3sec.csv"
n_mfcc = 13

extract_mfcc_features(folder, output_csv, n_mfcc=n_mfcc, sampling_rate=22050)


In [24]:
input_csv = "Sounds_3_seconds/mfcc_3_sec/train_13_mfcc_features_with_labels_3sec.csv"
shuffle_csv_and_save_in_place(input_csv)
encode_labels_and_save_in_place(input_csv)

In [5]:
input_csv = "Sounds_3_seconds/mfcc_3_sec/test_13_mfcc_features_with_labels_3sec.csv"
delete_label(input_csv)

# 5 sekundowe pliki

In [None]:
folder = "Sounds_5_seconds/test"
input_csv, output_csv = "Sounds_5_seconds/mfcc_5_sec/test_13_mfcc_features_with_labels_5sec.csv"
n_mfcc = 13

extract_mfcc_features(folder, output_csv, n_mfcc=n_mfcc, sampling_rate=22050)


In [26]:
input_csv = "Sounds_5_seconds/mfcc_5_sec/train_13_mfcc_features_with_labels_5sec.csv"
shuffle_csv_and_save_in_place(input_csv)
encode_labels_and_save_in_place(input_csv)

In [7]:
input_csv = "Sounds_5_seconds/mfcc_5_sec/test_13_mfcc_features_with_labels_5sec.csv"
delete_label(input_csv)