In [1]:
import pandas as pd
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from tqdm import tqdm

import os 
os.chdir("../")   # only run this ones else error will occur

In [2]:
csv_file_path = 'csv/broken_3s_audio_data.csv'
df = pd.read_csv(csv_file_path)

In [22]:
audio_data = []
audio_data2 = []
labels_language = []
labels_noise=[]
labels_gender = []

# Step 2 and 3: Load audio files and process the data with a sample rate of 22050
for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing audio files"):
    audio_file_path = 'Audio folders/3sec_audio/' + row['name']  # Adjust the path as needed
    audio, sample_rate = librosa.load(audio_file_path, sr=22050)  # Load audio with a sample rate of 22050

    mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=sample_rate)

    # Convert to decibels (log scale)
    mel_spectrogram_db = librosa.amplitude_to_db(mel_spectrogram, ref=np.max)

    mel_spectrogram_db_with_channel = np.expand_dims(mel_spectrogram_db, axis=-1)

    # Append the processed audio data and label to the lists
    audio_data.append(mel_spectrogram_db_with_channel)


    labels_noise.append(row[["noise"]])
    
    if row["noise"] == 0:
        audio_data2.append(mel_spectrogram_db_with_channel)
        labels_language.append(row[["english","hindi","punjabi","bangoli"]])
        labels_gender.append(row[['male','female']])

Processing audio files: 100%|██████████| 13388/13388 [02:23<00:00, 93.46it/s] 


In [23]:
# Step 4: Create NumPy arrays
audio_data = np.array(audio_data)
audio_data2 = np.array(audio_data2)
labels_language = np.array(labels_language)
labels_gender = np.array(labels_gender)
labels_noise = np.array(labels_noise)

In [24]:
X_train_l, X_test_l, y_train_l, y_test_l = train_test_split(audio_data2, labels_language, test_size=0.2, random_state=42)
X_train_g, X_test_g, y_train_g, y_test_g = train_test_split(audio_data2, labels_gender, test_size=0.2, random_state=42)
X_train_n, X_test_n, y_train_n, y_test_n = train_test_split(audio_data, labels_noise, test_size=0.2, random_state=42)

In [25]:
# Convert data types if needed
X_train_l = np.asarray(X_train_l, dtype=np.float32)
X_test_l = np.asarray(X_test_l, dtype=np.float32)
y_train_l = np.asarray(y_train_l, dtype=np.float32)
y_test_l = np.asarray(y_test_l, dtype=np.float32)

In [26]:
X_train_g = np.asarray(X_train_g, dtype=np.float32)
X_test_g = np.asarray(X_test_g, dtype=np.float32)
y_train_g = np.asarray(y_train_g, dtype=np.float32)
y_test_g = np.asarray(y_test_g, dtype=np.float32)

In [27]:
X_train_n = np.asarray(X_train_n, dtype=np.float32)
X_test_n = np.asarray(X_test_n, dtype=np.float32)
y_train_n = np.asarray(y_train_n, dtype=np.float32)
y_test_n = np.asarray(y_test_n, dtype=np.float32)

In [28]:
# Save the array to a file
np.save('private/npy/X_train_language.npy', X_train_l)
np.save('private/npy/X_test_language.npy', X_test_l)
np.save('private/npy/y_train_language.npy', y_train_l)
np.save('private/npy/y_test_language.npy', y_test_l)

In [29]:
# Save the array to a file
np.save('private/npy/X_train_gender.npy', X_train_g)
np.save('private/npy/X_test_gender.npy', X_test_g)
np.save('private/npy/y_train_gender.npy', y_train_g)
np.save('private/npy/y_test_gender.npy', y_test_g)

In [30]:
# Save the array to a file
np.save('private/npy/X_train_noise.npy', X_train_n)
np.save('private/npy/X_test_noise.npy', X_test_n)
np.save('private/npy/y_train_noise.npy', y_train_n)
np.save('private/npy/y_test_noise.npy', y_test_n)