<a href="https://colab.research.google.com/github/NRmethun/ML-DL-Notebook/blob/main/Audio_preprocessing_and_feature_extraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Clipping**

In [None]:
import numpy as np
from scipy.io.wavfile import read, write

input_file = '88.wav'
output_file = 'clipped_audio.wav'
sr, audio = read(input_file)
threshold = 0.01
# Apply clipping
clipped_audio = np.clip(audio, -threshold, threshold)
write(output_file, sr, clipped_audio.astype(np.int16))

**Time-stretching**

In [None]:
import librosa
from scipy.io.wavfile import write

audio_file = '86.wav'
output_file = 'time_stretched_audio1.wav'
audio, sr = librosa.load(audio_file)
print(sr)
# Define the time stretch factor  works fine for .5 to 1.5
stretch_factor = 1.5 # 1.2 Increase duration by 20%
# Apply time stretching
time_stretched_audio = librosa.effects.time_stretch(audio, rate=stretch_factor)
write(output_file, sr, time_stretched_audio)



**Reverberant**

In [None]:
import numpy as np
import soundfile as sf
import librosa

audio_file = '88.wav'
audio, sr = librosa.load(audio_file, sr=None, mono=True)
ir_file = 'reverb_impulse_response.wav'
ir, sr_ir = librosa.load(ir_file, sr=None, mono=True)

# Ensure both audio and impulse response have the same sample rate
audio = librosa.resample(audio, sr, sr_ir)

# Apply reverberation by convolving the audio signal with the impulse response
reverb_audio = np.convolve(audio, ir)

# Normalize the reverberant audio to prevent clipping
reverb_audio /= np.max(np.abs(reverb_audio))
output_file = 'reverberant_audio.wav'
sf.write(output_file, reverb_audio, sr)


**Random Gain**

In [None]:
import numpy as np
from scipy.io.wavfile import write
import librosa
audio_file = '88.wav'
output_dir = 'noisy_audio_variations/'
import soundfile as sf
audio, sr = librosa.load(audio_file)
num_variations = 30

for i in range(num_variations):
    # Define a random amplitude range for uniform noise
    min_amplitude = np.random.uniform(-0.1, -0.05)
    max_amplitude = np.random.uniform(0.05, 0.2)

    # Generate uniform noise within the specified range
    noise = np.random.uniform(min_amplitude, max_amplitude, len(audio))

    # Add the noise to the audio
    noisy_audio = audio + noise

    # Write the noisy audio to a new file
    output_file = output_dir + f'noisy_audio_variation_{i+1}.wav'
    sf.write(output_file, noisy_audio, sr)
    # write(output_file, sr, noisy_audio)


**Pitch Shifting**

In [None]:
import librosa
from scipy.io.wavfile import write

audio_file = '86.wav'
output_file = 'pitch_shifted_audio.wav'
audio, sr = librosa.load(audio_file)

# Define the pitch shift amount (in semitones).. range -5 to 12
pitch_shift = -3.5  # Increase pitch by 3 semitones

# Apply pitch shifting
pitch_shifted_audio = librosa.effects.pitch_shift(audio, sr=sr, n_steps=pitch_shift)
write(output_file, sr, pitch_shifted_audio)


**Pink Noise**

In [None]:
import pydsm
import librosa

audio_file = '88.wav'
audio, sr = librosa.load(audio_file)
noise = pydsm.pink(len(audio))

**Inverse Polarity**

In [None]:
import random
import librosa
import numpy as np
import soundfile as sf

def invert_polarity(signal):
    return signal * -1

if __name__ == "__main__":
    signal, sr = librosa.load("noisy_audio_variations/noisy_audio_variation_29.wav")
    augmented_signal = invert_polarity(signal)
    sf.write("augmented_audio.wav", augmented_signal, sr)


**Frequency** ***Masking***

In [None]:
import librosa
from scipy.io.wavfile import write

audio_file = '88.wav'
output_file = 'augmented_audio.wav'

# Load the original audio file
audio, sr = librosa.load(audio_file)

# Define the frequency mask parameters
mask_start = 500  # Start frequency of the mask in Hz
mask_width = 100  # Width of the mask in frequency bins

# Apply frequency masking
masked_audio = librosa.effects.freq_mask(audio, sr, mask_start, mask_width)
write(output_file, sr, masked_audio)


Some Important scripts to order things

**CSV file generation from audio data to feed in Model**



In [None]:
import os
import pandas as pd
list_ = []
dir_name = "D:\\data-preparation-phase-1\\generated-data"
# for folder in os.listdir(dir_name):
#     list_of_file_names = os.listdir(os.path.join(dir_name,folder))
#     list_ = list_ + list(zip([folder]*len(list_of_file_names), list_of_file_names))

### to beeeeeeeeeeeeeeeeeeeee
# df = pd.DataFrame(list_)
# df.to_csv("word_list.csv",index=False)
import wave
import os
import csv

# Path to the folder containing the audio dataset
dataset_folder = 'D:\\final-try\generated_data_5'

# Initialize the CSV file path
csv_file = 'audio_dataset.csv'

def get_audio_end_time(audio_path):
    with wave.open(audio_path, 'rb') as audio_file:
        frame_rate = audio_file.getframerate()
        total_frames = audio_file.getnframes()
        duration = total_frames / float(frame_rate)
        return duration

# Open the CSV file in write mode
with open(csv_file, 'w', newline='') as file:
    writer = csv.writer(file)

    # Write the header row
    writer.writerow(['file_name', 'class', 'tmin', 'tmax'])
    # Iterate over the folders and audio files
    for label in os.listdir(dataset_folder):
        label_folder = os.path.join(dataset_folder, label)
        if os.path.isdir(label_folder):
            for audio_file in os.listdir(label_folder):
                audio_path = os.path.join(label_folder, audio_file)
                if audio_file.endswith('.wav'):  # Adjust file extension if necessary
                    # Extract audio name, label, start time, and end time
                    audio_name = audio_file.split('.')[0]
                    start_time = 0.0  # Adjust start time if necessary
                    end_time = get_audio_end_time(audio_path) # Adjust end time if necessary

                    # Write the audio information to the CSV file
                    writer.writerow([audio_name, label, start_time, end_time])

print('CSV file created successfully.')

# Example usage
# audio_path = '88.wav'
# end_time = get_audio_end_time(audio_path)
# print("End time:", end_time, "seconds")


**Audio Feature extraction**

In [None]:
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt

# Load audio file
audio_path = '88.wav'
y, sr = librosa.load(audio_path)

# Extract Mel spectrogram
mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr)
mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)

# Extract spectral bandwidth
spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)

# Extract spectral centroid
spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)

# Extract chromagram
chromagram = librosa.feature.chroma_stft(y=y, sr=sr)

# Extract short-time Fourier transform (STFT)
stft = np.abs(librosa.stft(y))

# Plotting the Mel spectrogram
plt.figure(figsize=(10, 4))
librosa.display.specshow(mel_spectrogram_db, sr=sr, x_axis='time', y_axis='mel')
plt.colorbar(format='%+2.0f dB')
plt.title('Mel Spectrogram')
plt.show()

# Plotting the spectral bandwidth
plt.figure()
plt.plot(spectral_bandwidth[0])
plt.title('Spectral Bandwidth')
plt.xlabel('Frame')
plt.ylabel('Bandwidth')
plt.show()

# Plotting the spectral centroid
plt.figure()
plt.plot(spectral_centroid[0])
plt.title('Spectral Centroid')
plt.xlabel('Frame')
plt.ylabel('Centroid')
plt.show()

# Plotting the chromagram
plt.figure(figsize=(10, 4))
librosa.display.specshow(chromagram, sr=sr, x_axis='time', y_axis='chroma')
plt.colorbar()
plt.title('Chromagram')
plt.show()

# Plotting the short-time Fourier transform (STFT)
plt.figure(figsize=(10, 4))
librosa.display.specshow(librosa.amplitude_to_db(stft, ref=np.max), sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title('Short-Time Fourier Transform (STFT)')
plt.show()


**Ordering Audio data**

In [None]:
import os
import shutil
import uuid
def create_folder_and_move_files(source_dir, destination_dir):
    # Iterate over all files in the source directory
    for root, dirs, files in os.walk(source_dir):
        print(root)
        cnt= 0
        for file_name in files:
            source_file_path = os.path.join(root, file_name)
            destination_folder_path = os.path.join(destination_dir, os.path.basename(root))
            destination_file_path = os.path.join(destination_folder_path, file_name)
            shutil.move(source_file_path, destination_file_path)
            cnt+=1
        print(cnt)

            # print(f"Moved '{file_name}' to '{folder_path}'")

# Example usage
# source_directory = "D:\\demo-walk"
# destination_directory = "D:\\demo-move"
destination_directory = "D:\\data-preparation-phase-1\generated-data"
source_directory = "D:\\data-preparation-phase-1\orginal-data"

create_folder_and_move_files(source_directory, destination_directory)


**Order data and simulation**

In [None]:
### to beeeeeeeeeeeeeeeeeeeeeee
import librosa
import numpy as np
import soundfile as sf
import os
import uuid
import shutil
def pitch_scaling(audio, sr, pitch_shift):
    pitch_shifted_audio = librosa.effects.pitch_shift(audio, sr=sr, n_steps=pitch_shift)
    return pitch_shifted_audio

def time_stretching(audio, sr, stretch_factor):
    time_stretched_audio = librosa.effects.time_stretch(audio, rate=stretch_factor)
    return time_stretched_audio

def noise_injection(audio, sr, noise_parcentage_factor):
    mean = 0
    std = 0.1
    noise = np.random.normal(mean, audio.std(), len(audio))
    noisy_audio = audio + noise * noise_parcentage_factor
    return noisy_audio

def inverse_polarity(audio):
    return audio * -1

def start_augmenation(command, source_file_path, destination_folder_path,
                      start_range,end_range, increment, basename):
    i = start_range
    cnt = 0
    audio, sr = librosa.load(source_file_path)
    while i<= end_range:
        if int(command) == 1:
            audio_signal = pitch_scaling(audio, sr, i)
        elif int(command) ==2:
            audio_signal = time_stretching(audio, sr, i)
        elif int(command) ==3:
            audio_signal = noise_injection(audio, sr, i)
        elif int(command) ==4:
            audio_signal = inverse_polarity(audio)
        else: print("Don't understand your command ")
        destination_file_path = os.path.join(destination_folder_path, f"{basename}_{str(uuid.uuid4())}.wav")
        sf.write(destination_file_path, audio_signal, sr)
        i += increment
        cnt += 1

    print("total generated audio  --- ", cnt)


if __name__ == "__main__":
    destination_dir = "D:\\final-try\generated_data_5"
    source_dir = "D:\\final-try\orginal"
    cmd = input("Enter command: ")
    if int(cmd) == 1:
        # pitch scaling...
        # range -5 to 12
        start_range = -2
        end_range = 9
        increment = .2
    elif int(cmd) == 2:
        # time stretching...
        # range .5 to 1.5
        start_range = .2
        end_range = 1.2
        increment = .05
    elif int(cmd) == 3:
        start_range = .04
        end_range = .2
        increment = .05
    elif int(cmd) == 4:
        start_range = 00
        end_range = 00
        increment = 00


    for root, dirs, files in os.walk(source_dir):

        # print("root  ",root)
        for dir in dirs:
            folder_path = os.path.join(destination_dir, dir)
            os.makedirs(folder_path, exist_ok=True)
        for file_name in files:
            source_file_path = os.path.join(root, file_name)
            destination_folder_path = os.path.join(destination_dir, os.path.basename(root))
            start_augmenation(
                cmd, source_file_path,destination_folder_path,
                start_range, end_range, increment, os.path.basename(root)
                )
            # destination_file_path = os.path.join(destination_folder_path, f"{str(uuid.uuid4())}.wav")
            # shutil.move(source_file_path, destination_file_path)



