In [1]:
# For Google Colab imports
!pip install librosa

In [3]:
import pandas as pd # To create/edit/manipulate a data frame
import numpy as np # To perform a wide variety of mathematical operations on arrays
from glob import glob # a function that's used to search for files that match a specific file pattern or name
import csv # To converts into a readable csv file
import os # For high file management

# For feature extraction of audio files
import librosa
import librosa.display
from librosa import feature

In [4]:
# Google Colab for attaching Google Drive data
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Audio Data Feature Extraction
> 1. ```Zero Crossing Rate:``` The rate of sign-changes of the signal during the duration of a particular frame.
> 2. ```Root Mean Square:```  Metering tool that measures the average loudness of an audio track within a window of roughly 300 milliseconds.
> 3. ```Mel Frequency Cepstral Coefficients:``` Form a cepstral representation where the frequency bands are not linear but distributed according to the mel-scale.
> 4. ```Chromagram:``` Represents the 12 different pitches under an audio file, in one place so that we can understand the classification of the pitches in the audio files.
> 5. ```Melspectrogram:``` Scale of pitches that can be felt by the listener to be equal in distance from one another.
> 6. ```Spectral Centroid:``` The center of gravity of the spectrum.
> 7. ```Spectral Bandwidth:``` The difference between the upper and lower frequencies in a continuous band of frequencies.
> 8. ```Spectral Rolloff:``` The frequency below which 90% of the magnitude distribution of the spectrum is concentrated.
> 9. ```Spectral Entropy:``` Entropy of the normalized spectral energies for a set of sub-frames.
> 10. ```Spectral Flux:``` The squared difference between the normalized magnitudes of the spectra of the two successive frames.

***NOTE: We will be using Mel Frequency Cepstral Coefficients,Spectral Centroid, Spectral Bandwidth, Spectral Rolloff, Spectral Flux***

# Data Augmentation

In [5]:
# Noise Injection.
def inject_noise(data, sampling_rate = 0.035, threshold = 0.075, random = False):
    if random:
        sampling_rate = np.random.random() * threshold
    noise_amplitude = sampling_rate * np.random.uniform() * np.amax(data)
    augmented_data = data + noise_amplitude * np.random.normal(size = data.shape[0])
    return augmented_data

In [6]:
# Pitching.
def pitching(data, sampling_rate, pitch_factor = 0.7,random = False):
    if random:
        pitch_factor= np.random.random() * pitch_factor
    return librosa.effects.pitch_shift(y = data, sr = sampling_rate, n_steps = pitch_factor)

In [7]:
# Stretching.
def stretching(data,r = 0.9):
    return librosa.effects.time_stretch(y = data, rate = r)

In [8]:
# Pipeline function that applies all the audio data augmentation functions we just built.
def pipeline(data, sampling_rate):
    data = pitching(data, sampling_rate, random = True)
    data = inject_noise(data, random = True)
    data = stretching(data)
    return data

# Feature Extractions

In [9]:
def zero_crossing_rate(data, frame_length, hop_length):
    zcr = librosa.feature.zero_crossing_rate(y = data, frame_length = frame_length, hop_length = hop_length)
    return np.squeeze(zcr)

def root_mean_square(data, frame_length = 2048, hop_length = 512):
    rms = librosa.feature.rms(y = data, frame_length = frame_length, hop_length = hop_length)
    return np.squeeze(rms)


def mel_frequency_cepstral_coefficients(data, sampling_rate, frame_length = 2048, hop_length = 512, flatten:bool = True):
    mfcc = librosa.feature.mfcc(y = data,sr = sampling_rate)
    return np.squeeze(mfcc.T) if not flatten else np.ravel(mfcc.T)


def chroma_stft(data, sampling_rate, frame_length = 2048, hop_length = 512, flatten: bool = True):
    short_time_fourier_transform = np.abs(librosa.stft(data))
    chroma = librosa.feature.chroma_stft(sr = sampling_rate, S = short_time_fourier_transform)
    return np.squeeze(chroma.T) if not flatten else np.ravel(chroma.T)


def melspectrogram(data, sampling_rate, frame_length = 2048, hop_length = 512, flatten: bool = True):
    melspect = librosa.feature.melspectrogram(y = data, sr = sampling_rate)
    return np.squeeze(melspect.T) if not flatten else np.ravel(melspect.T)


def spectral_centroid(data, sampling_rate, frame_length = 2048, hop_length = 512):
    scentroid = librosa.feature.spectral_centroid(y = data, sr = sampling_rate, n_fft = frame_length, hop_length = hop_length)
    return np.squeeze(scentroid)


def spectral_bandwidth(data, sampling_rate, frame_length = 2048, hop_length = 512):
    sbandwidth = librosa.feature.spectral_bandwidth(y = data, sr = sampling_rate, n_fft = frame_length, hop_length = hop_length)
    return np.squeeze(sbandwidth)


def spectral_rolloff(data, sampling_rate, frame_length = 2048, hop_length = 512):
    srolloff = librosa.feature.spectral_rolloff(y = data, sr = sampling_rate, n_fft = frame_length, hop_length = hop_length)
    return np.squeeze(srolloff)


def spectral_flux(data, sampling_rate):
    sflux = librosa.onset.onset_strength(y = data, sr = sampling_rate)
    return np.squeeze(sflux)

# Feature Extraction Pipeline

In [10]:
def feature_extraction(data, sampling_rate, frame_length = 2048, hop_length = 512):
    result = np.array([])
    result = np.hstack((result,
                        # zero_crossing_rate(data, frame_length, hop_length),
                        # root_mean_square(data, frame_length, hop_length),
                        mel_frequency_cepstral_coefficients(data, sampling_rate, frame_length, hop_length),
                        spectral_centroid(data, sampling_rate, frame_length, hop_length),
                        spectral_bandwidth(data, sampling_rate, frame_length, hop_length),
                        spectral_rolloff(data, sampling_rate, frame_length, hop_length),
                        spectral_flux(data, sampling_rate)
                     ))
    return result


In [11]:
# Duration and offset act as placeholders because there is no audio in start and the ending of each audio file is normally below three seconds.
def get_features(file_path, duration = 2.5, offset = 0.6):
    data, sampling_rate = librosa.load(path = file_path, duration = duration, offset = offset)

    # No audio data augmentation.
    audio_1 = feature_extraction(data, sampling_rate)
    # audio_1 = np.resize(audio_1, (1, 19921))
    audio = np.array(audio_1)

    # # Inject Noise.
    # noise_audio_1 = inject_noise(data, random = True)
    # audio_2 =  feature_extraction(noise_audio_1, sampling_rate)
    # audio_2.resize((1, 19921))
    # audio = np.vstack((audio, audio_2))

    # # Pitching.
    # pitch_audio_1 = pitching(data, sampling_rate, random = True)
    # audio_3 = feature_extraction(pitch_audio_1, sampling_rate)
    # audio_3.resize((1, 19921))
    # audio = np.vstack((audio, audio_3))

    # # Stretching.
    # stretch_audio_1 = stretching(data)
    # audio_4 = feature_extraction(stretch_audio_1, sampling_rate)
    # audio_4.resize((1, 19921))
    # audio = np.vstack((audio, audio_4))

    # # Pitching and Inject Noise.
    # pitch_audio_2 = pitching(data, sampling_rate, random = True)
    # pitch_noise_audio_1 = inject_noise(pitch_audio_2, random = True)
    # audio_5 = feature_extraction(pitch_noise_audio_1, sampling_rate)
    # audio_5.resize((1, 19921))
    # audio = np.vstack((audio, audio_5))

    # # Stretching and Pitching.
    # stretch_audio_2 = stretching(data)
    # stretch_pitch_audio_1 = pitching(stretch_audio_2, sampling_rate, random = True)
    # audio_6 = feature_extraction(stretch_pitch_audio_1, sampling_rate)
    # audio_6.resize((1, 19921))
    # audio = np.vstack((audio, audio_6))

    # # Pitching, Inject Noise, and Stretching.
    # pitch_noise_stretch_audio_1 = pipeline(data, sampling_rate)
    # audio_7 =  feature_extraction(pitch_noise_stretch_audio_1, sampling_rate)
    # audio_7.resize((1, 19921))
    # audio = np.vstack((audio, audio_7))

    audio_features = audio

    return audio_features


# File path of Audio folder

```FAKE```

0. full_audio_features_list = whole_folder_extract('/content/drive/MyDrive/data/audio/fake/Cleaned Spoof-Audio/')

1. full_audio_features_list = whole_folder_extract('/content/drive/MyDrive/data/audio/fake/Fake People/')

2. full_audio_features_list = whole_folder_extract('/content/drive/MyDrive/data/audio/fake/Generated_Audio_FAKE/ljspeech_hifiGAN/')

3. full_audio_features_list = whole_folder_extract('/content/drive/MyDrive/data/audio/fake/Generated_Audio_FAKE/ljspeech_melgan/')

4. full_audio_features_list = whole_folder_extract('/content/drive/MyDrive/data/audio/fake/Generated_Audio_FAKE/ljspeech_melgan_large/')

5. full_audio_features_list = whole_folder_extract('/content/drive/MyDrive/data/audio/fake/Generated_Audio_FAKE/ljspeech_multi_band_melgan/')

6. full_audio_features_list = whole_folder_extract('/content/drive/MyDrive/data/audio/fake/Generated_Audio_FAKE/ljspeech_parallel_wavegan/')

7. full_audio_features_list = whole_folder_extract('/content/drive/MyDrive/data/audio/fake/Generated_Audio_FAKE/ljspeech_waveglow/')


```REAL```

0. full_audio_features_list = whole_folder_extract('/content/drive/MyDrive/data/audio/real/Real_People/')

1. full_audio_features_list = whole_folder_extract('/content/drive/MyDrive/data/audio/real/LJSpeech-1.1/wavs/')

2. full_audio_features_list = whole_folder_extract('/content/drive/MyDrive/data/audio/real/Cleaned_Bonafide-Audio/')

3. full_audio_features_list = whole_folder_extract('/content/drive/MyDrive/data/audio/real/CREMA-D_REAL/')

4. full_audio_features_list = whole_folder_extract('/content/drive/MyDrive/data/audio/real/common_voice_wav_audio/common_voice_10_valid_clips_wav/')

5. full_audio_features_list = whole_folder_extract('/content/drive/MyDrive/data/audio/real/common_voice_wav_audio/common_voice_corpus_6-1_valid_clips_wav/')

6. full_audio_features_list = whole_folder_extract('/content/drive/MyDrive/data/audio/real/common_voice_wav_audio/common_voice_12_valid_clips_wav/')

7. full_audio_features_list = whole_folder_extract('/content/drive/MyDrive/data/audio/real/common_voice_wav_audio/common_voice_13_valid_clips_wav/')

8. full_audio_features_list = whole_folder_extract('/content/drive/MyDrive/data/audio/real/common_voice_wav_audio/common_voice_14_valid_clips_wav/')

9. full_audio_features_list = whole_folder_extract('/content/drive/MyDrive/data/audio/real/common_voice_wav_audio/common_voice_15_valid_clips_wav/')

10. full_audio_features_list = whole_folder_extract('/content/drive/MyDrive/data/audio/real/common_voice_wav_audio/common_voice_3_valid_clips_wav/')

# Feature Extraction of data

In [None]:
def whole_folder_extract(audio_folder):
    full_audio_feature_list = []
    # Make a path to audio folder
    audio_path = os.path.join(os.getcwd(), audio_folder)
    for filename in os.listdir(audio_path):
        if filename.endswith('.wav'):
            # Full path of source file
            full_path = os.path.join(audio_path, filename)
            audio_features = get_features(full_path)
            # Flatten the 2D array into a 1D array before appending
            audio_features_flattened = np.array(audio_features).flatten()
            full_audio_feature_list.append(audio_features_flattened)
    return full_audio_feature_list


full_audio_features_list = whole_folder_extract('/content/drive/MyDrive/data/audio/real/common_voice_wav_audio/common_voice_corpus_6-1_valid_clips_wav/')
audio_features_df = pd.DataFrame(full_audio_features_list)
audio_features_df

  return pitch_tuning(


# Checking Dataframe

In [None]:
def inspect_dataframe(input_df):

    print('The Null Values:\n',input_df.isnull().sum().sum())
    print('\n')
    # print('The Duplicate Values:\n',input_df.duplicated().sum())
    # print('\n')
    # print('The Description:\n',input_df.describe())
    # print('\n')
    # print('Columns:')
    # for col in input_df.columns:
    #     print(col)

    pass

In [None]:
inspect_dataframe(audio_features_df)

# Extracting dataframe as csv

In [None]:
audio_features_df.to_csv('/content/drive/MyDrive/data/features/new_features/real/REAL_common_voice_corpus_6-1_feature_extraction_list.csv',  index=False)

# File Path for csv

```REAL```

0. audio_features_df.to_csv('/content/drive/MyDrive/data/features/new_features/real/REAL_voice_recognition_feature_extraction_list.csv',  index=False)

1. audio_features_df.to_csv('/content/drive/MyDrive/data/features/new_features/real/REAL_ljSpeech-1.1_feature_extraction_list.csv',  index=False)

2. audio_features_df.to_csv('/content/drive/MyDrive/data/features/new_features/real/REAL_bonafide_audio_feature_extraction_list.csv',  index=False)

3. audio_features_df.to_csv('/content/drive/MyDrive/data/features/new_features/real/REAL_crema-d_feature_extraction_list.csv',  index=False)

4. audio_features_df.to_csv('/content/drive/MyDrive/data/features/new_features/real/REAL_common_voice_10_feature_extraction_list.csv',  index=False)

5. audio_features_df.to_csv('/content/drive/MyDrive/data/features/new_features/real/REAL_common_voice_corpus_6-1_feature_extraction_list.csv',  index=False)

6. audio_features_df.to_csv('/content/drive/MyDrive/data/features/new_features/real/REAL_common_voice_12_feature_extraction_list.csv',  index=False)

7. audio_features_df.to_csv('/content/drive/MyDrive/data/features/new_features/real/REAL_common_voice_13_feature_extraction_list.csv',  index=False)

8. audio_features_df.to_csv('/content/drive/MyDrive/data/features/new_features/real/REAL_common_voice_14_feature_extraction_list.csv',  index=False)

9. audio_features_df.to_csv('/content/drive/MyDrive/data/features/new_features/real/REAL_common_voice_15_feature_extraction_list.csv',  index=False)

10. audio_features_df.to_csv('/content/drive/MyDrive/data/features/new_features/real/REAL_common_voice_3_feature_extraction_list.csv',  index=False)

```Fake```

0. audio_features_df.to_csv('/content/drive/MyDrive/data/features/new_features/fake/FAKE_spoof_audio_features_extraction_list.csv',  index=False)


1. audio_features_df.to_csv('/content/drive/MyDrive/data/features/new_features/fake/FAKE_voice_recognition_features_extraction_list.csv',  index=False)


2. audio_features_df.to_csv('/content/drive/MyDrive/data/features/new_features/fake/FAKE_ljspeech_hifiGAN_features_extraction_list.csv',  index=False)

3. audio_features_df.to_csv('/content/drive/MyDrive/data/features/new_features/fake/FAKE_ljspeech_melgan_features_extraction_list.csv',  index=False)

4. audio_features_df.to_csv('/content/drive/MyDrive/data/features/new_features/fake/FAKE_ljspeech_melgan_large_features_extraction_list.csv',  index=False)

5. audio_features_df.to_csv('/content/drive/MyDrive/data/features/new_features/fake/FAKE_ljspeech_multi_band_melgan_features_extraction_list.csv',  index=False)

6. audio_features_df.to_csv('/content/drive/MyDrive/data/features/new_features/fake/FAKE_ljspeech_parallel_wavegan_features_extraction_list.csv',  index=False)

7. audio_features_df.to_csv('/content/drive/MyDrive/data/features/new_features/fake/FAKE_ljspeech_waveglow_features_extraction_list.csv',  index=False)