In [1]:
# to import .ipynb files
import import_ipynb
import adding_augmentation
import os # for interacting with the operating system
import numpy as np # for linear algebra operations
import pandas as pd # for data processing and reading/writing CSV files
import matplotlib.pyplot as plt # for plotting data
import seaborn as sns # for creating beautiful plots
import librosa # a library for analyzing audio and music
import librosa.display # for displaying audio data
from IPython.display import Audio # for playing audio files

# Importing necessary modules from scikit-learn library
# StandardScaler is used for feature scaling
# OneHotEncoder is used for one-hot encoding categorical variables
# confusion_matrix and classification_report are used for evaluating the performance of a classifier
# train_test_split is used for splitting the data into training and testing sets

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from keras.callbacks import ReduceLROnPlateau
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout, BatchNormalization, AveragePooling1D
from keras.utils import np_utils, to_categorical
from keras.callbacks import ModelCheckpoint

from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, AveragePooling1D, Flatten, Dense, Dropout
from tensorflow.keras.models import Model

importing Jupyter notebook from adding_augmentation.ipynb


  "class": algorithms.Blowfish,


In [2]:
# Function to extract the features from audio data
def extract_features(data):
    result = np.array([])
    
    # Compute the Mel-frequency cepstral coefficients (MFCCs)
    # Use 58 MFCCs to get ~60 ms frames
    mfccs = librosa.feature.mfcc(y=data, sr=22050, n_mfcc=58)
    
    # Compute the average MFCCs for each frame
    mfccs_processed = np.mean(mfccs.T,axis=0)
    
    # Add the processed MFCCs to the result array
    result = np.array(mfccs_processed)
    
    return result

In [3]:
# Function to get features for a given audio file
def get_features(path):
    # Load the audio file, taking care of the no audio at the start and end of the file
    data, sample_rate = librosa.load(path, duration=3, offset=0.5, res_type='kaiser_fast') 
    
    # Extract features without augmentation
    result_1 = extract_features(data)
    result = np.array(result_1)
    
    # Extract features with added noise
    noise_data = adding_augmentation.add_noise(data)
    result_2 = extract_features(noise_data)
    result = np.vstack((result, result_2)) # Stack the results vertically
    
    # Extract features with time stretching
    stretch_data = adding_augmentation.stretch(data)
    result_3 = extract_features(stretch_data)
    result = np.vstack((result, result_3))
    
    # Extract features with time shifting
    shift_data = adding_augmentation.shift(data)
    result_4 = extract_features(shift_data)
    result = np.vstack((result, result_4))
    
    # Extract features with pitch shifting
    pitch_data = adding_augmentation.change_pitch(data, sample_rate)
    result_5 = extract_features(pitch_data)
    result = np.vstack((result, result_5)) 
    
    # Extract features with increased speed
    higher_speed_data = adding_augmentation.increase_speed(data)
    result_6 = extract_features(higher_speed_data)
    result = np.vstack((result, result_6))
    
    # Extract features with decreased speed
    lower_speed_data = adding_augmentation.decrease_speed(data)
    result_7 = extract_features(lower_speed_data)
    result = np.vstack((result, result_7))
    
    return result

In [4]:
def process1(females,males):
    # Initializing empty arrays for female data
    female_X, female_Y = [], []
    
    # Looping through each path and emotion of Female datase
    for path, emotion in zip(females.path, females.labels):
        features = get_features(path)
        # Loop through the augmented features returned from get_features()
        #adding augmentation, get_features return a multi dimensional array (for each augmentation), so we have to use a loop to fill the df
        for element in features: 
            female_X.append(element)        
            female_Y.append(emotion)
    

    # Initializing empty arrays for male data
    male_X, male_Y = [], []
    
    # Looping through each path and emotion of Male dataset
    for path, emotion in zip(males.path, males.labels):
        features = get_features(path)
        for element in features:
            male_X.append(element)
            male_Y.append(emotion)
            
    # Print the shapes of female and male data arrays
    print('Female features: ',len(female_X),' labels: ',len(female_Y))
    print('Male features: ',len(male_X),' labels: ',len(male_Y))
    return female_X, female_Y, male_X, male_Y

In [5]:
def setup_dataframe(gender, features, labels):
    # Create a dataframe from the extracted features
    data_frame = pd.DataFrame(features)
    
    # Add the labels column to the dataframe
    data_frame['labels'] = labels
    
    # Save the dataframe as a CSV file with the given gender name
    data_frame.to_csv(f'D:\\final_voice_model_3\\features\\{gender}_features.csv', index=False)
    
    # Print the sample of the dataframe
    print(f'{gender} dataframe')
    data_frame.sample(frac=1).head()
    
    # Return the created dataframe
    return data_frame

In [6]:
def process2(female_X, female_Y, male_X, male_Y):
    print()
    Females_Features = setup_dataframe('female', female_X, female_Y)
    Males_Features = setup_dataframe('male', male_X, male_Y)
    return Females_Features, Males_Features