In [1]:
import os # for interacting with the operating system
import numpy as np # for linear algebra operations
import pandas as pd # for data processing and reading/writing CSV files
import matplotlib.pyplot as plt # for plotting data
import seaborn as sns # for creating beautiful plots
import librosa # a library for analyzing audio and music
import librosa.display # for displaying audio data
from IPython.display import Audio # for playing audio files

# Importing necessary modules from scikit-learn library
# StandardScaler is used for feature scaling
# OneHotEncoder is used for one-hot encoding categorical variables
# confusion_matrix and classification_report are used for evaluating the performance of a classifier
# train_test_split is used for splitting the data into training and testing sets

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from keras.callbacks import ReduceLROnPlateau
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout, BatchNormalization, AveragePooling1D
from keras.utils import np_utils, to_categorical
from keras.callbacks import ModelCheckpoint

from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, AveragePooling1D, Flatten, Dense, Dropout
from tensorflow.keras.models import Model

  "class": algorithms.Blowfish,


In [2]:
def prepare_data(female_X, female_Y, male_X, male_Y,Females_Features, Males_Features, encoder):
    # Splitting the data into feature and target variables for female dataset
    # Features are stored in female_X by excluding the last column from the Females_Features dataframe
    # The target variable, 'labels', is stored in female_Y
    female_X = Females_Features.iloc[: ,:-1].values
    female_Y = Females_Features['labels'].values

    # Splitting the data into feature and target variables for male dataset
    # Features are stored in male_X by excluding the last column from the Males_Features dataframe
    # The target variable, 'labels', is stored in male_Y
    male_X = Males_Features.iloc[: ,:-1].values
    male_Y = Males_Features['labels'].values
    

    # One-hot encoding the female target variable and converting it to an array
    # The target variable is first converted to a numpy array and reshaped to have only one column
    # The reshaped array is then fit and transformed using the OneHotEncoder instance
    female_Y = encoder.fit_transform(np.array(female_Y).reshape(-1,1)).toarray()

    # One-hot encoding the male target variable and converting it to an array
    # The target variable is first converted to a numpy array and reshaped to have only one column
    # The reshaped array is then fit and transformed using the OneHotEncoder instance
    male_Y = encoder.fit_transform(np.array(male_Y).reshape(-1,1)).toarray()
    
    return female_X, female_Y, male_X, male_Y

In [3]:
def split_data(female_X, female_Y, male_X, male_Y):
    # Concatenating the features and targets of female and male datasets to create a combined dataset
    no_gender_X = np.concatenate((female_X, male_X))
    no_gender_Y = np.concatenate((female_Y, male_Y))

    # Splitting the combined dataset into training and testing sets
    # 20% of the data is set aside for testing and the rest for training
    # The data is shuffled before the split to ensure randomness
    x_train, x_test, y_train, y_test = train_test_split(no_gender_X, no_gender_Y, random_state=0, test_size=0.20, shuffle=True)

    # Printing the shape of the training and testing sets
    print('shape of the training sets: x_train: ',x_train.shape,' y_train: ', y_train.shape,'\nshape of the testing sets: x_test: ', x_test.shape,' y_test: ', y_test.shape)
    
    # Create an instance of the StandardScaler class
    scaler = StandardScaler()

    # Fit and transform the training data using the StandardScaler object
    # The fit_transform method calculates the mean and standard deviation of the input data and then scales the data accordingly
    x_train = scaler.fit_transform(x_train)

    # Apply the scaling calculated from the training data to the test data
    # The transform method uses the mean and standard deviation calculated from the training data to scale the test data
    x_test = scaler.transform(x_test)
    
    return x_train, x_test, y_train, y_test

In [4]:
def arrange_data(x_train, x_test, y_train, y_test):
    # Add an extra dimension to the training data along the specified axis (axis=2)
    # This is to ensure that the data has the correct shape for input into a neural network
    x_train = np.expand_dims(x_train, axis=2)

    # Add an extra dimension to the test data along the specified axis (axis=2)
    x_test = np.expand_dims(x_test, axis=2)
    
    # Printing the shape of the training and testing sets
    print('shape of the training sets: x_train: ',x_train.shape,' y_train: ', y_train.shape,'\nshape of the testing sets: x_test: ', x_test.shape,' y_test: ', y_test.shape)
    
    return x_train, x_test, y_train, y_test