# ELEC0138 Security and Privacy Assignment
## Group 11 Demo

The following notebook contains the demo code for the assignmet project. 

## Step 0a) Install dependencies
Run the following code block in order to install the required dependencies for the project.

In [4]:
# for transformation
!pip install praat-parselmouth

# for encryption and decryption
!pip install pycryptodome

# for gender detection
!pip install scikit-learn
!pip install tensorflow
!pip install pandas
!pip install tqdm

# for speech to text function
!pip install SpeechRecognition
!pip install ffprobe
!pip install ffmpeg
!pip install pydub



Collecting tqdm
  Using cached tqdm-4.65.0-py3-none-any.whl (77 kB)
Installing collected packages: tqdm
Successfully installed tqdm-4.65.0


In [5]:
import os
import time
import hashlib
import csv
import speech_recognition as sr

import parselmouth
from parselmouth.praat import call

from pathlib import Path
from Crypto.PublicKey import RSA
from Crypto.Util.Padding import pad
from Crypto.Cipher import AES, PKCS1_OAEP
from Crypto.Random import get_random_bytes
from pydub import AudioSegment


import pickle
from sklearn.linear_model import LinearRegression

import pandas as pd
import numpy as np
import tqdm
import glob
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping
from sklearn.model_selection import train_test_split

import librosa
import librosa.display
import IPython.display as ipd
import matplotlib.pyplot as plt

## Step 0b) Define constants

In [6]:
RAW_AUDIO_DIR = './demo/raw'
TRANSFORMED_AUDIO_DIR = './demo/transformed'
ENCRYPTED_AUDIO_DIR = './demo/transformed_encrypted'
DECRYPTED_AUDO_DIR = './demo/transformed_decrypted'
key_dir = './keys'

## Step 1) Transform the audio files
The transformation code uses the ParselMouth library which uses Praat functions in order to modify and transform the audio files. The raw audio files are read from the source directory and stores them in the output directory. 

In [7]:
def copy_folders(src_folder, dest_folder):
    for item in os.listdir(src_folder):
        if os.path.isdir(os.path.join(src_folder, item)):
            os.makedirs(os.path.join(dest_folder, item), exist_ok=True)
            copy_folders(os.path.join(src_folder, item), os.path.join(dest_folder, item)) 

def __transform_audio(snd):
    """Transforms the provided audio file

    Args:
        snd (parselmouth.Sound): sound to be transformed

    Returns:
        parselmouth.Sound: the transformed sound
    """
    pitch_shift = 60
    formant_shift_factor = 1.5 
    
    pitch = snd.to_pitch()
    medain_pitch = call(pitch, "Get quantile", 0, 0, 0.5, "Hertz")
    new_pitch = medain_pitch + pitch_shift
    transformed_snd = call(snd, "Change gender", 100, 500, formant_shift_factor, new_pitch, 1, 1)

    return transformed_snd


def transform(data_dir, output_dir):
    """Transforms all the audio files from the raw dataset
    """
    print('=> Transforming raw audio files...')
    
    # Check if the output_dir exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        
    copy_folders(data_dir, output_dir)
    
    raw_files = set()
    for dir_, _, files in os.walk(data_dir):
        for file_name in files:
            if file_name.endswith('.flac'):
                rel_dir = os.path.relpath(dir_, data_dir)
                rel_file = os.path.join(rel_dir, file_name)
                raw_files.add(rel_file)
                
    
    

    # Check if any raw files need to be processed
    if len(raw_files) == 0:
        print('\tUnable to find any raw files. Skipping this step.')
    else:
        print(f'\tTransforming {len(raw_files)} audio files...')

        # Transform the all the audio files
        for idx, file_sub_path in enumerate(raw_files):

            raw_file_path = os.path.join(data_dir, file_sub_path).replace('\\', '/')
            file_name = os.path.basename(file_sub_path)
            subdirs = os.path.dirname(file_sub_path)
            output_file_name = os.path.join(output_dir, subdirs, f'transformed_{file_name}').replace("\\","/")

            snd = parselmouth.Sound(raw_file_path)
            transformed_snd = __transform_audio(snd)
            print(output_file_name)
            transformed_snd.save(output_file_name, parselmouth.SoundFileFormat.FLAC)

        print('\tSuccessfully transformed audio files.')

In [8]:
transform(RAW_AUDIO_DIR, TRANSFORMED_AUDIO_DIR)

=> Transforming raw audio files...
	Transforming 10 audio files...
./demo/transformed/male/transformed_1272-128104-0000.flac
./demo/transformed/male/transformed_251-118436-0000.flac
./demo/transformed/male/transformed_422-122949-0000.flac
./demo/transformed/male/transformed_652-129742-0000.flac
./demo/transformed/male/transformed_777-126732-0000.flac
./demo/transformed/female/transformed_1462-170138-0000.flac
./demo/transformed/female/transformed_84-121123-0000.flac
./demo/transformed/female/transformed_1919-142785-0000.flac
./demo/transformed/female/transformed_1988-24833-0000.flac
./demo/transformed/female/transformed_1673-143396-0000.flac
	Successfully transformed audio files.


## Step 2) Encrypt and decrypt the audio files

### Step 2.1) Generate AES and RSA keys and save to local files

In [9]:
AES_key = get_random_bytes(32) #32 bytes (256) or 16 bytes (128)
RSA_key = RSA.generate(2048)

In [10]:
# Save key
if not os.path.exists(key_dir):
    os.makedirs(key_dir)

with open(os.path.join(key_dir, 'AES_key.txt'), 'wb') as f:
    f.write(AES_key)

with open(os.path.join(key_dir, 'public_key.txt'), 'wb') as f:
    f.write(RSA_key.publickey().export_key())

with open(os.path.join(key_dir, 'private_key.txt'), 'wb') as f:
    f.write(RSA_key.export_key())

### Step 2.2) Encrypt the audio files and the AES key
The audio files are encrypted with AES, and the AES key is encrypted with the public key of RSA

In [11]:
def copy_folders(src_folder, dest_folder):
    for item in os.listdir(src_folder):
        if os.path.isdir(os.path.join(src_folder, item)):
            # Copy folders
            os.makedirs(os.path.join(dest_folder, item), exist_ok =  True)
            # Copy subfolders
            copy_folders(os.path.join(src_folder, item), os.path.join(dest_folder, item))

In [12]:
def encrypt_file(key_dir, data_dir):
    if not os.path.exists(key_dir):
        print('Cannot find key')
        
    else:
        with open(os.path.join(key_dir, 'AES_key.txt'), 'rb') as f:
            key = f.read()
        with open(os.path.join(key_dir, 'public_key.txt'), 'rb') as f:
            public_key = RSA.import_key(f.read())
    
        i = 1
        BLOCK_SIZE = 128
        prev_dir_path = ''
        chunksize = 1000 * 1024
        cipher = PKCS1_OAEP.new(public_key)

        filepaths = list(Path(data_dir).glob(r'**/*.flac'))
        print('Detected ' + str(len(filepaths)) + ' audio files in ' + data_dir)

        outfile_dir = data_dir + '_encrypted'
        if not os.path.exists(outfile_dir):
            os.makedirs(outfile_dir)
        print('Encrypted audio file is saved in', os.path.abspath(outfile_dir))

        copy_folders(data_dir, outfile_dir)
        
        start_time = time.time()

        for filename in filepaths:

            output_file = os.path.join(outfile_dir, os.path.splitext(str(filename))[0] + '_AES.enc')
            output_file = output_file.replace(data_dir[2:].replace('/', '\\') + '\\', '')

            dir_path = os.path.dirname(output_file)
            if dir_path != prev_dir_path:
                print('\nFor files in', os.path.abspath(dir_path))
                prev_dir_path = dir_path

            iv = get_random_bytes(AES.block_size)
            encryptor = AES.new(key, AES.MODE_CBC, iv)
            filesize = os.path.getsize(str(filename))

            with open(str(filename), 'rb') as f:
                chunk = f.read(chunksize)

                with open(output_file, 'wb') as f:
                    f.write(filesize.to_bytes(8, 'big'))
                    f.write(iv)
                    chunk = pad(chunk, AES.block_size)
                    f.write(encryptor.encrypt(chunk))
                    end_time_in = time.time()
                    print(str(i) + ' ' + os.path.basename(str(filename)) + ' encrypted as ' 
                          + os.path.basename(output_file) + ' %.2f' % (end_time_in - start_time) + 's')
                    i += 1
        
        # Encrypt AES key
        with open(os.path.join(key_dir, 'AES_key.txt'), 'rb') as f:
            plaintext = f.read()
        ciphertext = b''
        for i in range(0, len(plaintext), BLOCK_SIZE):
            block = plaintext[i : i + BLOCK_SIZE]
            ciphertext += cipher.encrypt(block)
        with open(os.path.join(key_dir, 'AES_key.enc'), 'wb') as f:
            f.write(ciphertext)
        print('\nAES key is encrypted as AES_key.enc and saved in', os.path.abspath(os.path.join(key_dir, 'AES_key.enc')))
        
        end_time = time.time()
        
        print('\nTime for encrypting ' + str(len(filepaths)) + ' files is %.2f' % (end_time - start_time) + 's')

In [13]:
encrypt_file(key_dir, TRANSFORMED_AUDIO_DIR)

Detected 10 audio files in ./demo/transformed
Encrypted audio file is saved in C:\Users\Ys1ong\Desktop\ELEC0138_22-23_Assignment\demo\transformed_encrypted

For files in C:\Users\Ys1ong\Desktop\ELEC0138_22-23_Assignment\demo\transformed_encrypted\female
1 transformed_1462-170138-0000.flac encrypted as transformed_1462-170138-0000_AES.enc 0.01s
2 transformed_1673-143396-0000.flac encrypted as transformed_1673-143396-0000_AES.enc 0.01s
3 transformed_1919-142785-0000.flac encrypted as transformed_1919-142785-0000_AES.enc 0.01s
4 transformed_1988-24833-0000.flac encrypted as transformed_1988-24833-0000_AES.enc 0.02s
5 transformed_84-121123-0000.flac encrypted as transformed_84-121123-0000_AES.enc 0.02s

For files in C:\Users\Ys1ong\Desktop\ELEC0138_22-23_Assignment\demo\transformed_encrypted\male
6 transformed_1272-128104-0000.flac encrypted as transformed_1272-128104-0000_AES.enc 0.02s
7 transformed_251-118436-0000.flac encrypted as transformed_251-118436-0000_AES.enc 0.02s
8 transformed_

### Step 2.3) Decrypt the AES key and the audio files
The AES key is decrypted with the private key of RSA and the audio files are decrypted bwith the AES key

In [14]:
def decrypt_file(key_dir, data_dir):
    if not os.path.exists(key_dir):
        print('Cannot find key')
        
    else:
        with open(os.path.join(key_dir, 'private_key.txt'), 'rb') as f:
            private_key = RSA.import_key(f.read())
    
        BLOCK_SIZE = 128
        prev_dir_path = ''
        chunksize = 1000 * 1024
        cipher = PKCS1_OAEP.new(private_key)
        
        with open(os.path.join(key_dir, 'AES_key.enc'), 'rb') as f:
            ciphertext = f.read()
        plaintext = b''
        for i in range(0, len(ciphertext), private_key.size_in_bytes()):
            block = ciphertext[i : i + private_key.size_in_bytes()]
            plaintext += cipher.decrypt(block)
        with open(os.path.join(key_dir, 'AES_key_dec.txt'), 'wb') as f:
            f.write(plaintext)
        print('Decrypted AES key AES_key_dec.txt is saved in', os.path.abspath(os.path.join(key_dir, 'AES_key_dec.txt')))
        with open(os.path.join(key_dir, 'AES_key_dec.txt'), 'rb') as f:
            key = f.read()

        filepaths = list(Path(data_dir).glob(r'**/*.enc'))
        print('\nDetected ' + str(len(filepaths)) + ' encrypted audio files in ' + os.path.abspath(data_dir))

        outfile_dir = data_dir.replace('encrypted', 'decrypted')
        if not os.path.exists(outfile_dir):
            os.makedirs(outfile_dir)
        print('Decrypted audio file is saved in', os.path.abspath(outfile_dir))

        copy_folders(data_dir, outfile_dir)
        
        start_time = time.time()

        i = 1
        
        for filename in filepaths:

            output_file = os.path.join(outfile_dir, os.path.splitext(str(filename))[0] + '.flac')
            output_file = output_file.replace(data_dir[2:].replace('/', '\\') + '\\', '')

            dir_path = os.path.dirname(output_file)
            if dir_path != prev_dir_path:
                print('\nFor files in', os.path.abspath(dir_path))
                prev_dir_path = dir_path

            with open(str(filename), 'rb') as f:
                filesize = int.from_bytes(f.read(8), 'big')
                iv = f.read(AES.block_size)
                decryptor = AES.new(key, AES.MODE_CBC, iv)
                chunk = f.read(chunksize)

                with open(output_file, 'wb') as f:
                    f.write(decryptor.decrypt(chunk))
                    f.truncate(filesize)
                    end_time_in = time.time()
                    print(str(i) + ' ' + os.path.basename(str(filename)) + ' decrypted as ' 
                          + os.path.basename(output_file) + ' %.2f' % (end_time_in - start_time) + 's')
                    i += 1
                
        end_time = time.time()
        
        print('\nTime for decrypting ' + str(len(filepaths)) + ' files is %.2f' % (end_time - start_time) + 's')

In [15]:
decrypt_file(key_dir, TRANSFORMED_AUDIO_DIR + '_encrypted')

Decrypted AES key AES_key_dec.txt is saved in C:\Users\Ys1ong\Desktop\ELEC0138_22-23_Assignment\keys\AES_key_dec.txt

Detected 10 encrypted audio files in C:\Users\Ys1ong\Desktop\ELEC0138_22-23_Assignment\demo\transformed_encrypted
Decrypted audio file is saved in C:\Users\Ys1ong\Desktop\ELEC0138_22-23_Assignment\demo\transformed_decrypted

For files in C:\Users\Ys1ong\Desktop\ELEC0138_22-23_Assignment\demo\transformed_decrypted\female
1 transformed_1462-170138-0000_AES.enc decrypted as transformed_1462-170138-0000_AES.flac 0.00s
2 transformed_1673-143396-0000_AES.enc decrypted as transformed_1673-143396-0000_AES.flac 0.01s
3 transformed_1919-142785-0000_AES.enc decrypted as transformed_1919-142785-0000_AES.flac 0.01s
4 transformed_1988-24833-0000_AES.enc decrypted as transformed_1988-24833-0000_AES.flac 0.02s
5 transformed_84-121123-0000_AES.enc decrypted as transformed_84-121123-0000_AES.flac 0.02s

For files in C:\Users\Ys1ong\Desktop\ELEC0138_22-23_Assignment\demo\transformed_decry

### Step 2.4) Compare the decrypted audio files with the original audio files

In [16]:
def hash_file(data_dir):
    hash_dict = {}
    
    filepaths = list(Path(data_dir).glob(r'**/*.flac'))
    print('Detected ' + str(len(filepaths)) + ' audio files in ' + os.path.abspath(data_dir))
    
    for filename in filepaths:
            
        with open(filename, 'rb') as f:
            md5obj = hashlib.md5()
            md5obj.update(f.read())
            hash_value = md5obj.hexdigest()
            #print(os.path.basename(str(filename)) + ' hash value is ' + hash_value)
            
            if hash_value in hash_dict:
                hash_dict[hash_value].append(filename)
            else:
                hash_dict[hash_value] = [filename]
    
    return hash_dict

def compare_hash(data_dir1, data_dir2):
    i = 0
    filepaths1 = list(Path(data_dir1).glob(r'**/*.flac'))
    filepaths2 = list(Path(data_dir2).glob(r'**/*.flac'))
    
    hash_dict1 = hash_file(data_dir1)
    hash_dict2 = hash_file(data_dir2)
    
    for hash_value in hash_dict1:
        if hash_value in hash_dict2:
            i += 1
            print('\nFiles with hash value ' + hash_value + ' found in both directories:')
            print([os.path.abspath(str(f)) for f in hash_dict1[hash_value]][0])
            print([os.path.abspath(str(f)) for f in hash_dict2[hash_value]][0])
            
    print('\n{} ({:.2%}) files are the same after decrypted compared to original files'.format(i, i/len(filepaths1)))

In [17]:
compare_hash(TRANSFORMED_AUDIO_DIR, TRANSFORMED_AUDIO_DIR + '_decrypted')

Detected 10 audio files in C:\Users\Ys1ong\Desktop\ELEC0138_22-23_Assignment\demo\transformed
Detected 10 audio files in C:\Users\Ys1ong\Desktop\ELEC0138_22-23_Assignment\demo\transformed_decrypted

Files with hash value e5107f777fee78b223b16dd7ba7dfc58 found in both directories:
C:\Users\Ys1ong\Desktop\ELEC0138_22-23_Assignment\demo\transformed\female\transformed_1462-170138-0000.flac
C:\Users\Ys1ong\Desktop\ELEC0138_22-23_Assignment\demo\transformed_decrypted\female\transformed_1462-170138-0000_AES.flac

Files with hash value 420068ebe9e6e8cdd6a5bc7bb468ee7f found in both directories:
C:\Users\Ys1ong\Desktop\ELEC0138_22-23_Assignment\demo\transformed\female\transformed_1673-143396-0000.flac
C:\Users\Ys1ong\Desktop\ELEC0138_22-23_Assignment\demo\transformed_decrypted\female\transformed_1673-143396-0000_AES.flac

Files with hash value dc1c273af7596427b75a2c57070f6421 found in both directories:
C:\Users\Ys1ong\Desktop\ELEC0138_22-23_Assignment\demo\transformed\female\transformed_1919-14

## Step 3) Gender detection model

In [18]:
with open('./models/model_saved.pkl', 'rb') as f:
    loaded_model = pickle.load(f)

Keras model archive loading:
File Name                                             Modified             Size
config.json                                    2023-04-15 13:02:08         3264
metadata.json                                  2023-04-15 13:02:08           64
variables.h5                                   2023-04-15 13:02:08      1927928
Keras weights file (<HDF5 file "variables.h5" (mode r)>) loading:
...layers\dense
......vars
.........0
.........1
...layers\dense_1
......vars
.........0
.........1
...layers\dense_2
......vars
.........0
.........1
...layers\dense_3
......vars
.........0
.........1
...layers\dense_4
......vars
.........0
.........1
...layers\dropout
......vars
...layers\dropout_1
......vars
...layers\dropout_2
......vars
...layers\dropout_3
......vars
...metrics\mean
......vars
.........0
.........1
...metrics\mean_metric_wrapper
......vars
.........0
.........1
...optimizer
......vars
.........0
.........1
.........10
.........11
.........12
.........13
...

In [19]:
def extract_feature(file_name, **kwargs):
    """
    Extract feature from audio file `file_name`
        Features supported:
            - MFCC (mfcc)
            - Chroma (chroma)
            - MEL Spectrogram Frequency (mel)
            - Contrast (contrast)
            - Tonnetz (tonnetz)
        e.g:
        `features = extract_feature(path, mel=True, mfcc=True)`
    """
    mfcc = kwargs.get("mfcc")
    chroma = kwargs.get("chroma")
    mel = kwargs.get("mel")
    contrast = kwargs.get("contrast")
    tonnetz = kwargs.get("tonnetz")
    X, sample_rate = librosa.core.load(file_name)
    if chroma or contrast:
        stft = np.abs(librosa.stft(X))
    result = np.array([])
    if mfcc:
        mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
        result = np.hstack((result, mfccs))
    if chroma:
        chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
        result = np.hstack((result, chroma))
    if mel:
        mel = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T,axis=0)
        result = np.hstack((result, mel))
    if contrast:
        contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
        result = np.hstack((result, contrast))
    if tonnetz:
        tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
        result = np.hstack((result, tonnetz))
    return result

In [23]:
folder_path_male = './demo/raw/male'

flac_files_male = glob.glob(os.path.join(folder_path_male, '*.flac'))

# initial the male file name list
name_male_file =[]
for file_male in flac_files_male:
    name_male_file.append('./demo/raw/male/' + os.path.basename(file_male))

print(name_male_file)

###############################################################################################

folder_path_female = './demo/raw/female'

flac_files_female = glob.glob(os.path.join(folder_path_female, '*.flac'))

# initial the male file name list
name_female_file =[]
for file_female in flac_files_female:
    name_female_file.append('./demo/raw/female/' + os.path.basename(file_female))

print(name_female_file)

#############################################################################################
folder_path_female_2 = './demo/transformed_decrypted/Female'

flac_files_female_2 = glob.glob(os.path.join(folder_path_female_2, '*.flac'))

# initial the male file name list
name_female_file_2 =[]
for file_female_2 in flac_files_female_2:
    name_female_file_2.append('./demo/transformed_decrypted/Female/' + os.path.basename(file_female_2))

print(name_female_file_2)

#############################################################################################
folder_path_male_2 = './demo/transformed_decrypted/Male'

flac_files_male_2 = glob.glob(os.path.join(folder_path_male_2, '*.flac'))

# initial the male file name list
name_male_file_2 =[]
for file_male_2 in flac_files_male_2:
    name_male_file_2.append('./demo/transformed_decrypted/Male/'+os.path.basename(file_male_2))

print(name_male_file_2)

['./demo/raw/male/1272-128104-0000.flac', './demo/raw/male/251-118436-0000.flac', './demo/raw/male/422-122949-0000.flac', './demo/raw/male/652-129742-0000.flac', './demo/raw/male/777-126732-0000.flac']
['./demo/raw/female/1462-170138-0000.flac', './demo/raw/female/1673-143396-0000.flac', './demo/raw/female/1919-142785-0000.flac', './demo/raw/female/1988-24833-0000.flac', './demo/raw/female/84-121123-0000.flac']
['./demo/transformed_decrypted/Female/transformed_1462-170138-0000_AES.flac', './demo/transformed_decrypted/Female/transformed_1673-143396-0000_AES.flac', './demo/transformed_decrypted/Female/transformed_1919-142785-0000_AES.flac', './demo/transformed_decrypted/Female/transformed_1988-24833-0000_AES.flac', './demo/transformed_decrypted/Female/transformed_84-121123-0000_AES.flac']
['./demo/transformed_decrypted/Male/transformed_1272-128104-0000_AES.flac', './demo/transformed_decrypted/Male/transformed_251-118436-0000_AES.flac', './demo/transformed_decrypted/Male/transformed_422-1

In [24]:
h_male =[]
male_prob =[]
female_prob =[]
gender_male=[]
for i in range(len(name_male_file)):
    h_male.append(extract_feature(name_male_file[i], mel=True))

    # predict the gender!
    male_prob.append(loaded_model.predict(h_male[i].reshape(1,-1))[0][0])

    female_prob.append(1 - male_prob[i])
    gender_male.append("male" if male_prob[i] > female_prob[i] else "female")

print(gender_male)

count_of_male = gender_male.count('male')
print("Number of 'male' in the list:", count_of_male)

['male', 'male', 'male', 'male', 'male']
Number of 'male' in the list: 5


In [25]:
h_female =[]
male_prob =[]
female_prob =[]
gender_female=[]
for i in range(len(name_female_file)):
    h_female.append(extract_feature(name_female_file[i], mel=True))

    # predict the gender!
    male_prob.append(loaded_model.predict(h_female[i].reshape(1,-1))[0][0])

    female_prob.append(1 - male_prob[i])
    gender_female.append("male" if male_prob[i] > female_prob[i] else "female")

print(gender_female)

count_of_female = gender_female.count('female')
print("Number of 'female' in the list:", count_of_female)

['female', 'female', 'female', 'female', 'female']
Number of 'female' in the list: 5


In [26]:
h_male_2 =[]
male_prob =[]
female_prob =[]
gender_male_2=[]
for i in range(len(name_male_file_2)):
    h_male_2.append(extract_feature(name_male_file_2[i], mel=True))

    # predict the gender!
    male_prob.append(loaded_model.predict(h_male_2[i].reshape(1,-1))[0][0])

    female_prob.append(1 - male_prob[i])
    gender_male_2.append("male" if male_prob[i] > female_prob[i] else "female")

print(gender_male_2)

count_of_male = gender_male_2.count('male')
print("Number of 'male' in the list:", count_of_male)

['female', 'female', 'female', 'male', 'female']
Number of 'male' in the list: 1


In [27]:
h_female_2 =[]
male_prob =[]
female_prob =[]
gender_female_2=[]
for i in range(len(name_female_file_2)):
    h_female_2.append(extract_feature(name_female_file_2[i], mel=True))

    # predict the gender!
    male_prob.append(loaded_model.predict(h_female_2[i].reshape(1,-1))[0][0])

    female_prob.append(1 - male_prob[i])
    gender_female_2.append("male" if male_prob[i] > female_prob[i] else "female")

print(gender_female_2)

count_of_female = gender_female_2.count('female')
print("Number of 'female' in the list:", count_of_female)

['female', 'female', 'female', 'female', 'female']
Number of 'female' in the list: 5


## Step 4) Evaluate the Speech-to-text function of audio

In [31]:
def google_to_text(filename):
    '''
    call Google speech-to-text api
        filename: path of .flac file
        return: dict of prediction, {'sentence ID' : ['word1', 'word2', 'word3', ...]}
    
    '''
    flac_audio = AudioSegment.from_file(filename, format="flac")
    flac_audio.export("audio.wav", format="wav")

    r = sr.Recognizer()

    # read out the text output from google API
    with sr.AudioFile("audio.wav") as source:
        audio = r.record(source)
        text = r.recognize_google(audio, language="en-US")
#         print(text)
        text_list = text.strip().split(' ')

    return text_list

def read_text_gt(filename):
    '''
    load ground truth in txt file to dict format
        filename: path of txt file
        return: dict of ground truth, {'sentence ID' : ['word1', 'word2', 'word3', ...]}
    
    '''
    with open(filename, 'r') as file:
        lines = file.readlines()
    result_dict = {}
    for line in lines:
        items = line.strip().split(' ')

        key = items[0]
        values = items[1:]
        lowercase_values = [s.lower() for s in values]
        result_dict[key] = lowercase_values

#     print(result_dict)
    return result_dict


def count_matching_words(sentence1, sentence2):
    '''
    calcuate the accuary of result
        sentence 1: list of word
        sentence 2: list of word

        return: number of words matched, number of words missed in sentence1

    '''
    words1 = set(sentence1)
    words2 = set(sentence2)

    matching_words = words1.intersection(words2)

    non_matching_words = words1.difference(words2)
    num_non_matching_words = len(non_matching_words)

    return len(matching_words), num_non_matching_words


def create_csv(path, acc_list, result_list):
    '''
    save the records of training
    Args:
        path: save path of csv file
        acc_list: acc indicator
        result_list: speech to text result

    Returns: nothing
    '''
    # save predict labels of test dataset
    with open(path, 'w', encoding='utf-8', newline='') as f:
        csv_write = csv.writer(f)
        csv_write.writerow([str(acc_list[0]), str(acc_list[1]), str(acc_list[2])])
        csv_write.writerows(i for i in result_list)

In [37]:
current_path = os.path.join(os.getcwd(), "demo")
raw_audio_path = os.path.join(current_path, 'raw')
processed_audio_path = os.path.join(current_path, 'transformed_decrypted')
text_gt_path = os.path.join(current_path, 'text.txt')

print('text ground truth file is :',text_gt_path)
text_gt = read_text_gt(text_gt_path)

# speech to text for raw audios 

TP_whole_raw=0
FN_whole_raw=0
result_list_raw = []
for root, dirs, files in os.walk(raw_audio_path):
    for file in files:
        if file.endswith('.flac'):
            audio_file = os.path.basename(file)
            index = os.path.splitext(audio_file)[0]
            print('---recognizing ',os.path.join(root, audio_file))
            try:
                recognition = google_to_text(os.path.join(root, audio_file))
                TP, FN = count_matching_words(text_gt[index],recognition)
                result_list_raw.append(recognition)
            except:
                TP, FN = 0, 0
                result_list_raw.append([])
            TP_whole_raw += TP
            FN_whole_raw += FN
            

# speech to text for processed audios
print("\n")
TP_whole_pro=0
FN_whole_pro=0
result_list_pro = []
for root, dirs, files in os.walk(processed_audio_path):
    for file in files:
        if file.endswith('.flac'):
            audio_file = os.path.basename(file)
            index = os.path.splitext(audio_file)[0].split('_')[1]
            print('---recognizing ',os.path.join(root, audio_file))
            try:
                recognition = google_to_text(os.path.join(root, audio_file))
                TP, FN = count_matching_words(text_gt[index],recognition)
                result_list_pro.append(recognition)
            except:
                TP, FN = 0, 0
                result_list_pro.append([])
            TP_whole_pro += TP
            FN_whole_pro += FN

            
# output result 

accuracy_raw = TP_whole_raw / (TP_whole_raw+FN_whole_raw)
print('The accuaracy of test dataset in raw audio is: %.4f' %accuracy_raw)

acc_list_raw = [accuracy_raw, TP_whole_raw, FN_whole_raw]
create_csv(os.path.join(current_path, 'raw_audio_result.csv'), acc_list_raw, result_list_raw)

accuracy_pro = TP_whole_pro / (TP_whole_pro+FN_whole_pro)
print('The accuaracy of test dataset in processed audio is: %.4f' %accuracy_pro)

acc_list_pro = [accuracy_pro, TP_whole_pro, FN_whole_pro]
create_csv(os.path.join(current_path, 'processed_audio_result.csv'), acc_list_pro, result_list_pro)

text ground truth file is : C:\Users\Ys1ong\Desktop\ELEC0138_22-23_Assignment\demo\text.txt
---recognizing  C:\Users\Ys1ong\Desktop\ELEC0138_22-23_Assignment\demo\raw\female\1462-170138-0000.flac
---recognizing  C:\Users\Ys1ong\Desktop\ELEC0138_22-23_Assignment\demo\raw\female\1673-143396-0000.flac
---recognizing  C:\Users\Ys1ong\Desktop\ELEC0138_22-23_Assignment\demo\raw\female\1919-142785-0000.flac
---recognizing  C:\Users\Ys1ong\Desktop\ELEC0138_22-23_Assignment\demo\raw\female\1988-24833-0000.flac
---recognizing  C:\Users\Ys1ong\Desktop\ELEC0138_22-23_Assignment\demo\raw\female\84-121123-0000.flac
---recognizing  C:\Users\Ys1ong\Desktop\ELEC0138_22-23_Assignment\demo\raw\male\1272-128104-0000.flac
---recognizing  C:\Users\Ys1ong\Desktop\ELEC0138_22-23_Assignment\demo\raw\male\251-118436-0000.flac
---recognizing  C:\Users\Ys1ong\Desktop\ELEC0138_22-23_Assignment\demo\raw\male\422-122949-0000.flac
---recognizing  C:\Users\Ys1ong\Desktop\ELEC0138_22-23_Assignment\demo\raw\male\652-129

### ! tips about runtime error !
If you meet the "RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work" 

or "RuntimeWarning: Couldn't find ffprobe or avconv - defaulting to ffmpeg, but may not work"

Please download FFMPEG https://github.com/BtbN/FFmpeg-Builds/releases

Unzip it and open //bin folder 

move all executable (ffmpeg.exe) and .dll in //bin folder to this project directory