# 0. Install required library

In [None]:
%pip install ipykernal
%pip install librosa
%pip install numpy
%pip install noisereduce
%pip install sklearn
%pip install pickle

In [1]:
import pickle
import librosa
import numpy as np
import noisereduce as nr
import sklearn.preprocessing

from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


# 1. Preprocess data

In [2]:
# Make avg_frame length for same input
def zero_padding(mfcc, avg_frame=40):
    if mfcc.shape[1] < avg_frame:
        padding = np.zeros((mfcc.shape[0], avg_frame - mfcc.shape[1]))
        mfcc = np.hstack((mfcc, padding))
    else:
        mfcc = mfcc[:, :avg_frame]
        
    return mfcc

# raw-to-mfcc
def raw_to_mfcc(raw_file_path):

    # Read raw file
    with open(raw_file_path, 'rb') as file:
        # Raw-to-wav
        audio_data = np.frombuffer(file.read(), dtype=np.int16)
        audio_data = audio_data.astype(np.float32)

    # Reduce noise
    clean_data = nr.reduce_noise(y=audio_data, sr=16000)

    # Wav-to-mfcc
    mfcc = librosa.feature.mfcc(y=clean_data, sr=16000, n_mfcc=20, n_mels=100, fmin=0, fmax=400)

    # Zero padding and scaling
    mfcc = zero_padding(mfcc, avg_frame=37)
    mfcc = sklearn.preprocessing.minmax_scale(mfcc, axis=1)

    # Flatten data for model input
    mfcc = mfcc.flatten()

    return mfcc

In [3]:
X_test = []
X_test_name = []

with open('fmcc_test.ctl', 'r') as file:
    for line in tqdm(file):
        file_path = './raw16k/test/' + line.rstrip() + '.raw'
        
        X_test_name.append(file_path)
        X_test.append(raw_to_mfcc(file_path))

1000it [00:39, 25.54it/s]


# 2. Load trained model

In [4]:
# Load the trained model
with open('voting_classifier.pkl', 'rb') as f:
    voting_classifier = pickle.load(f)

# 3. Predict & Create result file

In [5]:
# Predict the classes for test data
predict = voting_classifier.predict(X_test)

In [6]:
# Create result file
file = open('./과탑_test_results.txt', 'w')

for i in range(len(predict)):
    if predict[i] == 'M':
        res = 'male'
    else:
        res = 'feml'

    file.write(X_test_name[i] + ' ' + res + '\n')
file.close()