In [1]:
import numpy as np

def get_test_file():
  file_name = open(f'202301ml_fmcc/fmcc_test_ref.txt', 'r')
  train_data_list = []
  label_list = []

  for i in file_name.readlines():
    file, label = i.strip('\n').split(' ')
    
    f = open(f'202301ml_fmcc/raw16k/test/' + file + '.raw', 'rb')
    file = np.fromfile(f, dtype='int16', sep="")
    
    train_data_list.append(file.astype(np.float32))
    label_list.append(0 if label[0] == 'm' else 1)
    
  return train_data_list, label_list

def get_train_file():
  file_name = open(f'202301ml_fmcc/fmcc_train.ctl', 'r')
  train_data_list = []
  label_list = []
  for i in file_name.readlines():
    i = i.strip('\n')
    f = open(f'202301ml_fmcc/raw16k/train/' + i + '.raw', 'rb')
    file = np.fromfile(f, dtype='int16', sep="")
    
    train_data_list.append(file.astype(np.float32))
    label_list.append(0 if i[0] == 'M' else 1)
    
  return train_data_list, label_list

In [2]:
import numpy as np
import pandas as pd

import os
import librosa

import scipy
from scipy.stats import skew
from tqdm import tqdm, tqdm_pandas

tqdm.pandas()

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

from sklearn.svm import SVC

In [3]:
# Generate mfcc features with mean and standard deviation
SAMPLE_RATE = 16000

def get_mfcc(data): 
    try:
        ft1 = librosa.feature.mfcc(y = data, sr = SAMPLE_RATE, n_mfcc=30)
        ft2 = librosa.feature.zero_crossing_rate(y = data)[0]
        ft3 = librosa.feature.spectral_rolloff(y = data)[0]
        ft4 = librosa.feature.spectral_centroid(y = data)[0]
        ft5 = librosa.feature.spectral_contrast(y = data)[0]
        ft6 = librosa.feature.spectral_bandwidth(y = data)[0]
        ft1_trunc = np.hstack((np.mean(ft1, axis=1), np.std(ft1, axis=1), skew(ft1, axis = 1), np.max(ft1, axis = 1), np.median(ft1, axis = 1), np.min(ft1, axis = 1)))
        ft2_trunc = np.hstack((np.mean(ft2), np.std(ft2), skew(ft2), np.max(ft2), np.median(ft2), np.min(ft2)))
        ft3_trunc = np.hstack((np.mean(ft3), np.std(ft3), skew(ft3), np.max(ft3), np.median(ft3), np.min(ft3)))
        ft4_trunc = np.hstack((np.mean(ft4), np.std(ft4), skew(ft4), np.max(ft4), np.median(ft4), np.min(ft4)))
        ft5_trunc = np.hstack((np.mean(ft5), np.std(ft5), skew(ft5), np.max(ft5), np.median(ft5), np.min(ft5)))
        ft6_trunc = np.hstack((np.mean(ft6), np.std(ft6), skew(ft6), np.max(ft6), np.median(ft6), np.max(ft6)))
        return pd.Series(np.hstack((ft1_trunc, ft2_trunc, ft3_trunc, ft4_trunc, ft5_trunc, ft6_trunc)))
    except:
        print('bad file')
        return pd.Series([0]*210)
    
def preprocess_data(data_list):
    feature_list = []
    for i in tqdm(range(len(data_list))):
        feature_list.append(get_mfcc(data_list[i]))
    return pd.DataFrame(feature_list)



In [14]:
from IPython.display import Audio

train_data, train_label = get_train_file()
test_data, test_label = get_test_file()

print(train_label[0])
print(test_label[0])

Audio(train_data[0], rate=SAMPLE_RATE)
Audio(test_data[0], rate=SAMPLE_RATE)

1
1


In [5]:
train_data_feature = preprocess_data(train_data)
train_data_feature.head()

test_data_feature = preprocess_data(test_data)
test_data_feature.head()

100%|██████████| 10000/10000 [02:18<00:00, 72.21it/s]
100%|██████████| 900/900 [00:11<00:00, 78.82it/s]


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,200,201,202,203,204,205,206,207,208,209
0,947.736389,63.669979,9.527609,14.521003,-2.21034,-18.987074,-7.916063,-8.910859,-20.378723,-8.452337,...,0.257467,32.457389,18.168564,5.856216,2892.978733,275.069969,0.966884,3565.669554,2803.650297,3565.669554
1,931.816406,44.733875,-9.455639,38.198257,-25.297447,-15.91183,-13.061944,-20.436848,-13.840734,-23.72007,...,-0.442572,22.289,12.137554,0.589478,2463.540236,326.411226,-0.589338,2925.718294,2542.493227,2925.718294
2,949.070251,26.101494,6.126511,-1.783883,-0.261157,-2.436235,-11.881001,-3.921774,-1.720328,-2.421887,...,-0.127741,24.928365,12.920896,2.034783,2827.601696,352.574562,0.763145,3654.854151,2743.043874,3654.854151
3,984.155212,52.6325,-28.557673,7.95704,-11.422466,-26.764269,-25.868858,-13.510789,-12.90753,-10.771862,...,-0.475334,24.056097,14.890788,0.971824,2565.834423,263.06848,0.75524,3260.525708,2486.466715,3260.525708
4,979.354309,65.947868,7.187599,11.461431,-1.998217,4.184265,-17.131212,-4.429257,-19.198687,1.491132,...,0.796828,33.890462,18.075664,10.290308,2714.565283,621.565915,-0.401433,3667.903284,2691.281724,3667.903284


In [6]:
train_data_feature.shape
test_data_feature.shape


(900, 210)

In [7]:
train_data_label = np.array(train_label)
test_data_label = np.array(test_label)


In [8]:
# Apply scaling for PCA
scaler = StandardScaler()
train_x_scaled = scaler.fit_transform(train_data_feature)
test_x_scaled = scaler.fit_transform(test_data_feature)


In [9]:
train_pca = PCA(n_components=65).fit(train_x_scaled)
train_x_pca = train_pca.transform(train_x_scaled)

test_pca = PCA(n_components=65).fit(test_x_scaled)
test_x_pca = test_pca.transform(test_x_scaled)

print(sum(train_pca.explained_variance_ratio_)) 
print(sum(test_pca.explained_variance_ratio_)) 


0.8807046660022634
0.8939380930838275


In [10]:
# Fit an SVM model
X_train, X_val, y_train, y_val = train_test_split(train_x_pca, train_data_label, test_size = 0.2, random_state = 42, shuffle = True)

clf = SVC(kernel = 'rbf', probability=True)

clf.fit(X_train, y_train)

print(accuracy_score(clf.predict(X_val), y_val))

0.9045


In [11]:
print(accuracy_score(clf.predict(test_x_pca), test_data_label))


print(train_x_pca.shape)
print(train_data_label.shape)

print(test_x_pca.shape)
print(test_data_label.shape)

0.47555555555555556
(10000, 65)
(10000,)
(900, 65)
(900,)
