In [1]:
import numpy as np

def get_test_file():
  file_name = open(f'202301ml_fmcc/fmcc_test_ref.txt', 'r')
  train_data_list = []
  label_list = []

  for i in file_name.readlines():
    file, label = i.strip('\n').split(' ')
    
    f = open(f'202301ml_fmcc/raw16k/test/' + file + '.raw', 'rb')
    file = np.fromfile(f, dtype='int16', sep="")
    
    train_data_list.append(file.astype(np.float32))
    label_list.append(0 if label[0] == 'm' else 1)
    
  return train_data_list, label_list

def get_train_file():
  file_name = open(f'202301ml_fmcc/fmcc_train.ctl', 'r')
  train_data_list = []
  label_list = []
  for i in file_name.readlines():
    i = i.strip('\n')
    f = open(f'202301ml_fmcc/raw16k/train/' + i + '.raw', 'rb')
    file = np.fromfile(f, dtype='int16', sep="")
    
    train_data_list.append(file.astype(np.float32))
    label_list.append(0 if i[0] == 'M' else 1)
    
  return train_data_list, label_list

In [2]:
import numpy as np
import pandas as pd

import os
import librosa

import scipy
from scipy.stats import skew
from tqdm import tqdm, tqdm_pandas

tqdm.pandas()

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

from sklearn.svm import SVC

In [3]:
# Generate mfcc features with mean and standard deviation
SAMPLE_RATE = 16000

def get_mfcc(data): 
    try:
        ft1 = librosa.feature.mfcc(y = data, sr = SAMPLE_RATE, n_mfcc=30)
        ft2 = librosa.feature.zero_crossing_rate(y = data)[0]
        ft3 = librosa.feature.spectral_rolloff(y = data)[0]
        ft4 = librosa.feature.spectral_centroid(y = data)[0]
        ft5 = librosa.feature.spectral_contrast(y = data)[0]
        ft6 = librosa.feature.spectral_bandwidth(y = data)[0]
        ft1_trunc = np.hstack((np.mean(ft1, axis=1), np.std(ft1, axis=1), skew(ft1, axis = 1), np.max(ft1, axis = 1), np.median(ft1, axis = 1), np.min(ft1, axis = 1)))
        ft2_trunc = np.hstack((np.mean(ft2), np.std(ft2), skew(ft2), np.max(ft2), np.median(ft2), np.min(ft2)))
        ft3_trunc = np.hstack((np.mean(ft3), np.std(ft3), skew(ft3), np.max(ft3), np.median(ft3), np.min(ft3)))
        ft4_trunc = np.hstack((np.mean(ft4), np.std(ft4), skew(ft4), np.max(ft4), np.median(ft4), np.min(ft4)))
        ft5_trunc = np.hstack((np.mean(ft5), np.std(ft5), skew(ft5), np.max(ft5), np.median(ft5), np.min(ft5)))
        ft6_trunc = np.hstack((np.mean(ft6), np.std(ft6), skew(ft6), np.max(ft6), np.median(ft6), np.max(ft6)))
        return pd.Series(np.hstack((ft1_trunc, ft2_trunc, ft3_trunc, ft4_trunc, ft5_trunc, ft6_trunc)))
    except:
        print('bad file')
        return pd.Series([0]*210)
    
def preprocess_data(data_list):
    feature_list = []
    for i in tqdm(range(len(data_list))):
        feature_list.append(get_mfcc(data_list[i]))
    return pd.DataFrame(feature_list)



In [4]:
import os

folder_name = "data"  # 생성할 폴더 이름을 지정하세요

# 현재 작업 디렉토리에 폴더를 생성합니다
try:
    os.mkdir(folder_name)
    print("폴더가 성공적으로 생성되었습니다.")
except FileExistsError:
    print("이미 동일한 이름의 폴더가 존재합니다.")
except Exception as e:
    print("폴더 생성 중 오류가 발생했습니다:", str(e))


폴더가 성공적으로 생성되었습니다.


In [5]:
try:
  train_data_feature = pd.read_csv(f'{folder_name}/train_data_feature.csv')
  test_data_feature = pd.read_csv(f'{folder_name}/test_data_feature.csv')
  
  train_label = pd.read_csv(f'{folder_name}/train_data_label.csv')
  test_label = pd.read_csv(f'{folder_name}/test_data_label.csv')
  
  train_label = train_label.values.ravel()
  test_label = test_label.values.ravel()
except:
  train_data, train_label = get_train_file()
  test_data, test_label = get_test_file()

  train_data_feature = preprocess_data(train_data)
  test_data_feature = preprocess_data(test_data)
  
  train_data_feature.to_csv(f'{folder_name}/train_data_feature.csv', index=False)
  test_data_feature.to_csv(f'{folder_name}/test_data_feature.csv', index=False)
  
  pd.DataFrame(train_label).to_csv(f'{folder_name}/train_data_label.csv', index=False)
  pd.DataFrame(test_label).to_csv(f'{folder_name}/test_data_label.csv', index=False)


100%|██████████| 10000/10000 [01:41<00:00, 98.15it/s] 
100%|██████████| 900/900 [00:08<00:00, 101.15it/s]


In [6]:
from IPython.display import Audio

print(train_label[0])

Audio(train_data[130], rate=SAMPLE_RATE)
Audio(test_data[0], rate=SAMPLE_RATE)

1


In [7]:
train_data_feature.shape
test_data_feature.shape


(900, 210)

In [8]:
train_data_label = np.array(train_label)
test_data_label = np.array(test_label)


In [9]:
# Apply scaling for PCA
scaler = StandardScaler()
train_x_scaled = scaler.fit_transform(train_data_feature)
test_x_scaled = scaler.fit_transform(test_data_feature)


In [10]:
train_pca = PCA(n_components=65).fit(train_x_scaled)
train_x_pca = train_pca.transform(train_x_scaled)

test_pca = PCA(n_components=65).fit(test_x_scaled)
test_x_pca = test_pca.transform(test_x_scaled)

print(sum(train_pca.explained_variance_ratio_)) 
print(sum(test_pca.explained_variance_ratio_)) 


0.880806709351117
0.8940401737540838


In [11]:
# Fit an SVM model
X_train, X_val, y_train, y_val = train_test_split(train_x_pca, train_data_label, test_size = 0.2, random_state = 42, shuffle = True)

clf = SVC(kernel = 'rbf', probability=True)

clf.fit(X_train, y_train)

print(accuracy_score(clf.predict(X_val), y_val))

0.903


In [12]:
print(accuracy_score(clf.predict(test_x_pca), test_data_label))

0.4777777777777778
