<a href="https://colab.research.google.com/github/JellyJoa/DataAnalysis/blob/master/Dacon/sound_classify/baseline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import librosa
import librosa.display as dsp
from IPython.display import Audio
import pandas as pd
import numpy as np
import os
from tqdm import tqdm

In [2]:
import random

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(929)

In [6]:
train = pd.read_csv('/content/drive/MyDrive/DL/sound_classify/train.csv')
train.head()

Unnamed: 0,file_name,label
0,001.wav,9
1,002.wav,0
2,004.wav,1
3,005.wav,8
4,006.wav,0


In [7]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   file_name  400 non-null    object
 1   label      400 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 6.4+ KB


In [8]:
%cd /content/drive/MyDrive/DL/sound_classify

!unzip -qq "/content/drive/MyDrive/DL/sound_classify/train.zip"
!unzip -qq "/content/drive/MyDrive/DL/sound_classify/test.zip"

/content/drive/MyDrive/DL/sound_classify


In [9]:
data, sample_rate = librosa.load('/content/drive/MyDrive/DL/sound_classify/train/001.wav', sr = 16000)
print('sample_rate:', sample_rate, ', audio shape:', data.shape)
print('length:', data.shape[0]/float(sample_rate), 'secs')

sample_rate: 16000 , audio shape: (10192,)
length: 0.637 secs


In [10]:
def train_dataset():
    folder = "/content/drive/MyDrive/DL/sound_classify/train/"
    dataset = []
    for file in tqdm(os.listdir(folder),colour='green'):
        if 'wav' in file:
            abs_file_path = os.path.join(folder,file)
            data, sr = librosa.load(abs_file_path, sr = 16000)
            class_label = int(train[train.file_name == file].label)
            dataset.append([data,class_label])
    
    print("Dataset 생성 완료")
    return pd.DataFrame(dataset,columns=['data','label'])

In [11]:
train_wav = train_dataset()

100%|[32m██████████[0m| 400/400 [00:12<00:00, 32.19it/s]

Dataset 생성 완료





In [12]:
train_wav.head()

Unnamed: 0,data,label
0,"[-0.003955867, -0.006708248, -0.005994901, -0....",0
1,"[0.00010048209, 0.0001519721, 0.00021017983, 0...",8
2,"[-1.3521178e-05, -2.205988e-07, 7.4610657e-06,...",6
3,"[4.0670046e-05, 4.2517717e-05, 1.606622e-05, 4...",6
4,"[-0.00018729804, -0.00031482545, -0.0002810128...",9


In [13]:
def extract_features(file):
    audio, sample_rate = librosa.load(file, sr = 16000)
    extracted_features = librosa.feature.mfcc(y=audio,
                                              sr=sample_rate,
                                              n_mfcc=40)

    extracted_features = np.mean(extracted_features.T,axis=0)
    return extracted_features

In [14]:
extract_features('/content/drive/MyDrive/DL/sound_classify/train/001.wav') 

array([-5.4157184e+02,  1.0199717e+02, -1.0018574e+01,  4.5054619e+01,
        7.3112831e+00,  1.0971639e+01, -1.2032939e+01, -5.8687963e+00,
       -1.8881397e+00,  4.2930884e+00, -8.1847525e+00, -2.3072267e+00,
       -9.1721897e+00,  1.4182716e+01, -1.2839543e+01, -3.1000307e+00,
       -3.0502689e+00, -2.1911802e+00, -6.2639456e+00, -5.1691580e+00,
       -1.3974123e+01,  3.3810470e+00, -6.9977813e+00,  3.7736315e-01,
       -4.4287405e+00,  1.0799457e+00, -1.3639281e+00,  4.2418456e+00,
        2.3687005e+00,  2.8972096e+00,  2.6670651e+00,  1.8590584e+00,
       -3.8219376e+00, -1.6171500e-01, -1.4186366e+00, -4.1422081e+00,
       -5.3374414e+00, -9.9907333e-01, -3.3392251e+00, -4.2987290e-01],
      dtype=float32)

In [15]:
def preprocess_train_dataset(data):
    mfccs = []
    for i in data:
        extracted_features = librosa.feature.mfcc(y=i,
                                              sr=16000,
                                              n_mfcc=40)
        extracted_features = np.mean(extracted_features.T,axis=0)
        mfccs.append(extracted_features)
            
    return mfccs

mfccs = preprocess_train_dataset(train_wav.data)
mfccs = np.array(mfccs)
mfccs

array([[-6.10061340e+02,  1.03518166e+02,  5.43828583e+01, ...,
         5.19934893e+00,  1.87639844e+00,  2.27816582e+00],
       [-5.51899719e+02,  8.80418625e+01, -2.50763297e+00, ...,
        -2.37301603e-01, -6.42527521e-01,  8.25356603e-01],
       [-6.25631409e+02,  1.72414207e+01,  2.64528923e+01, ...,
         5.46131086e+00,  2.48812735e-01,  2.64248586e+00],
       ...,
       [-6.24895752e+02,  1.54084351e+02,  1.28841314e+01, ...,
         2.64882326e+00, -9.22804952e-01,  4.14383769e-01],
       [-5.00923828e+02,  1.05222313e+02, -3.07546139e+01, ...,
         2.79688597e+00,  2.02590728e+00,  9.05824947e+00],
       [-6.55807007e+02,  1.09803085e+02,  1.75768242e+01, ...,
        -1.86223531e+00, -7.46039534e+00,  1.73112810e+00]], dtype=float32)

In [16]:
from sklearn.model_selection import train_test_split

train_X, test_X, train_y, test_y = train_test_split(mfccs, train_wav.label, test_size=0.4)

In [17]:
print('학습시킬 train 셋 : ', train_X.shape, train_y.shape)
print('검증할 val 셋 : ', test_X.shape, test_y.shape)

학습시킬 train 셋 :  (240, 40) (240,)
검증할 val 셋 :  (160, 40) (160,)


In [18]:
from sklearn.ensemble import RandomForestClassifier

# 모델 선언
model = RandomForestClassifier()

# 모델 학습
model.fit(train_X, train_y)

RandomForestClassifier()

In [19]:
# 먼저 점수를 메기는 방법인 평가 지표(Metric)를 정의합니다.
import numpy as np

def ACCURACY(true, pred):   
    score = np.mean(true==pred)
    return score

In [20]:
# 모델의 예측과 실제 정답값을 비교합니다.
prediction = model.predict(test_X)

score = ACCURACY(test_y, prediction)

print(f"모델의 정확도는 {score*100:.2f}% 입니다")

모델의 정확도는 63.12% 입니다


In [21]:
test = pd.read_csv('/content/drive/MyDrive/DL/sound_classify/test.csv')
test.head()

Unnamed: 0,file_name
0,003.wav
1,008.wav
2,010.wav
3,015.wav
4,024.wav


In [22]:
def test_dataset():
    folder = "/content/drive/MyDrive/DL/sound_classify/test/"
    dataset = []
    for file in tqdm(os.listdir(folder),colour='green'):
        if 'wav' in file:
            abs_file_path = os.path.join(folder,file)
            data, sr = librosa.load(abs_file_path, sr = 16000)
            dataset.append([data, file])
    
    print("Dataset 생성 완료")
    return pd.DataFrame(dataset,columns=['data', 'file_name'])

In [23]:
test_wav = test_dataset()

100%|[32m██████████[0m| 200/200 [00:06<00:00, 31.99it/s]

Dataset 생성 완료





In [24]:
mfccs_2 = preprocess_train_dataset(test_wav.data)
mfccs_2 = np.array(mfccs_2)

In [25]:
# 모델 선언
model = RandomForestClassifier()

# 모델 학습
model.fit(mfccs, train_wav.label)

# 모델 예측
prediction = model.predict(mfccs_2)

test_wav['label'] = prediction  

test_wav.head()

Unnamed: 0,data,file_name,label
0,"[8.87213e-05, 0.00013668207, 0.000102160935, 8...",404.wav,2
1,"[-0.00060603267, -0.0009821821, -0.00084093853...",453.wav,6
2,"[0.00020098813, 0.00029454648, 0.0002404883, 0...",349.wav,1
3,"[0.00013012115, 0.00020055204, 0.00018348989, ...",305.wav,8
4,"[-5.8358874e-06, -2.2567398e-05, -5.7295223e-0...",565.wav,7


In [26]:
testset = test_wav[['file_name', 'label']]

pred_df = testset.copy()
pred_df = pred_df.sort_values(by=[pred_df.columns[0]], ascending=[True]).reset_index(drop=True)
pred_df.head()

Unnamed: 0,file_name,label
0,003.wav,0
1,008.wav,1
2,010.wav,3
3,015.wav,3
4,024.wav,2


In [28]:
submission = pd.read_csv('/content/drive/MyDrive/DL/sound_classify/submission.csv')
submission['label'] = pred_df['label']
submission.head()

Unnamed: 0,file_name,label
0,003.wav,0
1,008.wav,1
2,010.wav,3
3,015.wav,3
4,024.wav,2


In [30]:
submission.to_csv('/content/drive/MyDrive/DL/sound_classify/saved/submit.csv', index=False)