## Import

In [12]:
import random
import pandas as pd
import numpy as np
import os
from tqdm.auto import tqdm
import librosa

from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler 

import warnings
warnings.filterwarnings(action='ignore') 

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Hyperparameter Setting

In [26]:
CFG = {
    'SR':20000,
    'N_MFCC':40, # Melspectrogram 벡터를 추출할 개수
    'SEED':42
}

## Fixed Random-Seed

In [5]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(CFG['SEED']) # Seed 고정

## Data Pre-Processing

In [27]:
train_df = pd.read_csv('/content/drive/MyDrive/speech_emotion/data/train.csv')
test_df = pd.read_csv('/content/drive/MyDrive/speech_emotion/data/test.csv')

train_df, valid_df = train_test_split(train_df, test_size=0.2, random_state=CFG['SEED'])

In [36]:
def get_mfcc_feature(df):
    features = []
    for path in tqdm(df['path']):
        # librosa패키지를 사용하여 wav 파일 load
        y, sr = librosa.load('/content/drive/MyDrive/speech_emotion/data/'+path[2:], sr=CFG['SR'])
        y = librosa.util.fix_length(y, size=40000)
        # librosa패키지를 사용하여 mfcc 추출
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=CFG['N_MFCC'])
        features.append(mfcc)
        
    features = np.array(features, np.float32)
    return features

In [37]:
train_x = get_mfcc_feature(train_df)
valid_x = get_mfcc_feature(valid_df)
test_x = get_mfcc_feature(test_df)

  0%|          | 0/4000 [00:00<?, ?it/s]

  0%|          | 0/1001 [00:00<?, ?it/s]

  0%|          | 0/1881 [00:00<?, ?it/s]

In [38]:
print(train_x.shape)
print(valid_x.shape)
print(test_x.shape)

(4000, 40, 79)
(1001, 40, 79)
(1881, 40, 79)


In [39]:
train_x = train_x.reshape((4000, -1))
valid_x = valid_x.reshape((1001, -1))
test_x = test_x.reshape((1881, -1))

In [40]:
scaler = MinMaxScaler()
scaler.fit(train_x)

valid_scaler = MinMaxScaler()
scaler.fit(valid_x)

test_scaler = MinMaxScaler()
scaler.fit(test_x)

In [41]:
train_y = np.array(train_df['label'])
valid_y = np.array(valid_df['label'])

## Classification Model Fit

In [42]:
SVM = svm.SVC(kernel = 'linear')
SVM.fit(train_x, train_y)

In [43]:
valid_pred = SVM.predict(valid_x)
print('Accuracy: %.2f' % accuracy_score(valid_y, valid_pred))

Accuracy: 0.38


## Inference

In [14]:
preds = model.predict(test_x)

NameError: ignored

## Submission

In [None]:
submission = pd.read_csv('./sample_submission.csv')
submission['label'] = preds
submission.to_csv('./baseline_submission.csv', index=False)

FileNotFoundError: [Errno 2] No such file or directory: './sample_submission.csv'