In [4]:
import numpy as np
import pandas as pd
import os
import librosa
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

In [5]:
from sklearn.metrics import accuracy_score
from scipy.stats import skew
SAMPLE_RATE = 16000

In [6]:
from tqdm import tqdm

import scipy

In [43]:
librosa.show_versions()

INSTALLED VERSIONS
------------------
python: 3.10.13 (main, Sep 11 2023, 13:44:35) [GCC 11.2.0]

librosa: 0.10.1

audioread: 3.0.1
numpy: 1.26.0
scipy: 1.11.3
sklearn: 1.3.2
joblib: 1.3.2
decorator: 5.1.1
numba: 0.58.1
soundfile: 0.9.0
pooch: v1.8.0
soxr: 0.3.7
typing_extensions: installed, no version number available
lazy_loader: installed, no version number available
msgpack: 1.0.7

numpydoc: None
sphinx: None
sphinx_rtd_theme: None
matplotlib: 3.8.1
sphinx_multiversion: None
sphinx_gallery: None
mir_eval: None
ipython: None
sphinxcontrib.rsvgconverter: None
pytest: None
pytest_mpl: None
pytest_cov: None
samplerate: None
resampy: None
presets: None
packaging: 23.1


In [7]:
df = pd.read_csv("/scratch/network/mk8574/audio_sentiment_challenge/data/train.csv")

In [50]:
def get_mfcc(name):
    path = "/scratch/network/mk8574/audio_sentiment_challenge/data"
    b, _ = librosa.core.load(path + name[1:], sr = SAMPLE_RATE)

   
    assert _ == SAMPLE_RATE
 
    try:
      
        ft1 = librosa.feature.mfcc(y=b, sr = SAMPLE_RATE, n_mfcc=20)

        ft2 = librosa.feature.zero_crossing_rate(y=b)[0]
        ft3 = librosa.feature.spectral_rolloff(y=b)[0]
        ft4 = librosa.feature.spectral_centroid(y=b)[0]
        
        ft1_trunc = np.hstack((np.mean(ft1, axis=1), np.std(ft1, axis=1), skew(ft1, axis = 1), np.max(ft1, axis = 1), np.min(ft1, axis = 1)))
        ft2_trunc = np.hstack((np.mean(ft2), np.std(ft2), skew(ft2), np.max(ft2), np.min(ft2)))
        ft3_trunc = np.hstack((np.mean(ft3), np.std(ft3), skew(ft3), np.max(ft3), np.min(ft3)))
        ft4_trunc = np.hstack((np.mean(ft4), np.std(ft4), skew(ft4), np.max(ft4), np.min(ft4)))
        
        return pd.Series(np.hstack((ft1_trunc, ft2_trunc, ft3_trunc, ft4_trunc)))
    except:
        print('bad file')
        return pd.Series([0]*115)

In [51]:
get_mfcc(df["path"][0])

0      -414.755768
1       110.100639
2        46.699074
3        23.939814
4        14.766221
          ...     
110    1757.048961
111     548.601215
112       4.651649
113    4937.061334
114    1417.299571
Length: 115, dtype: float64

In [52]:
train = pd.read_csv("/scratch/network/mk8574/audio_sentiment_challenge/data/train.csv")
test = pd.read_csv("/scratch/network/mk8574/audio_sentiment_challenge/data/test.csv")
train_data = pd.DataFrame()
train_data['fname'] = train['path']
test_data = pd.DataFrame()
test_data['fname'] = test['path']

train_data = train_data['fname'].apply(get_mfcc)
print('done loading train mfcc')
test_data = test_data['fname'].apply(get_mfcc)
print('done loading test mfcc')

train_data['fname'] = train['path']
test_data['fname'] = test['path']

  ft1_trunc = np.hstack((np.mean(ft1, axis=1), np.std(ft1, axis=1), skew(ft1, axis = 1), np.max(ft1, axis = 1), np.min(ft1, axis = 1)))


done loading train mfcc
done loading test mfcc


In [53]:
train_data['label'] = train['label']
test_data['label'] = np.zeros((len(test)))

In [54]:
train_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,107,108,109,110,111,112,113,114,fname,label
0,-414.755768,110.100639,46.699074,23.939814,14.766221,4.820827,1.445079,-0.926153,2.892507,4.465991,...,2.716211,9700.708008,2433.251953,1757.048961,548.601215,4.651649,4937.061334,1417.299571,./train/TRAIN_0000.wav,1
1,-399.769531,83.0513,55.473316,31.782587,22.040754,0.985082,3.712758,-2.35932,-0.026562,0.393657,...,0.739956,10012.939453,2583.984375,2684.697354,1668.877531,1.220943,6634.829822,1377.601065,./train/TRAIN_0001.wav,2
2,-341.145081,97.399071,38.274349,19.811539,0.731027,0.838703,-1.911201,-10.645285,-1.595292,-2.305276,...,0.810742,9851.44043,1313.525391,2378.619653,1515.726845,1.998935,6632.407966,1255.889075,./train/TRAIN_0002.wav,4
3,-376.963715,118.96167,34.490349,24.178419,-1.065605,-1.613391,2.888372,-7.865878,-2.260844,-5.609925,...,1.045671,8074.951172,1012.060547,1557.806416,510.66955,2.322108,4122.733019,1010.040479,./train/TRAIN_0003.wav,5
4,-352.863251,117.553345,29.948687,31.094316,5.406392,-5.591999,-4.8094,-11.500416,-6.894948,2.946273,...,0.052639,7644.287109,1248.925781,1673.094057,446.903984,1.873157,3786.299328,975.654634,./train/TRAIN_0004.wav,4


In [55]:
test_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,107,108,109,110,111,112,113,114,fname,label
0,-335.757324,125.215431,22.145767,14.351713,-1.04525,0.56786,-0.666743,-8.11483,-7.13674,-0.890951,...,0.181159,6610.693359,1378.125,1579.666105,304.585754,0.392215,2542.704594,1027.372377,./test/TEST_0000.wav,0.0
1,-295.973083,92.839691,24.976181,22.831312,-10.27867,7.804742,-7.779604,-11.83502,-9.290084,-2.127427,...,0.543844,9130.078125,1152.026367,1954.635065,504.731338,1.57895,4243.908179,1038.011646,./test/TEST_0001.wav,0.0
2,-444.395996,109.385201,55.236778,26.48605,12.487712,9.069915,2.790433,-0.485669,-2.200089,-3.334618,...,1.030299,10034.472656,872.094727,1692.604231,893.836349,3.20618,6174.399779,971.531068,./test/TEST_0002.wav,0.0
3,-384.600281,112.463974,47.454556,17.33646,13.556947,4.677102,-5.572524,-1.35483,-2.386852,0.361085,...,1.068405,10002.172852,872.094727,1878.747055,1117.230823,3.416088,7019.040575,888.37603,./test/TEST_0003.wav,0.0
4,-273.304077,97.817047,12.370096,24.59148,3.248469,-9.987857,-6.24626,-13.113686,-1.204894,0.282557,...,0.390368,8031.884766,1862.62207,2104.891983,518.462117,1.418307,4013.059597,1384.532166,./test/TEST_0004.wav,0.0


In [None]:
def extract_features(files):
    path = "/scratch/network/mk8574/audio_sentiment_challenge/data"
    features = {}
    cnt = 0
    for f in tqdm(files):
        features[f] = {}
        fs, data = scipy.io.wavfile.read(path + f[1:])
        abs_data = np.abs(data)
        diff_data = np.diff(data)
        def calc_part_features(data, n=2, prefix=''):
            f_i = 1
            for i in range(0, len(data), len(data)//n):
                features[f]['{}mean_{}_{}'.format(prefix, f_i, n)] = np.mean(data[i:i + len(data)//n])
                features[f]['{}std_{}_{}'.format(prefix, f_i, n)] = np.std(data[i:i + len(data)//n])
                features[f]['{}min_{}_{}'.format(prefix, f_i, n)] = np.min(data[i:i + len(data)//n])
                features[f]['{}max_{}_{}'.format(prefix, f_i, n)] = np.max(data[i:i + len(data)//n])
        features[f]['len'] = len(data)
        if features[f]['len'] > 0:
            n = 1
            calc_part_features(data, n=n)
            calc_part_features(abs_data, n=n, prefix='abs_')
            calc_part_features(diff_data, n=n, prefix='diff_')

            n = 2
            calc_part_features(data, n=n)
            calc_part_features(abs_data, n=n, prefix='abs_')
            calc_part_features(diff_data, n=n, prefix='diff_')

            n = 3
            calc_part_features(data, n=n)
            calc_part_features(abs_data, n=n, prefix='abs_')
            calc_part_features(diff_data, n=n, prefix='diff_')
        cnt += 1
    features = pd.DataFrame(features).T.reset_index()
    features.rename(columns={'index': 'fname'}, inplace=True)
    return features


train_files = train.fname.values
train_features = extract_features(train_files)


test_files = ss.fname.values
test_features = extract_features(test_files)

In [None]:
train_data = train_data.merge(train_features, on='fname', how='left')
test_data = test_data.merge(test_features, on='fname', how='left')
train_data.head()

In [16]:
x = df.drop(columns = ["id","path","label"])
y = df["label"]
len(y)

5001

In [17]:
X_train , X_test , y_train, y_test = train_test_split(x,y , test_size=0.2, random_state=2021)

In [18]:
xgb = XGBClassifier(n_estimators=1000, learning_rate=0.05) #1000개의 가지? epoch? , 0.05 학습률
xgb.fit(X_train, y_train) #학습

y_preds = xgb.predict(X_test) #검증

print('Accuracy: %.2f' % accuracy_score(y_test,y_preds))

XGBoostError: [00:55:17] /workspace/src/data/iterative_dmatrix.cc:202: Check failed: n_features >= 1 (0 vs. 1) : Data must has at least 1 column.
Stack trace:
  [bt] (0) /home/mk8574/.conda/envs/mk8574_3.10/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x3effba) [0x7f3d46b14fba]
  [bt] (1) /home/mk8574/.conda/envs/mk8574_3.10/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x3f59b7) [0x7f3d46b1a9b7]
  [bt] (2) /home/mk8574/.conda/envs/mk8574_3.10/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x3f8858) [0x7f3d46b1d858]
  [bt] (3) /home/mk8574/.conda/envs/mk8574_3.10/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x3a2a07) [0x7f3d46ac7a07]
  [bt] (4) /home/mk8574/.conda/envs/mk8574_3.10/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGQuantileDMatrixCreateFromCallback+0x2b0) [0x7f3d4688ac40]
  [bt] (5) /home/mk8574/.conda/envs/mk8574_3.10/lib/python3.10/lib-dynload/../../libffi.so.8(+0xa052) [0x7f3dc374c052]
  [bt] (6) /home/mk8574/.conda/envs/mk8574_3.10/lib/python3.10/lib-dynload/../../libffi.so.8(+0x8925) [0x7f3dc374a925]
  [bt] (7) /home/mk8574/.conda/envs/mk8574_3.10/lib/python3.10/lib-dynload/../../libffi.so.8(ffi_call+0xde) [0x7f3dc374b06e]
  [bt] (8) /home/mk8574/.conda/envs/mk8574_3.10/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x91e7) [0x7f3dc375c1e7]



In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns

cm = confusion_matrix(y_test,y_preds)

plt.figure(figsize=(16,9))
sns.heatmap(
    cm,
    annot=True,
    xticklabels=["blues","classical","country","disco","hiphop","jazz","metal","pop","reggae","rock"],
    yticklabels=["blues","classical","country","disco","hiphop","jazz","metal","pop","reggae","rock"]
)
plt.show()