In [1]:
from pathlib import Path
from typing import List, Tuple, Dict

# Wav Features and Visualization
import librosa
import IPython.display as ipd
# Basics
import numpy as np
import pandas as pd
# Visualization
import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use('ggplot')
# Machine Learning
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

!pip install catboost
import catboost as cb
from catboost import CatBoostClassifier

!pip install lightgbm
from lightgbm import LGBMClassifier

from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import GridSearchCV



# View Data

In [2]:
def load_wav(path: str, sample_rate: int) -> np.ndarray:
    waveform, _ = librosa.load(path, sr=sample_rate)
    return waveform


def extract_features(
        file_path: str, 
        sample_rate: int=22050,
        hop_length: int=180,
        n_fft: int=1024,
        n_frames_per_example: int=1
    ) -> List[np.ndarray]:
    
    waveform = load_wav(file_path, sample_rate=sample_rate)
    
    spectrogram = librosa.feature.melspectrogram(
        waveform, n_fft=n_fft, hop_length=hop_length
    )
    
    spectrogram = np.log(1e-20 + np.abs(spectrogram ** 2))
    
    n_examples = spectrogram.shape[1] // n_frames_per_example
    
    return [
        spectrogram[
            :,
            i*n_frames_per_example:(i+1) * n_frames_per_example
        ].reshape(1, -1)
        for i in range(n_examples)
    ]

In [3]:
# path = './train/classical/1688451846481878412.wav'
# # 1161154212832220511.wav
# # 1429195257218202828.wav
# n_frames_per_example = 15
# axis = False

# ipd.display(ipd.Audio(path))

# features = extract_features(path, n_frames_per_example=n_frames_per_example)

# features = [
#     feature.reshape(-1, n_frames_per_example)
#     for feature in features
# ]

# plt.figure(figsize=(20,5))
# plt.title('Spectrogram')
# plt.imshow(np.hstack(features), cmap='plasma')
# if axis:
#     plt.xlabel('seconds')
#     plt.ylabel('mel')
# else:
#     plt.axis('off')
# plt.savefig('spectrogram.png', dpi=200)
# plt.show()

# plt.figure(figsize=(20,5))
# for i, feature in enumerate(features):
#     plt.subplot(1, len(features) + 1, i + 1)
#     plt.imshow(feature, cmap='plasma')
#     plt.axis('off')
# plt.show()

# Prepare Train Dataset

In [4]:
def load_folder_data(
        path: Path, 
        train: bool, 
        params: Dict[str, int]
    ) -> Tuple[
        List[List[np.ndarray]], 
        List[str], 
        List[str]
    ]:
    
    features: List[List[np.ndarray]] = []
    labels: List[str] = []
    file_names: List[str] = list(path.rglob('*.wav'))

    for file_path in file_names:
        
        file_features = extract_features(file_path, **params)

        features.append(file_features)
        
        if train:
            class_name = file_path.parent.name
            labels.append([class_name] * len(file_features))
    
    return features, labels, file_names

In [5]:
from google.colab import drive
drive.mount('/content/gdrive')
params = {
    'sample_rate': 22050,
    'hop_length': 170,
    'n_fft': 2048,
    'n_frames_per_example': 1
}
train_path = Path('/content/gdrive/MyDrive/Colab Notebooks/dz8/train/train')
test_path = Path('/content/gdrive/MyDrive/Colab Notebooks/dz8/test/test')
SEED = 42
test_size = 0.05

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [6]:
class_names = [folder.name for folder in train_path.iterdir()]

class_name2id = {
    class_name: class_id 
    for class_id, class_name in enumerate(class_names)
}

features, labels, file_names = load_folder_data(train_path, train=True, params=params)

labels = [
    [class_name2id[label] for label in label_list]
    for label_list in labels
]

In [7]:
features_train, features_test, labels_train, labels_test, files_train, files_test = \
train_test_split(
    features, labels, file_names, test_size=test_size, random_state=SEED
)

In [8]:
X_train = np.vstack([x for list_ in features_train for x in list_])
y_train = np.array([x for list_ in labels_train for x in list_])

In [9]:
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('catboost', CatBoostClassifier(n_estimators=300, learning_rate=0.65, reg_lambda=100))
]).fit(X_train, y_train)

0:	learn: 1.9079198	total: 48.8s	remaining: 4h 3m 16s
1:	learn: 1.7667608	total: 1m 36s	remaining: 3h 59m 45s
2:	learn: 1.7056526	total: 2m 24s	remaining: 3h 58m 3s
3:	learn: 1.6598540	total: 3m 12s	remaining: 3h 57m 1s
4:	learn: 1.6200312	total: 4m 1s	remaining: 3h 57m 33s
5:	learn: 1.5971023	total: 4m 49s	remaining: 3h 56m 10s
6:	learn: 1.5690537	total: 5m 37s	remaining: 3h 55m 12s
7:	learn: 1.5456663	total: 6m 25s	remaining: 3h 54m 13s
8:	learn: 1.5277832	total: 7m 12s	remaining: 3h 52m 56s
9:	learn: 1.5093425	total: 8m	remaining: 3h 52m 4s
10:	learn: 1.4907576	total: 8m 48s	remaining: 3h 51m 14s
11:	learn: 1.4710241	total: 9m 36s	remaining: 3h 50m 32s
12:	learn: 1.4536249	total: 10m 24s	remaining: 3h 49m 40s
13:	learn: 1.4380767	total: 11m 12s	remaining: 3h 48m 56s
14:	learn: 1.4247488	total: 12m	remaining: 3h 48m 8s
15:	learn: 1.4141702	total: 12m 48s	remaining: 3h 47m 19s
16:	learn: 1.4011400	total: 13m 36s	remaining: 3h 46m 34s
17:	learn: 1.3923293	total: 14m 24s	remaining: 3h 4

In [10]:
test_features, _, test_pathes = load_folder_data(test_path, train=False, params=params)

In [11]:
predictions = [
    class_names[
        pipe.predict_proba(
            np.vstack(test_feature)
        ).mean(0).argmax()
    ]
    for test_feature in test_features
]

In [12]:
(
    pd.DataFrame({
        'index': [test_wav_path.name for test_wav_path in test_path.iterdir()],
        'genre': predictions
    })
    .to_csv('/content/gdrive/MyDrive/Colab Notebooks/dz8/my_submition.csv', index=False)
)