# Voice Recognition

## I. Preprocessing
Transform audio files into spectrogram images

Initialize dataset path (should be executed before each experiment)

In [None]:
from pathlib import Path

from utils.audios import plot_spectrogram
from utils.files import increment_path

ROOT = Path("data")
DATASET_PATH = increment_path(ROOT / "SpectrogramImages")
DATA_PATH = ROOT / "audio"

Plot spectrogram images

Audio directory structure should be in the following format, and must be in `.wav` format:
```
data
└── audio
    ├── class1
    │   ├── file1.wav
    │   ├── file2.wav
    │   └── ...
    ├── class2
    │   ├── file1.wav
    │   ├── file2.wav
    │   └── ...
    └── ...
```
if you want to transform audio files to `.wav` format, use `transform_audio` function in `utils.audios`.

Transform audio files to spectrogram images

In [None]:
for directory in DATA_PATH.iterdir():
    if not directory.is_dir():
        continue
    for file in directory.iterdir():
        if not file.is_file():
            continue
        if not (DATASET_PATH / directory.name).exists():
            (DATASET_PATH / directory.name).mkdir()
        plot_spectrogram(file, str(DATASET_PATH / directory.name / f"{file.stem}.png"))

## II. Experiment
### Experiment 1 - LBP-Based Classification with Spectrogram Images

Load spectrogram images

In [None]:
from datasets.lbp import LocalBinaryPatternsImageClassificationDataset

DATASET_PATH = ROOT / "SpectrogramImages"
dataset_spectrogram = LocalBinaryPatternsImageClassificationDataset()
dataset_spectrogram.load_images(str(DATASET_PATH))
print(dataset_spectrogram)
dataset_spectrogram.overview()

Export dataset to CSV

In [None]:
dataset_spectrogram.export_csv(ROOT / "csv", train_test_split=True, train_ratio=0.5)

Save LBP images (for fun)

In [None]:
dataset_spectrogram.save_lbp_images(ROOT / "lbp")

Load dataset from CSV
* `LocalBinaryPatternsDataset` is a lightweight version of `LocalBinaryPatternsImageClassificationDataset` that loads data from CSV files.

In [None]:
from datasets.lbp import LocalBinaryPatternsDataset

train_dataset, test_dataset = LocalBinaryPatternsDataset(), LocalBinaryPatternsDataset()
train_dataset.load_csv(ROOT / "csv" / "train.csv")
test_dataset.load_csv(ROOT / "csv" / "test.csv")
print(train_dataset)
print(test_dataset)

Train and test the classifier

In [None]:
from models.lbp import LocalBinaryPatternsClassifierBackend
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

classifier = LocalBinaryPatternsClassifierBackend(
    estimators={
        "SVM: Linear, C=1.0":              SVC(kernel='linear', C=1.0, random_state=42, probability=True),
        "RandomForest: n_estimators=1000": RandomForestClassifier(n_estimators=1000, random_state=42)
    },
    # scaler=StandardScaler()
)

classifier.train(train_dataset)
classifier.test(test_dataset)

### Experiment 2 - LBP-Based Classification with Channel-Flattened Spectrogram Images

Load spectrogram images in channel-flatten mode

In [None]:
from datasets.lbp import LocalBinaryPatternsImageClassificationDataset

DATASET_PATH = ROOT / "SpectrogramImages"
dataset_spectrogram_cf = LocalBinaryPatternsImageClassificationDataset()
dataset_spectrogram_cf.load_images(str(DATASET_PATH), channel_flatten=True)
print(dataset_spectrogram_cf)
dataset_spectrogram_cf.overview()

Export dataset to CSV

In [None]:
dataset_spectrogram_cf.export_csv(ROOT / "csv-channel-flatten", train_test_split=True, train_ratio=0.5)

Load dataset from CSV

In [None]:
from datasets.lbp import LocalBinaryPatternsDataset

train_dataset, test_dataset = LocalBinaryPatternsDataset(), LocalBinaryPatternsDataset()
train_dataset.load_csv(ROOT / "csv-channel-flatten" / "train.csv")
test_dataset.load_csv(ROOT / "csv-channel-flatten" / "test.csv")
print(train_dataset)
print(test_dataset)

In [None]:
from models.lbp import LocalBinaryPatternsClassifierBackend
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

classifier = LocalBinaryPatternsClassifierBackend(
    estimators={
        "SVM: Linear, C=1.0":              SVC(kernel='linear', C=1.0, random_state=42, probability=True),
        "RandomForest: n_estimators=1000": RandomForestClassifier(n_estimators=1000, random_state=42)
    },
    # scaler=StandardScaler()
)

classifier.train(train_dataset)
classifier.test(test_dataset)

### Experiment 3 - LBP-Based Classification with Augmented Spectrogram Images

Load spectrogram images to `BaseImageClassificationDataset`

In [None]:
from datasets.base import BaseImageClassificationDataset

DATASET_PATH = ROOT / "SpectrogramImages"
dataset_spectrogram = BaseImageClassificationDataset()
dataset_spectrogram.load_images(str(DATASET_PATH))
print(dataset_spectrogram)
dataset_spectrogram.overview()

Load to `ImageAugmentationDataset`
`ImageAugmentationDataset` is a subclass of `BaseImageClassificationDataset` that provides augmentation methods.
For now supports `gaussian_noise` and `mix_patch`.

In [None]:
from datasets.augment import ImageAugmentationDataset

aug_dataset = ImageAugmentationDataset()
aug_dataset.from_base_dataset(dataset_spectrogram)
print(aug_dataset)

Apply augmentation

In [None]:
aug_dataset.apply_augmentation(aug_ratio=0.5, gaussian_noise=True, mix_patch=True)

Overview augmented dataset

In [None]:
print(aug_dataset)
aug_dataset.overview()

Save augmented images

In [None]:
aug_dataset.save_images(ROOT / "augmented")

Load augmented dataset

In [None]:
%matplotlib inline
from datasets.lbp import LocalBinaryPatternsImageClassificationDataset

DATASET_PATH = ROOT / "augmented"
dataset_spectrogram = LocalBinaryPatternsImageClassificationDataset()
dataset_spectrogram.load_images(str(DATASET_PATH))
print(dataset_spectrogram)
dataset_spectrogram.overview()

Export dataset to CSV

In [None]:
dataset_spectrogram.export_csv(ROOT / "csv-augmented", train_test_split=True, train_ratio=0.5)

Load dataset from CSV

In [None]:
from datasets.lbp import LocalBinaryPatternsDataset

train_dataset, test_dataset = LocalBinaryPatternsDataset(), LocalBinaryPatternsDataset()
train_dataset.load_csv(ROOT / "csv-augmented" / "train.csv")
test_dataset.load_csv(ROOT / "csv-augmented" / "test.csv")
print(train_dataset)
print(test_dataset)

Train and test the classifier

In [None]:
from models.lbp import LocalBinaryPatternsClassifierBackend
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

classifier = LocalBinaryPatternsClassifierBackend(
    estimators={
        "SVM: Linear, C=1.0":              SVC(kernel='linear', C=1.0, random_state=42, probability=True),
        "RandomForest: n_estimators=1000": RandomForestClassifier(n_estimators=1000, random_state=42)
    },
    # scaler=StandardScaler()
)

classifier.train(train_dataset)
classifier.test(test_dataset)

### Experiment 4 - LBP-Based Classification with Augmented Spectrogram Images (with channel-flatten)

Load augmented dataset in channel-flatten mode

In [None]:
%matplotlib inline
from datasets.lbp import LocalBinaryPatternsImageClassificationDataset

DATASET_PATH = ROOT / "augmented"
dataset_spectrogram = LocalBinaryPatternsImageClassificationDataset()
dataset_spectrogram.load_images(str(DATASET_PATH), channel_flatten=True)
print(dataset_spectrogram)
dataset_spectrogram.overview()

Export dataset to CSV

In [None]:
dataset_spectrogram.export_csv(ROOT / "csv-augmented-channel-flatten", train_test_split=True, train_ratio=0.5)

Load dataset from CSV

In [None]:
from datasets.lbp import LocalBinaryPatternsDataset

train_dataset, test_dataset = LocalBinaryPatternsDataset(), LocalBinaryPatternsDataset()
train_dataset.load_csv(ROOT / "csv-augmented-channel-flatten" / "train.csv")
test_dataset.load_csv(ROOT / "csv-augmented-channel-flatten" / "test.csv")
print(train_dataset)
print(test_dataset)

Train and test the classifier

In [None]:
from models.lbp import LocalBinaryPatternsClassifierBackend
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

classifier = LocalBinaryPatternsClassifierBackend(
    estimators={
        "SVM: Linear, C=1.0":              SVC(kernel='linear', C=1.0, random_state=42, probability=True),
        "RandomForest: n_estimators=1000": RandomForestClassifier(n_estimators=1000, random_state=42)
    },
    # scaler=StandardScaler()
)

classifier.train(train_dataset)
classifier.test(test_dataset)

### Experiment 5 - CNN-Based Classification with Spectrogram Images (ResNet18)

Load spectrogram images to `TorchImageClassificationDataset`

Implements `torch.utils.data.Dataset` and compatible with `DataLoader`. 

In [None]:
from torchvision import transforms

from datasets.torch import TorchImageClassificationDataset

DATA_DIR = ROOT / "data_split"

transform = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]
)
train_dataset = TorchImageClassificationDataset(transform=transform)
train_dataset.load_images(root=str(DATA_DIR / "train"))
train_dataset.overview()

val_dataset = TorchImageClassificationDataset(transform=transform)
val_dataset.load_images(root=str(DATA_DIR / "val"))
val_dataset.overview()

test_dataset = TorchImageClassificationDataset(transform=transform)
test_dataset.load_images(root=str(DATA_DIR / "test"))
test_dataset.overview()

In [None]:
print(train_dataset)
print(val_dataset)
print(test_dataset)

Train

In [None]:
from models.cnn import ConvolutionNeuralNetworkClassifierBackend

classifier = ConvolutionNeuralNetworkClassifierBackend(
    model_name="resnet18",
    decay=0.01,
    epochs=10,
    learning_rate=0.001,
)

classifier.train(train_dataset, val_dataset)

Test

In [None]:
classifier.test(test_dataset)

### Experiment 6 - CNN-Based Classification with Spectrogram Images (VGG16)

Train

In [None]:
classifier = ConvolutionNeuralNetworkClassifierBackend(
    model_name="vgg16",
    decay=0.01,
    epochs=10,
    learning_rate=0.001,
)

classifier.train(train_dataset, val_dataset)

Test

In [None]:
classifier.test(test_dataset)