# import dataset

In [None]:
import os
import librosa
import numpy as np
from collections import Counter
import noisereduce as nr
import matplotlib.pyplot as plt
import librosa.display
from scipy.stats import zscore
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import plot_confusion_matrix
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
import csv
import pandas as pd
from sklearn.manifold import TSNE
import IPython.display as ipd

In [None]:
letter_to_label = {
    "a": 0,
    "b": 1,
    "c": 2,
    "d": 3,
    "e": 4,
    "f": 5,
    "g": 6,
    "h": 7,
    "i": 8,
    "j": 9
}

sr=24000

X_dev = []
y_dev = []

for label in sorted(os.listdir("./DSL1920_dataset_sept/development/")):
    for file in sorted(os.listdir(os.path.join("./DSL1920_dataset_sept/development/",label))):
        X_dev.append(librosa.load(os.path.join("./DSL1920_dataset_sept/development/", label, file), sr=sr)[0])
        y_dev.append(letter_to_label[label])
        
ids = []
X_eval = []

for file in os.listdir("./DSL1920_dataset_sept/evaluation/"):
    ids.append(file.split(".")[0])
    X_eval.append(librosa.load(os.path.join("./DSL1920_dataset_sept/evaluation/", file), sr=sr)[0])

In [None]:
X_dev = np.array(X_dev)
y_dev = np.array(y_dev)
X_eval = np.array(X_eval)

In [None]:
print(X_dev.shape, X_eval.shape, len(ids))

In [None]:
def print_spectrum(data, sr):
    X_per_spec = librosa.stft(data)
    Xdb = librosa.amplitude_to_db(abs(X_per_spec))
    plt.figure(figsize=(20, 12))
    librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='log')
    plt.colorbar()
    plt.show()

In [None]:
vet = []
for i in range(len(X_dev)):
    vet.append(np.std(X_dev[i]))
    
plt.hist(vet, bins=20)
plt.show()

In [None]:
vet = []
for i in range(len(X_eval)):
    vet.append(np.std(X_eval[i]))
    
plt.hist(vet, bins=20)
plt.show()

# balanced classes

In [None]:
print(Counter(y_dev))

# remove signals shorter than 12000 samples and take noise signal (9 signals)

In [None]:
x_not_12000 = [i for i, x in enumerate(X_dev) if len(x)<11900]#(len(x) != 11999 and len(x) != 12000)]
NOISE1 = X_dev[4608]
print(x_not_12000)

In [None]:
X_dev = np.delete(X_dev, x_not_12000)
y_dev = np.delete(y_dev, x_not_12000)

# noise reduction

In [None]:
print_spectrum(NOISE1, sr=sr)

In [None]:
ipd.Audio(NOISE1, rate=sr)

In [None]:
ipd.Audio(X_dev[18], rate=sr)

In [None]:
print_spectrum(X_dev[18], sr=sr)

In [None]:

### padding the sound before denoise because of distortion at the start and the end of signal

for i in range(len(X_dev)):
    X_dev[i] = np.pad(X_dev[i], (2500, 2500), mode="reflect")
    X_dev[i] = nr.reduce_noise(audio_clip=X_dev[i], noise_clip=NOISE1)
    X_dev[i] = X_dev[i][2500: len(X_dev[i])-2500]
    
for i in range(len(X_eval)):
    X_eval[i] = np.pad(X_eval[i], (2500, 2500), mode="reflect")
    X_eval[i] = nr.reduce_noise(audio_clip=X_eval[i], noise_clip=NOISE1)
    X_eval[i] = X_eval[i][2500: len(X_eval[i])-2500]

In [None]:
ipd.Audio(X_dev[18], rate=sr)

In [None]:
print_spectrum(X_dev[18], sr=sr)

In [None]:
print(X_dev.shape, y_dev.shape)

# normalize signals

In [None]:
def normalize_data(X):
    return np.array([zscore(x) for x in X])

X_dev = normalize_data(X_dev)
X_eval = normalize_data(X_eval)

In [None]:
for i in range(len(X_eval)):
    X_eval[i] = np.nan_to_num(X_eval[i], 0)
    
for i in range(len(X_dev)):
    X_dev[i] = np.nan_to_num(X_dev[i], 0)

# print audio signals

In [None]:
plt.figure(figsize=(20,5))
librosa.display.waveplot(X_dev[18], sr=sr)
plt.show()

In [None]:
def print_samples(data, sr):
    plt.figure(figsize=(20, 8))
    for idx in range(0, len(data), 1004):
        librosa.display.waveplot(data[idx], sr=sr, alpha=0.5)
    plt.show()

In [None]:
print_samples(X_dev, sr)

# extract features

In [None]:
def extract_features(X, sr):
    feat_X = []
    n_fft = int(sr*0.03)
    hop = int(sr*0.01)

    for x in X:
        chroma_stft = librosa.feature.chroma_stft(y=x, sr=sr, n_fft=n_fft, hop_length=hop)
        rms = librosa.feature.rms(y=x, frame_length=n_fft, hop_length=hop)
        spec_cent = librosa.feature.spectral_centroid(y=x, sr=sr, hop_length=hop, n_fft=n_fft)
        spec_bw = librosa.feature.spectral_bandwidth(y=x, sr=sr, n_fft=n_fft, hop_length=hop)
        rolloff = librosa.feature.spectral_rolloff(y=x, sr=sr, n_fft=n_fft, hop_length=hop)
        zcr = librosa.feature.zero_crossing_rate(x, frame_length=n_fft, hop_length=hop)
        mfcc = librosa.feature.mfcc(y=x, sr=sr, hop_length=hop, n_fft=n_fft, n_mfcc=26)
        mel = librosa.feature.melspectrogram(x, sr=sr, n_fft=n_fft, hop_length=hop, n_mels=128)
        contrast = librosa.feature.spectral_contrast(x, sr=sr, n_fft=n_fft, hop_length=hop)
        deltas = librosa.feature.delta(mfcc)
        
        
        feat_x = np.array([np.mean(rms), np.mean(spec_bw), np.mean(rolloff), np.mean(zcr), np.mean(spec_cent)])
        feat_x = np.append(feat_x, [np.mean(e) for e in chroma_stft])
        feat_x = np.append(feat_x, [np.mean(e) for e in contrast])
        feat_x = np.append(feat_x, [np.mean(e) for e in mfcc])
        feat_x = np.append(feat_x, [np.mean(e) for i, e in enumerate(mel) if i<20])
        feat_x = np.append(feat_x, [np.mean(e) for e in deltas])
        
        feat_X.append(feat_x)

    return np.array(feat_X)

In [None]:
feat_X_dev = extract_features(X_dev, sr)
feat_X_eval = extract_features(X_eval, sr)
print(feat_X_dev.shape, feat_X_eval.shape)

In [None]:
ss = StandardScaler()
X_dev_scaled = ss.fit_transform(feat_X_dev)
X_eval_scaled = ss.fit_transform(feat_X_eval)

In [None]:
def build_classifier(X_train, y_train, X_test, y_test, clf_to_evaluate, scores, param_grid, n_folds=5):
    
    clf = GridSearchCV(clf_to_evaluate, param_grid, cv=n_folds, scoring=score, n_jobs=-1)
    clf.fit(X_train, y_train)
    
    print(f"Best parameters set on dev set: {clf.best_params_}")
    print()
    print("Grid scores on dev set:\n")
    means = clf.cv_results_["mean_test_score"]
    stds = clf.cv_results_["std_test_score"]
    for mean, std, params in zip(means, stds, clf.cv_results_["params"]):
        print(f"{mean:.5f} (+/- {std:.5f}) for {params}")
    print()
    
    print("Detailed classification report: ")
    print("\nScores on full evaluation set: ")
    y_true, y_pred = y_test, clf.predict(X_test)
    fig, ax = plt.subplots(figsize=(20, 20))
    disp = plot_confusion_matrix(clf, X_test, y_test, display_labels=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"], cmap="Blues", normalize="true", ax=ax)
    disp.ax_.set_title("Confusion matrix")
    plt.show()
    print(classification_report(y_true, y_pred))
    
    return clf

## SVM

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_dev_scaled, y_dev, test_size=0.2, shuffle=True, stratify=y_dev)
score = "f1_macro"

params_grid = {
    "kernel": ["rbf"],#["rbf", "poly", "sigmoid", "linear"],
    "gamma": ["auto", "scale"],
    "C": [0.1, 0.5, 1, 1.5, 2, 5, 10, 50, 100]
}


clf_to_evaluate = SVC()

best_clf = build_classifier(X_train, y_train, X_test, y_test, clf_to_evaluate, score, params_grid)

In [None]:
clf = SVC(**best_clf.best_params_)
clf.fit(X_dev_scaled, y_dev)
y_pred = clf.predict(X_eval_scaled)

In [None]:
def print_file(filename, ids, y_pred):

    label_to_letter = {
        0 : "a",
        1 : "b",
        2 : "c",
        3 : "d",
        4 : "e",
        5 : "f",
        6 : "g",
        7 : "h",
        8 : "i",
        9 : "j"
    }

    with open(filename, "w") as f:
        f.write("Id,Predicted\n")
        for i in range(len(y_pred)):
            f.write(f"{ids[i]},{label_to_letter[y_pred[i]]}\n")

In [None]:
print_file(str(best_clf.best_params_)+".csv", ids, y_pred)

# neural network

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_dev_scaled, y_dev, test_size=0.2, shuffle=True, stratify=y_dev)
score = "f1_macro"

params_grid = {
    "hidden_layer_sizes": [(512, ), (1024, ), (2048, ), (4096, ), (512, 512)],
    "learning_rate_init": [0.0001, 0.001, 0.01],
    #"max_iter": [200, 400, 600],
    "solver": ["adam"]#["sgd", "adam"]
}

clf_to_evaluate = MLPClassifier()

best_clf = build_classifier(X_train, y_train, X_test, y_test, clf_to_evaluate, score, params_grid)

In [None]:
clf = MLPClassifier(**best_clf.best_params_)
clf.fit(X_dev_scaled, y_dev)
y_pred = clf.predict(X_eval_scaled)

In [None]:
def print_file(filename, ids, y_pred):

    label_to_letter = {
        0 : "a",
        1 : "b",
        2 : "c",
        3 : "d",
        4 : "e",
        5 : "f",
        6 : "g",
        7 : "h",
        8 : "i",
        9 : "j"
    }

    with open(filename, "w") as f:
        f.write("Id,Predicted\n")
        for i in range(len(y_pred)):
            f.write(f"{ids[i]},{label_to_letter[y_pred[i]]}\n")

In [None]:
print_file(str(best_clf.best_params_)+".csv", ids, y_pred)

# print tsne

In [None]:
import seaborn as sns

## to run this have to compute the features
tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=4000)
tsne_results = tsne.fit_transform(X_dev_scaled)

df = pd.DataFrame()
df["tsne_one"] = pd.Series(tsne_results[:,0])
df["tsne_two"] = pd.Series(tsne_results[:,1])
df["y"] = pd.Series(y_dev)

plt.figure(figsize=(22,14))
sns.scatterplot(
    x="tsne_one", y="tsne_two",
    hue="y",
    palette=sns.color_palette("hls", 10),
    data=df,
    legend="full",
    alpha=0.3
)

### CODE ENDS HERE, here are some preprocessing attempts which didn't improve performance (filtering and removing noisy signals)

### filtering high and low frequencies

In [None]:
ipd.Audio(X_dev[18], rate=sr)

In [None]:
print_spectrum(X_dev[18], sr=sr)

In [None]:
import scipy.signal as signal 

N  = 4    # Filter order
Wn = [2*40./sr, 2*8000./sr] # Cutoff frequency
B, A = signal.butter(N, Wn, btype="bandpass")

for i in range(len(X_dev)):
    X_dev[i] = np.pad(X_dev[i], (2500, 2500), mode="reflect")
    X_dev[i] = signal.filtfilt(B, A, X_dev[i])
    X_dev[i] = X_dev[i][2500: len(X_dev[i])-2500]
    
for i in range(len(X_eval)):
    X_eval[i] = np.pad(X_eval[i], (2500, 2500), mode="reflect")
    X_eval[i] = signal.filtfilt(B, A, X_eval[i])
    X_eval[i] = X_eval[i][2500: len(X_eval[i])-2500]

In [None]:
ipd.Audio(X_dev[18], rate=sr)

In [None]:
print_spectrum(X_dev[18], sr=sr)

### signal to noise ratio

In [None]:
def signaltonoise(a, axis=0, ddof=0):
    a = np.asanyarray(a)
    m = a.mean(axis)
    sd = a.std(axis=axis, ddof=ddof)
    return np.where(sd == 0, 0, m/sd)

In [None]:
count=[]
snrvet=[]

for i in range(len(X_dev)):
    snr = np.abs(signaltonoise(X_dev[i]))
    snrvet.append(snr)
    if snr>0.9:
        count.append(y_dev[i])
        #count.append(X_dev[i])
        
print(len(count))

In [None]:
#ipd.Audio(count[180], rate=sr)

In [None]:
plt.hist(snrvet, bins=100, range=(0, 1.5))
plt.show()

In [None]:
print(Counter(count))

In [None]:
plt.hist(count, bins=10)
plt.show()