In [39]:
import librosa
import numpy as np
from sklearn.svm import LinearSVC as LSVC
from sklearn.metrics import confusion_matrix

In [192]:
# definesc functiile necesare citirii si prelucrarii datelor de intrare
# https://medium.com/comet-ml/applyingmachinelearningtoaudioanalysis-utm-source-kdnuggets11-19-e160b069e88

def read_sounds(files, input_file):
    sounds = []
    for i in range(len(files)):
        x, sample_rate = librosa.load(input_file + '/' + input_file + '/' + files[i], res_type='kaiser_fast')
        mfccs = np.mean(librosa.feature.mfcc(y=x, sr=sample_rate, n_mfcc=128).T, axis=0)
        sounds.append(mfccs)
    return np.array(sounds)

def read_data(input_file):
    files, labels = np.loadtxt(input_file + '.txt', dtype='str', delimiter=',', unpack=True)
    return read_sounds(files, input_file), np.array(labels.astype(int))

def read_test(input_file='test'):
    test_files = np.loadtxt(input_file + '.txt', dtype='str', unpack=True)
    return test_files, read_sounds(test_files, input_file)

In [54]:
# citesc datele de intrare

train_data, train_labels = read_data('train')
val_data, val_labels = read_data('validation')
test_files, test_data = read_test()

In [387]:
# declar modelul folosit si il antrenez pe datele de intrare
# https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html

model = LSVC(dual=False)
model.fit(train_data, train_labels)

LinearSVC(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, loss='squared_hinge', max_iter=1000,
          multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
          verbose=0)

In [388]:
# scorul pe datele de antrenare

model.score(train_data, train_labels)

0.742

In [389]:
# scorul pe datele de validare

model.score(val_data, val_labels)

0.705

In [None]:
# matricea de confuzie pentru datele de validare

predicted_val_labels = model.predict(val_data)
confusion_matrix_val = confusion_matrix(val_labels, predicted_val_labels)
print(confusion_matrix_val)

In [None]:
# sparg matricea de validare

true_negative = confusion_matrix_val[0][0]
false_positive = confusion_matrix_val[0][1]
false_negative = confusion_matrix_val[1][0]
true_positive = confusion_matrix_val[1][1]

In [None]:
# calculez precizia

precision_score = true_positive / (true_positive + false_positive)
print(precision_score)

In [None]:
# calculez recall-ul

recall_score = true_positive / (true_positive + false_negative)
print(recall_score)

In [385]:
# calculez si salvez predictiile pentru datele de test
# https://stackoverflow.com/questions/36210977/python-numpy-savetxt-header-has-extra-character/36211002

test_labels = model.predict(test_data)
np.savetxt('submission1.txt', np.c_[test_files, test_labels], delimiter=',', header='name,label', comments='', fmt='%s')