# Multi Nomial Logistic Regression
- We will flatten the matrices so that they act as one row of data

In [1]:
import numpy as np
import pickle
from sklearn.linear_model import SGDClassifier
import re
import librosa
import multiprocessing as mp

### Preprocessing

In [2]:
labeldict = {
    'Sadness': 0,
    'Excited': 1,
    'Happiness': 2,
    'Anger' : 3,
    'Frustration' : 4,
    'Other' : 5
}

In [3]:
def one_hot_encode(label):
    one_hot = np.zeros(6)
    one_hot[labeldict[label]] = 1
    return one_hot

In [4]:
def one_hot_encode_list(listOfLabels):
    finalList = []
    for label in listOfLabels:
        finalList.append(one_hot_encode(label))
    return np.array(finalList)

In [5]:
def get_STFT_and_label(path):
    emotion = re.match('.*/DATA/([a-zA-Z]+)/.*', path).groups()[0]
    data, _ = librosa.load(path, sr=44100)
    STFT = np.abs(librosa.stft(data))
    return STFT, emotion

In [6]:
def preprocess_input(pathList): # Returns a list of x (batch_size, timesteps, feature), y (one_hot_encoded)
    with mp.Pool() as p:
        results = p.map(get_STFT_and_label, pathList)
    # Preprocess x:
    x = [item[0] for item in results]
    # Flatten
    x = [item.astype(np.float16) for sublist in x for item in sublist]
    # Zero-padding:
    def zero_pad_or_trim(item, max_pad=1497):
        if item.shape[0] >= 1497: # If the time steps is greater than 1497, then trim it to 1497
            item = item[:1497]
        else:
            item = np.pad(item, (0, max_pad-item.shape[0]), mode='constant')
        return item
    x= [zero_pad_or_trim(item) for item in x]
    # Reshape:
    x = np.array(x).reshape((-1, 1025, 1497))
    # Flatten:
    x = x.reshape(x.shape[0], -1)
    # Preprocess y:
    y = [item[1] for item in results]
    # Transform to integer coded values
    y = [labeldict[item] for item in y]
    # Convert to numpy array
    y = np.array(y)
    # one_hot_encode
#     y = one_hot_encode_list(y)
    return x, y

In [7]:
# Make batches of the pathList:
def create_batches(pathList, batch_size):
    ansList = [] # To store the final batched paths
    tempList = [] # Temporary list
    count = 0
    while count < len(pathList):
        tempList.append(pathList[count]) # Append the path
        count += 1
        if (count % batch_size) == 0: # if count is a multiple of batch_size
            ansList.append(tempList)
            tempList = []
    if len(tempList) != 0: # If tempList is not empty
        ansList.append(tempList) # Append the remaining values
    return ansList

# Loading data: 
- We will load the data per predefined batch size, this is to reduce the memory used for training:

In [8]:
with open('train_paths.pkl', 'rb') as f:
    train_paths = pickle.load(f)
with open('test_paths.pkl', 'rb') as f:
    test_paths = pickle.load(f)

In [18]:
def test(path):
    emotion = re.match('.*/DATA/([a-zA-Z]+)/.*', path).groups()[0]
    data, sr = librosa.load(path, sr=44100)
    mel = librosa.feature.melspectrogram(data, )
    return STFT

In [19]:
test(train_paths[0]).shape

(1025, 208)

In [20]:
test(train_paths[1]).shape

(1025, 201)

## Implementation:

In [21]:
# Load the validation datasets. The validation datasets are loaded entirely to the machine.
x_val, y_val = preprocess_input(test_paths)

In [23]:
def get_length_audio(path):
    data, _ = librosa.load(path, sr=44100)
    data = np.abs(librosa.stft(data))
    return data.shape[1]

with mp.Pool() as p:
    results = p.map(get_length_audio, test_paths)
results

[177,
 356,
 236,
 384,
 572,
 602,
 1205,
 560,
 200,
 99,
 757,
 349,
 301,
 223,
 190,
 723,
 210,
 320,
 140,
 106,
 188,
 342,
 288,
 498,
 639,
 380,
 224,
 160,
 152,
 1037,
 217,
 87,
 722,
 819,
 338,
 580,
 219,
 117,
 579,
 272,
 458,
 440,
 377,
 221,
 458,
 119,
 239,
 197,
 675,
 1194,
 95,
 405,
 489,
 300,
 217,
 665,
 238,
 249,
 427,
 377,
 133,
 304,
 609,
 542,
 1253,
 234,
 1038,
 134,
 638,
 194,
 114,
 1223,
 800,
 295,
 213,
 204,
 109,
 399,
 938,
 898,
 286,
 350,
 201,
 527,
 1289,
 806,
 197,
 188,
 194,
 342,
 642,
 296,
 329,
 159,
 577,
 510,
 232,
 521,
 389,
 522,
 392,
 379,
 564,
 173,
 109,
 194,
 231,
 187,
 563,
 153,
 290,
 308,
 391,
 181,
 396,
 264,
 730,
 756,
 844,
 875,
 191,
 240,
 526,
 143,
 202,
 146,
 328,
 153,
 446,
 124,
 133,
 550,
 1004,
 1391,
 763,
 1319,
 345,
 223,
 288,
 60,
 168,
 499,
 701,
 153,
 181,
 303,
 403,
 191,
 206,
 570,
 181,
 255,
 345,
 239,
 393,
 146,
 388,
 746,
 566,
 313,
 257,
 260,
 1038,
 118,
 311,
 51

In [24]:
max(results)

1497

## Multinomial logistic regression:

In [35]:
# Batch_size is 32
batch_size = 32

In [45]:
model = SGDClassifier(loss='log')

In [46]:
training_batch = create_batches(train_paths, batch_size=batch_size)

In [47]:
for batch in training_batch:
    x_train, y_train = preprocess_input(batch)
    model.partial_fit(x_train, y_train, classes = [0,1,2,3,4,5]) # Partial_fit to solve memory problem

In [53]:
y_pred = model.predict(x_val)

In [54]:
# Accuracy:
count = 0
for i in range(len(y_val)):
    if y_val[i] == y_pred[i]:
        count+=1
print(f"Accuracy : {count/len(y_val)}")

Accuracy : 0.2816666666666667


In [58]:
y_val[:20]

array([1, 5, 1, 0, 1, 4, 3, 2, 2, 5, 0, 2, 2, 1, 5, 3, 2, 3, 2, 2])

In [59]:
y_pred[:20]

array([5, 1, 1, 0, 3, 3, 0, 3, 3, 5, 0, 1, 1, 3, 5, 3, 3, 1, 5, 5])

In [63]:
model.score(x_val, y_val)

0.2816666666666667