# Evaluation of the BiTiFGAN encoder with the best validation performance on the test set

We select the checkpoint from the BiTiFGAN training which yielded the representations with the highest performance on the validation set and test the representations' performance on the SC09 test set.

### Import packages

In [75]:
import os
os.chdir(os.path.join("/", "home", "c-matsty", "Bi-TiFGAN---TensorFlow-1.14", "src"))

import numpy as np
import tensorflow as tf
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

from gantools.model import BiSpectrogramGAN
from gantools.gansystem import GANsystem
from hyperparams.tifgan_hyperparams import get_hyperparams
from feature_evaluation.utils import load_data, load_data_labels

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

### Define data paths

In [36]:
data_dir = os.path.join("/media", "datastore", "c-matsty-data")

In [37]:
dataset_dir = os.path.join(data_dir, "datasets", "SpeechCommands")

In [38]:
train_dir = os.path.join(dataset_dir, "SpeechCommands_Preproc_2_training")
train_input_path = os.path.join(train_dir, "input_data")

In [39]:
test_dir = os.path.join(dataset_dir, "SpeechCommands_Preproc_2_test")
test_input_path = os.path.join(test_dir, "input_data")

In [64]:
training_labels_path = os.path.join(train_dir, "labels")
test_labels_path = os.path.join(test_dir, "labels")

### Define path to BiTiFGAN checkpoints

In [49]:
checkpoints_path = os.path.join(data_dir, "checkpoints_summaries", "bitifgan-results-sc09-run6-512-gradnorm")

### Define path to BiTiFGAN evaluation results

In [25]:
results_dir = os.path.join("..", "..")
rf_evaluation_results = os.path.join(results_dir, "bitifgan_516_gradclip_eval_res_rf_holdoutCV", 
                                     "evaluation_over_time_results.npz")
lr_evaluation_results = os.path.join(results_dir,"bitifgan_516_gradclip_eval_res_lr_holdoutCV",
                                     "evaluation_over_time_results.npz" )

### Load evaluation results on validation set

In [26]:
rf_results = np.load(rf_evaluation_results)
lr_results = np.load(lr_evaluation_results)

### Get checkpoint with best validation performance

In [33]:
best_score_rf = -1
best_score_lr = -1
best_score_update_step_rf = 0
best_score_update_step_lr = 0

for i in rf_results.keys():
        rf_results_i = rf_results[i]
        lr_results_i = lr_results[i]
        
        mean_f1_score_rf = np.mean(rf_results_i[:, 2])
        mean_f1_score_lr = np.mean(lr_results_i[:, 2])
        
        if mean_f1_score_rf > best_score_rf:
            best_score_rf = mean_f1_score_rf
            best_score_update_step_rf = i
            
        if mean_f1_score_lr > best_score_lr:
            best_score_lr = mean_f1_score_lr
            best_score_update_step_lr = i

In [31]:
print("Update step with performing representations using Logistic Regression: {} - Performance {}".format(best_score_update_step_lr, best_score_lr))
print("Update step with performing representations using Random Forest: {} - Performance {}".format(best_score_update_step_rf, best_score_rf))

Update step with performing representations using Logistic Regression: 20000 - Performance 0.5789920902066363
Update step with performing representations using Random Forest: 32000 - Performance 0.7113905230233338


### Load data

###### Load training data

In [42]:
X_tr = load_data(train_input_path)

  0%|          | 0/165 [00:00<?, ?it/s]

Loading data


100%|██████████| 165/165 [00:33<00:00,  4.92it/s]


###### Load test data

In [43]:
X_ts = load_data(test_input_path)

  0%|          | 0/20 [00:00<?, ?it/s]

Loading data


100%|██████████| 20/20 [00:04<00:00,  4.97it/s]


### Load labels

In [69]:
y_train = load_data_labels(training_labels_path)
y_test = load_data_labels(test_labels_path)
label_dict = {value: index for index, value in enumerate(np.unique(y_train))}
y_train = np.vectorize(label_dict.get)(y_train)
y_test = np.vectorize(label_dict.get)(y_test)

100%|██████████| 165/165 [00:00<00:00, 3498.33it/s]
100%|██████████| 20/20 [00:00<00:00, 2504.81it/s]


## Test best performing representations according to Logistic Regression on the test set

### Extract features for both the training and test set with the best performing encoder

In [61]:
name = 'commands_md64_8k'
batch_size = 64
with tf.device('/gpu:0'):
    params = get_hyperparams(checkpoints_path, name)
    biwgan = GANsystem(BiSpectrogramGAN, params)

    features_tr = []
    with tf.Session() as sess:
        biwgan.load(sess=sess, checkpoint=best_score_update_step_lr)

        for i in range(0, len(X_tr), batch_size):
            x_batch = X_tr[i:i+batch_size]
            z = sess.run(biwgan._net.z_real, feed_dict={biwgan._net.X_real: x_batch})
            features_tr.append(z)
    features_tr = np.vstack(features_tr)
    
    features_test = []
    with tf.Session() as sess:
        biwgan.load(sess=sess, checkpoint=best_score_update_step_lr)

        for i in range(0, len(X_ts), batch_size):
            x_batch = X_ts[i:i+batch_size]
            z = sess.run(biwgan._net.z_real, feed_dict={biwgan._net.X_real: x_batch})
            features_test.append(z)
    features_test = np.vstack(features_test)

Generator 
--------------------------------------------------
     The input is of size (?, 100)
     0 Full layer with 16384 outputs
         Size of the variables: (?, 16384)
     Reshape to (?, 8, 4, 512)
     1 Deconv layer with 512 channels
         Non linearity applied
         Size of the variables: (?, 16, 8, 512)
     2 Deconv layer with 256 channels
         Non linearity applied
         Size of the variables: (?, 32, 16, 256)
     3 Deconv layer with 128 channels
         Non linearity applied
         Size of the variables: (?, 64, 32, 128)
     4 Deconv layer with 64 channels
         Non linearity applied
         Size of the variables: (?, 128, 64, 64)
     5 Deconv layer with 1 channels
         Size of the variables: (?, 256, 128, 1)
    Costum non linearity: <function tanh at 0x7f59c23ecf80>
     The output is of size (?, 256, 128, 1)
--------------------------------------------------

Encoder 
--------------------------------------------------
     The data input i

### Calculate sample weights

In [70]:
print("-Calculating class_weights based on the training data class labels")
label_dict = {value: index for index, value in enumerate(np.unique(y_train))}
class_counts = [len(y_train[y_train == i]) for i in label_dict.values()]
class_weights = [max(class_counts) / class_count for class_count in class_counts]
class_weight_dict = {class_idx: class_weight for class_idx, class_weight in
                     zip(label_dict.values(), class_weights)}

train_sample_weight = [class_weight_dict[label] for label in y_train]
test_sample_weight = [class_weight_dict[label] for label in y_test]

-Calculating class_weights based on the training data class labels


### Normalize data

In [71]:
# Normalize features
mean = features_tr.mean()
std = features_tr.std()
features_tr = (features_tr - mean) / std
features_test = (features_test - mean) / std

### Train Logistic Regression model

In [79]:
model = LogisticRegression(multi_class='multinomial', random_state=0, max_iter=15000)

In [80]:
model = model.fit(features_tr, y_train, sample_weight=train_sample_weight)

In [81]:
# Evaluate model
y_preds = model.predict(features_test)
metrics = print(classification_report(y_test, y_preds))

              precision    recall  f1-score   support

           0       0.60      0.66      0.63       257
           1       0.59      0.59      0.59       270
           2       0.64      0.62      0.63       253
           3       0.58      0.59      0.59       259
           4       0.61      0.60      0.60       248
           5       0.63      0.66      0.64       239
           6       0.88      0.80      0.84       244
           7       0.50      0.54      0.52       267
           8       0.54      0.56      0.55       264
           9       0.69      0.60      0.64       250

    accuracy                           0.62      2551
   macro avg       0.63      0.62      0.62      2551
weighted avg       0.62      0.62      0.62      2551



## Test best performing representations according to Random Forest on the test set

In [82]:
name = 'commands_md64_8k'
batch_size = 64
with tf.device('/gpu:0'):
    params = get_hyperparams(checkpoints_path, name)
    biwgan = GANsystem(BiSpectrogramGAN, params)

    features_tr = []
    with tf.Session() as sess:
        biwgan.load(sess=sess, checkpoint=best_score_update_step_rf)

        for i in range(0, len(X_tr), batch_size):
            x_batch = X_tr[i:i+batch_size]
            z = sess.run(biwgan._net.z_real, feed_dict={biwgan._net.X_real: x_batch})
            features_tr.append(z)
    features_tr = np.vstack(features_tr)
    
    features_test = []
    with tf.Session() as sess:
        biwgan.load(sess=sess, checkpoint=best_score_update_step_rf)

        for i in range(0, len(X_ts), batch_size):
            x_batch = X_ts[i:i+batch_size]
            z = sess.run(biwgan._net.z_real, feed_dict={biwgan._net.X_real: x_batch})
            features_test.append(z)
    features_test = np.vstack(features_test)

Generator 
--------------------------------------------------
     The input is of size (?, 100)
     0 Full layer with 16384 outputs
         Size of the variables: (?, 16384)
     Reshape to (?, 8, 4, 512)
     1 Deconv layer with 512 channels
         Non linearity applied
         Size of the variables: (?, 16, 8, 512)
     2 Deconv layer with 256 channels
         Non linearity applied
         Size of the variables: (?, 32, 16, 256)
     3 Deconv layer with 128 channels
         Non linearity applied
         Size of the variables: (?, 64, 32, 128)
     4 Deconv layer with 64 channels
         Non linearity applied
         Size of the variables: (?, 128, 64, 64)
     5 Deconv layer with 1 channels
         Size of the variables: (?, 256, 128, 1)
    Costum non linearity: <function tanh at 0x7f59c23ecf80>
     The output is of size (?, 256, 128, 1)
--------------------------------------------------

Encoder 
--------------------------------------------------
     The data input i

### Normalize data

In [83]:
# Normalize features
mean = features_tr.mean()
std = features_tr.std()
features_tr = (features_tr - mean) / std
features_test = (features_test - mean) / std

### Train Random Forest model

In [87]:
model = RandomForestClassifier(n_estimators=400, random_state=0)

In [None]:
model = model.fit(features_tr, y_train, sample_weight=train_sample_weight)

In [None]:
# Evaluate model
y_preds = model.predict(features_test)
metrics = print(classification_report(y_test, y_preds))