In [1]:
# --- Import Libraries
import copy
import numpy as np
import tensorflow as tf
from tensorflow.keras import Model, Sequential, layers, losses, optimizers
from sklearn import random_projection
from sklearn.metrics import accuracy_score

from data import Dataset

**Notes:** 
* https://link.springer.com/article/10.1007/s10044-018-0697-0
* https://keras.io/examples/vision/grad_cam/

In [2]:
#--- Autoselect GPU
from jarvis.utils.general import gpus
gpus.autoselect()

[ 2021-04-26 12:46:15 ] CUDA_VISIBLE_DEVICES automatically set to: 1           


In [3]:
# --- Prepare Data
path = 'data/ctrl_vs_case.csv'
dataset = Dataset(path)

In [4]:
# --- Model Blocks
conv = lambda x, features, dr=1, name=None : layers.Conv1D(filters=features, kernel_size=1, strides=1, dilation_rate=dr, padding='same', name=name)(x)
elu  = lambda x: layers.ELU()(x)
norm = lambda x: layers.BatchNormalization()(x)
mlp  = lambda x, features, dr=1: elu(norm((conv(x, features, dr))))

# --- Model Architecture
def encoder(x, features):
    for f in features:
        x = mlp(x, f)
    return x

def decoder(x, features):
    features = features[::-1]
    for f in features:
        x = mlp(x, f)
    return x

def network(dataset):
    tf.random.set_seed(0)
    x = dataset.Input
    input_size = x.shape[-1]
    features = [128, 64]
    embed_size = 32
    outputs = {}
    
    x_ = encoder(x, features)
    embedding = mlp(x_, embed_size)
    x_ = decoder(embedding, features)

    classification = conv(embedding, 2, name=dataset.l1)
    reconstruction = conv(x_, input_size, name=dataset.l2)
    
    outputs[dataset.l1] = classification
    outputs[dataset.l2] = reconstruction
    
    return Model(x, outputs)
    
# --- Model Compile
def compile_(model, dataset, lr=1e-4):
    model.compile(
        optimizer=optimizers.Adam(learning_rate=lr), 
        loss={dataset.l1: losses.SparseCategoricalCrossentropy(from_logits=True), 
              dataset.l2: losses.MSE}, 
        metrics={dataset.l1: 'accuracy', 
                 dataset.l2: 'mean_absolute_error'},
        experimental_run_tf_function=False)

    return model

# --- Model Trainer
def train(model, dataset):
    outputs = {}
    outputs[dataset.l1] = dataset.train_labels
    outputs[dataset.l2] = dataset.train_data
    class_weights = {dataset.l1: dataset.weights}
    
    validation = {}
    validation[dataset.l1] = dataset.test_labels
    validation[dataset.l2] = dataset.test_data
    
    history = model.fit(
        x=dataset.train_data,
        y=outputs, 
        batch_size=4, 
        epochs=80, 
        validation_data=(dataset.test_data, validation), 
        validation_freq=10,
        class_weight=class_weights,
    )
    return history


# --- Prepare Model
# del model
model = network(dataset)
model = compile_(model, dataset)

In [5]:
# --- Train Model
history = train(model, dataset)


Train on 114 samples, validate on 49 samples
Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80


In [6]:
# Nonlinear [1024, 512, 256] embed 128
print(history.history['classification_accuracy'][-1])
print(history.history['reconstruction_mean_absolute_error'][-1])

print(history.history['val_classification_accuracy'][-1])
print(history.history['val_reconstruction_mean_absolute_error'][-1])


# (pred best class 0.8163265306122455)

# no class_weights 
# 0.7920354
# 489.65833
# 0.8134715
# 476.4897

# class_weights
# 0.53539824
# 490.59412
# 0.74093264
# 476.9029

# [256] class_weights
# 0.5132743
# 514.46576
# 0.54404145
# 513.9966

# [128, 64] class_weights embed 32
# 0.53097343
# 518.819
# 0.7098446
# 515.60516

# Deeper better than wider!

0.50442475
518.71136
0.68911916
515.4104


In [17]:
# Linear [1024, 512, 256] embed 128
print(history.history['classification_accuracy'][-1])
print(history.history['reconstruction_mean_absolute_error'][-1])

print(history.history['val_classification_accuracy'][-1])
print(history.history['val_reconstruction_mean_absolute_error'][-1])

# 0.8097345
# 506.56668
# 0.7512953
# 497.98688

0.5176991
506.6457
0.3834197
500.44925


In [7]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 1, 53859)]   0                                            
__________________________________________________________________________________________________
conv1d (Conv1D)                 (None, 1, 128)       6894080     input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 1, 128)       512         conv1d[0][0]                     
__________________________________________________________________________________________________
elu (ELU)                       (None, 1, 128)       0           batch_normalization[0][0]        
______________________________________________________________________________________________

In [11]:
def pred_one_class_acc(y):
    return sum(y/len(y))
    
def eval(model, data, labels):
    # test on whole dataset
    preds = model.predict(data)

    if type(preds) == list: preds = preds[0];
    preds = np.argmax(preds, axis=-1)
    
    print('dominant class acc: {}'.format(pred_one_class_acc(labels)))
    print('model pred acc: {}'.format(accuracy_score(labels, preds)))
    

In [16]:
eval(model, dataset.test_data, dataset.test_labels)

dominant class acc: 0.8163265306122455
model pred acc: 0.5306122448979592


# Machine Learning Stuff...

In [12]:
transformer = random_projection.GaussianRandomProjection()
X_new = transformer.fit_transform(dataset.features.squeeze())
print(X_new.shape)

(163, 4366)


In [13]:
X_new

array([[  7794.5112722 ,  25466.99115922,  50625.08150119, ...,
        -18741.85192469, -14975.12450545, -24716.42935759],
       [ 11071.49525594,  20976.92404719,  39328.86054669, ...,
        -14292.56014547,  -6918.82888456, -19860.47128973],
       [  8011.61620477,  17182.79149171,  40489.7180404 , ...,
        -12251.25835009,  -8394.20052692, -15792.59517772],
       ...,
       [ 13475.77548306,  21790.79481885,  44833.57616194, ...,
        -12127.87087676, -15508.98270711, -16103.80338758],
       [  7025.02032196,  16959.86900662,  42436.73625363, ...,
        -14074.24514158, -10336.02692565, -16806.70632879],
       [ 10728.48657413,  19286.38915573,  44174.01361244, ...,
        -17721.62286345, -15242.08650982, -21161.04696759]])

In [74]:
#https://jakevdp.github.io/PythonDataScienceHandbook/05.10-manifold-learning.html

In [5]:
from sklearn.manifold import LocallyLinearEmbedding
model = LocallyLinearEmbedding(n_neighbors=30, n_components=3, method='modified',
                               eigen_solver='dense')

out = model.fit_transform(dataset.features.squeeze())

In [6]:
fig = pyplot.figure()
ax = Axes3D(fig)
ax.scatter(out[:, 0], out[:, 1], out[:, 2])


plt.show()

NameError: name 'pyplot' is not defined

In [7]:
X = dataset.features.squeeze()

In [91]:
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier


Xtr, Xte, Ytr, Yte = [np.array(data.squeeze()) for data in [dataset.train_data, dataset.test_data, dataset.train_labels, dataset.test_labels]]


def pca(x, n=10, verbose=False):
    model = PCA(n).fit(x)
    if verbose:
        plt.plot(np.cumsum(model.explained_variance_ratio_))
        plt.xlabel('n components')
        plt.ylabel('cumulative variance');
    return model


def results(model, xtr, ytr, xte, yte):
    print('train: {}'.format(accuracy_score(ytr, model.predict(xtr))))
    print('train pick als: {}'.format(sum(ytr)/len(ytr)))
    print('test: {}'.format(accuracy_score(yte, model.predict(xte))))
    print('test pick als: {}'.format(sum(yte)/len(yte)))
    
    

def knn(xtr, ytr, xte, yte, n=3):
    model = KNeighborsClassifier(n_neighbors=n)
    model.fit(xtr, ytr)
    
    results(model, xtr, ytr, xte, yte)
    
    
def rf(xtr, ytr, xte, yte, n=1):
    model = RandomForestClassifier(n_estimators=500, max_depth=2, random_state=0, class_weight='balanced')
    model.fit(xtr, ytr)
    
    results(model, xtr, ytr, xte, yte)

In [65]:
# pca
pc = pca(Xtr)
xtr = pc.transform(Xtr)
xte = pc.transform(Xte)

print('- KNN')
knn(Xtr, Ytr, Xte, Yte)
print('- PCA')
knn(xtr, Ytr, xte, Yte)
print()
print('- RF')
rf(Xtr, Ytr, Xte, Yte)
print('- PCA')
rf(xtr, Ytr, xte, Yte)

- KNN
train: 0.8596491228070176
train pick als: 0.8157894736842105
test: 0.7755102040816326
test pick als: 0.8163265306122449
- PCA
train: 0.868421052631579
train pick als: 0.8157894736842105
test: 0.7551020408163265
test pick als: 0.8163265306122449

- RF
train: 1.0
train pick als: 0.8157894736842105
test: 0.8163265306122449
test pick als: 0.8163265306122449
- PCA
train: 0.9385964912280702
train pick als: 0.8157894736842105
test: 0.7346938775510204
test pick als: 0.8163265306122449
