In [1]:
from generator import LowShotGenerator
from classifier import Classifier
from train import get_trained_classifier_and_data

import numpy as np
import data_utils as du
import collect

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
n_clusters = 20
n_files = 12
λ = 1.
n_samples = 1
n_examples = 20
disease_name = 'Hernia'
all_diseases = list(range(15))

In [3]:
data_obj = du.get_processed_data(n_files)
le = du.get_label_encoder(data_obj)

fetching data from file #1
fetching data from file #2
fetching data from file #3
fetching data from file #4
fetching data from file #5
fetching data from file #6
fetching data from file #7
fetching data from file #8
fetching data from file #9
fetching data from file #10
fetching data from file #11
fetching data from file #12


In [4]:
disease_label_int = le.transform((disease_name,))[0]
print('{0}\'s label is {1}'.format(disease_name, disease_label_int))

unused_diseases = [disease_label_int]
diseases_to_remove = [disease_name]

diseases = [d for d in all_diseases if d not in unused_diseases]
print('All remaining diseases: {0}'.format(', '.join([str(l) for l in diseases])))

Hernia's label is 7
All remaining diseases: 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14


In [5]:
classifier, X_train, X_test, y_train, y_test = get_trained_classifier_and_data(diseases_to_remove, n_files=n_files)
classifier.toggle_trainability()  # make the classifier non-trainable

Loaded classifier and data from files
Loaded classifier weights from a saved model
Classifier is now non-trainable!


In [6]:
quadruplets_data = collect.load_quadruplets(n_clusters=n_clusters, categories=diseases, n_files=n_files)

Loaded centroids from file


In [7]:
lsg_name = 'lsg_f.{0}_c.{1}_w.{2}'.format(n_files, n_clusters, '.'.join([str(d) for d in unused_diseases]))
lsg = LowShotGenerator(classifier.model, quadruplets_data, λ=λ, name=lsg_name)

Generator summary:
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 6144)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               3146240   
_________________________________________________________________
dense_2 (Dense)              (None, 512)               262656    
_________________________________________________________________
dense_3 (Dense)              (None, 2048)              1050624   
Total params: 4,459,520
Trainable params: 4,459,520
Non-trainable params: 0
_________________________________________________________________
None

Whole model summary:
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 6144)              0         
____________________

In [8]:
lsg.fit()

Fitting generator
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.engine.training.Model at 0x7f48fc135080>

In [15]:
unused_data = collect.load_quadruplets(n_clusters=n_clusters, categories=all_diseases, n_files=n_files)
quadruplets, centroids, cat_to_vectors, original_shape = unused_data

Loaded centroids from file


In [16]:
print('Generating {0} examples from {1} samples of {2}'.format(n_examples, n_samples, disease_label_int))

Generating 20 examples from 1 samples of 7


In [56]:
# get all data of all diseases
X, y = du.get_features_and_labels(data_obj)
X_train, X_test, y_train, y_test = du.get_train_test_split(X, y, test_size=0.1)

# remove the data of the disease
mask = ~(y_test[:, disease_label_int] == 1.0)
y_test_sub = y_test[mask]
X_test_sub = X_test[mask]

mask = ~(y_train[:, disease_label_int] == 1.0)
y_train_sub = y_train[mask]
X_train_sub = X_train[mask]

# keep the data of the disease
mask = (y_test[:, disease_label_int] == 1.0)
y_test_disease = y_test[mask]
X_test_disease = X_test[mask]

mask = (y_train[:, disease_label_int] == 1.0)
y_train_disease = y_train[mask]
X_train_disease = X_train[mask]

y_disease = np.concatenate((y_train_disease, y_test_disease))
X_disease = np.concatenate((X_train_disease, X_test_disease))

# split the data of the disease (should train only with n_samples)
mask = np.ones(len(y_disease), dtype=bool)
mask[list(range(n_samples))] = False
y_train_disease, y_test_disease = y_disease[~mask], y_disease[mask]
X_train_disease, X_test_disease = X_disease[~mask], X_disease[mask]

print('Training with {0} samples'.format(len(X_train_disease)))
print('Testing on {0} samples'.format(len(X_test_disease)))

Training with 1 samples
Testing on 109 samples


In [27]:
# first, try to train with only n_samples of the disease and the rest samples of allother diseases
X_train, y_train = np.concatenate((X_train_sub, X_train_disease)), np.concatenate((y_train_sub, y_train_disease))

classifier = Classifier(trainable=True, n_classes=15)
classifier.fit(X_train, y_train)

Train on 73881 samples, validate on 8209 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100


Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.engine.training.Model at 0x7f48f11fe080>

In [28]:
loss, acc = classifier.evaluate(X_test_sub, y_test_sub)
print('accuracy for regular diseases is {0}'.format(acc))

loss, acc = classifier.evaluate(X_test_disease, y_test_disease)
print('accuracy for novel disease {0} without generated examples is {1}'.format(disease_label_int, acc))

accuracy for regular diseases is 0.2127123287671233
accuracy for novel disease 7 without generated examples is 0.0


In [57]:
# now, generating new examples and concat them to the n_samples samples
new_examples = [lsg.generate(ϕ, n_new=(n_examples - n_samples) // n_samples) for ϕ in X_train_disease]
X_train_disease = np.concatenate([X_train_disease] + new_examples)

from sklearn.preprocessing import OneHotEncoder
def onehot_encode(y, n_classes=None):
    yy = y.reshape(-1, 1)
    enc = OneHotEncoder(n_values=n_classes) if n_classes else OneHotEncoder()
    enc.fit(yy)
    one_hot_labels = enc.transform(yy).toarray()
    return one_hot_labels

y_train_disease = onehot_encode(np.array([disease_label_int for x in X_train_disease]), 15)

In [58]:
# train with the generated examples
print(y_train_sub.shape, y_train_disease.shape)
X_train, y_train = np.concatenate((X_train_sub, X_train_disease)), np.concatenate((y_train_sub, y_train_disease))

classifier = Classifier(trainable=True, n_classes=15)
classifier.fit(X_train, y_train)

(82089, 15) (20, 15)
Train on 73898 samples, validate on 8211 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100


Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.engine.training.Model at 0x7f48f0bb8048>

In [59]:
loss, acc = classifier.evaluate(X_test_sub, y_test_sub)
print('accuracy for regular diseases is {0}'.format(acc))

loss, acc = classifier.evaluate(X_test_disease, y_test_disease)
print('accuracy for novel disease {0} with generated examples is {1}'.format(disease_label_int, acc))

accuracy for regular diseases is 0.1907945205479452
accuracy for novel disease 7 with generated examples is 0.0
