In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_circles

In [None]:
def scatter(x, y=None):
    if y is None:
        plt.plot(x[:, 0], x[:, 1], 'ko', alpha=0.05)
    else:
        plt.plot(x[y == 0, 0], x[y == 0, 1], 'bo', alpha=0.5)        
        plt.plot(x[y == 1, 0], x[y == 1, 1], 'ro', alpha=0.5)        

In [None]:
def plot_scatter(x, y=None):
    plt.figure(figsize=(6, 6))
    scatter(x, y)
    plt.show()

In [None]:
### Define circle distributions

In [None]:
noise = 0.1
factor = 0.2

### Training data (small)

In [None]:
x_train, y_train = make_circles(n_samples=10, noise=noise, factor=factor)
plot_scatter(x_train, y_train)

### Test data

In [None]:
x_test, y_test = make_circles(n_samples=100, noise=noise, factor=factor)
plot_scatter(x_test, y_test)

### Unlabeled data drawn from the same distribution (big)

In [None]:
x, _ = make_circles(n_samples=10000, noise=noise, factor=factor)
plot_scatter(x)

### Plot labeled and unlabled data in the same figure

In [None]:
plt.figure(figsize=(6, 6))
scatter(x_train, y_train)
#scatter(x_test, y_test)
scatter(x)
plt.show()

### Build an autoencoder to train latent representation

In [None]:
import keras
from keras.layers import Input, Dense
from keras.models import Model

activation = 'tanh'
input_vector = Input(shape=(2,))
h = Dense(20, activation=activation)(input_vector)
h = Dense(10, activation=activation)(h)
h = Dense(4, activation=activation)(h)
encoded = h

h = Dense(10, activation=activation)(h)
h = Dense(20, activation=activation)(h)
h = Dense(2, activation=activation)(h)

ae = Model(input_vector, h)
ae.summary()

encoded_input = Input(shape=(4,))
decoder = Model(encoded_input, ae.layers[-1](ae.layers[-2](ae.layers[-3](encoded_input))))
encoder = Model(input_vector, encoded)

ae.compile(optimizer='rmsprop', loss='mse')

ae.fit(x, x, batch_size=100, epochs=2, validation_split=0.1)

x_train_latent = encoder.predict(x_train)
x_test_latent = encoder.predict(x_test)

### Compare the performance of supervised and semi-supervised learning

In [None]:
from sklearn.ensemble import RandomForestClassifier

c1 = RandomForestClassifier()
c1.fit(x_train, y_train)
s1 = c1.score(x_test, y_test)
print('Supervised learning:      ', s1)

c2 = RandomForestClassifier()
c2.fit(x_train_latent, y_train)
s2 = c2.score(x_test_latent, y_test)
print('Semi-supervised learning: ', s2)

c3 = RandomForestClassifier()
c3.fit(np.hstack((x_train, x_train_latent)), y_train)
s3 = c3.score(np.hstack((x_test, x_test_latent)), y_test)
print('Combined learning:        ', s3)