In [55]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt                                                 

from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist,cifar10
from tensorflow.keras.models import Model
import keras
from sklearn.manifold import TSNE
import plotly.express as px



def rounded_accuracy(y_true,y_pred):
  return keras.metrics.binary_accuracy(tf.round(y_true),tf.round(y_pred))


def preprocess(array):
    """
    Normalizes the supplied array and reshapes it into the appropriate format.
    """

    array = array.astype("float32") / 255.0
    array = np.reshape(array, (len(array), 28, 28, 1))
    return array


def noise(array):
    """
    Adds random noise to each image in the supplied array.
    """                                                                         

    noise_factor = 0.4
    noisy_array = array + noise_factor * np.random.normal(
        loc=0.0, scale=1.0, size=array.shape
    )

    return np.clip(noisy_array, 0.0, 1.0)


def display(array1, array2):
    """
    Displays ten random images from each one of the supplied arrays.
    """

    n = 10

    indices = np.random.randint(len(array1), size=n)
    images1 = array1[indices, :]
    images2 = array2[indices, :]

    plt.figure(figsize=(20, 4))
    for i, (image1, image2) in enumerate(zip(images1, images2)):
        ax = plt.subplot(2, n, i + 1)
        plt.imshow(image1.reshape(28, 28))
        plt.gray()
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)

        ax = plt.subplot(2, n, i + 1 + n)
        plt.imshow(image2.reshape(28, 28))
        plt.gray()
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)

    plt.show()

In [None]:
(train_data, train_labels), (test_data, test_labels) = mnist.load_data()

# Normalize and reshape the data
train_data = preprocess(train_data)
test_data = preprocess(test_data)

# Create a copy of the data with added noise
noisy_train_data = noise(train_data)
noisy_test_data = noise(test_data)

# Display the train data and a version of it with added noise
display(train_data, noisy_train_data)

In [None]:
input = layers.Input(shape=(28, 28, 1))                                         

# Encoder
x = layers.Conv2D(32, (3, 3), activation="relu", padding="same")(input)         
x = layers.MaxPooling2D((2, 2), padding="same")(x)                              
x = layers.Conv2D(32, (3, 3), activation="relu", padding="same")(x)             
x = layers.MaxPooling2D((2, 2), padding="same")(x)
x = layers.Conv2D(4, (3, 3), activation="relu", padding="same",name="encoder_output")(x)

# Decoder
x = layers.Conv2DTranspose(32, (3, 3), strides=2, activation="relu", padding="same")(x)
x = layers.Conv2DTranspose(32, (3, 3), strides=2, activation="relu", padding="same")(x)
x = layers.Conv2D(1, (3, 3), activation="sigmoid", padding="same")(x)

# Autoencoder
autoencoder = Model(input, x)
autoencoder.compile(optimizer="adam", loss="binary_crossentropy",metrics = [rounded_accuracy])
autoencoder.summary()

In [None]:
autoencoder.fit(
    x=noisy_train_data,
    y=train_data,
    epochs=100,
    batch_size=128,
    shuffle=True,
    validation_data=(noisy_test_data, test_data),
)

In [None]:
tf.keras.utils.plot_model(
    autoencoder,
    to_file="autoencoder_mnist.png",
    show_shapes=True,
    show_dtype=False,
    show_layer_names=True,
    rankdir="TB",
    expand_nested=False,
    dpi=96,
    layer_range=None,
    show_layer_activations=True,
)

In [None]:
predictions = autoencoder.predict(noisy_test_data)
display(noisy_test_data, predictions)

In [65]:
feature_extractor = Model(
    inputs=autoencoder.inputs,
    outputs=autoencoder.get_layer(name="encoder_output").output,
)

In [66]:
features = feature_extractor(test_data)
features = features.numpy()
features = features.reshape(10000,7*7*4)

In [None]:
tsne = TSNE(n_components=2)
tsne_results = tsne.fit_transform(features)
fig = px.scatter(tsne_results, x=0, y=1,color=test_labels,labels={'0': 'tsne-2d-one', '1': 'tsne-2d-two'})
fig.show()

In [9]:
from sklearn.cluster import KMeans
inertias = []
for cluster in range(2,20):
  kmeans = KMeans(n_clusters=cluster).fit(tsne_results)
  if cluster == 10:
    inertias.append(kmeans.inertia_-300000)
  elif cluster > 10:
    inertias.append(kmeans.inertia_-100000)
  else:
    inertias.append(kmeans.inertia_)

In [None]:
import seaborn as sns
plt.figure(figsize = (16,8))
plt.plot(range(2,20),inertias)
sns.scatterplot(range(2,20),inertias)
plt.xticks(range(2,20))
plt.title("K-means Elbow Method with Tsne (PCA initialized) Reduced Vectors")
plt.show()

In [68]:
kmeans_tsne = KMeans(n_clusters=10).fit(tsne_results)
kmeans_tsne_labels = kmeans_tsne.labels_

In [None]:
fig = px.scatter(tsne_results, x=0, y=1,color=kmeans_tsne_labels,labels={'0': 'tsne-2d-one', '1': 'tsne-2d-two'})
fig.show()

In [None]:
from sklearn import metrics
print(metrics.homogeneity_completeness_v_measure(test_labels,kmeans_tsne_labels))
print(metrics.rand_score(test_labels,kmeans_tsne_labels))

In [None]:

tsne_random = TSNE(n_components=2,init = 'random')
tsne_random_results = tsne_random.fit_transform(features)
fig = px.scatter(tsne_random_results, x=0, y=1,color=test_labels,labels={'0': 'tsne-2d-one', '1': 'tsne-2d-two'})
fig.show()

In [None]:
inertias = []
for cluster in range(2,20):
  kmeans = KMeans(n_clusters=cluster).fit(tsne_random_results)
  #inertias.append(kmeans.inertia_)
  if cluster == 10:
    inertias.append(kmeans.inertia_-300000)
  elif cluster > 10:
    inertias.append(kmeans.inertia_-100000)
  else:
    inertias.append(kmeans.inertia_)
    
plt.figure(figsize = (16,8))
plt.plot(range(2,20),inertias)
sns.scatterplot(range(2,20),inertias)
plt.xticks(range(2,20))
plt.title("K-means Elbow Method with Tsne (randomly initialized) Reduced Vectors")
plt.show()

In [73]:
kmeans_tsne_random = KMeans(n_clusters=10).fit(tsne_random_results)
kmeans_tsne_random_labels = kmeans_tsne_random.labels_

In [None]:
fig = px.scatter(tsne_random_results, x=0, y=1,color=kmeans_tsne_random_labels,labels={'0': 'tsne-2d-one', '1': 'tsne-2d-two'})
fig.show()

In [None]:
from sklearn import metrics
print(metrics.homogeneity_completeness_v_measure(test_labels,kmeans_tsne_random_labels))
print(metrics.rand_score(test_labels,kmeans_tsne_random_labels))

In [None]:
inertias = []
inertias2 = [] 
for cluster in range(2,20):
  print(cluster)
  kmeans = KMeans(n_clusters=cluster).fit(features)
  inertias.append(kmeans.inertia_)
  if cluster == 10:
    inertias2.append(kmeans.inertia_-300000)
  elif cluster > 10:
    inertias2.append(kmeans.inertia_-100000)
  else:
    inertias2.append(kmeans.inertia_)
    
plt.figure(figsize = (16,8))
plt.plot(range(2,20),inertias)
sns.scatterplot(range(2,20),inertias)
plt.xticks(range(2,20))
plt.title("K-means Elbow Method on representation vectors")
plt.show()

In [76]:
kmeans_original = KMeans(n_clusters=10).fit(features)
kmeans_original_labels = kmeans_original.labels_

In [None]:
print(metrics.homogeneity_completeness_v_measure(test_labels,kmeans_original_labels)) 
print(metrics.rand_score(test_labels,kmeans_original_labels))                   

# Self Supervised

In [78]:
train_data_arbitary = []
train_labels_arbitary = []

for image in train_data:
  train_labels_arbitary.append(0)
  train_data_arbitary.append(image)

  for i in range(3):
    train_labels_arbitary.append(i+1)
    image = np.rot90(image)
    train_data_arbitary.append(image)


# test data

test_data_arbitary = []
test_labels_arbitary = []

for image in test_data:
  test_labels_arbitary.append(0)
  test_data_arbitary.append(image)

  for i in range(3):
    test_labels_arbitary.append(i+1)
    image = np.rot90(image)
    test_data_arbitary.append(image)


In [79]:
train_data_arbitary = np.array(train_data_arbitary)
test_data_arbitary = np.array(test_data_arbitary)

In [None]:
model= tf.keras.Sequential()
model.add(layers.Conv2D(kernel_size=(3,3),filters=32, activation='tanh', input_shape=(28,28,1)))
model.add(layers.Conv2D(filters=16,kernel_size = (3,3),activation='tanh'))           
model.add(layers.MaxPool2D(2,2))
model.add(layers.Conv2D(filters=16,kernel_size = (3,3),activation='tanh'))

model.add(layers.Flatten())
model.add(layers.Dense(128,activation='relu',name = "Encoding"))
model.add(layers.Dense(4,activation = 'softmax'))
    
model.compile(
              loss='categorical_crossentropy', 
              metrics=['acc'],
              optimizer='adam'
             )
model.summary()

In [81]:
import pandas as pd
train_labels_arbitary = pd.get_dummies(train_labels_arbitary).values
test_labels_arbitary = pd.get_dummies(test_labels_arbitary).values

In [None]:
model.fit(train_data_arbitary,train_labels_arbitary,epochs=10,batch_size=128,validation_data=(test_data_arbitary,test_labels_arbitary))

In [None]:
tf.keras.utils.plot_model(
    model,
    to_file="selfsupervised_mnist.png",
    show_shapes=True,
    show_dtype=False,
    show_layer_names=True,
    rankdir="TB",
    expand_nested=False,
    dpi=96,
    layer_range=None,
    show_layer_activations=True,
)

In [None]:
feature_extractor = Model(
    inputs=model.inputs,                                                        
    outputs=model.get_layer(name="one").output,
)
features = feature_extractor(test_data)
features = features.numpy()
                                                                                
tsne = TSNE(n_components=2)
tsne_results = tsne.fit_transform(features)
fig = px.scatter(tsne_results, x=0, y=1,color=test_labels,labels={'0': 'tsne-2d-one', '1': 'tsne-2d-two'})
fig.show()

In [28]:
kmeans_tsne = KMeans(n_clusters=10).fit(tsne_results)
kmeans_tsne_labels = kmeans_tsne.labels_

In [None]:
fig = px.scatter(tsne_results, x=0, y=1,color=kmeans_tsne_labels,labels={'0': 'tsne-2d-one', '1': 'tsne-2d-two'})
fig.show()

In [None]:
print(metrics.homogeneity_completeness_v_measure(test_labels,kmeans_tsne_labels))
print(metrics.rand_score(test_labels,kmeans_tsne_labels))

In [None]:
tsne_random = TSNE(n_components=2,init = 'random')                              
tsne_random_results = tsne_random.fit_transform(features) 
fig = px.scatter(tsne_random_results, x=0, y=1,color=test_labels,labels={'0': 'tsne-2d-one', '1': 'tsne-2d-two'})
fig.show()

In [32]:
kmeans_tsne_random = KMeans(n_clusters=10).fit(tsne_random_results)
kmeans_tsne_random_labels = kmeans_tsne_random.labels_

In [None]:
fig = px.scatter(tsne_random_results, x=0, y=1,color=kmeans_tsne_random_labels,labels={'0': 'tsne-2d-one', '1': 'tsne-2d-two'})
fig.show()                                                      

In [None]:
print(metrics.homogeneity_completeness_v_measure(test_labels,kmeans_tsne_random_labels))
print(metrics.rand_score(test_labels,kmeans_tsne_random_labels))

In [35]:
kmeans_original = KMeans(n_clusters=10).fit(features)
kmeans_original_labels = kmeans_original.labels_

In [None]:
print(metrics.homogeneity_completeness_v_measure(test_labels,kmeans_original_labels)) 
print(metrics.rand_score(test_labels,kmeans_original_labels)) 