In [1]:
import numpy as np
from tqdm import tqdm
import pandas as pd

import tensorflow as tf
import tensorflow_datasets as tfds

from tensorflow.keras import layers, models
from tensorflow.keras.utils import Sequence
import math

from collections import Counter



In [2]:
class CatDogsDataset(Sequence):
    def __init__(self, x_set, batch_size):
        self.x = x_set
        self.batch_size = batch_size

    def __len__(self):
        return math.ceil(len(self.x) / self.batch_size)

    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]

        return batch_x

In [3]:
cat_array = np.load("./data/cats_array.npy")
dogs_array = np.load("./data/dogs_array.npy")
cat_array.shape, dogs_array.shape

((1000, 200, 200, 3), (1000, 200, 200, 3))

In [4]:
cats_dataset = CatDogsDataset(cat_array, 16)
dogs_dataset = CatDogsDataset(dogs_array, 16)

In [5]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(200, 200, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(256))

In [6]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 198, 198, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 99, 99, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 97, 97, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 48, 48, 64)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 46, 46, 64)        36928     
                                                                 
 flatten (Flatten)           (None, 135424)            0

In [7]:
cats_emb=None
for data in tqdm(cats_dataset):
    data=data.reshape(-1,200,200,3)
    output=model(data)
    output=output.numpy()
    if cats_emb is None:
        cats_emb=output
    else:
        cats_emb=np.concatenate((cats_emb,output))

100%|██████████| 63/63 [00:02<00:00, 21.38it/s]


In [8]:
dogs_emb=None
for data in tqdm(dogs_dataset):
    data=data.reshape(-1,200,200,3)
    output=model(data)
    output=output.numpy()
    if dogs_emb is None:
        dogs_emb=output
    else:
        dogs_emb=np.concatenate((dogs_emb,output))

100%|██████████| 63/63 [00:00<00:00, 118.74it/s]


In [9]:
np.save('./data/cats_emb', cats_emb)
np.save('./data/dogs_emb', dogs_emb)

In [11]:
from sklearn.cluster import KMeans

X = np.concatenate((cats_emb, dogs_emb))
x_kmeans = KMeans(n_clusters=2, random_state=0).fit(X)

In [12]:
x_kmeans.labels_.shape

(2000,)

In [13]:
from sklearn.decomposition import PCA

x_pca = PCA(n_components=3).fit_transform(X)

In [14]:
df = pd.DataFrame(x_pca, columns=['x', 'y', 'z'])
df['label'] = x_kmeans.labels_
df.head()

Unnamed: 0,x,y,z,label
0,-44.271503,7.962452,-0.164273,0
1,-7.006478,25.312286,-17.405674,0
2,28.826828,-16.520014,4.971582,1
3,-19.059313,50.141281,1.383309,0
4,-5.281155,13.163472,-4.736938,0


In [15]:
import plotly.express as px

fig = px.scatter_3d(df, x='x', y='y', z='z',
              color='label')
fig.show()