## __Classification: Embeddings Generation__

In [17]:
## importing necessary libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import os

import cv2
from PIL import Image

from sklearn.manifold import TSNE

from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import RandomRotation, RandomContrast, RandomTranslation, RandomBrightness, RandomFlip    
from tensorflow.keras.models import Sequential

In [2]:
## setting up the images directory

images_dir = '../data/classification'

print(os.listdir(images_dir))

['0', '1', '2', '3', '4', '5', '6', '7', '8']


In [11]:
## VGG 16 embeddings

vgg = VGG16(include_top=False, weights='imagenet', input_shape=(112, 112, 3))
vgg.trainable = False
vgg.summary()

In [12]:
## function takes in image numpy arr and ouptputs VGG16 embeddings

def get_vgg16_embeddings(img):
    img = cv2.resize(img, (112, 112))
    img = np.expand_dims(img, axis=0)
    img = img/255.0
    return vgg.predict(img).flatten()

In [13]:
## loading dataset with 9 classses stored in 9 independent lists with 9 lists of labels

def load_dataset(i):
    dataset = []
    labels = []
    
    path = images_dir + '/' + str(i)
    files = os.listdir(path)
    for file in files:
        file_path = os.path.join(path, file)
        img = cv2.imread(file_path)
        if img is None:
            continue
        img = cv2.resize(img, (224, 224))
        img_embedding = get_vgg16_embeddings(img)
        dataset.append(img_embedding)
        labels.append(i)
    return np.array(dataset), np.array(labels)

In [16]:
## load the dataset embeddings

X = []
y = []

for i in range(9):
    dataset, labels = load_dataset(i)
    X.append(dataset)
    y.append(labels)

X = np.concatenate(X)
y = np.concatenate(y)

print(X.shape, y.shape)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11

In [25]:
## implementing t-SNE

tsne = TSNE(n_components=3, random_state=42, verbose=2, perplexity=35, n_iter=800)
x_3d = tsne.fit_transform(X)

[t-SNE] Computing 106 nearest neighbors...
[t-SNE] Indexed 3520 samples in 0.025s...
[t-SNE] Computed neighbors for 3520 samples in 1.648s...
[t-SNE] Computed conditional probabilities for sample 1000 / 3520
[t-SNE] Computed conditional probabilities for sample 2000 / 3520
[t-SNE] Computed conditional probabilities for sample 3000 / 3520
[t-SNE] Computed conditional probabilities for sample 3520 / 3520
[t-SNE] Mean sigma: 4.927012
[t-SNE] Computed conditional probabilities in 0.114s
[t-SNE] Iteration 50: error = 71.7472534, gradient norm = 0.0161118 (50 iterations in 1.242s)
[t-SNE] Iteration 100: error = 68.1567230, gradient norm = 0.0034018 (50 iterations in 1.099s)
[t-SNE] Iteration 150: error = 67.8283386, gradient norm = 0.0013320 (50 iterations in 1.094s)
[t-SNE] Iteration 200: error = 67.8176880, gradient norm = 0.0004872 (50 iterations in 1.199s)
[t-SNE] Iteration 250: error = 67.8133621, gradient norm = 0.0002379 (50 iterations in 1.103s)
[t-SNE] KL divergence after 250 iterat

In [26]:
## cluster labels

names = {
    0: 'Speed Limit',
    1: 'Keep Left',
    2: 'Keep Right',
    3: 'No passing',
    4: 'Priority Road',
    5: 'Road Work',
    6: 'Stop',
    7: 'Yield',
    8: 'No Parking'
}

name = ['Speed Limit', 'Keep Left', 'Keep Right', 'No passing', 'Priority Road', 'Road Work', 'Stop', 'Yield', 'No Parking']

In [27]:
y_names = []
for i in range(len(y)):
    y_names.append(names[y[i]])

## 3D scatter plot with each cluster having class name

df = pd.DataFrame({'x': x_3d[:, 0], 'y': x_3d[:, 1], 'z': x_3d[:, 2], 'label': y_names})

In [None]:
fig = px.scatter_3d(df, x='x', y='y', z='z', color='label', labels={'label': 'Cluster'}, title='t-SNE 3D')
fig.update_traces(marker=dict(size=4))
fig.show()

In [29]:
## doing the same with resnet50



In [30]:
res = ResNet50(include_top=False, weights='imagenet', input_shape=(112, 112, 3))
res.trainable = False
def get_resnet50_embeddings(img):
    img = cv2.resize(img, (112, 112))
    img = np.expand_dims(img, axis=0)
    img = img/255.0
    return res.predict(img).flatten()

In [31]:
## loading dataset with 9 classses stored in 9 independent lists with 9 lists of labels

def load_dataset_resnet(i):
    dataset = []
    labels = []
    
    path = images_dir + '/' + str(i)
    files = os.listdir(path)
    for file in files:
        file_path = os.path.join(path, file)
        img = cv2.imread(file_path)
        if img is None:
            continue
        img = cv2.resize(img, (224, 224))
        img_embedding = get_resnet50_embeddings(img)
        dataset.append(img_embedding)
        labels.append(i)
    return np.array(dataset), np.array(labels)

In [32]:
## load the dataset embeddings

X = []
y = []

for i in range(9):
    dataset, labels = load_dataset_resnet(i)
    X.append(dataset)
    y.append(labels)

X = np.concatenate(X)
y = np.concatenate(y)

print(X.shape, y.shape)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms

In [33]:
## implementing t-SNE

tsne = TSNE(n_components=3, random_state=42, verbose=2, perplexity=35, n_iter=800)
x_3d = tsne.fit_transform(X)

[t-SNE] Computing 106 nearest neighbors...
[t-SNE] Indexed 3520 samples in 0.140s...
[t-SNE] Computed neighbors for 3520 samples in 8.396s...
[t-SNE] Computed conditional probabilities for sample 1000 / 3520
[t-SNE] Computed conditional probabilities for sample 2000 / 3520
[t-SNE] Computed conditional probabilities for sample 3000 / 3520
[t-SNE] Computed conditional probabilities for sample 3520 / 3520
[t-SNE] Mean sigma: 4.050464
[t-SNE] Computed conditional probabilities in 0.103s
[t-SNE] Iteration 50: error = 76.8491669, gradient norm = 0.0093894 (50 iterations in 1.822s)
[t-SNE] Iteration 100: error = 75.6048584, gradient norm = 0.0006230 (50 iterations in 1.809s)
[t-SNE] Iteration 150: error = 75.5658569, gradient norm = 0.0001092 (50 iterations in 1.647s)
[t-SNE] Iteration 200: error = 75.5612640, gradient norm = 0.0000835 (50 iterations in 1.638s)
[t-SNE] Iteration 250: error = 75.5602493, gradient norm = 0.0000769 (50 iterations in 1.684s)
[t-SNE] KL divergence after 250 iterat

In [34]:
## cluster labels

names = {
    0: 'Speed Limit',
    1: 'Keep Left',
    2: 'Keep Right',
    3: 'No passing',
    4: 'Priority Road',
    5: 'Road Work',
    6: 'Stop',
    7: 'Yield',
    8: 'No Parking'
}

name = ['Speed Limit', 'Keep Left', 'Keep Right', 'No passing', 'Priority Road', 'Road Work', 'Stop', 'Yield', 'No Parking']

y_names = []
for i in range(len(y)):
    y_names.append(names[y[i]])

## 3D scatter plot with each cluster having class name

df = pd.DataFrame({'x': x_3d[:, 0], 'y': x_3d[:, 1], 'z': x_3d[:, 2], 'label': y_names})

fig = px.scatter_3d(df, x='x', y='y', z='z', color='label', labels={'label': 'Cluster'}, title='t-SNE 3D')
fig.update_traces(marker=dict(size=4))
fig.show()