Import Libraries

In [65]:
# Data manipulation
import pandas as pd
import numpy as np

# Tensorflow
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow import keras
from tensorflow.keras.preprocessing.image import load_img

config = tf.compat.v1.ConfigProto(gpu_options=tf.compat.v1.GPUOptions(allow_growth=True))
sess = tf.compat.v1.Session(config=config)

# Keras
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image

# VGG16, ResNet50
from keras.applications.vgg16 import VGG16
from keras.applications.resnet50 import preprocess_input, decode_predictions

from keras.models import Model, Sequential

from keras.layers import Input, InputLayer, Convolution2D, MaxPooling2D, Flatten, Dense, Conv2D, Dropout, Lambda, GlobalAveragePooling2D

from keras.utils import to_categorical

# ResNet
from keras.applications.resnet50 import ResNet50

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import CountVectorizer
from IPython.display import display, Image

# Data visualization
import matplotlib.pyplot as plt
from matplotlib.pyplot import imread

# Image
from PIL import Image
import glob
import cv2

from tqdm import tqdm
import os, re, csv

Checking TensorFlow version and GPU Compatability

In [3]:
print("TF version:", tf.__version__)
physical_devices = tf.config.list_physical_devices('GPU')
print("Num GPUs:", len(physical_devices))

TF version: 2.4.1
Num GPUs: 1


In [4]:
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

Import Labels

In [5]:
labels = pd.read_csv("labels.csv")
labels.head()

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever


Checking and we see that there are no empty data

In [6]:
labels.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10222 entries, 0 to 10221
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   id      10222 non-null  object
 1   breed   10222 non-null  object
dtypes: object(2)
memory usage: 159.8+ KB


Total number of unique dog breeds

In [7]:
# List of unique breeds
breeds_list = sorted(list(set(labels['breed'])))

unique_breeds = len(breeds_list)
print("Unique breeds:", unique_breeds)

Unique breeds: 120


Checking sample submission file 

In [8]:
sample = pd.read_csv('sample_submission.csv')

In [9]:
sample

Unnamed: 0,id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,000621fb3cbb32d8935728e48679680e,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
1,00102ee9d8eb90812350685311fe5890,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
2,0012a730dfa437f5f3613fb75efcd4ce,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
3,001510bc8570bbeee98c8d80c8a95ec1,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
4,001a5f3114548acdefa3d4da05474c2e,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10352,ffeda8623d4eee33c6d1156a2ecbfcf8,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
10353,fff1ec9e6e413275984966f745a313b0,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
10354,fff74b59b758bbbf13a5793182a9bbe4,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
10355,fff7d50d848e8014ac1e9172dc6762a3,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333


2 ways we will be doing this:

Part 1. Input > Feature Extraction > Classifier > Predictions

Part 2. Input > Neural Network > Predictions

The difference is in 2, Feature Extraction + Classification will be done by the Neural Network itself, while 1 will be more "manual"

#### Part 1 

In [125]:
# 224x224, RGB = 3 channels
input_shape = (224, 224, 3)

Getting list of unique breeds

In [11]:
breeds_arr = dict(zip(breeds_list, range(unique_breeds)))

Converting training+test image data to array

In [12]:
# Convert training images to array

def images_to_array(data_dir, labels, img_size=(224,224,3)):

    image_id = labels['id']
    images_labels = labels['breed']
    data_size = len(image_id)
    
    X = np.zeros([data_size, img_size[0], img_size[1], img_size[2]], dtype=np.uint8)
    y = np.zeros([data_size,1], dtype=np.uint8)
    
    # Data and labels
    for i in tqdm(range(data_size)):
        image_name = image_id[i]
        img_dir = os.path.join(data_dir, image_name+'.jpg')
        img_pixels = load_img(img_dir, target_size=img_size)
        X[i] = img_pixels
        
        image_breed = images_labels[i]
        y[i] = breeds_arr[image_breed]
    
    # One hot encoder
    y = to_categorical(y)
    
    # Shuffle    
    ind = np.random.permutation(data_size)
    X = X[ind]
    y = y[ind]
    print('Data Size: ', X.shape)
    print('Label Size: ', y.shape)
    return X, y

In [13]:
# Convert test images to array

def images_to_array2(data_dir, labels, img_size = (224,224,3)):

    image_id = labels['id']
    data_size = len(image_id)
    X = np.zeros([data_size, img_size[0], img_size[1], 3], dtype=np.uint8)
    
    for i in tqdm(range(data_size)):
        image_name = image_id[i]
        img_dir = os.path.join(data_dir, image_name+'.jpg')
        img_pixels = tf.keras.preprocessing.image.load_img(img_dir, target_size=img_size)
        X[i] = img_pixels
        
    print('Data Size: ', X.shape)
    return X

In [14]:
# Training data

X, y = images_to_array('train/', labels, input_shape)

100%|███████████████████████████████████████████████████████████████████████████| 10222/10222 [00:27<00:00, 366.14it/s]


Data Size:  (10222, 224, 224, 3)
Label Size:  (10222, 120)


In [15]:
# Test data

test_data = images_to_array2('test/', sample, input_shape)

100%|███████████████████████████████████████████████████████████████████████████| 10357/10357 [00:27<00:00, 376.43it/s]

Data Size:  (10357, 224, 224, 3)





Feature Extraction

In [16]:
# Feature Extraction 

def get_features(model_name, data_preprocessor, input_size, data):
    '''
    1- Create a feature extractor to extract features from the data.
    2- Returns the extracted features and the feature extractor.
    '''
    #Prepare pipeline.
    input_layer = Input(input_size)
    preprocessor = Lambda(data_preprocessor)(input_layer)
    base_model = model_name(weights='imagenet', include_top=False,
                            input_shape=input_size)(preprocessor)
    avg = GlobalAveragePooling2D()(base_model)
    feature_extractor = Model(inputs = input_layer, outputs = avg)
    #Extract feature.
    feature_maps = feature_extractor.predict(data, batch_size=64, verbose=1)
    print('Feature shape: ', feature_maps.shape)
    return feature_maps

Using ResNet

In [17]:
# Extracting training features

resnet_features_train = get_features(ResNet50, preprocess_input, input_shape, X)

Feature shape:  (10222, 2048)


In [18]:
# Extracting test features

resnet_features_test = get_features(ResNet50, preprocess_input, input_shape, test_data)

Feature shape:  (10357, 2048)


 Normal NN

Building a simple NN model

In [126]:
inputs = keras.Input(shape=resnet_features_train.shape[1:])

x = Dense(128, activation='relu')(inputs)
x = Dropout(0.25)(x)

predictions = Dense(units=unique_breeds, activation='softmax')(x) # Classification layer

f1 = keras.Model(inputs, predictions, name='DNN')

f1.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

f1.summary()

Model: "DNN"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_8 (InputLayer)         [(None, 2048)]            0         
_________________________________________________________________
dense_2 (Dense)              (None, 128)               262272    
_________________________________________________________________
dropout_2 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 120)               15480     
Total params: 277,752
Trainable params: 277,752
Non-trainable params: 0
_________________________________________________________________


Callbacks

In [29]:
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath=os.path.join('models/', 'resnetfeatures_best_val_loss2.h5'),
    save_weights_only=False,
    monitor='val_loss',
    mode='auto',
    save_best_only=True)

# If the validation loss doesn't improve, stop training
earlystopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=9)

Training the model

In [30]:
bs = 128
epoch = 50

In [31]:
f1_h = f1.fit(resnet_features_train, y, 
              batch_size=bs, 
              epochs=epoch, 
              validation_split=0.1, 
              callbacks=[model_checkpoint, earlystopping])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50


Prediction on test data

In [32]:
f1_pred = f1.predict(resnet_features_test, batch_size=bs)

Output to submission file

In [33]:
f1_df = sample

for x in breeds_list:
    f1_df[x] = f1_pred[:, breeds_arr[x]]
f1_df.to_csv('submission_feature_extraction.csv', index=None)

#### Part 2 

In [None]:
labels['id'] = labels['id'] + '.jpg'

Here we split our labels data into training and validation set

In [None]:
train, val = train_test_split(labels, test_size=0.2, shuffle=True)

In [None]:
print("Train shape:", train.shape)
print("Validation shape:", val.shape)

Image data augmentation


We "create" more training data from existing data by doing image manipulation (Rotate/Zoom/Scaling/Contrast) (Get more variants of image for neural network to learn)

This will help improve the performance of the neural network

For more info: https://towardsdatascience.com/complete-image-augmentation-in-opencv-31a6b02694f5

In [None]:
# Datagen for labels

train_datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        rescale=1./255, # Scale/Normalize pixel value from range [0,255] to [0,1], RGB coeff in 0-255 too high for model to process
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
)

validation_datagen = ImageDataGenerator(rescale=1./255)

test_datagen = ImageDataGenerator(rescale=1/255.)


Retrieve our training/val/test sets

In [None]:
# Training / Validation set

bs = 64

training_set = train_datagen.flow_from_dataframe(
    dataframe=train,
    directory='train/',
    x_col="id",
    y_col="breed",
    target_size=(224, 224), # Size of each image = 150x150, has to match input_shape
    class_mode="categorical",
    batch_size=bs
)

validation_set = validation_datagen.flow_from_dataframe(
    dataframe=val,
    directory='train/',
    x_col="id",
    y_col="breed",
    target_size=(224, 224),
    class_mode="categorical",
    batch_size=bs
)

test_set = test_datagen.flow_from_directory(
    '',
    target_size = (224, 224),
    batch_size = bs,
    classes=['test']
)

#### 2.1 CNN (Convolutional Neural Network)

In [None]:
# Create a sequential model
inputs = keras.Input(shape=input_shape)

x = Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=[224, 224, 3])(inputs)
x = MaxPooling2D(pool_size=2, strides=2)(x)

x = Conv2D(filters=32, kernel_size=3, activation='relu')(x)
x = MaxPooling2D(pool_size=2, strides=2)(x)

x = Flatten()(x)
x = Dense(units=64, activation='relu')(x)

predictions = Dense(units=unique_breeds, activation='softmax')(x) # Classification layer

cnn = keras.Model(inputs, predictions, name='CNN')

cnn.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])


View summary of the CNN model

In [None]:
cnn.summary()

In [None]:
model_plot = tf.keras.utils.plot_model(cnn, show_shapes=True)
display(model_plot)

Checkpoints

In [None]:
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath=os.path.join('models/', 'cnn_best_val_loss2.h5'),
    save_weights_only=False,
    monitor='val_loss',
    mode='auto',
    save_best_only=True)

# If the validation loss doesn't improve, stop training
earlystopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=9)

# If the validation loss doesn't improve, reduce the learning rate to 0.2 times it's previous value

# New LR = Old LR * factor
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.01, patience=3)

Training the CNN model

In [None]:
epochs = 1

step_size_train = training_set.n // bs
step_size_val = validation_set.n // bs

In [None]:
# Samples // batch size = # of batches
# 1 epoch trains # of batches
# Weights updated after each batch 

history = cnn.fit(training_set,
          epochs=epochs,
          steps_per_epoch=step_size_train,
          validation_data=validation_set,
          validation_steps=step_size_val,
          callbacks=[model_checkpoint, earlystopping, reduce_lr], shuffle=True, verbose=1)

Save final CNN model

In [None]:
cnn.save('models/final_cnn.h5')

Model Evaluation

In [None]:
# Loss - Accuracy 

print(cnn.metrics_names)
cnn.evaluate(validation_set, steps=step_size_val, verbose=1)

Accuracy + Loss plot

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper left')
plt.show()

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper left')
plt.show()

Prediction on test set

In [None]:
cnn_y_pred = cnn.predict(test_set, verbose=1)

#### 2.2 ResNet50 (Residual Network)

Tackling vanishing gradient

Using pre-trained ResNet model in keras (Transfer Learning)

In [None]:
# Set include_top=False, as we will be using our own classification layer
# Using imagenet weights

resnet = ResNet50(input_shape=input_shape, weights='imagenet', include_top=False)

Building classification layer on top of ResNet

In [None]:
inputs2 = keras.Input(shape=input_shape)

x = resnet(inputs2)

x = Flatten()(x)
x = Dropout(0.25)(x) # Dropout layer reduces overfitting

x = Dense(units=256,activation='relu')(x) # Rectified Linear Unit - Helping vanishing gradient
x = Dropout(0.25)(x)

x = Dense(units=128,activation='relu')(x)
x = Dropout(0.35)(x)

predictions2 = Dense(units=unique_breeds, activation='softmax')(x) # Sum of Prob = 1 

resnet = keras.Model(inputs2, predictions2, name='ResNet')

resnet.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy']) # Multi-class, Labels in one-hot, use categorical_crossentropy

View Summary of the ResNet model

In [None]:
resnet.summary()

In [None]:
model_plot = tf.keras.utils.plot_model(cnn, show_shapes=True)
display(model_plot)

Checkpoints

In [None]:
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath=os.path.join('models/', 'resnet_best_val_loss2.h5'),
    save_weights_only=False,
    monitor='val_loss',
    mode='auto',
    save_best_only=True)

# If the validation loss doesn't improve, stop training
earlystopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=9)

# If the validation loss doesn't improve, reduce the learning rate to 0.2 times it's previous value

# New LR = Old LR * factor
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.01, patience=3)

Training the ResNet model

In [None]:
epochs = 1

In [None]:
history2 = cnn.fit(training_set,
          epochs=epochs,
          steps_per_epoch=step_size_train,
          validation_data=validation_set,
          validation_steps=step_size_val,
          callbacks=[model_checkpoint, earlystopping, reduce_lr], shuffle=True, verbose=1)

Save final ResNet model

In [None]:
resnet.save('models/final_resnet.h5')

Model Evaluation

In [None]:
print(resnet.metrics_names)
resnet.evaluate(validation_set, steps=step_size_val, verbose=1)

Accuracy + Loss Plot

In [None]:
plt.plot(history2.history['accuracy'])
plt.plot(history2.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper left')
plt.show()

In [None]:
plt.plot(history2.history['loss'])
plt.plot(history2.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper left')
plt.show()

Prediction on test set

In [None]:
resnet_y_pred = resnet.predict(test_set, verbose=1)

Comparison of models

In [None]:
print("CNN:Loss vs Accuracy\n")
cnn.evaluate(validation_set, steps=step_size_val, verbose=1)
print("\n")
print("ResNet:Loss vs Accuracy\n")
resnet.evaluate(validation_set, steps=step_size_val, verbose=1)

Submission, opening the sample file to see requirements

In [None]:
sample2 = pd.read_csv('sample_submission.csv')
sample

Get list of filenames

In [None]:
file_list = test_set.filenames
id_list = []
for name in file_list:
    m = re.sub('test/', '', name)
    m = re.sub('.jpg', '', m)
    id_list.append(m)

Map each prediction value to its respective class for each image/file

In [None]:
sample2['id'] = id_list
sample2.iloc[:,1:] = cnn_y_pred
sample2

Output to csv

In [None]:
submission = sample2.set_index('id')
submission.to_csv('submission.csv')

In [None]:
submission.head()

#### Visualizing image features on Tensorboard Projector

Will be using features extracted using ResNet from Part 1

Involves Dimensionality Reduction (PCA) to display the images on a 3D plane in the projector

In [35]:
# Start up tensorboard
%load_ext tensorboard

In [36]:
len(resnet_features_test)

10357

1. Create feature vectors + metadata (labels)

In [37]:
# Export to tsv file for embedding projector

# ResNet features
np.savetxt("resnet_features_test.tsv", resnet_features_test, delimiter="\t")

Labels for test data

In [53]:
meta_arr = f1_pred

# Take the highest value/"predicted" breed for each test img/file
meta_arr = (meta_arr == meta_arr.max(axis=1, keepdims=1)).astype(int)

In [102]:
meta_arr[0]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [57]:
# Convert to DF - Easier to read
meta_df = sample

for x in breeds_list:
    meta_df[x] = meta_arr[:, breeds_arr[x]]

In [100]:
meta_df

Unnamed: 0,id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,000621fb3cbb32d8935728e48679680e,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,00102ee9d8eb90812350685311fe5890,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0012a730dfa437f5f3613fb75efcd4ce,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,001510bc8570bbeee98c8d80c8a95ec1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,001a5f3114548acdefa3d4da05474c2e,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10352,ffeda8623d4eee33c6d1156a2ecbfcf8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10353,fff1ec9e6e413275984966f745a313b0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
10354,fff74b59b758bbbf13a5793182a9bbe4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10355,fff7d50d848e8014ac1e9172dc6762a3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [70]:
# Reversing OHE array

# Label each img/file to its predicted breed
def reverse_OHE(row):
    for c in meta_df.columns:
        if row[c]==1:
            return c

In [106]:
reverse_meta_df = pd.DataFrame(meta_df.apply(reverse_OHE, axis=1))

In [107]:
# Joining img id 
reverse_meta_df = reverse_meta_df.join(meta_df['id'])

In [121]:
# Rename headers
reverse_meta_df.columns = ['breed', 'id']

In [122]:
reverse_meta_df

Unnamed: 0,breed,id
0,japanese_spaniel,000621fb3cbb32d8935728e48679680e
1,samoyed,00102ee9d8eb90812350685311fe5890
2,english_setter,0012a730dfa437f5f3613fb75efcd4ce
3,pug,001510bc8570bbeee98c8d80c8a95ec1
4,tibetan_terrier,001a5f3114548acdefa3d4da05474c2e
...,...,...
10352,standard_poodle,ffeda8623d4eee33c6d1156a2ecbfcf8
10353,weimaraner,fff1ec9e6e413275984966f745a313b0
10354,dhole,fff74b59b758bbbf13a5793182a9bbe4
10355,lhasa,fff7d50d848e8014ac1e9172dc6762a3


In [124]:
# Export to tsv file 

metadata = reverse_meta_df[['id', 'breed']].to_csv('metadata.tsv', sep='\t', index=False)

2. Create sprite image (Collage) to be displayed on the projector

In [None]:
! magick montage train/*.jpg -tile 105x105 -geometry 50x50! sprite.jpg
print("Done")

3. Create config file

In [132]:
# projector_config.pbtxt should look like this

with open('projector_config.pbtxt', 'w') as file:
    file.write('embeddings { tensor_path: "resnet_features_test.tsv" metadata_path: "metadata.tsv" sprite { image_path: "sprite.jpg" single_image_dim: 50 single_image_dim: 50 } }')


Run tensorboard locally

In [None]:
#! tensorboard --logdir logs/tensorboard

Open: http://localhost:6006/#projector

End

References

[1] https://machinelearningmastery.com/convolutional-layers-for-deep-learning-neural-networks/

[2] https://medium.com/@14prakash/understanding-and-implementing-architectures-of-resnet-and-resnext-for-state-of-the-art-image-cf51669e1624

[3] https://machinelearningmastery.com/rectified-linear-activation-function-for-deep-learning-neural-networks/#:~:text=The%20rectified%20linear%20activation%20function,otherwise%2C%20it%20will%20output%20zero.&text=The%20rectified%20linear%20activation%20function%20overcomes%20the%20vanishing%20gradient%20problem,learn%20faster%20and%20perform%20better

[4] https://medium.com/@kumon/visualizing-image-feature-vectors-through-tensorboard-b850ce1be7f1