In [1]:
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
import numpy as np
from tqdm import tqdm
import pickle

In [2]:
with open("./data/model_data.pkl", "rb") as f:
    data = pickle.load(f)

* normalize
* target encode
* prepare training data and labels
* train test split
* ensure optimal class distribution

In [3]:
id2breed = {}
X = []
y = []
for i, breed in tqdm(enumerate(data.keys()), desc='encoding data'):
    arrs = data[breed]
    breed_name = breed.split('-')[1] ##'Chihuahua'
    id2breed[i] = breed_name
    arrs = [arr/255 for arr in arrs] ##normalizing the data
    X.extend(arrs)
    y.extend([i]*len(arrs))
        
breed2id = {v:k for k,v in id2breed.items()}

encoding data: 120it [00:22,  5.40it/s]


In [4]:
import gc
del data
gc.collect()

66

In [5]:
X = np.array(X, dtype=np.float32 , copy=False)
y = np.array(y, dtype=np.int8 , copy=False)

In [6]:
y.shape

(20579,)

In [14]:
with open ('Xs1.npy', 'wb') as f:
    np.save(f, X)
    
with open ('ys1.npy', 'wb') as f:
    np.save(f, y)

# Modelling 

In [28]:
def create_model():
    model = keras.Sequential()
    model.add(keras.layers.Conv2D(64, 3, activation = 'relu', input_shape=X.shape[1:]))
    model.add(keras.layers.MaxPool2D(2))
    model.add(keras.layers.Dropout(0.2))
    
    model.add(keras.layers.Conv2D(128, 3, activation = 'relu'))
    model.add(keras.layers.MaxPool2D(2))
    model.add(keras.layers.Dropout(0.2))
    
    model.add(keras.layers.Conv2D(256, 3, activation = 'relu'))
    model.add(keras.layers.MaxPool2D(2))
    model.add(keras.layers.Dropout(0.2))
    
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(512))
    model.add(keras.layers.Dense(120, activation = 'softmax'))
    
    model.compile(optimizer = 'adam',
                 loss = 'categorical_crossentropy',
                 metrics = ['binary_accuracy', 'accuracy'])
    return model

In [29]:
model = create_model()

In [30]:
model(X[0].reshape(1, 128,128,3))

<tf.Tensor: shape=(1, 120), dtype=float32, numpy=
array([[0.00790411, 0.00837276, 0.00822549, 0.0084664 , 0.00899715,
        0.0085634 , 0.00883573, 0.00884341, 0.00789522, 0.00722962,
        0.00687683, 0.00854253, 0.00940577, 0.00845031, 0.01025973,
        0.00820877, 0.00900011, 0.00942375, 0.00799709, 0.0076754 ,
        0.00820741, 0.00708072, 0.00910622, 0.0083728 , 0.00794689,
        0.00814093, 0.00765346, 0.00908156, 0.00770216, 0.00865217,
        0.00840248, 0.0080416 , 0.00824281, 0.00782723, 0.00944236,
        0.00950228, 0.0081868 , 0.0081222 , 0.00786532, 0.00828376,
        0.00823817, 0.00818458, 0.00847102, 0.00875408, 0.00887699,
        0.00862029, 0.00935389, 0.00856779, 0.0078031 , 0.0079511 ,
        0.00806982, 0.00755861, 0.00823098, 0.00822857, 0.0080976 ,
        0.00972242, 0.00825278, 0.0086606 , 0.00789291, 0.00775813,
        0.00866633, 0.00860864, 0.00828754, 0.00774018, 0.00760204,
        0.00890044, 0.00792442, 0.00816381, 0.00810272, 0.00944907

* class distribution
* test train split
* one hot encode targets

In [48]:
from tensorflow.keras.utils import to_categorical
ys = to_categorical(y)

In [50]:
from sklearn.model_selection import train_test_split

In [53]:
train_test_split(X, ys, test_size=0.3)

[array([[[[0.03137255, 0.02745098, 0.01568627],
          [0.03137255, 0.02745098, 0.01176471],
          [0.03137255, 0.02745098, 0.01176471],
          ...,
          [0.33333333, 0.27843137, 0.23529412],
          [0.33333333, 0.27843137, 0.24313725],
          [0.3372549 , 0.28235294, 0.24705882]],
 
         [[0.03921569, 0.03529412, 0.01960784],
          [0.03921569, 0.03921569, 0.01960784],
          [0.03921569, 0.03921569, 0.01960784],
          ...,
          [0.33333333, 0.27843137, 0.23529412],
          [0.33333333, 0.27843137, 0.24313725],
          [0.3372549 , 0.28235294, 0.24705882]],
 
         [[0.04313725, 0.03921569, 0.02745098],
          [0.04313725, 0.03921569, 0.02352941],
          [0.04313725, 0.03921569, 0.02352941],
          ...,
          [0.32941176, 0.2745098 , 0.23137255],
          [0.32941176, 0.2745098 , 0.23921569],
          [0.3372549 , 0.28235294, 0.24705882]],
 
         ...,
 
         [[0.48627451, 0.38823529, 0.43137255],
          [0.44313