# Loading in Data with Tensorflow and Keras



In [None]:
breeds = [ "beagle", "bernese_mountain_dog", "doberman", "labrador_retriever", "siberian_husky"]


In [None]:
import tensorflow as tf
from tensorflow import keras


In [None]:
# Create Dictionary

args = {
    "labels": "inferred",
    "label_mode": "categorical", #each breed is one category
    "batch_size": 32,  # 32 images are loaded and process at once
    "image_size": (256, 256), # to resize all images to same size
    "seed": 1,  # to ensure that the same sequence of random numbers is generated every time the model is trained / tested
    "validation_split": .2,  # 20% of the data to be used to validate algorithm
    "class_names": breeds 
}

In [None]:
# Setup Validation Dataset
train = tf.keras.utils.image_dataset_from_directory(
    "images",
    subset="training",
    **args
)

In [None]:
# Setup Test Dataset
test = tf.keras.utils.image_dataset_from_directory(
    "images",
    subset="validation",
    **args
)

In [None]:
train
# BatachDataset indicates that train data has been loaded into the tensorflow

# Exploring Images in Dataset

In [None]:
# To get the first batch (1 batch is 32 images)
first = train.take(1)
first

In [None]:
# To look at images and labels in first batch
images , labels = list(first)[0]

In [None]:
first_image = images[0]

# Showing the first image [red, green, blue]
# TF seperated 3 main colours into 3 matrixes
first_image[:3,:3,0]

In [None]:
from PIL import Image

Image.fromarray(first_image.numpy().astype("uint8"))

In [None]:
labels[0]

#  numpy=array([0., 0., 0., 0., 1.], 1 located at last place indicates that this image is a husky

# Training an Initial Convolutional Layer

In [None]:
# To load everything from memory instead of the hard drive
# For performance optimisation

train = train.cache().prefetch(buffer_size= tf.data.AUTOTUNE)
test = test.cache().prefetch(buffer_size= tf.data.AUTOTUNE)

In [None]:
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

model = Sequential([
  tf.keras.layers.Rescaling(1./255),  # Rescale data to a form for neural network easier to work with
  layers.Conv2D(16, 3, padding='same', activation='relu', input_shape=(256,256,3)), # Run convolutional network to scan over images
  layers.Flatten(), # Bring features that nn has created for us to prediction
  layers.Dense(128, activation='relu'), # To convert it to prediction layer
  layers.Dense(len(breeds)) # Layer to make the prediction
])


In [None]:
model.compile(
    optimizer="adam", 
    loss = tf.keras.losses.CategoricalCrossentropy(from_logits = True),  #defines the loss function for a categorical classification model using cross-entropy with logits.
    metrics= ["accuracy"]
)

In [None]:
history = model.fit(
    train,
    validation_data = test,
    epochs = 5,
    verbose = 1
)

# Looking at Model Error


In [None]:
model.summary()

In [None]:
import pandas as pd

history_df = pd.DataFrame.from_dict(history.history)

# To Display in Table Form
#history_df[["accuracy", "val_accuracy"]]

# To Display in Graph Form
history_df[["accuracy", "val_accuracy"]].plot()


# Overfitting Occured 

# Improving Model Accuracy

In [None]:
def train_model(network, epochs=5):
    model = Sequential(network)

    model.compile(optimizer='adam',
                  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
                  metrics=['accuracy'])

    history = model.fit(
      train,
      validation_data=test,
      epochs=epochs
    )
    history_df = pd.DataFrame.from_dict(history.history)
    return history_df, model

   



In [None]:
network = [
  tf.keras.layers.Rescaling(1./255),  # Rescale data to a form for neural network easier to work with
  layers.Conv2D(16, 4, padding='same', activation='relu', input_shape=(256,256,3)), # Increase size of window to 4
  layers.MaxPooling2D(), # Reduce overfitting and makes model run faster by reducing the num of parameter
  layers.Conv2D(32, 4, padding='same', activation='relu', input_shape=(256,256,3)), # Increase num of filters, let it pickup higher level features
  layers.MaxPooling2D(),
  layers.Conv2D(64, 4, padding='same', activation='relu', input_shape=(256,256,3)),
  layers.MaxPooling2D(),

  layers.Dropout(.2), # Randomly setting some of the output to zero

  layers.Flatten(), # Bring features that nn has created for us to prediction
  layers.Dense(128, activation='relu'), # To convert it to prediction layer
  layers.Dense(len(breeds)) # Layer to make the prediction
]

history_df, model = train_model(network)

In [None]:
history_df[["accuracy", "val_accuracy"]].plot()

# Still Overfitting but improved result

# Augmenting Our Data


In [None]:
# Generate more data for neural network

data_augmentation = tf.keras.Sequential([

    layers.RandomFlip("horizontal", seed = 1), # Flip image left right
    layers.RandomRotation(.2, seed = 1), # Rotate image randomly 90, 180, 270
    layers.RandomZoom(.2, seed = 1) 

])



In [None]:
full_network = [data_augmentation] + network

In [None]:
history_df, model = train_model(full_network)

In [None]:
history_df[["accuracy", "val_accuracy"]].plot()

# Investigating Model Error

In [None]:
preds = model.predict(test)

In [None]:
import numpy as np

predicted_class = np.argmax(preds, axis = 1)

predicted_class

In [None]:
actual_labels = np.concatenate([y for x, y in test], axis = 0)
actual_labels


In [None]:
actual_class = np.argmax(actual_labels, axis = 1)
actual_class

In [None]:
import itertools

actual_image = [x.numpy().astype("uint8") for x, y in test]
actual_image = list(itertools.chain.from_iterable(actual_image))

In [None]:
actual_image = [Image.fromarray(a) for a in actual_image]

In [None]:
pred_df = pd.DataFrame(zip(predicted_class, actual_class, actual_image), columns = ["prediction", "actual", "image"])

In [None]:
pred_df["prediction"] = pred_df["prediction"].apply(lambda x: breeds[x])

In [None]:
pred_df["actual"] = pred_df["actual"].apply(lambda x: breeds[x])

In [None]:
pred_df.head()

In [None]:
import base64
import io

def image_formatter(img):
    with io.BytesIO() as buffer:
        img.save(buffer, 'png')
        img_str = base64.b64encode(buffer.getvalue()).decode()
        return f'<img src="data:image/jpeg;base64,{img_str}">'

pred_df.head(10).style.format({'image': image_formatter})