# Introduction to Convolutional Neural Networks and Computer Vision with TensorFlow

Computer Vision: Practice of writing algorithms which can discover patterns in visual data. e.g. Camera of a self-driving car recognizing other cars in view.

The images being worked with are from the Food101 dataset
(101 different classes of food): https://www.kaggle.com/dansbecker/food-101

We've modified it to only use two classes (pizza & steak) using the image data modification notebook

**Note:** We start with a smaller dataset so we can experiment quickly and figure out what works (or better yet what doesn't work) before scaling up.

In [None]:
import zipfile

# Get data
!wget -nc --no-clobber https://storage.googleapis.com/ztm_tf_course/food_vision/pizza_steak.zip

# Unzip
zip_ref = zipfile.ZipFile("pizza_steak.zip")
zip_ref.extractall()
zip_ref.close()

In [None]:
# Inspect data
!ls pizza_steak/train/pizza
# !ls pizza_steak/train/steak


In [None]:
import os
# Walk through pizza_steak dir and list # of files
for dirpath, dirnames, filenames in os.walk('pizza_steak'):
  print(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'.")

In [None]:
# Another way to find out how many images are in a folder
num_steak_images_train = len(os.listdir("pizza_steak/train/steak"))
num_steak_images_train


In [None]:
# To visualize our images, first let's get the class names programmatically
import pathlib
import numpy as np
data_dir = pathlib.Path("pizza_steak/train")
class_names = np.array(sorted([item.name for item in data_dir.glob("*")]))
class_names

In [None]:
# Visualize images
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import random

def view_random_image(target_dir, target_class):
  """
  Takes target_dir and target_class as strings
  Displays a random image witn class and shape
  returns the image
  """
  # Setup the target dir (we'll view images from here)
  target_folder = target_dir+target_class
  
  # Get random img path
  random_image = random.sample(os.listdir(target_folder), 1)

  # Read in the image and plot it using matplotlib
  img = mpimg.imread(target_folder + "/" + random_image[0])
  plt.imshow(img)
  plt.title(target_class)
  plt.axis(False)

  print(f"Image shape: {img.shape}")
  return img


In [None]:
# Try the 
img = view_random_image("pizza_steak/train/","pizza")

In [None]:
import tensorflow as tf
tf.constant(img)

In [None]:
# View img shape
img.shape # returns width, height, color channels

In [None]:
# Normalize the values
img/255. # max pixel value 255

## An end-to-end example

Build a convolutional neural network to find patterns in our images.

* Load our images
* Preprocess our images (normalize)
* Build a CNN to find patterns
* Compile the CNN
* Fit the CNN to our training data

In [None]:
import random
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import Sequential
# Created a class for creating and fitting models cuz I got sick of writing it over and over again.
# set the seed
tf.random.set_seed(42)
class ComputerVisionModel():
  def __init__(self, train_dir, test_dir, batch_size = 32, target_size=(224,224), class_mode="binary", seed=42, epochs=5, rescale=1./255):
    self.train_dir = train_dir
    self.test_dir = test_dir
    self.batch_size = batch_size
    self.target_size = target_size
    self.class_mode = class_mode # binary or sparse, categorical, raw or multi_output, input or none
    self.seed = seed
    self.epochs = epochs
    # Preprocess data (normalize pixel data from images)
    self.train_datagen = ImageDataGenerator(rescale=rescale)
    self.valid_datagen = ImageDataGenerator(rescale=rescale)
     # Import data from directories and turn it into batches
    self.train_data = self.train_datagen.flow_from_directory(
        directory=self.train_dir, 
        batch_size=self.batch_size,
        target_size=self.target_size,
        class_mode=self.class_mode, 
        seed=self.seed
        )
  
    self.valid_data = self.valid_datagen.flow_from_directory(
        directory=self.test_dir, 
        batch_size=self.batch_size,
        target_size=self.target_size,
        class_mode=self.class_mode, 
        seed=self.seed
        )

  def create_model(self, layers):
    """
    Returns a model, takes tensorflow Sequential layers parameters
    """
    # Build a CNN model (same as Tiny VGG on the CNN explainer website)
    model = Sequential(layers)
    return model

  def fit(self, model):
    """
    Returns history. Takes model and fits it
    """
    history = model.fit(
        self.train_data, 
        epochs=self.epochs,
        steps_per_epoch=len(self.train_data),
        validation_data=self.valid_data,
        validation_steps=len(self.valid_data)
        )
    return history

  def plot_loss_curves(self, history):
    """
    Returns separate loss curves for training and validation metrics, takes history
    """ 
    loss = history.history["loss"]
    val_loss = history.history["val_loss"]

    accuracy = history.history["accuracy"]
    val_accuracy = history.history["val_accuracy"]

    epochs = range(len(history.history["loss"])) # How many epochs did we run for
    
    # Plot loss
    plt.plot(epochs, loss, label="training_loss")
    plt.plot(epochs, val_loss, label="val_loss")
    plt.title("loss")
    plt.xlabel("epochs")
    plt.legend()

    # Plot accuracy
    plt.figure()
    plt.plot(epochs, accuracy, label="training_accuracy")
    plt.plot(epochs, val_accuracy, label="val_accuracy")
    plt.title("accuracy")
    plt.xlabel("epochs")
    plt.legend()

  def view_random_training_image(self, target_class):
    """
    Takes target_class as string
    Displays a random image witn class and shape
    returns the image
    """
    # Setup the target dir (we'll view images from here)
    target_folder = train_dir+"/"+target_class
    
    # Get random img path
    random_image = random.sample(os.listdir(target_folder), 1)

    # Read in the image and plot it using matplotlib
    img = mpimg.imread(target_folder + "/" + random_image[0])
    plt.imshow(img)
    plt.title(target_class)
    plt.axis(False)

    print(f"Image shape: {img.shape}")
    return img
 

In [None]:
# remember to switch runtime to use GPU `runtime>change runtime type`
model_initializer = ComputerVisionModel(train_dir="pizza_steak/train", test_dir="pizza_steak/test")

In [None]:
model = model_initializer.create_model(layers=[
        tf.keras.layers.Conv2D(
            filters=19,
            kernel_size=3,
            activation="relu",
            input_shape=(224, 224, 3)
        ),
        tf.keras.layers.Conv2D(10, 3, activation="relu"),
        tf.keras.layers.MaxPool2D(
            pool_size=2,
            padding="valid"
            ),
        tf.keras.layers.Conv2D(10, 3, activation="relu"),
        tf.keras.layers.Conv2D(10, 3, activation="relu"),
        tf.keras.layers.MaxPool2D(2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(1, activation="sigmoid")
        ])

# Compile CNN
model.compile(
    loss="binary_crossentropy",
    optimizer=tf.keras.optimizers.Adam(),
    metrics=["accuracy"]
    )
history_1 = model_initializer.fit(model=model)

In [None]:
history_1.history

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# Create a 2nd model
# Set random seed
tf.random.set_seed(42)

# Create
model_2 = model_initializer.create_model(
    layers=[
            tf.keras.layers.Flatten(input_shape=(224, 224, 3)), # dense layers expect a 1-dimensional vector as input
            tf.keras.layers.Dense(4, activation='relu'),
            tf.keras.layers.Dense(4, activation='relu'),
            tf.keras.layers.Dense(1, activation='sigmoid')
            ])

# Compile CNN
model_2.compile(
    loss="binary_crossentropy",
    optimizer=tf.keras.optimizers.Adam(),
    metrics=["accuracy"]
    )

history_2 = model_initializer.fit(model=model_2)

In [None]:
# Same as above, just add an extra layer and add more neurons
tf.random.set_seed(42)

# Create
model_3 = model_initializer.create_model(
    layers=[
     tf.keras.layers.Flatten(input_shape=(224, 224, 3)),
     tf.keras.layers.Dense(100, activation="relu"),
     tf.keras.layers.Dense(100, activation="relu"),
     tf.keras.layers.Dense(100, activation="relu"),
     tf.keras.layers.Dense(1, activation="sigmoid")
    ]
)

# Compile
model_3.compile(loss='binary_crossentropy',
                optimizer=tf.keras.optimizers.Adam(),
                metrics=["accuracy"])

# Fit
history_3 = model_initializer.fit(model=model_3)

In [None]:
model_3.summary()

## Binary Classification

1. Visualize, Visualize, Visualize
2. Preprocess the data (normalize)
3. Create a model (start with a baseline)
4. Fit the model
5. Evaluate
6. Adjust different parameters and improve the model
7. Repeat until satisfied

### Visualize

In [None]:

plt.figure()
plt.subplot(1, 2, 1)
steak_img = view_random_image("pizza_steak/train/", "steak")

plt.subplot(1, 2, 2)
pizza_img = view_random_image("pizza_steak/train/", "pizza")


### Preprocess Data

In [None]:
# Define directory dataset paths
train_dir = "pizza_steak/train/"
test_dir = "pizza_steak/test/"

Organize the data into **batches**

Batch: small subset of data.  Rather than look at all ~15k images at one time, a model might only look at 32 at a time.

**It does this for a a couple of reasons:**
1. 15,000 images (or more) might not fit into the memory of your processor (GPU).
2. Trying to learn the patternsin 15k images in one hit could result in the model not being able to learn very well.


**Why batch size of 32?**
32 is a generally accepted mini-batch size.

However, mini-batch size is a hyperparameter that needs to be tuned according to your data and chosen model for optimal performance.

With smaller batch sizes the estimate of gradient in each epoch is more noisy but it helps the algorithm to avoid local minima. But it also makes training less efficient if you go too low as the weights will jump around too much and the cost will converge much more slowly.

In [None]:
# Checking the GPU
!nvidia-smi

In [None]:
# Create train and test data generator
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# When loading images, divide all pixel values by 255, this normalizes all the images
rescale_val = 1/255.

model_initializer = ComputerVisionModel(
    train_dir="pizza_steak/train/", 
    test_dir="pizza_steak/test/",
    batch_size = 32, 
    target_size=(224,224), 
    class_mode="binary", 
    seed=42, 
    epochs=5, 
    rescale=rescale_val
    )

In [None]:
images, labels = model_initializer.train_data.next() # get the 'next' batch of images/labels in train_data
images[:2], images[0].shape

In [None]:
images[7], images[7].shape

In [None]:
# View labels for the first batch
labels

###3. Create a CNN model (start with a baseline)

A baseline is a relatively simple model or existing result that you setup when beginning a machine learning experiment.  As you keep experimenting, you try to beat the baseline.

**Note:** In deep learning, there is almost an infinite amount of architectures you could create. So one of the best w2ays to get started is to begin with something simple and see if it works on your data and then introduce complexity as required. (look up which current model is performing best in the field of your problem)

#### Check out [Papers With Code](https://paperswithcode.com/)

In [None]:
# Create a models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPool2D, Activation

# Same as above, just add an extra layer and add more neurons
tf.random.set_seed(42)

model_4 = model_initializer.create_model(
    layers=[
            Conv2D(
                filters=10,# number of sliding windows going across an input (higher = more complex model)
                kernel_size=3, # (3,3), # size of the sliding window across an input
                strides=1, # (1,1), # size of the step the sliding window takes across an input
                padding="valid",
                activation="relu",
                input_shape=(224, 224, 3) # input layer (specify input shape)
            ),
            Conv2D(10, 3, activation="relu"),
            Conv2D(10, 3, activation="relu"),
            Flatten(),
            Dense(1, activation="sigmoid") # output layer (working with binary classification so only 1 output neuron)
            ])

In [None]:
# Compile
model_4.compile(loss='binary_crossentropy',
                optimizer=Adam(),
                metrics=["accuracy"])

In [None]:
# Fit
history_4 = model_initializer.fit(model_4)

In [None]:
# Evaluate - plot training curves
import pandas as pd
pd.DataFrame(history_4.history).plot(figsize=(10,7))

**Note:** When a model's **validation loss starts to increase**, it's likely that the model is **overfitting** the training dataset.

It's learning the patterns in the training dataset *too well* and thus the model's ability to generalize to unseen data will be diminished.

In [None]:
model_initializer.plot_loss_curves(history_4)


### Adjust the model parameters

**Fitting a machine learning model comes in 3 steps:**

1. Create a baseline
2. Beat the baseline by overfitting a larger model
3. Reduce overfitting

**Ways to induce overfitting:**

* Increase # of conv layers
* Increase # of conv filters
* Add another dense layer to the output of our flattened layer

**Ways to reduce overfitting:**

* Add data augmentation
* Add regularization layers (such as MaxPool2D)
* Add more data...

**Note:** Reducing overfitting is also known as **regularization**

In [None]:
# Same as above, just add an extra layer and add more neurons
tf.random.set_seed(42)
# Create a new model
model_5 = model_initializer.create_model(
    layers=[
            Conv2D(10, 3, activation="relu", input_shape=(224, 224, 3)),
            MaxPool2D(pool_size=2),
            Conv2D(10, 3, activation="relu"),
            MaxPool2D(), 
            Conv2D(10, 3, activation="relu"),
            MaxPool2D(),
            Flatten(),
            Dense(1, activation="sigmoid")
    ])

In [None]:
# Compile
model_5.compile(loss='binary_crossentropy',
                optimizer=Adam(),
                metrics=["accuracy"])
# Fit
history_5 = model_initializer.fit(model_5)

In [None]:
model_initializer.plot_loss_curves(history_5), model_5.summary()

### MaxPool2D()
Selects 1 max pixel value from all pixels within range of kernel

### Conv2D
Slides matrix of weights, multiplies by pixel value then sums all results.

### Data Augmentation

In [None]:
# Create imageDataGenerator training instance with data augmentation
train_datagen_augmented = ImageDataGenerator(rescale=1/255.,
                                             rotation_range=0.2, # how much do you want to rotate an image
                                             shear_range=0.2, # how much do you want to shear an image?
                                             zoom_range=0.2, # how much do you want to zoom?
                                             width_shift_range=0.2, # move image around on X axis
                                             height_shift_range=0.3, # move image around on Y axis
                                             horizontal_flip=True) # do you want to flip an image?

# Create ImageDataGenerator without data augmentation
train_datagen = ImageDataGenerator(rescale=1/255.)

# Create ImageDataGenerator without data augmentation for the test dataset
test_datagen = ImageDataGenerator(rescale=1/255.)




**Question:** What is data augmentation?

Data Augmentation: the process of altering our training data, leading it to have more diversity and in turn allowing our models to learn more generalizable (hopefully) patterns.

Altering might mean adjusting the rotation of an image, flipping it, cropping it, or something similar.

Let's write some code to visualize data augmentation...

In [None]:
# Import data and augment it from training directory

IMG_SIZE = (224, 224)
print("Augmented training data:")
train_data_augmented = train_datagen_augmented.flow_from_directory(train_dir,
                                                                   target_size=IMG_SIZE,
                                                                   batch_size=32,
                                                                   class_mode="binary",
                                                                   shuffle=False) # for demonstration purposes only, usually true

# Create non-augmented train data batches
print("Non-augmented training data:")
train_data = train_datagen.flow_from_directory(train_dir, 
                                              target_size=IMG_SIZE,
                                              batch_size=32,
                                              class_mode="binary",
                                              shuffle=False)     
# Create non-augmented test data batches
print("Non-augmented test data:")
test_data = test_datagen.flow_from_directory(test_dir, 
                                              target_size=IMG_SIZE,
                                              batch_size=32,
                                              class_mode="binary")                                                      

**Note:** Data augmentation is usually only performed on the training data. Using `ImageDataGenerator` build-in data augmentation parameters, our images are left as they are in the directories but are modified as they're loaded into the model.

Let's visualize the augmented data:

In [None]:
# Get sample augmented data batches
images, labels = train_data.next()
augmented_images, augmented_labels = train_data_augmented.next() # note: labels aren't augmented, only data


In [None]:
# Show original image and augmented image
import random
random_number = random.randint(0, 31) # bath sizes are 32...

print(f'Showing image number: {random_number}')
plt.imshow(images[random_number])
plt.title(f'Original Image')
plt.axis(False)
plt.figure()


plt.imshow(augmented_images[random_number])
plt.title(f'Augmented Image')
plt.axis(False)


<table>
<colgroup><col width="214px"><col></colgroup>
<tbody><tr><th colspan="2"><h2><span>ImageDataGenerator Args</span></h2></th></tr>
<tr>
<td>
<code translate="no" dir="ltr">featurewise_<wbr>center</code><a id="featurewise_center"></a>
</td>
<td>
Boolean. Set input mean to 0 over the dataset,
feature-wise.
</td>
</tr><tr>
<td>
<code translate="no" dir="ltr">samplewise_<wbr>center</code><a id="samplewise_center"></a>
</td>
<td>
Boolean. Set each sample mean to 0.
</td>
</tr><tr>
<td>
<code translate="no" dir="ltr">featurewise_<wbr>std_<wbr>normalization</code><a id="featurewise_std_normalization"></a>
</td>
<td>
Boolean. Divide inputs by std of the
dataset, feature-wise.
</td>
</tr><tr>
<td>
<code translate="no" dir="ltr">samplewise_<wbr>std_<wbr>normalization</code><a id="samplewise_std_normalization"></a>
</td>
<td>
Boolean. Divide each input by its std.
</td>
</tr><tr>
<td>
<code translate="no" dir="ltr">zca_<wbr>epsilon</code><a id="zca_epsilon"></a>
</td>
<td>
epsilon for ZCA whitening. Default is 1e-6.
</td>
</tr><tr>
<td>
<code translate="no" dir="ltr">zca_<wbr>whitening</code><a id="zca_whitening"></a>
</td>
<td>
Boolean. Apply ZCA whitening.
</td>
</tr><tr>
<td>
<code translate="no" dir="ltr">rotation_<wbr>range</code><a id="rotation_range"></a>
</td>
<td>
Int. Degree range for random rotations.
</td>
</tr><tr>
<td>
<code translate="no" dir="ltr">width_<wbr>shift_<wbr>range</code><a id="width_shift_range"></a>
</td>
<td>
Float, 1-D array-like or int<p></p>

<ul>
<li>float: fraction of total width, if &lt; 1, or pixels if &gt;= 1.</li>
<li>1-D array-like: random elements from the array.</li>
<li>int: integer number of pixels from interval <code translate="no" dir="ltr">(-width_shift_range,
+width_shift_range)</code> - With <code translate="no" dir="ltr">width_shift_range=2</code> possible values
are integers <code translate="no" dir="ltr">[-1, 0, +1]</code>, same as with <code translate="no" dir="ltr">width_shift_range=[-1, 0,
+1]</code>, while with <code translate="no" dir="ltr">width_shift_range=1.0</code> possible values are floats
in the interval [-1.0, +1.0).
</li></ul></td>
</tr><tr>
<td>
<code translate="no" dir="ltr">height_<wbr>shift_<wbr>range</code><a id="height_shift_range"></a>
</td>
<td>
Float, 1-D array-like or int
<li>float: fraction of total height, if &lt; 1, or pixels if &gt;= 1.</li>
<li>1-D array-like: random elements from the array.</li>
<li>int: integer number of pixels from interval <code translate="no" dir="ltr">(-height_shift_range,
+height_shift_range)</code> - With <code translate="no" dir="ltr">height_shift_range=2</code> possible values
are integers <code translate="no" dir="ltr">[-1, 0, +1]</code>, same as with <code translate="no" dir="ltr">height_shift_range=[-1, 0,
+1]</code>, while with <code translate="no" dir="ltr">height_shift_range=1.0</code> possible values are floats
in the interval [-1.0, +1.0).
</li></td>
</tr><tr>
<td>
<code translate="no" dir="ltr">brightness_<wbr>range</code><a id="brightness_range"></a>
</td>
<td>
Tuple or list of two floats. Range for picking a
brightness shift value from.
</td>
</tr><tr>
<td>
<code translate="no" dir="ltr">shear_<wbr>range</code><a id="shear_range"></a>
</td>
<td>
Float. Shear Intensity (Shear angle in counter-clockwise
direction in degrees)
</td>
</tr><tr>
<td>
<code translate="no" dir="ltr">zoom_<wbr>range</code><a id="zoom_range"></a>
</td>
<td>
Float or [lower, upper]. Range for random zoom. If a float,
<code translate="no" dir="ltr">[lower,<wbr> upper] = [1-zoom_<wbr>range,<wbr> 1+zoom_<wbr>range]</code>.
</td>
</tr><tr>
<td>
<code translate="no" dir="ltr">channel_<wbr>shift_<wbr>range</code><a id="channel_shift_range"></a>
</td>
<td>
Float. Range for random channel shifts.
</td>
</tr><tr>
<td>
<code translate="no" dir="ltr">fill_<wbr>mode</code><a id="fill_mode"></a>
</td>
<td>
One of {"constant", "nearest", "reflect" or "wrap"}. Default is
'nearest'. Points outside the boundaries of the input are filled
according to the given mode:
<ul>
<li>'constant': kkkkkkkk|abcd|kkkkkkkk (cval=k)</li>
<li>'nearest':  aaaaaaaa|abcd|dddddddd</li>
<li>'reflect':  abcddcba|abcd|dcbaabcd</li>
<li>'wrap':  abcdabcd|abcd|abcdabcd
</li></ul></td>
</tr><tr>
<td>
<code translate="no" dir="ltr">cval</code><a id="cval"></a>
</td>
<td>
Float or Int. Value used for points outside the boundaries when
<code translate="no" dir="ltr">fill_<wbr>mode = "constant"</code>.
</td>
</tr><tr>
<td>
<code translate="no" dir="ltr">horizontal_<wbr>flip</code><a id="horizontal_flip"></a>
</td>
<td>
Boolean. Randomly flip inputs horizontally.
</td>
</tr><tr>
<td>
<code translate="no" dir="ltr">vertical_<wbr>flip</code><a id="vertical_flip"></a>
</td>
<td>
Boolean. Randomly flip inputs vertically.
</td>
</tr><tr>
<td>
<code translate="no" dir="ltr">rescale</code><a id="rescale"></a>
</td>
<td>
rescaling factor. Defaults to None. If None or 0, no rescaling is
applied, otherwise we multiply the data by the value provided (after
applying all other transformations).
</td>
</tr><tr>
<td>
<code translate="no" dir="ltr">preprocessing_<wbr>function</code><a id="preprocessing_function"></a>
</td>
<td>
function that will be applied on each input. The
function will run after the image is resized and augmented.
The function should take one argument: one image (Numpy tensor with
rank 3), and should output a Numpy tensor with the same shape.
</td>
</tr><tr>
<td>
<code translate="no" dir="ltr">data_<wbr>format</code><a id="data_format"></a>
</td>
<td>
Image data format, either "channels_first" or
"channels_last". "channels_last" mode means that the images should have
shape <code translate="no" dir="ltr">(samples,<wbr> height,<wbr> width,<wbr> channels)</code>, "channels_first" mode means
that the images should have shape <code translate="no" dir="ltr">(samples,<wbr> channels,<wbr> height,<wbr> width)</code>.
It defaults to the <code translate="no" dir="ltr">image_<wbr>data_<wbr>format</code> value found in your Keras config
file at <code translate="no" dir="ltr">~/<wbr>.<wbr>keras/<wbr>keras.<wbr>json</code>. If you never set it, then it will be
"channels_last".
</td>
</tr><tr>
<td>
<code translate="no" dir="ltr">validation_<wbr>split</code><a id="validation_split"></a>
</td>
<td>
Float. Fraction of images reserved for validation
(strictly between 0 and 1).
</td>
</tr><tr>
<td>
<code translate="no" dir="ltr">dtype</code><a id="dtype"></a>
</td>
<td>
Dtype to use for the generated arrays.
</td>
</tr>
</tbody></table>

### Build a model and train using augmented data

Same as model 5 but using augmented data

In [None]:
# Import data and augment it from training directory

IMG_SIZE = (224, 224)
print("Augmented training data:")
train_data_augmented_shuffled = train_datagen_augmented.flow_from_directory(train_dir,
                                                                   target_size=IMG_SIZE,
                                                                   batch_size=32,
                                                                   class_mode="binary",
                                                                   shuffle=True) 

# Create non-augmented train data batches
print("Non-augmented training data:")
train_data_shuffled = train_datagen.flow_from_directory(train_dir, 
                                              target_size=IMG_SIZE,
                                              batch_size=32,
                                              class_mode="binary",
                                              shuffle=True)     
# Create non-augmented test data batches
print("Non-augmented test data:")
test_data = test_datagen.flow_from_directory(test_dir, 
                                              target_size=IMG_SIZE,
                                              batch_size=32,
                                              class_mode="binary")     

# Create new Model
model_6 = model_initializer.create_model(layers=[
            Conv2D(10, 3, activation="relu", input_shape=(224, 224, 3)),
            MaxPool2D(pool_size=2),
            Conv2D(10, 3, activation="relu"),
            MaxPool2D(), 
            Conv2D(10, 3, activation="relu"),
            MaxPool2D(),
            Flatten(),
            Dense(1, activation="sigmoid")
    ])
# Compile
model_6.compile(loss='binary_crossentropy',
                optimizer=Adam(),
                metrics=["accuracy"])
# Fit

history_6 = model_6.fit(
    train_data_augmented, 
    epochs=5,
    steps_per_epoch=len(train_data_augmented),
    validation_data=test_data,
    validation_steps=len(test_data)
    )

In [None]:
# Check training curves
model_initializer.plot_loss_curves(history_6)

### Repeat until satisfied

Options:
* Increase the number of model layers (e.g. add more `Conv2D` / `MaxPool2D` layers)
* Increase the number of filters in each covolutional layer
* Train for longer (more epochs)
* Find an ideal learning rate
* Get more data (give the model more opportunities to learn)
* Use **transfer learning** to leverage what another image model has learned and adjust it for our own use case.

**Practice:** Recreate the model on the CNN explainer webiste (same as model_1) and see how it performs on the augmented shuffled training data.


In [None]:
# Create the model (same as model_5 and model_6)
model_7 = Sequential([
  Conv2D(10, 3, activation='relu', input_shape=(224, 224, 3)),
  MaxPool2D(),
  Conv2D(10, 3, activation='relu'),
  MaxPool2D(),
  Conv2D(10, 3, activation='relu'),
  MaxPool2D(),
  Flatten(),
  Dense(1, activation='sigmoid')
])

# Compile the model
model_7.compile(loss='binary_crossentropy',
                optimizer=Adam(),
                metrics=['accuracy'])

# Fit the model
history_7 = model_7.fit(train_data_augmented_shuffled, # now the augmented data is shuffled
                        epochs=5,
                        steps_per_epoch=len(train_data_augmented_shuffled),
                        validation_data=test_data,
                        validation_steps=len(test_data))

In [None]:
# Create a function to import and resize an img to a size that's usable in our model
# Download custom image
import matplotlib.image as mpimg
!wget -nc --no-clobber https://raw.githubusercontent.com/mrdbourke/tensorflow-deep-learning/main/images/03-steak.jpeg

def load_and_prep_image(filename, img_shape=224):
  """
  Reads an image from filename, 
  turns it into a tensor and reshapes it to (img_shape, img_shape, color_channels)
  """

  # Read in the image
  img = tf.io.read_file(filename)
  # Decode the read file into a tensor
  img = tf.image.decode_jpeg(img) 
  # Resize
  img = tf.image.resize(img,size=[img_shape, img_shape])
  # Rescale the image (get all values between 0 and 1 {normalized})
  img = img/255.
  return img
# Load in and preprocess a custom image
steak = load_and_prep_image(filename='03-steak.jpeg')

pred=model_7.predict(tf.expand_dims(steak, axis=0))
        

Looks like our custome image is being put through our model, however, it currently outputs a prediction probability, wouldn't it be nice if we coudl visualize the image as well as the model's prediction?

In [None]:
# Remind ourself of the class names
class_names


In [None]:
# We can index the predicted class by rounding the prediction probability and indexing on the class names
pred_class = class_names[int(tf.round(pred)[0][0])]
pred_class

In [None]:
def pred_and_plot(model, filename, class_names):
  """
  Imports an image located at filename, makes a prediction on it with
  a trained model and plots the image with the predicted class as the title.
  """
  # Import the target image and preprocess it
  img = load_and_prep_image(filename)

  # Make a prediction
  pred = model.predict(tf.expand_dims(img, axis=0))

  # Get the predicted class
  pred_class = class_names[int(tf.round(pred)[0][0])]

  # Plot the image and predicted class
  plt.imshow(img)
  plt.title(f"Prediction: {pred_class}")
  plt.axis(False);



In [None]:
# Test our model on a custom image
pred_and_plot(model_7, "03-steak.jpeg", class_names)


In [None]:
# Download another test custom image and make a prediction on it
!wget -nc --no-clobber https://raw.githubusercontent.com/mrdbourke/tensorflow-deep-learning/main/images/03-pizza-dad.jpeg
pizza = load_and_prep_image(filename='03-pizza-dad.jpeg')
# Test our model on a custom image
pred_and_plot(model_7, "03-pizza-dad.jpeg", class_names)



In [None]:
# Try my own image
pred_and_plot(model_7, "drive/MyDrive/tensorflow_course/steak_custom.jpg", class_names)

# Multi-class Image Classification

1. Become one with the data
2. Preprocess the data (get it ready for a model)
3. Create a model (start with a baseline)
4. Fit the model (overfit it to make sure it works)
5. Evaluate the model
6. Adjust different hyperparameters and improve the model (try to beat baseline/reduce overfitting)
7. Repeat until satisfied

In [None]:
import zipfile
!wget -nc --no-clobber https://storage.googleapis.com/ztm_tf_course/food_vision/10_food_classes_all_data.zip

# Unzip our data
zip_ref = zipfile.ZipFile('10_food_classes_all_data.zip', 'r')
zip_ref.extractall()
zip_ref.close()

In [None]:
import os

# Walk through 10 classes of food image data
for dirpath, dirnames, filenames in os.walk("10_food_classes_all_data"):
  print(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'.")

In [None]:
# Setup the train and test directories
train_dir = '10_food_classes_all_data/train/'
test_dir = '10_food_classes_all_data/test/'

### Visualize

In [None]:
import pathlib
import numpy as np
data_dir = pathlib.Path(train_dir)
class_names = np.array(sorted([item.name for item in data_dir.glob('*')]))
print(class_names)


In [None]:
# View an image
import random
img = view_random_image(target_dir=train_dir,
                        target_class=random.choice(class_names))

### Preprocess the data (prepare it for the model)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Rescale
train_datagen = ImageDataGenerator(rescale=1/255.)
test_datagen = ImageDataGenerator(rescale=1/255.)

# Load data in from directories and turn it into batches
train_data = train_datagen.flow_from_directory(train_dir,
                                               target_size=(224,224),
                                               batch_size=32,
                                               class_mode='categorical')

test_data = test_datagen.flow_from_directory(test_dir,
                                             target_size=(224,224), 
                                             batch_size=32,
                                             class_mode='categorical')



### Create a model (start with a baseline)

In [None]:
# Create a model w/ baseline (clone of cnn explainer)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense
from tensorflow.keras.optimizers import Adam

model_8 = Sequential([
  Conv2D(10, 3, activation='relu', input_shape=(224, 224, 3)),
  Conv2D(10, 3, activation='relu'),
  MaxPool2D(),
  Conv2D(10, 3, activation='relu'),
  Conv2D(10, 3, activation='relu'),
  MaxPool2D(),
  Flatten(),
  Dense(10, activation='softmax') # changed to have 10 output neurons (10 classes) and use softmax activation function
])

### Compile the model

In [None]:
# Compile the model
model_8.compile(loss='categorical_crossentropy',
                optimizer=Adam(),
                metrics=['accuracy'])


### Fit the model

In [None]:
# Fit the model
history_8 = model_8.fit(train_data, # now the augmented data is shuffled
                        epochs=5,
                        steps_per_epoch=len(train_data),
                        validation_data=test_data,
                        validation_steps=len(test_data))

### Evaluate the model

In [None]:
# Evaluate on the test data
model_8.evaluate(test_data)

In [None]:
# check out the model's loss curves on the 10 classes

def plot_loss_curves(history):
    """
    Returns separate loss curves for training and validation metrics, takes history
    """ 
    loss = history.history["loss"]
    val_loss = history.history["val_loss"]

    accuracy = history.history["accuracy"]
    val_accuracy = history.history["val_accuracy"]

    epochs = range(len(history.history["loss"])) # How many epochs did we run for
    
    # Plot loss
    plt.plot(epochs, loss, label="training_loss")
    plt.plot(epochs, val_loss, label="val_loss")
    plt.title("loss")
    plt.xlabel("epochs")
    plt.legend()

    # Plot accuracy
    plt.figure()
    plt.plot(epochs, accuracy, label="training_accuracy")
    plt.plot(epochs, val_accuracy, label="val_accuracy")
    plt.title("accuracy")
    plt.xlabel("epochs")
    plt.legend()

plot_loss_curves(history_8)

What do these loss curves tell us?

Our model is **overfitting** the training set
It's getting great results on the training data but fails to generalize well to unseen data and performs poorly on the test dataset

### Adjust model parameters

Due to it's performance on the trainin data, it's clear our model is learning something but it's not generalizing well to unseen data.

Let's try to fix overfitting by:

* **Get more data** - Having more data gives a model more opportunity to learn diverse patterns
* **Simplify the model** - If the current model is overfitting the data, it may be too complicated of a model.  One way to simplify a model is to: reduce # of layers or reduce # of hidden units in layers
* **Use data augmentation** - Data augmentation manipulates the training data in such a way to add more diversity to it (without alterning the original data)
* **Use transfer learning** - Leverages the patterns another model has learned on similar data to your own and allows you to use those patterns on your own dataset.


In [None]:
# Try simplifying the model first
# Remove 2 convolutional layers from the model
model_9 = Sequential([
  Conv2D(10, 3, activation='relu', input_shape=(224, 224, 3)),
  MaxPool2D(),
  Conv2D(10, 3, activation='relu'),
  MaxPool2D(),
  Flatten(),
  Dense(10, activation='softmax') # changed to have 10 output neurons (10 classes) and use softmax activation function
])
model_9.compile(loss='categorical_crossentropy',
                optimizer=Adam(),
                metrics=['accuracy'])

history_9 = model_9.fit(train_data, # now the augmented data is shuffled
                        epochs=5,
                        steps_per_epoch=len(train_data),
                        validation_data=test_data,
                        validation_steps=len(test_data))

In [None]:
model_8.summary(), model_9.summary()

In [None]:
# Check out loss curves
plot_loss_curves(history_9)

### Trying to reduce overfitting with data augmentation

Ideally, we want to:
* Reduce overfitting (get the train and validation loss curves closer)
* Improve validation accuracy

In [None]:
# Create imageDataGenerator training instance with data augmentation
train_datagen_augmented = ImageDataGenerator(rescale=1/255.,
                                             rotation_range=0.2, # how much do you want to rotate an image
                                             zoom_range=0.2, # how much do you want to zoom?
                                             width_shift_range=0.2, # move image around on X axis
                                             height_shift_range=0.3, # move image around on Y axis
                                             horizontal_flip=True) # do you want to flip an image?


In [None]:
# Import data and augment it from training directory
IMG_SIZE = (224, 224)
print("Augmented training data:")
train_data_augmented = train_datagen_augmented.flow_from_directory(train_dir,
                                                                   target_size=IMG_SIZE,
                                                                   batch_size=32,
                                                                   class_mode="categorical") # for demonstration purposes only, usually true
  

### Model Cloning

In [None]:
# Create a model and fit with augmented data
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense
from tensorflow.keras.optimizers import Adam

# Clone model 8
model_10 = tf.keras.models.clone_model(model_8)

# Compile the cloned model
model_10.compile(loss="categorical_crossentropy",
                 optimizer=Adam(),
                 metrics=["accuracy"])

# Fit
history_10 = model_10.fit(
    train_data_augmented, 
    epochs=5,
    steps_per_epoch=len(train_data_augmented),
    validation_data=test_data,
    validation_steps=len(test_data)
    )

In [None]:
model_8.evaluate(test_data)

In [None]:
model_10.evaluate(test_data)

### Repeat until satisfied

Keep going, continually trying to bring our loss curves closer together and trying to improve the validatin/test accuracy.

How?

By running lots of experiments:
* Restructuring our model's architecture (increasing layers/hidden units)
* Adjust the learning rate
* Try different methods of data augmentation (adjust the hyperparameters in our ImageDataGenerator instance)
* Training for longer (e.g. 10 epochs instead of 5)

* Transfer Learning: Take a model's pre-learned patterns from one problem and tweak them to suit your own problem.  For example, take a model trained on pictures of cars to recognize trucks.

### Making a prediction with our trained model

Let's use our trained model to make some predictions on our own custom images

In [None]:
# Reminder for classes
class_names

In [None]:
# Download some custom images

!wget -nc --no-clobber https://raw.githubusercontent.com/mrdbourke/tensorflow-deep-learning/main/images/03-hamburger.jpeg
!wget -nc --no-clobber https://raw.githubusercontent.com/mrdbourke/tensorflow-deep-learning/main/images/03-pizza-dad.jpeg
!wget -nc --no-clobber https://raw.githubusercontent.com/mrdbourke/tensorflow-deep-learning/main/images/03-steak.jpeg
!wget -nc --no-clobber https://raw.githubusercontent.com/mrdbourke/tensorflow-deep-learning/main/images/03-sushi.jpeg

In [None]:
# Remake pred_and_plot to take multiclass
def pred_and_plot(model, filename, class_names):
  """
  Imports an image located at filename, makes a prediction on it with
  a trained model and plots the image with the predicted class as the title.
  """
  # Import the target image and preprocess it
  img = load_and_prep_image(filename)

  # Make a prediction
  pred = model.predict(tf.expand_dims(img, axis=0))
  # Multiclass logic and get the predicted class
  if len(pred[0]) > 1:
    pred_class = class_names[tf.argmax(pred[0])]
  else:
    pred_class = class_names[int(tf.round(pred)[0][0])]

  # Plot the image and predicted class
  plt.imshow(img)
  plt.title(f"Prediction: {pred_class}")
  plt.axis(False);


In [None]:
# Make a prediction using model_10
pred_and_plot(model=model_10, 
              filename="03-hamburger.jpeg", 
              class_names=class_names)

### Saving and loading our trained model

In [None]:
# Save a model
model_10.save('saved_trained_model_10')

In [None]:
# Load in a trained model and evaluate it
loaded_model_10 = tf.keras.models.load_model('saved_trained_model_10')
loaded_model_10.evaluate(test_data)

In [None]:
# Compare our loaded model to our existing model
model_10.evaluate(test_data)