##### Copyright 2019 The TensorFlow Authors.

In [None]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Convolutional Neural Network (CNN)

<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://www.tensorflow.org/tutorials/images/cnn">
    <img src="https://www.tensorflow.org/images/tf_logo_32px.png" />
    View on TensorFlow.org</a>
  </td>
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/images/cnn.ipynb">
    <img src="https://www.tensorflow.org/images/colab_logo_32px.png" />
    Run in Google Colab</a>
  </td>
  <td>
    <a target="_blank" href="https://github.com/tensorflow/docs/blob/master/site/en/tutorials/images/cnn.ipynb">
    <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
    View source on GitHub</a>
  </td>
  <td>
    <a href="https://storage.googleapis.com/tensorflow_docs/docs/site/en/tutorials/images/cnn.ipynb"><img src="https://www.tensorflow.org/images/download_logo_32px.png" />Download notebook</a>
  </td>
</table>

This tutorial demonstrates training a simple [Convolutional Neural Network](https://developers.google.com/machine-learning/glossary/#convolutional_neural_network) (CNN) to classify [CIFAR images](https://www.cs.toronto.edu/~kriz/cifar.html). Because this tutorial uses the [Keras Sequential API](https://www.tensorflow.org/guide/keras/overview), creating and training your model will take just a few lines of code.


### Import TensorFlow

In [None]:
import tensorflow as tf

from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt

from PIL import Image
import os

import numpy as np

### Download and prepare the CIFAR10 dataset


The CIFAR10 dataset contains 60,000 color images in 10 classes, with 6,000 images in each class. The dataset is divided into 50,000 training images and 10,000 testing images. The classes are mutually exclusive and there is no overlap between them.

In [None]:
(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()

# Normalize pixel values to be between 0 and 1
train_images, test_images = train_images / 255.0, test_images / 255.0

train_images.shape

test_images.shape

train_labels.shape


In [None]:
# Python program to convert numpy array to image

# import required libraries
import numpy as np
from PIL import Image as im

# define a main function
def main():

    # create a numpy array from scratch
    # using arange function.
    # 1024x720 = 737280 is the amount 
    # of pixels.
    # np.uint8 is a data type containing
    # numbers ranging from 0 to 255 
    # and no non-negative integers
    array = np.arange(0, 737280, 1, np.uint8)

    # check type of array
    print(type(array))

    # our array will be of width 
    # 737280 pixels That means it 
    # will be a long dark line
    print(array.shape)

    # Reshape the array into a 
    # familiar resoluition
    array = np.reshape(array, (1024, 720))

    # show the shape of the array
    print(array.shape)

    # show the array
    print(array)

    # creating image object of
    # above array
    data = im.fromarray(array)

    # saving the final output 
    # as a PNG file
    # data.save('gfg_dummy_pic.png')
    
    # data

# driver code
if __name__ == "__main__":

    # function call
    main()

In [None]:
# Test creating image from array

a = np.full((100, 100), 0)
im = Image.fromarray(a, mode="L")
im.getpixel((0, 0))  # 44
im = Image.fromarray(a, mode="RGB")
im.getpixel((0, 0))  # (44, 1, 0)



In [None]:
# train_images[0,:,:,:].shape

# train_images[0,:,:,:]

# Image.fromarray(train_images[0,:,:,:])

In [None]:
# Load single image to test converting to array

# load image
img = Image.open('Training Set BW/01_k-0p082.jpg')

# asarray() class is used to convert PIL images into NumPy arrays
numpydata = np.asarray(img)

# <class 'numpy.ndarray'>
print(type(numpydata))

# shape
print(numpydata.shape)


In [None]:
# Load images into arrays and create labels

filenames = []

all_distortion_images = np.zeros((1,480,640,3)) # array of RGB values for images [N x width x height x 3]

all_distortion_labels = np.zeros(1,dtype=int) # array of labels for images denoting if they are distorted. 0 = not distorted, 1 = distored

# directory
directory = 'Training Set BW'

# iterate over files in directory
for filename1 in os.listdir(directory):
    f1 = os.path.join(directory, filename1)
    # check if file is valid
    if os.path.isfile(f1) and ".jpg" in f1:
        filenames.append(filename1)
        # load image
        img = Image.open(f1)
        # convert to numpy array
        img_array = np.asarray(img)/255
        # print(img_array.shape)
        # if numpy array is 480 x 640 x 3, then append to all_images array
        if img_array.shape[0] == 480 and img_array.shape[1] == 640 and img_array.shape[2] == 3:
            img_array = img_array.reshape((1,img_array.shape[0],img_array.shape[1],img_array.shape[2])) # reshape to 1 x 480 x 640 x 3
            # print(img_array.shape)
            all_distortion_images = np.append(all_distortion_images, img_array, axis=0) # append to all_distortion_images array
        # if numpy array is 960 x 1280 x 3, then downsample by factor of 2, and append to all_images array
        if img_array.shape[0] == 960 and img_array.shape[1] == 1280 and img_array.shape[2] == 3:
            img_array = img_array[0:-1:2,0:-1:2,:] # downsample
            img_array = img_array.reshape((1,img_array.shape[0],img_array.shape[1],img_array.shape[2])) # reshape to 1 x 480 x 640 x 3
            # print(img_array.shape)
            all_distortion_images = np.append(all_distortion_images, img_array, axis=0) # append to all_distortion_images array
        # label images based on if "k" is in filename
        if "k" in filename1:
            all_distortion_labels = np.append(all_distortion_labels, 1)
        else:
            all_distortion_labels = np.append(all_distortion_labels, 0)

# remove first index of all_distortion_images and all_distortion_labels
all_distortion_images = all_distortion_images[1:,:,:,:]
all_distortion_labels = all_distortion_labels[1:]
all_distortion_labels = all_distortion_labels.reshape(-1,1)

all_distortion_images.shape

all_distortion_labels.shape



In [None]:
# split data into train and test data

np.random.seed(42) # set random seed

idx = np.random.permutation(all_distortion_images.shape[0]) # get permutation 

idx_train = idx[0:40] # training indices
idx_test = idx[40:] # test indices

# print(idx_train)
# print(idx_test)

# split all data and labels into training and test sets
train_images = all_distortion_images[idx_train,:,:,:]

test_images = all_distortion_images[idx_test,:,:,:]

train_labels = all_distortion_labels[idx_train]

test_labels = all_distortion_labels[idx_test]

# filenames_train = filenames[idx_train]

print(train_labels[:,0].T)

print(test_labels[:,0].T)



In [None]:
# print(filenames)

# print("k" in filenames[k1] for k1 in range(0,len(filenames)))

# print(all_distortion_labels)

In [None]:
# print(filenames[idx_train[3]])

# # print(["k" in filenames[k1] for k1 in range(0,len(filenames))])

# print(all_distortion_labels[idx_train[3]])

### Verify the data

To verify that the dataset looks correct, let's plot the first 25 images from the training set and display the class name below each image:


In [None]:
# class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
#                'dog', 'frog', 'horse', 'ship', 'truck']

# plt.figure(figsize=(10,10))
# for i in range(25):
#     plt.subplot(5,5,i+1)
#     plt.xticks([])
#     plt.yticks([])
#     plt.grid(False)
#     plt.imshow(train_images[i])
#     # The CIFAR labels happen to be arrays, 
#     # which is why you need the extra index
#     plt.xlabel(class_names[train_labels[i][0]])
# plt.show()

In [None]:
class_names = ['undistorted', 'distorted']

plt.figure(figsize=(8,24))
for k1 in range(0,12):
    plt.subplot(6,2,k1+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(train_images[k1])
    # The CIFAR labels happen to be arrays, 
    # which is why you need the extra index
    plt.xlabel(filenames[idx_train[k1]] + " : " + class_names[train_labels[k1,0]])
plt.show()


### Create the convolutional base

The 6 lines of code below define the convolutional base using a common pattern: a stack of [Conv2D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Conv2D) and [MaxPooling2D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/MaxPool2D) layers.

As input, a CNN takes tensors of shape (image_height, image_width, color_channels), ignoring the batch size. If you are new to these dimensions, color_channels refers to (R,G,B). In this example, you will configure your CNN to process inputs of shape (32, 32, 3), which is the format of CIFAR images. You can do this by passing the argument `input_shape` to your first layer.


In [None]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(480, 640, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))

# model = models.Sequential()
# model.add(layers.Conv2D(32, (4, 4), activation='relu', input_shape=(32, 32, 3)))
# model.add(layers.MaxPooling2D((2, 2)))
# model.add(layers.Conv2D(64, (3, 3), activation='relu'))
# model.add(layers.MaxPooling2D((2, 2)))
# model.add(layers.Conv2D(64, (3, 3), activation='relu'))

Let's display the architecture of your model so far:

In [None]:
model.summary()

Above, you can see that the output of every Conv2D and MaxPooling2D layer is a 3D tensor of shape (height, width, channels). The width and height dimensions tend to shrink as you go deeper in the network. The number of output channels for each Conv2D layer is controlled by the first argument (e.g., 32 or 64). Typically,  as the width and height shrink, you can afford (computationally) to add more output channels in each Conv2D layer.

### Add Dense layers on top

To complete the model, you will feed the last output tensor from the convolutional base (of shape (4, 4, 64)) into one or more Dense layers to perform classification. Dense layers take vectors as input (which are 1D), while the current output is a 3D tensor. First, you will flatten (or unroll) the 3D output to 1D,  then add one or more Dense layers on top. CIFAR has 10 output classes, so you use a final Dense layer with 10 outputs.

In [None]:
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(2, activation='sigmoid'))
# model.add(layers.Dense(1))

Here's the complete architecture of your model:

In [None]:
model.summary()

The network summary shows that (4, 4, 64) outputs were flattened into vectors of shape (1024) before going through two Dense layers.

### Compile and train the model

In [None]:

# compile model
model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

# train model
history = model.fit(train_images, train_labels, epochs=10, validation_data=(test_images, test_labels))


### Evaluate the model

In [None]:
# dir(history.history)

In [None]:
#

fig1 = plt.figure(figsize=(12,6))
# plt.plot(history.epoch,0.6*np.ones(len(history.epoch)),'k--')
# plt.plot(history.epoch,0.7*np.ones(len(history.epoch)),'k--')
# plt.plot(history.epoch,0.8*np.ones(len(history.epoch)),'k--')
# plt.plot(history.epoch,0.9*np.ones(len(history.epoch)),'k--')
# plt.plot(history.epoch,1.0*np.ones(len(history.epoch)),'k--')
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.2, 1.2])
plt.grid()
plt.legend(loc='lower right')

test_loss, test_acc = model.evaluate(test_images,  test_labels, verbose=2)


In [None]:
print(test_acc)

Your simple CNN has achieved a test accuracy of over 70%. Not bad for a few lines of code! For another CNN style, check out the [TensorFlow 2 quickstart for experts](https://www.tensorflow.org/tutorials/quickstart/advanced) example that uses the Keras subclassing API and `tf.GradientTape`.

In [None]:
# dir(history.history)

In [None]:
# train_images

In [None]:
k1 = 1

fig1 = plt.figure(figsize=(8,8))
plt.imshow(test_images[k1])
plt.xlabel(class_names[test_labels[k1,0]])
plt.show()

In [None]:
model.predict(test_images)

# model.predict(train_images)

In [None]:
# test_images[0].reshape(1,32,32,3).shape

In [None]:
kim = 1

print(model.predict(test_images[kim].reshape(1,480,640,3))[0])

print(np.argmax(model.predict(test_images[kim].reshape(1,480,640,3))))

print(class_names[np.argmax(model.predict(test_images[kim].reshape(1,480,640,3)))])

print(class_names[test_labels[k1,0]])

In [None]:
# Validate model accuracy

# model prediction from test_images
prediction = model.predict(test_images)

# apply classification using argmax to 0 or 1
predicted_class = np.argmax(prediction, axis=1)

print("Predicted classes for test images:")
print(predicted_class)

print("Test image labels:")
print(test_labels[:,0].T)

print(f"Number of correct predictions out of number of test images: {np.sum(predicted_class == test_labels[:,0])} / {len(test_labels)}")
# np.sum(predicted_class == test_labels)

print(f"Model accuracy on test images: {np.sum(predicted_class == test_labels[:,0]) / len(test_labels)}")

In [None]:
# class_names[np.argmax(model.predict(test_images[1].reshape(1,32,32,3)))]

In [None]:
# np.argmax(model.predict(test_images),axis=1)

# test_labels.reshape(-1,)

# np.argmax(model.predict(test_images),axis=1)


# test_labels.reshape(-1,) == np.argmax(model.predict(test_images),axis=1)

# sum(test_labels.reshape(-1,) == np.argmax(model.predict(test_images),axis=1))

# sum(test_labels.reshape(-1,) == np.argmax(model.predict(test_images),axis=1)) / test_labels.shape[0]