<a href="https://colab.research.google.com/github/ParthivNaresh/CNN_Models_Parthiv_Naresh/blob/Practice%2FInceptionModel/CNN_Model_2019_August_Parthiv.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
#!pip install -U -q PyDrive
import os
import random
import numpy as np
import pandas as pd
from PIL import Image
from shutil import move
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from google.colab import auth
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from oauth2client.client import GoogleCredentials
from sklearn.model_selection import train_test_split
from tensorflow.keras import Model
from tensorflow.python.keras.utils import plot_model
from tensorflow.python.keras.layers import Input
from tensorflow.python.keras.optimizers import SGD, RMSprop
from tensorflow.python.keras.layers.core import Dense, Flatten
from tensorflow.python.keras.layers.convolutional import Conv2D, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import img_to_array, load_img

import tensorflow.contrib.eager as tfe

tf.enable_eager_execution()

In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
zip_data_file = "https://s3-ap-southeast-1.amazonaws.com/he-public-data/DL%23+Beginner.zip"
zip_extract_location = "/content/drive/My Drive/"

google_drive_location = "/content/drive/My Drive/CNN_Project_1_Animals_Data"

data_directory = google_drive_location + "/data/"
training_directory = google_drive_location + "/train/"
testing_directory = google_drive_location + "/test/"
predicting_directory = google_drive_location + "/predict/"
labels_directory = google_drive_location + "/animals_labels_train.csv"
visualizations = google_drive_location + '/Visualizations/'

callback_cutoff_accuracy = 0.6

data = pd.read_csv(labels_directory).rename(columns={'Image_id':'image_id','Animal':'animal'})
data['animal'] = data.animal.str.replace('\+', ' ')

In [0]:
'''
This data splitting class assumes that the data is initially presented in one folder
and has already been shuffled.
This class splits the data into a training and testing set with a ratio of 80% training
and 20% testing.
Categorization of the data into subfolders based on their labels is done in 
categorize_data().
'''

class split_data_training_test():
    
    def __init__(self, data_path):
        self.data_path = data_path
        self.all_the_data = []
        for image_id in os.listdir(self.data_path):
            if os.path.isfile(self.data_path + image_id):
                self.all_the_data.append(image_id)
        
        self.train_list = train_test_split(self.all_the_data, train_size = 0.8, shuffle=False)[0]
        self.test_list = train_test_split(self.all_the_data, train_size = 0.8, shuffle=False)[1]
        print(str(len(self.train_list)) + " images placed in the training set")
        print(str(len(self.test_list)) + " images placed in the test set")
        #print(self.train_list[0:5])
        #print(self.test_list[0:5])
        
    def move_training_to(self, train_path):
        self.train_path = train_path
        
        for train_name in self.train_list:
            move(self.data_path + train_name,self.train_path)
        
    def move_test_to(self, test_path):
        self.test_path = test_path
        
        for test_name in self.test_list:
            move(self.data_path + test_name,self.test_path)


In [0]:
'''
This categorization class assumes that the data has been presented in the following format:
1 csv file with all the image ids in one column and their corresponding labels in another
2 subfolders (training and testing) that hold a series of images with their file names as their image ids
It will then create a set of folders within the training and testing folders corresponding to all
unique labels from the csv file, and will move the images in the training and testing folders
into their respective subfolders based on how they have been labeled in the csv file.
This will make it easier to use the data in image generators later on.
'''

class categorize_data():
  
  # Initializes the distinct categories in the animal column
  def __init__(self, data):
    self.data = data
    self.animal_categories = list(self.data.animal.unique())
    
  def in_directory(self, directory):
    self.directory = directory
    
    # Makes a sub-directory for every animal in the specified directory
    for animal in self.animal_categories:
        if not os.path.exists(self.directory + animal):
            os.mkdir(self.directory + animal)
    
    # Iterates through every file in the specified directory
    # and finds the same file name in the csv with its
    # relevant animal category and moves it to that animal folder
    for image_id in os.listdir(self.directory):
        if os.path.isfile(self.directory + image_id):
            # Finds the row number that matches the current image-id in the dataframe
            row = self.data.loc[self.data['image_id'] == image_id]
            # iloc is needed to identify the value by INDEX
            animal = row['animal'].iloc[0]
            self.this_file = self.directory + image_id
            self.destination = self.directory + "\\" + animal + "\\"
            # Moves the image to the appropriate animal folder
            move(self.this_file, self.destination)


In [0]:
'''
This class assumes that all the data has been moved into appropriately labeled
subfolders in the training or testing folders.
This allows for a quick display of random images from that category.
'''

class display_images():
  
  def __init__(self, animal, directory):
    self.animal = animal
    self.directory = directory
    self.animal_folder = os.path.join(self.directory + self.animal)
    self.number_of_images = len(os.listdir(self.animal_folder))
    print("Total training " + self.animal +  " images: ", self.number_of_images)
    
  def numberOfTimes(self, number):
    # List of image paths in the specified category
    random_list = []
    for index in range(number):
        random_list.append(np.random.randint(0,self.number_of_images))
    
    # Creates a list based on randomly picked images
    self.animal_folder_images = [os.path.join(self.animal_folder, os.listdir(self.animal_folder)[image_index]) 
                                 for image_index in random_list]

    for i, file_path in enumerate(self.animal_folder_images):
      file_name = file_path[file_path.rfind("\\") + 1:]
      image_one = mpimg.imread(file_path)
      plt.imshow(image_one)
      plt.axis('Off')
      #plt.title(file_name, loc='center')
      plt.show()

display_images("wolf", training_directory).numberOfTimes(4)


In [0]:
input_img = Input(shape=(150, 150, 3))

layer_1 = Conv2D(16, (3,3), activation='relu')(input_img)
layer_2 = Conv2D(32, (3,3), activation='relu')(layer_1)
layer_3 = MaxPooling2D((2,2))(layer_2)

layer_4 = Conv2D(64, (3,3), activation='relu')(layer_3)
layer_5 = MaxPooling2D((2,2))(layer_4)
    
### 1st layer
layer_1_1_inception = Conv2D(20, (1,1), padding='same', activation='relu')(layer_5)
layer_1_2_inception = MaxPooling2D((2,2), strides=(1,1), padding='same')(layer_5)
layer_1_3_inception = Conv2D(12, (1,1), padding='same', activation='relu')(layer_5)
layer_1_4_inception = MaxPooling2D((3,3), strides=(1,1), padding='same')(layer_5)
layer_1_5_inception = Conv2D(12, (1,1), padding='same', activation='relu')(layer_5)

### 2nd layer
layer_2_1_inception = Conv2D(12, (1,3), padding='same', activation='relu')(layer_1_1_inception)
layer_2_2_inception = Conv2D(24, (1,1), padding='same', activation='relu')(layer_1_2_inception)
layer_2_3_inception = Conv2D(12, (1,5), padding='same', activation='relu')(layer_1_3_inception)
layer_2_4_inception = Conv2D(24, (1,1), padding='same', activation='relu')(layer_1_4_inception)
layer_2_5_inception = Conv2D(12, (1,3), padding='same', activation='relu')(layer_1_5_inception)

### 3rd layer
layer_3_1_inception = Conv2D(12, (3,1), padding='same', activation='relu')(layer_2_1_inception)
layer_3_2_inception = Conv2D(12, (5,1), padding='same', activation='relu')(layer_2_3_inception)
layer_3_3_inception = Conv2D(12, (3,1), padding='same', activation='relu')(layer_2_5_inception)

mid_1 = tf.keras.layers.concatenate([layer_3_1_inception, layer_2_2_inception,
                                     layer_3_2_inception, layer_2_4_inception,
                                     layer_3_3_inception], axis = 3)

layer_6 = Conv2D(84, (3,3), activation='relu')(mid_1)
layer_7 = MaxPooling2D((2,2))(layer_6)

layer_8 = Conv2D(96, (3,3), activation='relu')(layer_7)
layer_9 = MaxPooling2D((2,2))(layer_8)

### 4st layer
layer_4_1_inception = Conv2D(24, (1,1), padding='same', activation='relu')(layer_9)
layer_4_2_inception = MaxPooling2D((2,2), strides=(1,1), padding='same')(layer_9)
layer_4_3_inception = Conv2D(24, (1,1), padding='same', activation='relu')(layer_9)

### 5th layer
layer_5_1_inception = Conv2D(24, (3,3), padding='same', activation='relu')(layer_4_1_inception)
layer_5_2_inception = Conv2D(16, (1,1), padding='same', activation='relu')(layer_4_2_inception)
layer_5_3_inception = Conv2D(24, (1,3), padding='same', activation='relu')(layer_4_3_inception)
layer_5_4_inception = Conv2D(24, (3,1), padding='same', activation='relu')(layer_4_3_inception)

### 5th layer
layer_6_1_inception = Conv2D(24, (1,3), padding='same', activation='relu')(layer_5_1_inception)
layer_6_2_inception = Conv2D(24, (3,1), padding='same', activation='relu')(layer_5_1_inception)

mid_2 = tf.keras.layers.concatenate([layer_5_2_inception, layer_5_3_inception,
                                     layer_5_4_inception, layer_6_1_inception,
                                     layer_6_2_inception], axis = 3)

flat_1 = Flatten()(mid_2)

dense_1 = Dense(600, activation='relu')(flat_1)
dense_2 = Dense(300, activation='relu')(dense_1)
dense_3 = Dense(150, activation='relu')(dense_2)
output = Dense(30, activation='softmax')(dense_3)

my_model = Model([input_img], output)

plot_model(my_model,
           to_file = google_drive_location + '/InceptionModel.png',
           show_shapes=True,
           show_layer_names=True)

# Print the model summary
my_model.summary()

def list_layers():
    first_five = my_model.layers[: 8]
    #last_five = my_model.layers[-5 :]
    for layer in first_five:
        class_of_layer = str(layer)[0 : str(layer).index(" ")]
        print(class_of_layer[class_of_layer.rfind('.') + 1 : ] + " - " + layer.name)

a = list_layers()

my_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [0]:
class visualization_by_layer():
    
    def __init__(self, animal):
        # Creates a list of every animal in the specified folder
        self.path = training_directory + animal
        self.all_the_data = []
        for image_id in os.listdir(self.path):
            if os.path.isfile(self.path + "/" + image_id):
                self.all_the_data.append(image_id)
        # Randomly chooses an animal in the aggregated file paths
        img_path = random.choice(self.all_the_data)
        img = load_img(self.path + "/" + img_path, target_size=(150, 150))
        x = img_to_array(img)
        x = x.reshape((1,) + x.shape)
        x /= 255.0
        
        # Avoids the input layer as part of the outputs
        successive_layers = [layer for layer in my_model.layers[0:] if not layer.name.startswith('input')]
        successive_outputs = [layer.output for layer in successive_layers]
        visualization_model = tf.keras.models.Model(inputs = my_model.input, outputs = successive_outputs)
        successive_feature_maps = visualization_model.predict(x)

        position = 0
        list_of_layers = []
        for layer, feature_map in zip(successive_layers, successive_feature_maps):

            if len(feature_map.shape) == 4:
                
                # Only for the conv/maxpool layers, not the fully-connected layers
                # feature map shape (1, size, size, n_features)
                n_features = feature_map.shape[-1]
                size = feature_map.shape[1]
            
                # Tile images in this matrix
                display_grid = np.zeros((size, size * n_features))
                
                # Postprocess the feature
                for i in range(n_features):
                    x  = feature_map[0, :, :, i]
                    x -= x.mean()
                    x /= x.std()
                    x *=  64
                    x += 128
                    x  = np.clip(x, 0, 255).astype('uint8')
                    # Tile each filter into a horizontal grid
                    display_grid[:, i * size : (i + 1) * size] = x
                
                width = 20. / n_features
                height = 10. / n_features
                plt.rcParams.update({'figure.max_open_warning': 0})
                plt.figure(figsize=(width * n_features, height))
                
                # Function that formats the title of the layer and provides
                # cursory information based on the type of layer
                def get_layer_information(layer_name):
                    
                    information = []
                    title = ''
                    
                    layers_names = {
                            'conv2d' : "Convolutional 2D",
                            'conv3d' : "Convolutional 3D",
                            'max_pooling' : "Max Pooling",
                            'concatenate' : "Concatenate"
                            }
                    
                    layers_properties = {
                            'conv2d' : ['filters', 'kernel_size', 'strides'],
                            'max_pooling' : ['pool_size', 'strides'],
                            'concatenate' : ['axis']
                            }
                    
                    for key in layers_names:
                        if layer_name.startswith(key):
                            name = layers_names.get(key, "nothing")
                            information.append(name)
                            title += name
                            break;
                    
                    for key in layers_properties:
                        if layer_name.startswith(key):
                            prop = layers_properties.get(key, "nothing")
                            for prop_type in prop:
                                information.append(str(layer.get_config()[prop_type]))
                            break;
                            
                    if key == 'conv2d':
                        plt.title(title + ", Filters: " + information[1] 
                                  + ", Kernel Size: " + information[2]
                                  + ", Strides: " + information[3])
                    elif key == 'max_pooling':
                        plt.title(title + ", Pool Size: " + information[1] 
                                  + ", Strides: " + information[2])
                    elif key == 'concatenate':
                        plt.title(title + ", Axis: " + information[1])
                
                get_layer_information(layer.name.lower())
                
                plt.grid(False)
                # Displays the plot in the console
                plt.imshow(display_grid, aspect='auto', cmap='viridis')
                # Saves the output of every layer in the specified folder so they
                # can be combined later
                plt.savefig(visualizations + '/' + str(position) + '.png', bbox_inches='tight')
                list_of_layers.append(visualizations + '/' + str(position) + '.png')
                
                position += 1

        images = []
        for each_layer in list_of_layers:
            images.append(each_layer)
        
        widths, heights = zip(*(Image.open(each_image).size for each_image in images))
        # Because of line 63, the width will be more or less the same for
        # every output (depending on # of filters), however some are wider 
        # than others by a few pixels leading to black space behind the image,
        # so the minimum width is taken instead of the maximum
        min_width = min(widths)
        total_height = sum(heights)
        
        new_im = Image.new('RGB', (min_width, total_height))
        
        y_offset = 0    
        for im in images:
          new_im.paste(Image.open(im), (0,y_offset))
          y_offset += Image.open(im).size[1]
        
        # Removes the files that each contain the output of one layer
        for each_file in os.listdir(visualizations):
            if os.path.isfile(visualizations + '/' + each_file):
                os.remove(visualizations + '/' + each_file)
            
        new_im.save(visualizations + '/output_by_layers.jpg')
            
                
visualization_by_layer("wolf")

In [10]:
# Rescaling and augementations for the training data
training_datagen = ImageDataGenerator(
        rescale = 1./255.,
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)
# Rescaling for the test data
validation_datagen = ImageDataGenerator(rescale = 1./255.)
  
# Training and test generators label data based on the folder name
train_generator = training_datagen.flow_from_directory(
        # specify the output size and type of classification (binary, category, etc)
        training_directory,
        batch_size = 26,
        target_size=(150,150),
        class_mode='categorical')

validation_generator = validation_datagen.flow_from_directory(
        testing_directory,
        batch_size = 26,
        target_size=(150,150),
        class_mode='categorical')

Found 10410 images belonging to 30 classes.
Found 2600 images belonging to 30 classes.


In [0]:
class myCallback(tf.keras.callbacks.Callback):
    
    def on_epoch_end(self, epoch, logs={}):
        
        if(logs.get('acc')>callback_cutoff_accuracy):
            self.model.save_weights(external_drive_location + "\\V3_Parthiv_Attempt_1.h5")
            print("\nReached 60% accuracy so cancelling training!")
            self.model.stop_training = True

In [0]:
history = my_model.fit_generator(
train_generator,
validation_data = validation_generator,
steps_per_epoch = 50,
epochs = 20,
validation_steps = 50,
verbose = 1,
callbacks=[myCallback()])
    
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend(loc=0)
plt.figure()

plt.show()

In [0]:
uploaded = files.upload()

for fn in uploaded.keys():
  path = fn
  img = image.load_img(path, target_size=(150, 150))
  x = image.img_to_array(img)
  x = np.expand_dims(x, axis=0)

  images = np.vstack([x])
  classes = model.predict(images, batch_size=10)
  print(fn)
  print(classes)