In [None]:
from google_images_download import google_images_download   #importing the library

response = google_images_download.googleimagesdownload()   #class instantiation

arguments = {"keywords":"dragon ball z anime, dbz anime,goku transparent, vegeta anime,piccolo anime","limit":100,"print_urls":True}   #creating list of arguments
paths = response.download(arguments)   #passing the arguments to the function
print(paths)   #printing absolute paths of the downloaded images

In [None]:
import split_folders

# Split with a ratio.
# To only split into training and validation set, set a tuple to `ratio`, i.e, `(.8, .2)`.
split_folders.ratio('downloads', output="dataset", seed=1337, ratio=(.8, .2)) # default values

# Split val/test with a fixed number of items e.g. 100 for each set.
# To only split into training and validation set, use a single number to `fixed`, i.e., `10`.
# split_folders.fixed('downloads', output="dataset", seed=1337, fixed=(100, 100), oversample=False) # default values

# Convolutional Neural Network

## Part 1 - Building the CNN

In [1]:
# Importing the Keras libraries and packages
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense

Using TensorFlow backend.


In [2]:
# Initialising the CNN
classifier = Sequential()

In [5]:
# Step 1 - Convolution
classifier.add(Conv2D(32, (3, 3), input_shape=(64, 64, 3), activation='relu'))

'''
* 32 feature detector-->32 feature maps (size of 5x5). (64 if using a GPU)
* feature detector consists of 3 rows and 3 columns.

* input_shape= shape of input image (converting all images to fixed size/image format)
* since the images are color images, they will be converted into 3-D arrays.
* 64x64 format (128 or 256 if using a GPU)

* activation function 'relu' is most commonly used for CNN.
'''

In [6]:
# Step 2 - Max Pooling
classifier.add(MaxPooling2D(pool_size=(2, 2)))

'''reduces the size of our feature maps into 2x2 (reducing the complexity of the model).'''

# Adding a second convolutional layer
# classifier.add(Conv2D(32, (3, 3), activation='relu'))
# classifier.add(MaxPooling2D(pool_size=(2, 2)))

In [7]:
# Step 3 - Flattening
classifier.add(Flatten())

In [8]:
# Step 4 - Full connection
classifier.add(Dense(units=128, activation='relu'))

'''
* units= number of nodes in the hidden layer(full connection).

* 128 is not too small, but it can be higher if using GPU.

* activation function 'relu' is most commonly used for CNN.
'''

# Final output layer
classifier.add(Dense(units=1, activation='sigmoid'))

'''
* activation function 'sigmoid' because we have a binary outcome. If we have more, need to use 'softmax'.
'''

In [9]:
# Compiling the CNN
classifier.compile(
    optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

''' 
* 'adam' is a good stochastic gradient descent optimizer for CNN.

* cross entropy loss function is usually used for classifcation problems. Because we have a binary outcome, we
use 'binary_crossentropy'. If we had more we'd need to use 'categorical_crossentropy'.
'''

## Part 2 - Fitting the CNN to the images

### Image Preprocessing

In [10]:
from keras.preprocessing.image import ImageDataGenerator

'''
* Image Data Generator is the first step the Image Augmentation which will help prevent overfitting
(when we have few data points to train our model).

* It creates many batches of the images, and for each batch it will apply some random transformations to them so
we'll get many more diverse images within these batches.'''


train_datagen = ImageDataGenerator(rescale=1./255,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True)


test_datagen = ImageDataGenerator(rescale=1./255)

'''
* rescale the images to a standard size
* shear_range is a geomatrical transformation of the pixels.
* zoom_range is a random zoom we apply to the images.
'''

training_set = train_datagen.flow_from_directory('dataset/train',
                                                 target_size=(64, 64),
                                                 batch_size=32,
                                                 class_mode='binary')

test_set = test_datagen.flow_from_directory('dataset/val',
                                            target_size=(64, 64),
                                            batch_size=32,
                                            class_mode='binary')

Found 8000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.


### Fit Generator

In [12]:
classifier.fit_generator(training_set,
                         steps_per_epoch=8000,
                         epochs=25,
                         validation_data=test_set,
                         validation_steps=2000)

'''
* steps_per epoch should equal the number of images in the training set.
* can use more than 25 epochs if using a GPU.
* validation_steps should equal the number of images in the testing set.
'''

Epoch 1/25
  34/8000 [..............................] - ETA: 1:38:33 - loss: 0.6448 - acc: 0.6259

KeyboardInterrupt: 