# Importing the necessary packages.

In [1]:
from keras.models import Sequential
from keras.layers import Convolution2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
from keras.preprocessing.image import ImageDataGenerator

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# Initialising the CNN.

In [2]:
classifier = Sequential()

# First convolution layer.
I decided to use 32 feature maps 3x3 pixels, input shape is the same as original pictures which is 50x50 pixels and there is another dimension because pictures are in RGB scale. To get rid of linearity in the pictures I used ReLU functions (Rectified Linear Unit).

In [3]:
classifier.add(Convolution2D(32, 3, 3, input_shape = (50, 50, 3), activation = 'relu'))
classifier.add(MaxPooling2D(pool_size = (2, 2)))

  """Entry point for launching an IPython kernel.


# Second convolution layer.
Using one convolutional layer resulted in unsatisfying results which were not very bad but could be better. To achieve that improvement I added anothet convolutional layer, the same as the first one and this action resulted in better accuracy which you will see int the last step. 

In [4]:
classifier.add(Convolution2D(32, 3, 3, activation = 'relu'))
classifier.add(MaxPooling2D(pool_size = (2, 2)))

  """Entry point for launching an IPython kernel.


# Flattening.

In [5]:
classifier.add(Flatten())


# Full connection.
There is on hidden layer in this fully connected neural network, it consists of 128 neurons and the their activation fucntion is rectifier function. For the output layer I chose sigmoid function which I believe is a good choice for this example. 

In [6]:
classifier.add(Dense(output_dim = 128, activation = 'relu'))
classifier.add(Dense(output_dim = 1, activation = 'sigmoid'))

  """Entry point for launching an IPython kernel.
  


# Compiling the CNN.
Optimizer was chosen to be adam which is one of the stochastic gradient descent algorithms, loss function is binary crossentropy, if there were more than two categories, categorical crossentropy woudl be a better choice, but in this there are two categories: pictures with invasive ductal carcinoma (1) and whitout it (0). 

In [7]:
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

# Fitting the CNN to the images.
Code for image preprocessing comes from Keras documentation, it contains everything that is needed for those pictures to be useful for presented convolutional neural network. ImageDataGenerator is used for training set so there would be some variety which prevents overfitting. Just in case of some pictues having different size than 50x50 pixels target_size is set to (50, 50). CNN uses 7042 pictures as trainig set and 1759 as test set, description of these pictures is in README file. Number of epocs equal to 25 is enough to achieve good results.

In [8]:
train_datagen = ImageDataGenerator(rescale = 1./255,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True)

test_datagen = ImageDataGenerator(rescale = 1./255)

training_set = train_datagen.flow_from_directory('data/training_set',
                                                 target_size = (50, 50),
                                                 batch_size = 32,
                                                 class_mode = 'binary')

test_set = test_datagen.flow_from_directory('data/test_set',
                                            target_size = (50, 50),
                                            batch_size = 32,
                                            class_mode = 'binary')

classifier.fit_generator(training_set,
                         samples_per_epoch = 7042,
                         nb_epoch = 25,
                         validation_data = test_set,
                         nb_val_samples = 1759)

Found 7042 images belonging to 2 classes.
Found 1759 images belonging to 2 classes.




Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x1b4bd58ae10>

# Results.
After last epoc you can see that accuracy for trainig set is 87.74% and fot test set 84.71%. Both values are above 80% which is a great outcome and they are close to eachother and that provides the information that overfitting did not occur. I could probably obtain higher accuracy by using more pictures and more hidden layers or neurons in the hidden layer but that would increase the needed computing power.