In [None]:
'''
The code below is inspired from the link you put on the board: https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html

We built our model with only 2 classes: bottles and cans. Indeed, with this lab, we learnt that 
pre-processing the data takes a LONG LONG LONG LONG time. We actually downloaded the images with imagenet and the program
is available in the public drive. However, a lot of corrumpted images appeared, so cleaning all of them was creepy.

We have 700 images for each class for the training.
We have 290 images for each class for the validation.

The final accuracy depends on the number of epoch we put. If we put 50, the final accuracy is about 80%.
We know that if we use another archiecture of our neurons (as VGG16 that you mentionned), we will get a better accuracy.
The problem is that it was quite difficult for our pc to run the program, so that's why we focused on the convnet.
'''
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K
import warnings
warnings.simplefilter("ignore")  #ignore warnings about "Possibly corrupt EXIF data" -> images...


# we put 150x150 as dimension of all our images. So thet all have the same dimensions
img_width, img_height = 150, 150

train_data_dir = 'data2/train'
validation_data_dir = 'data2/validation'
nb_train_samples = 1400
nb_validation_samples = 580
epochs = 50
batch_size = 10 #We put 12 as batch_size that contains the number of trains set 

if K.image_data_format() == 'channels_first':
    input_shape = (3, img_width, img_height)
else:
    input_shape = (img_width, img_height, 3)
    
    
#Here we build our CNN with 4 layers
    
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=input_shape)) #the output is 32x150x150
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))


model.add(Conv2D(32, (3, 3))) #32 filters of shape 3x3
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3))) # 64 filters of shape 3x3
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten()) #it converts our from 3D to 1D
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy', #our loss
              optimizer='rmsprop', #we used rmsprop as optimizer for the backward. It is a kind of gradient descent optimization algorithms -> minimize the loss
              metrics=['accuracy']) #useful to determine the accuracy

# because we do not have a lot of image, we actually generate some other thanks to the existing images (we just 
#change the zoom, the scale...)
train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(rescale=1. / 255)

#we used images from the target path to generate our batch for the train
train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary')#because we consider 2 classes, we use binary mode

#quite the same here but for validation this time
validation_generator = test_datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary') #because we consider 2 classes, we use binary mode

#we use these 2 generators to fit our model. Basically, for each epoch, we put each batch in the network.
#because the batch_size is 12 and we have 1400 samples for the train, for each epoch, we actually put 140 batchs in the network
model.fit_generator(
    train_generator,
    steps_per_epoch=nb_train_samples // batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=nb_validation_samples // batch_size)

model.save_weights('first_try.h5') #as mentionned in the tutorial, we shall always save our wrights after the training

'''
CONCLUSION:
When our data teachers told us that data scientist spend 80% of their time cleaning the data, we did not realize that
they were so much right.
'''