In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

In [None]:
%cd drive/MyDrive/Security_copy/Andres2Windows/Andres/Advance_compuation/Project3/
# !tar -xvf fastai-datasets-kaggle-galaxy-zoo-the-galaxy-challenge-1.tar
# !ls images_training_rev1/
# !ls images_test_rev1/
# !tar -xzvf images_training_rev1.tar.gz
# !pwd
# !ls images_training_rev1/

In [None]:
import pandas as pd
from keras.preprocessing.image import ImageDataGenerator
# This is to run here in my pc
# import os
# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
# os.environ["CUDA_VISIBLE_DEVICES"] = ""

def append_ext(filename):
    return filename + ".jpg"


def Wrapper1ImageDataGenerator():
    """
    fill_mode: is set to nearest, this have physical sense. For instance, in
      the case of a well centered galaxy, it will fill the points outside the
      boundaries of the input image with the same color of the background.
    rescale: set to 1/255 to have values of image between 0 an 255.
    rotation: any number as the morpholgy and such things are not really
      affected by it (180 means between -180 and 180).
    shifts: invariance in shifts is also used but not as strongly since
      sometimes images are very near the border.
    flips: similar to rotations
    validation_split: set to 0.2 to follow normal standarts of the percent of
      validation data.
    """
    datagen = ImageDataGenerator(
        fill_mode='nearest',
        rescale=1. / 255,
        rotation_range=180,
        width_shift_range=0.1,
        height_shift_range=0.1,
        horizontal_flip=True,
        vertical_flip=True,
        validation_split=0.2)

    return datagen


def Wrapper1DataGenerator(dict_params):
    """
    class_mode: is set to raw since the results given are simply numerical with
      the belong probabilities to each class.
    batch_size: set to 2 multiple supposing this will give better preformance
    """
    train_generator = dict_params["data_generator"].flow_from_dataframe(
        dataframe=dict_params["dataframe"],
        directory=dict_params["directory"],
        x_col="id",
        y_col=classes,
        subset=dict_params["subset_name"],
        batch_size=64,
        seed=123,
        shuffle=True,
        class_mode="raw",
        target_size=(224, 224))

    return train_generator



In [None]:
classes = ['Class1.1', 'Class1.2', 'Class1.3', 'Class2.1', 'Class2.2',
           'Class3.1', 'Class3.2', 'Class4.1', 'Class4.2', 'Class5.1',
           'Class5.2', 'Class5.3', 'Class5.4', 'Class6.1', 'Class6.2',
           'Class7.1', 'Class7.2', 'Class7.3', 'Class8.1', 'Class8.2',
           'Class8.3', 'Class8.4', 'Class8.5', 'Class8.6', 'Class8.7',
           'Class9.1', 'Class9.2', 'Class9.3', 'Class10.1', 'Class10.2',
           'Class10.3', 'Class11.1', 'Class11.2', 'Class11.3', 'Class11.4',
           'Class11.5', 'Class11.6']

# This will read the probability results for each galaxy into the different
#   classes.
traindf = pd.read_csv('training_solutions_rev1.csv')

# Creating a new column with "GalaxyID" + "jpg", so that our image name matches
#   to this new column in the data-frame/csv.
traindf["id"] = traindf['GalaxyID'].astype(str).apply(append_ext)

# Create generator object with given augmentation and strcuture.
# specify suitables augmentations
datagen = Wrapper1ImageDataGenerator()
# specify the general structure of chunks to be trained and its generator
params = {"dataframe": traindf, "data_generator": datagen,
          "subset_name": "training", "directory": "images_training_rev2"}
train_generator = Wrapper1DataGenerator(params)
# structure of validation gen and itself.
params["subset_name"] = "validation"
valid_generator = Wrapper1DataGenerator(params)

# # This somewhat garantees that it will pass through the batch that constain all
STEP_SIZE_TRAIN = train_generator.n // train_generator.batch_size
STEP_SIZE_VALID = valid_generator.n // valid_generator.batch_size


Then, we use Resnet:

ResNet-50 is a convolutional neural network that is trained on more than a million images from the ImageNet database [1]. The network is 50 layers deep and can classify images into 1000 object categories, such as keyboard, mouse, pencil, and many animals.

In [None]:
from keras.applications.resnet50 import ResNet50
from keras.layers import Flatten, Dense
from keras.models import Model
from keras import optimizers
from keras.callbacks import Callback
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping
from keras.models import load_model
import tensorflow as tf


# Modify last layer of the model in order to do the classification
img_shape = (224, 224, 3)
resnet_model = ResNet50(include_top=False, input_shape=img_shape)
# flatten output of last layer before adding output layer (Dense layer)
x = Flatten()(resnet_model.output)  # This will Flatten the reset_model output
# add output layer (number of outputs = 37)
x = Dense(len(classes), activation='sigmoid')(x)
# load the modified model
model = Model(inputs=resnet_model.input, outputs=x)

# In order to obtain better results we set them all to trainable
for layer in model.layers:
    layer.trainable = True

optimizer = optimizers.Adam(lr=0.001, decay=5e-4)

# The mean squared error is used for the tranining, and accuracy gives the
#   number of correct predictions.
model.compile(optimizer, loss='mse', metrics=["accuracy"])

# The callbaks are used to save the values of the current state in the training

# Class to sabe the losses in training and validation
class LossHistory(Callback):
  def on_train_begin(self, logs={}):
      self.losses = []
      self.val_losses = []

  def on_batch_end(self, batch, logs={}):
          self.losses.append(logs.get('loss'))
          self.val_losses.append(logs.get('val_loss'))

history = LossHistory()

# make the algorithm stop if there is not improvement, it is assumed taht 4 
#   epochs with not improvement will not give better results
early_stopping = EarlyStopping(
    monitor='val_loss', patience=5, verbose=1, mode='auto')


class CheckPointCallBacks(tf.keras.callbacks.ModelCheckpoint):
    def __init__(self, filepath, verbose, save_best_only, init_epoch):
        super().__init__(filepath=filepath, verbose=verbose, 
                         save_best_only=save_best_only)
        self.effective_epoch = init_epoch

    def on_epoch_end(self, epoch, logs=None):
        self.effective_epoch += 1
        self.filepath = "train_param/weights-improvement-%02d-{val_accuracy:.2f}.hdf5" % (self.effective_epoch)


actual_epoch = 10
filepath = "train_param/weights-improvement-10-0.76.hdf5"
model = load_model("train_param/weights-improvement-10-0.76.hdf5")
checkpointer = CheckPointCallBacks(filepath=filepath, verbose=2, 
                                  save_best_only=True, 
                                  init_epoch=actual_epoch)

# Save the weights using the `checkpoint_path` format
model.save_weights(filepath.format(epoch=30))

# latest = tf.train.latest_checkpoint(checkpoint_dir)
# checkpointer = ModelCheckpoint(filepath=filepath, verbose=2, save_best_only=True)

hist = model.fit_generator(
    train_generator,
    steps_per_epoch=STEP_SIZE_TRAIN,
    validation_data=valid_generator,
    validation_steps=STEP_SIZE_VALID,
    epochs=30,
    callbacks=[history, checkpointer, early_stopping])


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 8))
plt.plot(hist.epoch, hist.history['loss'], label='Training Loss')
plt.plot(hist.epoch, hist.history['val_loss'], label='Validation', linestyle='--')
plt.title("RMSE vs Epochs")
plt.xlabel("Epochs")
plt.ylabel("RMSE")
plt.legend()
plt.savefig("TrainingResult2.png")
plt.show()
!ls

In [None]:
# Load best result obtained
from keras.models import load_model

model = load_model("train_param/weights-improvement-10-0.76.hdf5")


In [None]:
model.predict_generator(train_generator, steps=train_generator.n / train_generator.batch_size, verbose=1)