# **Python Colab Notebook**

**Kaggle Competition** "Usos del suelo desde el espacio"

**Equipo** "gnh_2020"

**Integrantes** Gabriel Cervera, Nicolás Delgado, Héctor Quintero

# **Librerías necesarias**

In [None]:
import csv
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
import pathlib
import pandas as pd
import scipy

%tensorflow_version 2.x
import tensorflow as tf

from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [None]:
tf.test.gpu_device_name()

#**Cargar los datos**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Directorio que contiene las imágenes
DATA_DIR = '/content/drive/My Drive/Competicio_Redes_Neuronales'
# Nombre de las clases
CLASS_NAMES = sorted(os.listdir(str(DATA_DIR) + '/train/train'))
print(CLASS_NAMES)

In [None]:
train_images = []
train_labels = []

for c in CLASS_NAMES:
  path = DATA_DIR + '/train/train/' + c + '/'
  for img in os.listdir(path):
    train_images.append(cv2.imread(path + img))
    train_labels.append(c)

np.save(DATA_DIR + 'train_images.npy', train_images)
np.save(DATA_DIR + 'train_labels.npy', train_images)



## Train

In [None]:
# Imágenes de train
train_images = np.load(DATA_DIR + '/train_images.npy')
# Etiquetas de imágenes de train
train_labels = np.load(DATA_DIR + '/train_labels.npy')

In [None]:
np.shape(train_images), np.shape(train_labels)

## Separación en Sub-Train y Validación

In [None]:
from sklearn.model_selection import train_test_split

# Dividiendo el conjunto de train en train y validacion
sub_train_images, validation_images, sub_train_labels, validation_labels = train_test_split(train_images, train_labels, test_size=0.15, random_state=42)

In [None]:
np.shape(sub_train_images), np.shape(sub_train_labels), np.shape(validation_images), np.shape(validation_labels)

## Test

In [None]:
# Imágenes de test
test_images = np.load(DATA_DIR + '/test_images.npy')
# Nombres de imágenes de test
test_names = []

path = DATA_DIR + '/validacion/validacion/'
for img in os.listdir(path):
  test_names.append(img)

In [None]:
np.shape(test_images), np.shape(test_names)

# Modelo

## Creación

In [None]:
IMG_SHAPE = (256, 256, 3)

def create_model():
  # Modelo Predefinido (Se selecciona alguno de los modelos preentrenados)
  base_model = tf.keras.applications.ResNet50(input_shape=IMG_SHAPE, include_top=False, pooling = 'avg', weights='imagenet')

  # Inicialización del modelo
  model = tf.keras.Sequential()
  
  model.add(base_model)

  model.add(keras.layers.Flatten())
  # model.add(keras.layers.Dense(1024, activation='relu'))
  # model.add(keras.layers.Dense(512, activation='relu'))
  # model.add(keras.layers.Dense(256, activation='relu'))
  # model.add(keras.layers.Dense(128, activation='relu'))
  # model.add(keras.layers.Dense(64, activation='relu'))
  
  # Capa Dense para clasificacion
  model.add(keras.layers.Dense(45, activation='softmax'))

  # Compilando el modelo
  model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=0.0001), loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

  return model

In [None]:
# Creando el modelo
model = create_model()
model.summary()

### Cargando modelos guardados (opcional)

In [None]:
model.load_weights('/content/drive/model_resnet50_epoch_12.h5')
validation_loss, validation_acc = model.evaluate(validation_images, validation_labels, batch_size = 1)

(validation_loss, validation_acc)

## Entrenando el Modelo

In [None]:
for j in range(0, 20):
  # Entrenando el modelo
  history = model.fit(sub_train_images, sub_train_labels, epochs = 1, validation_data=(validation_images, validation_labels))

  # Guardando los pesos del modelo por cada epoca
  model.save_weights('/content/drive/My Drive/model_resnet50_epoch_' + str((j+1)*1) + '.h5')

## Entrenando modelo con todos los datos

In [None]:
for j in range(0, 10):
  # Entrenando el modelo
  history = model.fit(train_images, train_labels, epochs = 1)

  # Guardando los pesos del modelo por cada epoca
  model.save_weights('/content/drive/My Drive/model_resnet50_epoch_' + str((j+1)*1) + '.h5')

In [None]:
# Realizando predicciones
predictions = model.predict_classes(test_images)

# Modelo con ImageDataGenerator

## Generadores

In [None]:
batch_size = 32

train_generator =  keras.preprocessing.image.ImageDataGenerator(rescale=1./255,                                
                                     rotation_range=45,
                                     horizontal_flip=True).flow_from_directory(DATA_DIR + '/train/train',
                                                                               shuffle=True, target_size=(256, 256),
                                                                               batch_size=batch_size, class_mode='categorical')
                                     

validation_generator = keras.preprocessing.image.ImageDataGenerator(rescale=1./255).flow_from_directory(DATA_DIR + '/validacion/validacion (separadas)', target_size=(256, 256), batch_size=batch_size, class_mode='categorical')

In [None]:
validation_generator.class_indices

## Creación

In [None]:
# Creando el modelo
model = create_model()
model.summary()

## Cargando modelos guardados (Opcional)

In [None]:
model.load_weights('/content/drive/My Drive/model_densenet201_generatorV2_12_epoch_14.h5')

validation_loss, validation_acc = model.evaluate_generator(generator = validation_generator, verbose=1)
(validation_loss, validation_acc)

## Entrenando el modelo

In [None]:
for j in range(0, 30):
  print("Epoch: " + str(j) + " ---------- ")
  
  model.fit_generator(
          train_generator,
          steps_per_epoch= (24660 // batch_size) + 1,
          epochs = 1,
          validation_data = validation_generator,
          validation_steps = (3080 // batch_size) + 1)
  
  model.save_weights('/content/drive/My Drive/model_densenet201_generatorV2_12_epoch_' + str((j+1)*1) + '.h5')

## Generando predicciones

In [None]:
# Realizando predicciones
validation_generator.reset()
predictions = model.predict_generator(validation_generator, workers=0)

results = []
for i in range(len(predictions)):
  results.append((validation_generator.filenames[validation_generator.index_array[i]].split('/')[1], predictions[i]))
results = sorted(results)

# Predicciones para fusión de modelos
densenet201_generator_predictions = []
for i in range(len(results)):
  densenet201_generator_predictions.append(results[i][1])

# Predicciones propias del modelo
predictions = np.argmax(densenet201_generator_predictions, axis=-1)

In [None]:
densenet201_generator_predictions[1]

# Fusion de modelos

## Cargando modelos

In [None]:
# Model Xception
json_file = open('/content/drive/My Drive/Competicio_Redes_Neuronales/modelos_fusion/model_xception.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model_xception = keras.models.model_from_json(loaded_model_json)
model_xception.load_weights("/content/drive/My Drive/Competicio_Redes_Neuronales/modelos_fusion/model_xception.h5")
model_xception.compile(optimizer=tf.keras.optimizers.RMSprop(lr=0.0001), loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

# Model Resnet50
json_file = open('/content/drive/My Drive/Competicio_Redes_Neuronales/modelos_fusion/model_resnet50.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model_resnet50 = keras.models.model_from_json(loaded_model_json)
model_resnet50.load_weights("/content/drive/My Drive/Competicio_Redes_Neuronales/modelos_fusion/model_resnet50.h5")
model_resnet50.compile(optimizer=tf.keras.optimizers.RMSprop(lr=0.0001), loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

# Model Resnet152
json_file = open('/content/drive/My Drive/Competicio_Redes_Neuronales/modelos_fusion/model_resnet152.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model_resnet152 = keras.models.model_from_json(loaded_model_json)
model_resnet152.load_weights("/content/drive/My Drive/Competicio_Redes_Neuronales/modelos_fusion/model_resnet152.h5")
model_resnet152.compile(optimizer=tf.keras.optimizers.RMSprop(lr=0.0001), loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

# Model DenseNet121
json_file = open('/content/drive/My Drive/Competicio_Redes_Neuronales/modelos_fusion/model_densenet121.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model_densenet121 = keras.models.model_from_json(loaded_model_json)
model_densenet121.load_weights("/content/drive/My Drive/Competicio_Redes_Neuronales/modelos_fusion/model_densenet121.h5")
model_densenet121.compile(optimizer=tf.keras.optimizers.RMSprop(lr=0.0001), loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

# Model DenseNet169
json_file = open('/content/drive/My Drive/Competicio_Redes_Neuronales/modelos_fusion/model_densenet169.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model_densenet169 = keras.models.model_from_json(loaded_model_json)
model_densenet169.load_weights("/content/drive/My Drive/Competicio_Redes_Neuronales/modelos_fusion/model_densenet169.h5")
model_densenet169.compile(optimizer=tf.keras.optimizers.RMSprop(lr=0.0001), loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

# Model DenseNet201
json_file = open('/content/drive/My Drive/Competicio_Redes_Neuronales/modelos_fusion/model_densenet201.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model_densenet201 = keras.models.model_from_json(loaded_model_json)
model_densenet201.load_weights("/content/drive/My Drive/Competicio_Redes_Neuronales/modelos_fusion/model_densenet201.h5")
model_densenet201.compile(optimizer=tf.keras.optimizers.RMSprop(lr=0.0001), loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

## Vote fusion

In [None]:
xception_predictions = model_xception.predict_classes(test_images)
resnet50_predictions = model_resnet50.predict_classes(test_images)
resnet152_predictions = model_resnet152.predict_classes(test_images)
densenet121_predictions = model_densenet121.predict_classes(test_images)
densenet169_predictions = model_densenet169.predict_classes(test_images)
densenet201_predictions = model_densenet201.predict_classes(test_images)

In [None]:
predictions = []
for i in range(3080):
  predictions.append(scipy.stats.mode([xception_predictions[i], resnet50_predictions[i], resnet152_predictions[i], densenet121_predictions[i], densenet201_predictions[i]])[0][0])


## Otras medidas

In [None]:
xception_predictions = model_xception.predict(test_images)
resnet50_predictions = model_resnet50.predict(test_images)
resnet152_predictions = model_resnet152.predict(test_images)
densenet121_predictions = model_densenet121.predict(test_images)
densenet169_predictions = model_densenet169.predict(test_images)
densenet201_predictions = model_densenet201.predict(test_images)

## Media Armónica

Consideremos $n$ modelos distintos para fusionar, para cada imagen los modelos nos devuelven un vector de $45$ elementos, la posición del elemento de ese vector nos da la probabilidad de pertenencia a esa clase. Sean pues:
$$X_1 = \text{modelo } 1,$$
$$X_2 = \text{modelo } 2,$$
$$ \vdots $$
$$X_n = \text{modelo } n,$$
El modelo $j$  viene dado por la siguiente matriz:
$$ X_j =
\left( \begin{array}{cccc}
 p_{j,1}^1 & p_{j,1}^2 & \cdots & p_{j,1}^{45} \\ 
 p_{j,2}^{1} & p_{j,2}^{2} & \cdots & p_{j,2}^{45} \\
 \vdots & \vdots & \ddots & \vdots \\
 p_{j,3080}^{1} & p_{j,3080}^{2} & \cdots & p_{j,3080}^{45}
\end{array} \right) $$
Podemos hacer distintas medias de centralización para obtener probabilidades medias usando todos los métodos. Calcularemos la media armónica de las matrices obteniendo así una matriz de probabilidades que fusiona los $n$ modelos. La media armónica de que la primera imagen pertenezca a la clase 1 viene dado por:
$$H = \frac{n}{1/p_{1,1}^1+1/p_{2,1}^1 + \cdots 1/p_{n,1}^1 }. $$ 
La media armónica resulta poco influida por la existencia de determinados valores mucho más grandes que el conjunto de los otros, siendo en cambio sensible a valores mucho más pequeños que el conjunto. La media armónica no está definida en el caso de que exista algún valor nulo.

In [None]:
predictions = np.argmax(5.0/(1.0/xception_predictions + 1.0/resnet152_predictions + 1.0/densenet121_predictions + 1.0/densenet169_predictions + 1.0/densenet201_predictions), axis=-1)

## Media Geométrica

In [None]:
  predictions = np.argmax(np.power(np.multiply(densenet201_predictions, np.multiply(densenet121_predictions, np.multiply(xception_predictions,resnet152_predictions))), 0.25), axis = -1)

## Modelo más seguro de su clasificación

In [None]:
# Nos quedamos con la predicción del modelo que más seguro esté de su desición
predictions = []

for img in range(3080):
  m = np.max(densenet201_generator_predictions[img])
  i = np.argmax(densenet201_generator_predictions[img])

  if(m <= np.max(densenet201_predictions[img])):
    m = np.max(densenet201_predictions[img])
    i = np.argmax(densenet201_predictions[img])
  
  predictions.append(i)

# Guardando predicciones

In [None]:
# Generando archivo .csv
with open('/content/drive/My Drive/submission.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["ID", "Category"])
    for i in range(len(predictions):
          writer.writerow([str(test_names[i]), str(predictions[i])])