In [16]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import cv2
import re
from tqdm import tqdm
sns.set()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
from google.colab import drive
drive.mount('/content/drive')
%cd "/content/drive/My Drive/Diplomado ML/Python for datascience/Image Challenging"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/My Drive/Diplomado ML/Python for datascience/Image Challenging


# **DATA**

In [0]:
img_size=150

In [0]:
#read train
path = "ia-pucp-diplomado-img-challenge/train/train/"
regex = re.compile('(.*?)(_[0-9])') #Para extraer las categorías de los nombres del archivo porque el nombre de la imagen es la clase que tiene
x, y = [], []
for p in tqdm(os.listdir(path)): #Recorremos cada archivo dentro del path
    category = re.findall(regex, p)[0][0]
    img_array = cv2.imread(os.path.join(path,p)) #Cargando la imagen como un array 
    new_img_array = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB) #Conversion de los colores de la imagen porque no se cargaron los colores correctos en el codigo anterior
    new_img_array = cv2.resize(new_img_array, dsize=(img_size, img_size))#Estandarizacion del tamaño de la imagen
    x.append(new_img_array) #Imagenes como arreglos
    y.append(category) #Etiquetas o clases de las imagenes

In [20]:
#reshape de la forma (Batch size, Height, Width, Channels(colores, 1=escala de grises|3=rgb))
x = np.array(x).reshape(-1, img_size,img_size,3) #x es una lista y con convertirlo a un array ya tiene la forma que en reshape se busca
y =  np.array(y)
x.shape, y.shape

((1341, 150, 150, 3), (1341,))

# **DATA SPLIT Y LABEL ENCODER**

In [0]:
from sklearn.preprocessing import LabelEncoder
encoder=LabelEncoder()
y=encoder.fit_transform(y)

In [0]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=2019)
X_train.shape,X_test.shape,y_train.shape,y_test.shape

#**IMAGE GENERATION**

In [0]:
from keras.preprocessing.image import ImageDataGenerator

img_width, img_height, channels = 150, 150, 3
batch_size = 32

train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    featurewise_center=True,
    featurewise_std_normalization=True,
    zoom_range=0.2,
    horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1. / 255)

train_generator = train_datagen.flow(X_train,y_train,batch_size=batch_size)

validation_generator = test_datagen.flow(X_test,y_test,batch_size=batch_size)

# **Transfering learning**

In [0]:
from keras import applications
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras import backend as K

In [0]:
vgg16 = applications.VGG16(weights='imagenet', include_top=False, 
                           input_shape=(img_width, img_height, channels))

In [0]:
top_model = Sequential()
top_model.add(Flatten(input_shape=vgg16.output_shape[1:]))
top_model.add(Dense(256, activation='relu')) #relu
top_model.add(Dropout(0.5)) #0.5
top_model.add(Dense(10, activation='softmax'))

In [0]:
model = Sequential()
for layer in vgg16.layers:
    model.add(layer)
model.add(top_model)

In [0]:
for layer in model.layers[:-1]:
    layer.trainable = False

In [0]:
model.compile(optimizer='rmsprop',loss='sparse_categorical_crossentropy',metrics=['accuracy']) #rmsprop
model.summary()

In [0]:
#Detenemos el entrenamiento para evitar overfitting con EarlyStopping. 
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10, min_delta=0.0001) #Min_delta:umbral para determinar si esta empeorando,patience:cuantas epochs se puede tolerar que haya emperorado el modelo
#Guardamos el mejor modelo
mc = ModelCheckpoint('best_model.hdf5', monitor='val_acc', verbose=1, save_best_only=True, mode='max')

In [226]:
model.fit_generator(
    train_generator,
    validation_data=validation_generator,
    epochs=10, #100
    steps_per_epoch=60, #60
    validation_steps=25)#,
    #callbacks=[es,mc])

Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fdcf7071c18>

# **Evaluacion**

In [97]:
#leer el test
#read train
path = "ia-pucp-diplomado-img-challenge/test/test/"
x_test = []
test_names = []
for p in tqdm(os.listdir(path)):
    test_names.append(p)
    img_array = cv2.imread(os.path.join(path,p))
    new_img_array = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)
    new_img_array = cv2.resize(new_img_array, dsize=(img_size, img_size))
    x_test.append(new_img_array)

100%|██████████| 530/530 [04:05<00:00,  2.27it/s]


In [98]:
x_test = np.array(x_test).reshape(-1, img_size,img_size,3)
x_test.shape

(530, 150, 150, 3)

In [0]:
x_test = x_test.astype('float32') / 255

**Cargando el modelo**

In [0]:
# Cargando el mejor modelo
from keras.models import load_model
best_model=load_model('best_model.hdf5')

**Prediccion**

In [227]:
precomputed_test = model.predict(x_test)
print(precomputed_test.shape)

(530, 10)


In [228]:
precomputed_test[0]

array([0.18195708, 0.00568192, 0.01182612, 0.00595256, 0.02952376,
       0.0255202 , 0.55525136, 0.02405876, 0.1560277 , 0.00420056],
      dtype=float32)

In [0]:
prueba=[np.argmax(i) for i in precomputed_test]

In [0]:
classes=encoder.inverse_transform(prueba)

In [0]:
#save solution file
data = {'file': test_names, 'label': classes}
df_sub = pd.DataFrame(data)
df_sub = df_sub.sort_values(by='file')
df_sub.to_csv('submission2.csv', index=False)