In [1]:
# Importando bibiliotecas 
import tensorflow 
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
import keras_tuner 
from google.cloud import aiplatform

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import numpy
import pandas
import json, os

In [1]:
# Declarando variáveis
REGION = "us-central1"
PROJECT_ID = "ml-demos-garrido"
MODEL_PATH='gs://'+PROJECT_ID+'-bucket/model/'
DATASET_PATH='gs://'+PROJECT_ID+'/area_cover_dataset.csv'
PIPELINE_ROOT = 'gs://'+PROJECT_ID
MODEL_ARTIFACTS_LOCATION ='gs://'+PROJECT_ID+'-bucket/'

In [3]:
# Trazendo os dados para a RAM
area_cover_dataframe = pandas.read_csv(DATASET_PATH)

In [4]:
# Pré-processamento de dados (indexing)
def index(dataframe):
    
    categoricals = dataframe.dtypes.loc[lambda x: x == 'O'].index.tolist()
    reverse_transform = {}
    
    for col in categoricals:
        numerical = pandas.Series(dataframe[col].unique()).reset_index().set_index(0).to_dict()["index"]
        reverse_transform[col] = numerical
        dataframe[col] = dataframe[col].map(numerical)
        
    return dataframe

In [5]:
# Pré-processamento (extração de features e scaling)
indexed_dataframe = index(area_cover_dataframe)
features_dataframe = indexed_dataframe.drop("Area_Cover", axis = 1)
standard_scaler = StandardScaler()

scaled_features = pandas.DataFrame(standard_scaler.fit_transform(features_dataframe),
                                   columns=features_dataframe.columns)

In [6]:
# Pré-processamento (categorical -> numerical)
labels_dataframe = indexed_dataframe["Area_Cover"].values
categorical_labels = to_categorical(labels_dataframe)

In [7]:
# Train test split
dfx_train, dfx_val, dfy_train, dfy_val = train_test_split(scaled_features.values, 
                                                          categorical_labels, test_size=0.2)

In [8]:
# Arquitetura da rede de classificação
def build_model(hptune):
    model = Sequential()
    model.add(Dense(10, input_shape = (12,), activation = 'relu'))
    model.add(Dropout(.2))
    model.add(Dense(7, activation = 'softmax'))
    
    learning_rate = hptune.Choice('learning_rate', 
                                 values=[1e-2, 1e-3, 1e-4])

    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='mse',
                  metrics=['accuracy'])
    
    return model

In [9]:
# Tuning de hiperparâmetros
tuner = keras_tuner.Hyperband(build_model, overwrite=True, objective='accuracy')

2022-05-27 19:35:13.129803: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [10]:
## Definição de early-stopping
stop_early = tensorflow.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

# Keras tuning search
tuner.search(dfx_train, dfy_train, epochs=50, validation_split=0.2, callbacks=[stop_early])

# Obtendo os hiperparâmetros ótimos
best_hyperparameters=tuner.get_best_hyperparameters(num_trials=10)[0]

Trial 3 Complete [00h 00m 11s]
accuracy: 0.6892343759536743

Best accuracy So Far: 0.6892343759536743
Total elapsed time: 00h 00m 35s
INFO:tensorflow:Oracle triggered exit
INFO:tensorflow:Oracle triggered exit


In [12]:
# Treinando o novo modelo com os hiperparâmetros encontrados
model = tuner.hypermodel.build(best_hyperparameters)
history = model.fit(dfx_train, dfy_train, epochs=50, validation_split=0.2)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [None]:
# Recuperando melhor validation accuracy histórica
val_acc_per_epoch = history.history['val_accuracy']
best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
print('Best epoch: %d' % (best_epoch,))

In [None]:
# Avaliando test loss e accuracy
eval_result = model.evaluate(dfx_val, dfy_val)
print("[Model test loss, test accuracy]:", eval_result)

In [None]:
# Redefinindo o tuner do modelo com os melhores hiperparâmetros
hypermodel = tuner.hypermodel.build(best_hyperparameters)
# Retreinando o modelo com os melhores hiperparâmetros
hypermodel.fit(dfx_train, dfy_train, epochs=best_epoch, validation_split=0.2)

In [None]:
# Avaliando test loss e accuracy para o modelo final
eval_result = hypermodel.evaluate(dfx_val, dfy_val)
print("[Hypermodel test loss, test accuracy]:", eval_result)

In [None]:
# Salvando os pesos do modelo
hypermodel.save(MODEL_PATH)