# Teste de Tensorflow - GPU

In [1]:
import tensorflow as tf

lista_gpu = tf.config.list_physical_devices("GPU")
"Tem GPU" if lista_gpu else "Não tem GPU"

2023-01-25 16:28:52.725201: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-25 16:28:52.731272: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-25 16:28:52.731382: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-25 16:28:52.731431: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1953] TensorFlow was not built with CUDA kernel binaries compatible with compute capability 8.6. CUDA kernels will be jit-compiled from PTX, which could take 30 minutes or longer.


'Tem GPU'

In [2]:
for gpu in lista_gpu:
    tf.config.experimental.set_memory_growth(gpu, True)

# Import's

In [None]:
import tensorflow as tf
from datasets import Dataset, concatenate_datasets
from transformers import AutoTokenizer
from transformers import TFAutoModelForSequenceClassification
from transformers import create_optimizer
from keras import backend as K
from keras.utils.np_utils import to_categorical
import numpy as np
import json

## Lendo Dataset

In [None]:
# dataset = Dataset.from_csv("../../data/datasets/dataset_e-commerce/olist.csv")
dataset = Dataset.from_json("../../data/dataset_olist.json")

In [None]:
dataset

## Renomeando as colunas do dataset

In [None]:
dataset = dataset.rename_column("review_text", "text")
dataset = dataset.rename_column("polarity", "labels")
dataset = dataset.rename_column("kfold_polarity", "grupos")

In [None]:
dataset

## Covertendo os resultados para int

In [None]:
dataset.features

In [None]:
novo_tipo = dataset.features.copy()
novo_tipo["labels"] = Value("int32")
dataset = dataset.cast(novo_tipo)

In [None]:
dataset.features

## Quantidade de label's

In [None]:
set(dataset["labels"])


## Modelo

In [None]:
model_id = 'neuralmind/bert-base-portuguese-cased'

### Baixando o tokenizador

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_id)

### Baixando o modelo

In [None]:
model = TFAutoModelForSequenceClassification.from_pretrained(model_id, num_labels=2)

### Tokenizando as entradas

In [None]:
def tokenize_dataset(data):
    return tokenizer(data["text"], padding=True, return_tensors="tf")

dataset = dataset.map(tokenize_dataset, batched=True)

In [None]:
dataset.features

## Filtrando Dataset

In [None]:
set(dataset["labels"])

In [None]:
dataset = dataset.filter(lambda x: x["labels"] is not None)

In [None]:
set(dataset["labels"])

## Transformando os labels

In [None]:
labels_lista = to_categorical(dataset["labels"])

In [None]:
len(labels_lista)

In [None]:
dataset = dataset.add_column("label", labels_lista.tolist())

In [None]:
dataset.features

In [None]:
dataset["labels"]

In [None]:
dataset["label"]

## Dividindo os dados

In [None]:
def group_by(dataset):
    lista_grupos = []
    for grupo in set(dataset["grupos"]):
        lista_grupos.append(dataset.filter(lambda x: x["grupos"]==grupo))
    return lista_grupos

In [None]:
dataset_lista = group_by(dataset)

In [None]:
len(dataset_lista)

In [None]:
dataset_lista[0].features

### Metricas

In [None]:
def func_precision(y_true, y_pred):
    y_pred = np.argmax(y_pred.numpy(), axis=1)
    y_true = np.argmax(y_true.numpy(), axis=1)
    precision = tf.keras.metrics.Precision()
    precision.update_state(y_true, y_pred)
    return precision.result().numpy()

def func_recall(y_true, y_pred):
    y_pred = np.argmax(y_pred.numpy(), axis=1)
    y_true = np.argmax(y_true.numpy(), axis=1)
    recall = tf.keras.metrics.Recall()
    recall.update_state(y_true, y_pred)
    return recall.result().numpy()

def func_f1(y_true, y_pred):
    precision = func_precision(y_true, y_pred)
    recall = func_recall(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

### Configurações

In [None]:
num_batchs = 16
num_epochs = 3
num_train_steps = (len(dataset_lista[0])  // num_batchs) * num_epochs
optimizer, lr_schedule = create_optimizer(
    init_lr=2e-5,
    num_train_steps=num_train_steps,
    weight_decay_rate=0.01,
    num_warmup_steps=0,
)

In [None]:
optimizer

In [None]:
lr_schedule

## Removendo Colunas

In [None]:
for index, dataset in enumerate(dataset_lista):
    # dataset_lista[index] = dataset.remove_columns(["original_index", "text", "review_text_processed", "review_text_tokenized", "labels", "rating", "grupos", "kfold_rating"])
    dataset_lista[index] = dataset.remove_columns(["text", "group"])

In [None]:
dataset_lista[0].features

### Treinando o modelo

In [None]:
resultados = []

for index, dataset in enumerate(dataset_lista):
    ## Treino
    dataset_treino = dataset_lista[:index]
    dataset_treino += dataset_lista[index+1:]
    dataset_treino = concatenate_datasets(dataset_treino)
    print(dataset_treino)
    tf_dataset_treino = model.prepare_tf_dataset(dataset_treino.select(range(1000)), batch_size=num_batchs, shuffle=True, tokenizer=tokenizer)
    ## Teste
    tf_dataset_teste = model.prepare_tf_dataset(dataset.select(range(100)), batch_size=num_batchs, shuffle=True, tokenizer=tokenizer)
    ## Modelo
    model.compile(
        optimizer=optimizer,
        loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
        metrics=[
            tf.keras.metrics.CategoricalAccuracy(),
            func_precision,
            func_recall,
            func_f1
        ],
        run_eagerly = True
    )
    history = model.fit(tf_dataset_treino, use_multiprocessing=True)
    # loss, acc, precision, recall, f1 = model.evaluate(tf_dataset_teste, use_multiprocessing=True)

In [None]:
resultados

### Salvando o resultado

In [None]:
with open("../resultados.json", "w") as arquivo:
    json.dump(resultados, arquivo, indent=4)