In [None]:
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

"""
Seguimiento de loss, accuracy y imprimir graficos.
"""
import matplotlib.pyplot as plt
arr_loss_train = []
arr_loss_test = []
arr_acc_train = []
arr_acc_test = []
def loss_plot(epochs, loss):
    plt.plot([*range(epochs)], loss)
    plt.ylabel("Loss")
    plt.xlabel("epochs")
def acc_plot(epochs,acc):
    plt.plot([*range(epochs)], acc)
    plt.ylabel("Accuracy")
    plt.xlabel("epochs")

In [None]:
"""
Algoritmo de entrenamiento y de validacion de accuraccy.
"""
def train_loop(dataloader, model, loss_fn, optimizer):
    print_every_x_batches = 100
    size = len(dataloader.dataset)
    correct = 0
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X = X.to(device)
        y = y.to(device)
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # calculate accuracy
        correct += (pred.argmax(1) == y).type(torch.float).sum().item()

        # print loss during training, not recorded.
        if batch % print_every_x_batches == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
    arr_loss_train.append(loss.cpu().detach().numpy().item())
    correct /= size
    arr_acc_train.append(correct)

    


def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0
    model.eval() # pone a la red en moodo evaluacion, desactiva capas dropout.
    with torch.no_grad(): # desactiva el proceso de calculo y guardado de valores intermedios
        for X, y in dataloader:
            X = X.to(device)
            y = y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    arr_loss_test.append(test_loss)
    arr_acc_test.append(correct)

In [None]:
"""
Instanciar modelo a ajustar hiperparametros.
"""
from modelo_convolucional import RedConvolucional
model = RedConvolucional()

"""
Definir funcion de costo y optimizador
Explicacion sobre la optimizacion 
https://pytorch.org/tutorials/beginner/basics/optimization_tutorial.html
"""
import torch
from torch import nn
# CrossEntropyLoss requiere el output "logits", no es necesario pasarlo por el softmax, ya que lo calcula dentro. https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html
loss_function = nn.CrossEntropyLoss()
# se necesita enviar los parametros del modelo al optimizador para que los pueda actualizar.
optimizer = torch.optim.Adam(model.parameters())

# aca abajo, lo importante para la optimizacion de hiperparametros

In [4]:
"""
importar funciones utilidad.
"""
from cargar_datos import cargar_datasets, cargar_dataloaders, classes
from functools import partial
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler


def main(num_samples=10, max_num_epochs=10, gpus_per_trial=0.5):
    data_dir = os.path.abspath("./data")
    # se carga una vez, para que si no esta descargado, se descarge y se valide, el resto de veces en las pruebas, se saltara esta parte.
    cargar_datasets(data_dir)
    """
    variable de configuracion, Ray tune modificara sus valores en el proceso de ajuste de hiperparametros.
    """
    config = {
        "batch_size": tune.choice([8, 16, 32]),
        "learning_rate": tune.loguniform(1e-4, 1e-1),
        # red convolucional
        "cant_filtros_conv1": tune.choice([6, 12, 18]),
        "kernel_size_maxpool1": tune.choice([2, 3]),
        "cant_filtros_conv2": tune.choice([16, 22, 28]),
        "kernel_size_maxpool2": tune.choice([2, 3]),
        "full_l1": tune.choice([120, 140, 160]),
        "full_l2": tune.choice([84, 104, 124])
    }

    scheduler = ASHAScheduler(
        metric="loss",
        mode="min",
        max_t=max_num_epochs,
        grace_period=1,
        reduction_factor=2)
    reporter = CLIReporter(
        parameter_columns=list(config.keys()),
        metric_columns=["loss", "accuracy", "training_iteration"])
    result = tune.run(
        # partial es una funcion de orden superior de tipo curry
        partial(train_cifar, data_dir=data_dir),
        resources_per_trial={"cpu": 2, "gpu": gpus_per_trial},
        config=config,
        num_samples=num_samples,
        scheduler=scheduler,
        progress_reporter=reporter)

    best_trial = result.get_best_trial("loss", "min", "last")
    print("Best trial config: {}".format(best_trial.config))
    print("Best trial final validation loss: {}".format(
        best_trial.last_result["loss"]))
    print("Best trial final validation accuracy: {}".format(
        best_trial.last_result["accuracy"]))

    best_trained_model = Net(best_trial.config["l1"], best_trial.config["l2"])
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if gpus_per_trial > 1:
            best_trained_model = nn.DataParallel(best_trained_model)
    best_trained_model.to(device)

    best_checkpoint_dir = best_trial.checkpoint.value
    model_state, optimizer_state = torch.load(os.path.join(
        best_checkpoint_dir, "checkpoint"))
    best_trained_model.load_state_dict(model_state)

    test_acc = test_accuracy(best_trained_model, device)
    print("Best trial test set accuracy: {}".format(test_acc))


def train_cifar(config,data_dir = "pepe"):
    print(config)
    print(data_dir)


if __name__ == "__main__":
    # You can change the number of GPUs per trial here:
    main(num_samples=1, max_num_epochs=10, gpus_per_trial=0)




Files already downloaded and verified
Files already downloaded and verified
== Status ==
Current time: 2022-11-13 00:00:37 (running for 00:00:00.13)
Memory usage on this node: 7.0/15.4 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: None
Resources requested: 2.0/20 CPUs, 0/1 GPUs, 0.0/5.79 GiB heap, 0.0/2.9 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /home/braian/ray_results/train_cifar_2022-11-13_00-00-37
Number of trials: 1/1 (1 RUNNING)
+-------------------------+----------+---------------------+--------------+-----------------+----------------------+------------------------+----------------------+------------------------+-----------+-----------+
| Trial name              | status   | loc                 |   batch_size |   learning_rate |   cant_filtros_conv1 |   kernel_size_maxpool1 |   cant_filtros_conv2 |   kernel_size_maxpool2 |   full_l1 |   full_l2 |
|-------------------------+----------+-------

TuneError: Traceback (most recent call last):
  File "/home/braian/miniconda3/envs/tesis/lib/python3.9/site-packages/ray/tune/execution/trial_runner.py", line 853, in _wait_and_handle_event
    self._on_training_result(
  File "/home/braian/miniconda3/envs/tesis/lib/python3.9/site-packages/ray/tune/execution/trial_runner.py", line 978, in _on_training_result
    self._process_trial_results(trial, result)
  File "/home/braian/miniconda3/envs/tesis/lib/python3.9/site-packages/ray/tune/execution/trial_runner.py", line 1061, in _process_trial_results
    decision = self._process_trial_result(trial, result)
  File "/home/braian/miniconda3/envs/tesis/lib/python3.9/site-packages/ray/tune/execution/trial_runner.py", line 1098, in _process_trial_result
    self._validate_result_metrics(flat_result)
  File "/home/braian/miniconda3/envs/tesis/lib/python3.9/site-packages/ray/tune/execution/trial_runner.py", line 1194, in _validate_result_metrics
    raise ValueError(
ValueError: Trial returned a result which did not include the specified metric(s) `loss` that `AsyncHyperBandScheduler` expects. Make sure your calls to `tune.report()` include the metric, or set the TUNE_DISABLE_STRICT_METRIC_CHECKING environment variable to 1. Result: {'trial_id': '599f1_00000', 'experiment_id': '14f2e30a2234411d83c3bfab52d52225', 'date': '2022-11-13_00-00-38', 'timestamp': 1668308438, 'pid': 22295, 'hostname': 'braian-pc-linux', 'node_ip': '192.168.0.187', 'done': True, 'config/batch_size': 8, 'config/learning_rate': 0.0019577033484982163, 'config/cant_filtros_conv1': 18, 'config/kernel_size_maxpool1': 3, 'config/cant_filtros_conv2': 16, 'config/kernel_size_maxpool2': 3, 'config/full_l1': 140, 'config/full_l2': 104}
