In [None]:
from pathlib import Path

import numpy as np
import pandas as pd
import time
import torch
import torchvision.transforms as transforms
from torchvision.datasets import MNIST
from src.utils import compute_metrics, create_autoencoder, create_tied_autoencoder, create_data_loader

np.random.seed(17)
torch.manual_seed(17)

In [None]:
torch.cuda.is_available()

In [None]:
def get_list_params(path):
    params = []
    keys = [
        "activation",
        "epochs",
        "latent_dim",
        "num_layers",
        "rate_reduce"
    ]
    with open(path) as f:
        for line in f:
            line = line.split()
            line[1] = int(line[1])
            line[2] = int(line[2])
            line[3] = int(line[3])
            line[4] = float(line[4])
            params.append({k: v for k, v in zip(keys, line)})
    return params

In [None]:
data_path = "../pytorch_data/"
batch_size = 256
iters = 15

transform = transforms.Compose([transforms.ToTensor()])
train_loader = create_data_loader(data_path, MNIST, transform, batch_size, True)
test_loader = create_data_loader(data_path, MNIST, transform, batch_size, False)

In [None]:
folder = Path("./")
params = get_list_params(folder / "params.txt")
eval_params = get_list_params(folder / "eval_params_vanila.txt")
params = [p for p in params if not p in eval_params]

In [None]:
for j, p in enumerate(params, 504 - len(params)):
    latent_dim = p["latent_dim"]
    rate_reduce = p["rate_reduce"]
    num_layers = p["num_layers"]
    activation = p["activation"]
    out_activation = None
    epochs = p["epochs"]
    out_file = folder / f"data/vanila_ae/ae_{j}.csv"

    df = []
    input_dim = train_loader.dataset.data.shape[1:]
    ae_params = {
        "input_dim": input_dim,
        "latent_dim": latent_dim,
        "activation": activation, 
        "out_activation": out_activation,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu")
    }
    layers_dim = np.array(
        [np.prod(input_dim) // rate_reduce ** (i + 1) for i in range(num_layers)],
        dtype=int
    )
    layers_dim[layers_dim < latent_dim] = latent_dim
    ae_params["layers_dim"] = list(layers_dim)

    fit_params = {
        "optimizer": torch.optim.Adam,
        "epochs": epochs,
        "loss": torch.nn.MSELoss,
    }
    print(
        f"Num epochs: {epochs} -- Latent dim: {latent_dim} -- reduce x{rate_reduce} -- L {num_layers}"
    )
    for i in range(1, iters + 1):
        print(f"Iteration {i}")
        print("Create model")
        t = time.time()
        model = create_autoencoder(ae_params, train_loader, fit_params)
        t = int(time.time() - t)
        print(f"Time: {t // 60}-{t % 60}")

        print(f"Compute metrics")
        metrics = compute_metrics(model, train_loader, test_loader)
        print(f"Loss: Train - {metrics['eval_train']:.3f} -- Test - {metrics['eval_test']:.3f}")
        print(f"KNN accuracy: {metrics['accuracy']: .3f}")
        print(f"KMeans v-measure: {metrics['v_measure']: .3f}\n\n")

        metrics["iteration"] = i
        metrics["epochs"] = epochs
        metrics["latent_dim"] = latent_dim
        metrics["rate_reduce"] = rate_reduce
        metrics["num_layers"] = num_layers
        metrics["activation"] = activation

        df.append(metrics)
        del model

    df = pd.DataFrame(df)
    df.to_csv(out_file, index=False)

    with open(folder / "eval_params_vanila.txt", "a") as f:
        s = f"{activation} {epochs} {latent_dim} {num_layers} {rate_reduce}\n"
        f.write(s)

In [None]:
folder = Path("./")
params = get_list_params(folder / "params.txt")
eval_params = get_list_params(folder / "eval_params_tied.txt")
params = [p for p in params if not p in eval_params]

alpha = 0

In [None]:
for j, p in enumerate(params, 504 - len(params)):
    latent_dim = p["latent_dim"]
    rate_reduce = p["rate_reduce"]
    num_layers = p["num_layers"]
    activation = p["activation"]
    out_activation = None
    epochs = p["epochs"]
    out_file = folder / f"data/tied_ae/ae_{j}.csv"

    df = []
    input_dim = train_loader.dataset.data.shape[1:]
    ae_params = {
        "input_dim": input_dim,
        "latent_dim": latent_dim,
        "activation": activation, 
        "out_activation": out_activation,
        "alpha": alpha,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu")
    }
    layers_dim = np.array(
        [np.prod(input_dim) // rate_reduce ** (i + 1) for i in range(num_layers)],
        dtype=int
    )
    layers_dim[layers_dim < latent_dim] = latent_dim
    ae_params["layers_dim"] = list(layers_dim)

    fit_params = {
        "optimizer": torch.optim.Adam,
        "epochs": epochs,
        "loss": torch.nn.MSELoss,
    }
    print(
        f"Num epochs: {epochs} -- Latent dim: {latent_dim} -- reduce x{rate_reduce} -- L {num_layers}"
    )
    for i in range(1, iters + 1):
        print(f"Iteration {i}")
        print("Create model")
        t = time.time()
        model = create_tied_autoencoder(ae_params, train_loader, fit_params)
        t = int(time.time() - t)
        print(f"Time: {t // 60}-{t % 60}")

        print(f"Compute metrics")
        metrics = compute_metrics(model, train_loader, test_loader)
        print(f"Loss: Train - {metrics['eval_train']:.3f} -- Test - {metrics['eval_test']:.3f}")
        print(f"KNN accuracy: {metrics['accuracy']: .3f}")
        print(f"KMeans v-measure: {metrics['v_measure']: .3f}")

        metrics["iteration"] = i
        metrics["epochs"] = epochs
        metrics["latent_dim"] = latent_dim
        metrics["rate_reduce"] = rate_reduce
        metrics["num_layers"] = num_layers
        metrics["activation"] = activation

        df.append(metrics)
        del model

    df = pd.DataFrame(df)
    df.to_csv(out_file, index=False)

    with open(folder / "eval_params_tied.txt", "a") as f:
        s = f"{activation} {epochs} {latent_dim} {num_layers} {rate_reduce}\n"
        f.write(s)

In [None]:
folder = Path("./")
params = get_list_params(folder / "params.txt")
eval_params = get_list_params(folder / "eval_params_tied_ort.txt")
params = [p for p in params if not p in eval_params]

alpha = 0.01

In [None]:
for j, p in enumerate(params, 504 - len(params)):
    latent_dim = p["latent_dim"]
    rate_reduce = p["rate_reduce"]
    num_layers = p["num_layers"]
    activation = p["activation"]
    out_activation = None
    epochs = p["epochs"]
    out_file = folder / f"data/tied_ort_ae/ae_{j}.csv"

    df = []
    input_dim = train_loader.dataset.data.shape[1:]
    ae_params = {
        "input_dim": input_dim,
        "latent_dim": latent_dim,
        "activation": activation, 
        "out_activation": out_activation,
        "alpha": alpha,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu")
    }
    layers_dim = np.array(
        [np.prod(input_dim) // rate_reduce ** (i + 1) for i in range(num_layers)],
        dtype=int
    )
    layers_dim[layers_dim < latent_dim] = latent_dim
    ae_params["layers_dim"] = list(layers_dim)

    fit_params = {
        "optimizer": torch.optim.Adam,
        "epochs": epochs,
        "loss": torch.nn.MSELoss,
    }
    print(
        f"Num epochs: {epochs} -- Latent dim: {latent_dim} -- reduce x{rate_reduce} -- L {num_layers}"
    )
    for i in range(1, iters + 1):
        print(f"Iteration {i}")
        print("Create model")
        t = time.time()
        model = create_tied_autoencoder(ae_params, train_loader, fit_params)
        t = int(time.time() - t)
        print(f"Time: {t // 60}-{t % 60}")

        print(f"Compute metrics")
        metrics = compute_metrics(model, train_loader, test_loader)
        print(f"Loss: Train - {metrics['eval_train']:.3f} -- Test - {metrics['eval_test']:.3f}")
        print(f"KNN accuracy: {metrics['accuracy']: .3f}")
        print(f"KMeans v-measure: {metrics['v_measure']: .3f}")

        metrics["iteration"] = i
        metrics["epochs"] = epochs
        metrics["latent_dim"] = latent_dim
        metrics["rate_reduce"] = rate_reduce
        metrics["num_layers"] = num_layers
        metrics["activation"] = activation

        df.append(metrics)
        del model

    df = pd.DataFrame(df)
    df.to_csv(out_file, index=False)

    with open(folder / "eval_params_tied_ort.txt", "a") as f:
        s = f"{activation} {epochs} {latent_dim} {num_layers} {rate_reduce}\n"
        f.write(s)