# Univariate regression to predict the wind speed field

In [None]:
import torch
import torch.nn as nn
import numpy as np
import random

import sys

sys.path.append("../..")

from src.logging_metrics import MetricsLogger
from src.data_loaders import get_wake_dataloaders
import src.plotting as plotting
from src.evaluation import evaluate_model

# define random seeds for Neural Networks
torch.manual_seed(0)
np.random.seed(0)

In [None]:
# hyperparameters
# in univariate, the batch size is a multiplier for number of cells
# (i.e. the batch_size should be dividible by the number of cells)
BATCH_MULTIPLIER = 8
EPOCHS = 500
LR = 0.0001  # before it was 0.01

ACTIVATION_FUNCTION = nn.ReLU()
N_MAPPING = 256  # Number of Fourier features
HIDDEN_LAYERS_UNITS = [256, 256, 256]  # before it was [128, 256, 32]

FACTORS_FOLDER = "discr_factors_x2_30_y-2_2_step0.125_TIstep0.01_CTstep0.01"
DATA_FOLDER = f"../../data/{FACTORS_FOLDER}/"

INPUT_VAR_TO_TRAIN_REDUCTION_FACTOR = {"ti": 4, "ct": 4}  # , 'x/D': 12, 'y/D': 12}
train_reduc_factor_string = "training_factors=" + "-".join(
    [f"{k}{v}" for k, v in INPUT_VAR_TO_TRAIN_REDUCTION_FACTOR.items()]
).replace("/", "")
# INPUT_VAR_TO_TRAIN_RANGES = {"ti": [(0.15, 0.4)], "ct": [(0.3, 0.7)]}
# train_range_string = "training_ranges=" + "-".join(
#    [
#        f"{var}{r[0]}-{r[1]}"
#        for var, ranges in INPUT_VAR_TO_TRAIN_RANGES.items()
#        for r in ranges
#    ]
# )
EXPERIMENT = "interpolation"

MODEL_NAME = (
    f"univariate_NeRF_{N_MAPPING}fourier-features_"
    f"layers{'-'.join(str(lu) for lu in HIDDEN_LAYERS_UNITS)}_{train_reduc_factor_string}"
)
BEST_MODEL_PATH = f"../../saved_models/{FACTORS_FOLDER}/{MODEL_NAME}.pt"
CONSIDER_WS = False
COORDS_AS_INPUT = True  # univariate setting
N_COORDINATES = 2  # Number of input coordinates (x and y)
if CONSIDER_WS:
    MODEL_NAME += "_consider_ws"
    FACTORS_FOLDER = FACTORS_FOLDER.replace("TIstep0.01_CTstep0.01", "reducedTI-CT")
    BEST_MODEL_PATH = f"../../saved_models/{FACTORS_FOLDER}/{MODEL_NAME}.pt"
print(f"Model name: {MODEL_NAME}")
print(f"Best model path: {BEST_MODEL_PATH}")

MODEL_DESCRIPTION = (
    f"{MODEL_NAME}_{FACTORS_FOLDER}: act. func. {ACTIVATION_FUNCTION.__class__.__name__} "
    + f"(not last layer), batch multiplier {BATCH_MULTIPLIER}, {EPOCHS} epochs, lr={LR})"
)
print(f"Model description: {MODEL_DESCRIPTION}")

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"\nDevice: {DEVICE}")

In [None]:
class FourierLayer(nn.Module):
    def __init__(self, n_features: int, n_mapping: int):
        super(FourierLayer, self).__init__()
        self.coefficients = nn.Parameter(
            torch.randn(n_mapping, n_features)
        )  # sampled from normal distribution with mean 0 and variance 1

    def forward(self, x):
        x_proj = 2 * torch.pi * x @ self.coefficients.t()
        return torch.cat([torch.sin(x_proj), torch.cos(x_proj)], dim=-1)


class NeRF2D(nn.Module):
    def __init__(self, n_coordinates: int, n_features: int, output_space: int):
        super(NeRF2D, self).__init__()
        self.fourier_layer = FourierLayer(n_coordinates, N_MAPPING)
        layer_units = [N_MAPPING * 2 + n_features] + HIDDEN_LAYERS_UNITS
        mlp_layers = []
        for first, second in zip(layer_units, layer_units[1:]):
            mlp_layers += [nn.Linear(first, second), ACTIVATION_FUNCTION]
        mlp_layers.append(
            nn.Linear(layer_units[-1], output_space)
        )  # last layer not activated
        self.mlp = nn.Sequential(*mlp_layers)

    def forward(self, inputs):
        # Pass coordinates through the Fourier Layer
        coords = inputs[:, -2:]
        input_features = inputs[:, :-2]
        fourier_out = self.fourier_layer(coords)

        # Concatenate Fourier layer output with other input features
        input_features = torch.cat((fourier_out, input_features), dim=-1)

        # Feed-forward through the rest of the neural network
        return self.mlp(input_features)

In [None]:
valid_dataloader = None
train_dataloader, valid_dataloader, test_dataloader = get_wake_dataloaders(
    DATA_FOLDER,
    consider_ws=CONSIDER_WS,
    coords_as_input=COORDS_AS_INPUT,
    # train_perc=0.6,
    # test_perc=0.2,
    # validation_perc=0.2,
    input_var_to_train_reduction_factor=INPUT_VAR_TO_TRAIN_REDUCTION_FACTOR,
    # input_var_to_train_ranges=INPUT_VAR_TO_TRAIN_RANGES,
    batch_multiplier=BATCH_MULTIPLIER,
)
len(train_dataloader)
print(valid_dataloader)

In [None]:
for b in train_dataloader:
    print(b[0].shape, b[1].shape)
    break

input_space = train_dataloader.dataset.inputs.shape[1]
n_features = input_space - N_COORDINATES
output_space = train_dataloader.dataset.outputs.shape[1]
print(
    f"{input_space=} ({N_COORDINATES} coordinates and {n_features} other input features)\t{output_space=}"
)

In [None]:
model = NeRF2D(N_COORDINATES, n_features, output_space).to(DEVICE)

optimizer = torch.optim.Adam((p for p in model.parameters() if p.requires_grad), lr=LR)
loss_function = torch.nn.MSELoss()

metrics_logger = MetricsLogger(name=MODEL_NAME, automatic_save_after=2)
best_vloss = 1_000_000.0
for epoch in range(EPOCHS):
    """TRAINING"""
    model.train(True)
    epoch_tloss = 0
    for batch in iter(train_dataloader):
        x, y = batch[0].to(DEVICE), batch[1].to(DEVICE)
        optimizer.zero_grad()
        prediction = model(x)
        tloss = loss_function(prediction, y)
        tloss.backward()
        optimizer.step()

        epoch_tloss += tloss.item() * x.size(0)

    avg_tloss = epoch_tloss / len(train_dataloader.sampler)
    metrics_logger.log_metric(epoch, "Training loss", avg_tloss)

    """VALIDATION"""
    if valid_dataloader:
        model.train(False)
        epoch_vloss = 0
        for batch in iter(valid_dataloader):
            x, y = batch[0].to(DEVICE), batch[1].to(DEVICE)
            prediction = model(x)
            vloss = loss_function(prediction, y)

            epoch_vloss += vloss.item() * x.size(0)

        avg_vloss = epoch_vloss / len(valid_dataloader.sampler)
        metrics_logger.log_metric(epoch, "Validation loss", avg_vloss)
        # Track best performance, and save the model's state
        if avg_vloss < best_vloss:
            best_vloss = avg_vloss
            torch.save(model.state_dict(), BEST_MODEL_PATH)

In [None]:
metrics_logger.plot_metrics_by_epoch(start_from_epoch=2)
metrics_logger.save_metrics()

# Evaluation and testing

In [None]:
# loading best model
model = NeRF2D(N_COORDINATES, n_features, output_space).to(DEVICE)
model.load_state_dict(torch.load(BEST_MODEL_PATH))
model.eval()

save_results = False

# evaluation on trainset
evaluate_model(
    model,
    data=train_dataloader,
    data_type="train",
    model_description=MODEL_DESCRIPTION,
    save_results=save_results,
    experiment=EXPERIMENT,
)

print("\n")

# evalution on testset
evaluate_model(
    model,
    data=test_dataloader,
    data_type="test",
    model_description=MODEL_DESCRIPTION,
    save_results=save_results,
    experiment=EXPERIMENT,
)

In [None]:
test_dataset = test_dataloader.dataset
num_cells = test_dataset.num_cells
num_fields = len(test_dataset) // num_cells
field_indices = list(range(num_fields))
random.shuffle(field_indices)

with torch.no_grad():
    for field_idx in field_indices[:10]:
        ti, ct, ws, wake_field, predicted_wake_field = (
            test_dataset.get_parameters_for_plotting_univariate(model, field_idx)
        )

        plotting.plot_maps(
            test_dataset.X_grid,
            test_dataset.Y_grid,
            wake_field,
            predicted_wake_field,
            ti,
            ct,
            ws,
            error_to_plot="absolute",
        )