# Univariate regression to predict the wind speed field

In [7]:
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np

from data_loaders import get_wake_dataloaders

# define random seeds for Neural Networks
torch.manual_seed(0)
np.random.seed(0)

In [8]:
DATAFRAME_FILEPATH = "data/discr_factors_x2_50_y-1_1_step0.125/"
BEST_MODEL_PATH = "models/univariate_NN"

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.manual_seed(0)

# hyperparameters
BATCH_SIZE = 256
EPOCHS = 10
LR = 0.0000001

In [10]:
valid_dataloader = None
train_dataloader, valid_dataloader, test_dataloader = get_wake_dataloaders(DATAFRAME_FILEPATH,
                                                          consider_ws=False,
                                                          coords_as_input=True,
                                                          train_perc=0.6,
                                                          test_perc=0.2,
                                                          validation_perc=0.2,
                                                          batch_size=BATCH_SIZE)
len(train_dataloader)

30960

In [12]:
input_space = train_dataloader.dataset.x.shape[1]
output_space = train_dataloader.dataset.y.shape[1]

class UnivariateNN(nn.Module):
    def __init__(self, input_space, output_space) -> None:
        super(UnivariateNN, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_space, 50),
            nn.ReLU(),
            nn.Linear(50, 250),
            nn.ReLU(),
            nn.Linear(250, output_space),
            nn.ReLU() #TODO?
        )

    def forward(self, x):
        return self.layers(x)

model = UnivariateNN(input_space, output_space).to(DEVICE)

optimizer = torch.optim.Adam(
    (p for p in model.parameters() if p.requires_grad), lr=LR
)
loss_function = torch.nn.MSELoss()

print("Starting training")
epoch_to_metrics = dict()
best_vloss = 1_000_000.
for epoch in range(EPOCHS):
    '''TRAINING'''
    model.train(True)
    running_tloss = 0
    epoch_accs = torch.FloatTensor()
    for batch in iter(train_dataloader):
        x, y = batch[0].to(DEVICE), batch[1].to(DEVICE)
        optimizer.zero_grad()
        prediction = model(x)
        tloss = loss_function(prediction, y)
        tloss.backward()
        #torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5) TODO
        optimizer.step()

        running_tloss += tloss.item()
        #TODO accuracy or other metric computation????

    avg_tloss = running_tloss / len(train_dataloader)
    print(f"{epoch=} -> Training loss={avg_tloss}", end="\t")
    epoch_to_metrics[epoch] = [avg_tloss]

    '''VALIDATION'''
    if valid_dataloader:
        model.train(False)
        running_vloss = 0.0
        for batch in iter(valid_dataloader):
            x, y = batch[0].to(DEVICE), batch[1].to(DEVICE)
            prediction = model(x)
            vloss = loss_function(prediction, y)

            running_vloss += vloss.item()

        avg_vloss = running_vloss / len(valid_dataloader)
        print(f"Validation loss={avg_vloss}")
        epoch_to_metrics[epoch].append(avg_vloss)
        # Track best performance, and save the model's state
        if avg_vloss < best_vloss:
            best_vloss = avg_vloss
            torch.save(model.state_dict(), BEST_MODEL_PATH)
    else:
        print()        

epoch_to_metrics

Starting training
epoch=0 -> Training loss=0.012114214747235313	Validation loss=0.0018776357266046557
epoch=1 -> Training loss=0.0011128074865387875	Validation loss=0.0008930278765967248
epoch=2 -> Training loss=0.0006918291025265327	Validation loss=0.0006936189759290851
epoch=3 -> Training loss=0.0005794045013155215	Validation loss=0.0006067763143943165
epoch=4 -> Training loss=0.0005163988303864653	Validation loss=0.0005494558098421351
epoch=5 -> Training loss=0.00047178651029541874	Validation loss=0.0005073598401451142
epoch=6 -> Training loss=0.0004381583325676877	Validation loss=0.0004745879350113527
epoch=7 -> Training loss=0.00041146563345214666	Validation loss=0.00044785888156496505
epoch=8 -> Training loss=0.00038937425475542726	Validation loss=0.00042531104850888283
epoch=9 -> Training loss=0.0003705589495490901	Validation loss=0.00040575966543687645


{0: [0.012114214747235313, 0.0018776357266046557],
 1: [0.0011128074865387875, 0.0008930278765967248],
 2: [0.0006918291025265327, 0.0006936189759290851],
 3: [0.0005794045013155215, 0.0006067763143943165],
 4: [0.0005163988303864653, 0.0005494558098421351],
 5: [0.00047178651029541874, 0.0005073598401451142],
 6: [0.0004381583325676877, 0.0004745879350113527],
 7: [0.00041146563345214666, 0.00044785888156496505],
 8: [0.00038937425475542726, 0.00042531104850888283],
 9: [0.0003705589495490901, 0.00040575966543687645]}

In [None]:
"""
metrics_df = pd.DataFrame(epoch_to_loss_acc).transpose().reset_index().rename(columns={"index": "Epoch", 0: "Loss", 1: "Speed avg error in wake prediction"})
metrics_df.plot(x="Epoch", y="Loss")
"""

'\nmetrics_df = pd.DataFrame(epoch_to_loss_acc).transpose().reset_index().rename(columns={"index": "Epoch", 0: "Loss", 1: "Speed avg error in wake prediction"})\nmetrics_df.plot(x="Epoch", y="Loss")\n'

In [None]:
"""
metrics_df.plot(x="Epoch", y="Speed avg error in wake prediction")
"""

'\nmetrics_df.plot(x="Epoch", y="Speed avg error in wake prediction")\n'

# Testing

In [13]:
# loading best one

model = UnivariateNN(input_space, output_space).to(DEVICE)
model.load_state_dict(torch.load(BEST_MODEL_PATH))
model.train(False)

total_loss = 0
with torch.no_grad():
    for batch in iter(test_dataloader):
        # to device
        x, y = batch[0].to(DEVICE), batch[1].to(DEVICE)

        prediction = model(x)
        tloss = loss_function(prediction, y)

        total_loss += tloss.item()

avg_loss = total_loss / len(test_dataloader) / BATCH_SIZE #TODO
print(f"Testing results: avg loss={avg_loss}")


Testing results: avg loss=1.3076779867828094e-06
