In [114]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

In [115]:
DATAFRAME_FILEPATH = "data/wake_dataframe.csv"

In [116]:
BATCH_SIZE = 64
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.manual_seed(0)

EPOCHS = 500

INPUT_VARIABLES = 5
OUTPUT_VARIABLES = 3

min_max_scaler = MinMaxScaler() # or StandardScaler ?

In [117]:
class WakeFieldDataset(Dataset):
    def __init__(self, dataframe_csv):
        self._dataframe = pd.read_csv(dataframe_csv)
        self.x = self._dataframe.iloc[:, :-OUTPUT_VARIABLES]
        self.x = torch.FloatTensor(min_max_scaler.fit_transform(self.x))
        self.y = self._dataframe.iloc[:, -OUTPUT_VARIABLES:]
        self.y = torch.FloatTensor(self.y.values)
        assert len(self.x) == len(self.y)
    
    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

In [118]:
ds = WakeFieldDataset(DATAFRAME_FILEPATH)
#TODO split in training and test sets

train_dataloader = DataLoader(ds, batch_size=BATCH_SIZE, shuffle=True)
#test_dataloader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=True)
len(train_dataloader)
ds[0]

(tensor([0.0000, 0.2846, 0.0000, 0.0000, 0.8332]),
 tensor([9.6202, 0.7610, 3.4080]))

### Multivariate regression to predict the wind speed field

In [None]:
model = nn.Sequential(
    nn.Linear(INPUT_VARIABLES, 50),
    nn.ReLU(),
    nn.Linear(50, 250),
    nn.ReLU(),
    nn.Linear(250, OUTPUT_VARIABLES),
    nn.ReLU() #TODO?
).to(DEVICE)

lr = 0.0000001
optimizer = torch.optim.Adam(
    (p for p in model.parameters() if p.requires_grad), lr=lr
)
loss_function = torch.nn.MSELoss()
model.train()

print("Starting training")
epoch_to_loss_acc = dict()
for epoch in range(EPOCHS):
    epoch_loss = 0
    epoch_accs = torch.FloatTensor()
    for batch in iter(train_dataloader):
        #TODO to device in the dataloader
        x, y = batch

        prediction = model.forward(x)

        # loss and step
        loss = loss_function(prediction, y)
        loss.backward()
        #torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5) # TODO
        optimizer.step()

        epoch_loss += loss.item()

        #TODO accuracy or other metric computation

    print(f"{epoch=} -> {epoch_loss=}")
    epoch_to_loss_acc[epoch] = epoch_loss

epoch_to_loss_acc

In [None]:
"""
metrics_df = pd.DataFrame(epoch_to_loss_acc).transpose().reset_index().rename(columns={"index": "Epoch", 0: "Loss", 1: "Speed avg error in wake prediction"})
metrics_df.plot(x="Epoch", y="Loss")
"""

In [None]:
"""
metrics_df.plot(x="Epoch", y="Speed avg error in wake prediction")
"""