### Model Training
 This script is an example used to train the model using a csv file containing the dataset needed for the model.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from ANN import ANN, optimParameters, createDataLoader, normUnitvar, cross_entropy
import copy
import torch
import pandas as pd
import pickle

from torch.utils.cpp_extension import CUDA_HOME
print(CUDA_HOME)

if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
print(f"Using device: {device}")

# # Merge data from different files
# from create_data import merge_files
# merge_files("C:/Users/Woute/Documents/Studie/Master_AES/CEGM2003 Data Science and AI/Morph_3/dsaie_morph3/data/satellite/averages/average_training_r2")

None
Using device: cpu


In [20]:
# Path to dataset
path = "test_r1_data/undersampled_merged_features.csv"
path2 = "test_r1_data/merged_features.csv"

# Select year to use as test set
test_year = 2020

# Parameters
lambda__values = [0.005] # Regularization parameter
input_dim = 5 # Number of features
output_dim = 1 # Number of outputs
hidden_layers = 1 # Number of hidden layers
hidden_nodes = 5  # Number of nodes in the hidden layer
activation_function = 'relu' # Activation function

# Load dataset
data = pd.read_csv(path)
data2 = pd.read_csv(path2)  
features = ['distance', 'sin_angle', 'cos_angle', 'river_width', 'water_in_range'] 
target_variable = ['next_year_water']
data_test = data2[data2['year'] == test_year]
data_training = data[data['year'] != test_year]

# Normalize the features in the dataset per feature
X = torch.tensor(data_training[features].values, dtype=torch.float32).to(device)
X_normalizer = normUnitvar(X)

X_test = torch.tensor(data_test[features].values, dtype=torch.float32).to(device)
X_test_norm = X_normalizer.normalize(X_test)

# Store the Targets in a tensor
targets = torch.tensor(data_training[target_variable].values, dtype=torch.int8).to(device)
T_test = torch.tensor(data_test[target_variable].values, dtype=torch.int8).to(device)

# Create a new dataset with the normalized features
X_norm = X_normalizer.normalize(X)

# Create a new dataset with the normalized features and the targets
train_loader, val_loader = createDataLoader(torch.utils.data.TensorDataset(X_norm, targets), batch_size=64)

min_loss = 1e9
best_lambda = 0

for lambda_val in lambda__values:
    #Initialize model
    model = ANN(input_dim, hidden_layers, hidden_nodes, output_dim, activation_function).to(device)
    w = model.parameters()

    # Train model
    model, val_loss, metrics = optimParameters(model, w, train_loader, val_loader, lambda_val, n_epochs=2000)

    print(f"Model with lambda = {lambda_val} has a loss of {val_loss}\n")

    #Save the best model
    if val_loss < min_loss:
        min_loss = val_loss
        best_lambda = lambda_val
        best_model = copy.deepcopy(model)
        best_metrics = metrics

print(f"Best model for lambda={best_lambda} with a loss {min_loss}.")

# Save the best model and the metrics
# torch.save(best_model, f"models/best_model_{hidden_layers}_{hidden_nodes}_{best_lambda}.pth")
# np.save(f"models/metrics_{hidden_layers}_{hidden_nodes}_{best_lambda}.npy", best_metrics)

# Save the training normalizer
# with open("models/data_normalizer_class.pkl", 'wb') as output:
#     pickle.dump(X_normalizer, output, pickle.HIGHEST_PROTOCOL)

Epoch: 0, Validation Loss: 0.6934574246406555
Epoch: 20, Validation Loss: 0.6935567855834961
Final epoch: 26, loss: 0.693700909614563, best model at epoch 5 with loss 0.693143367767334
Model with lambda = 0.005 has a loss of 0.693143367767334

Best model for lambda=0.005 with a loss 0.693143367767334.


In [21]:
# Determine the test loss
t_hat = best_model(X_test_norm.to(device))
test_loss = torch.nn.BCELoss()(t_hat.float(), T_test.float())
print(f"Test loss for the best model is: {test_loss}")

Test loss for the best model is: 0.6904420256614685
