# Special kursus

## Import of modules

In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   
os.environ["CUDA_VISIBLE_DEVICES"]=f'{0}'
from __future__ import annotations

import torch

from modules import dataHandler, dataprocessing, models
from modules import scalers as scaling

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"cuda {torch.cuda.current_device()}")
else:
    print("cpu")
    device = torch.device("cpu")

cpu


## Load data

Loads all the different datasets: Regions, TS profiles, Bathymetri, GHRSST and SMOS.\
The TS profiles also gets cleaned and split into different regions

In [3]:
(
    regions,
    ts_profiles,
    bathymetri_lat, bathymetri_lon, bathymetri_topography,
    ghrsst_lat, ghrsst_lon, ghrsst_sst, ghrsst_times, ghrsst_fraction_time, ghrsst_time_bnds,
    smos_sss, smos_time, smos_lat, smos_lon
) = dataHandler.load_all(verbose=True)

Bathymetri [2/4]          

cannot be safely cast to variable data type
  bathymetri_topography: np.ndarray = bathymetri['bedrock_topography'][:].data[bathymetri_lat > 60, :]
cannot be safely cast to variable data type
  bathymetri_topography: np.ndarray = bathymetri['bedrock_topography'][:].data[bathymetri_lat > 60, :]


SMOS [4/4]            

In [4]:
ts_areas = dataprocessing.split_regions(ts_profiles, regions.set_crs(4326))

  points = gp.GeoDataFrame(ts_profiles.point, columns=['geometry']).set_crs(4326)
  return np.array([getattr(profile, __name) for profile in self.profiles], dtype=self.__get_type(out))
  exec(code_obj, self.user_global_ns, self.user_ns)


## Creation of training, validation and testing datasets

The following code splits the dataset into 3 datasets (training, valdiation and testing).\
The data outside the ghrsst time is cut.

In [5]:
min_time, max_time = min(ghrsst_fraction_time), max(ghrsst_fraction_time)
training_end = 2012
validation_end = 2015
train, val, test, areas = dataprocessing.split_data_set(ts_areas, training_end, validation_end, min_time, max_time)

# Training

Converts the dict objects into dataloaders containing torch tensors and scales each feature

In [6]:
scalers = [
    scaling.MeanScaling, # lat
    scaling.MeanScaling, # lon
    scaling.MeanScaling, # year
    scaling.MeanScaling, # decimal year
    scaling.MeanScaling, # sss
    scaling.MeanScaling, # sst
    scaling.MeanScaling, # surface depth
    scaling.MinMaxScaling, # bathymetri
    scaling.MeanScaling, # salinity profile
    scaling.MeanScaling, # temperature profile
]

scalers, train_loader, val_loader, test_loader = dataprocessing.process_data(
    train=train,
    val=val,
    test=test,
    bathymetri_lat=bathymetri_lat,
    bathymetri_lon=bathymetri_lon,
    bathymetri_topography=bathymetri_topography,
    scalers= scalers
)

Trains the model and saves it

In [7]:
training_loss, validation_loss = models.train_model(train_loader, val_loader)

Model:
	RNN:
		Input 8, Output 21, Layers 2
	Linear:
		Input 21, Output 21, Layers 1
Optimizer:
	lr 0.5, momentum 0.8 weight decay 0.0001

[  1/100] Salinity Training Loss 1.49739, lr 0.500 | Validation Loss 1.21736, MedAE 0.865
[ 10/100] Salinity Training Loss 0.75499, lr 0.490 | Validation Loss 0.48421, MedAE 0.312
[ 20/100] Salinity Training Loss 0.60842, lr 0.457 | Validation Loss 0.46368, MedAE 0.301
[ 30/100] Salinity Training Loss 0.56430, lr 0.403 | Validation Loss 0.46259, MedAE 0.309
[ 40/100] Salinity Training Loss 0.58190, lr 0.335 | Validation Loss 0.48199, MedAE 0.329
[ 50/100] Salinity Training Loss 0.54648, lr 0.258 | Validation Loss 0.45904, MedAE 0.312
[ 60/100] Salinity Training Loss 0.54855, lr 0.181 | Validation Loss 0.44789, MedAE 0.299
[ 70/100] Salinity Training Loss 0.51197, lr 0.110 | Validation Loss 0.44737, MedAE 0.298
[ 80/100] Salinity Training Loss 0.49636, lr 0.053 | Validation Loss 0.44973, MedAE 0.300
[ 90/100] Salinity Training Loss 0.49082, lr 0.016 