In [1]:
!pip install -f https://download.pytorch.org/whl/cu101/torch_stable.html torch==1.8.0+cu101 torchvision==0.9.0
!pip install wandb==0.12.1

Looking in links: https://download.pytorch.org/whl/cu101/torch_stable.html


In [1]:
from tqdm import tqdm
from os import listdir
from random import *
seed(0)


import pandas as pd

from pylab import *

import torch
import torch.nn as nn
import torch.nn.functional as F

from torch import optim
from torch.utils.data import DataLoader, random_split

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error

import wandb

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = "cuda:1"

### Data trepanation

In [2]:
dataset = 'barents'
folder_train, folder_val, folder_test = dataset + '/train/maps/', dataset + '/valid/maps/', dataset + '/test/maps/'
train_files, val_files, test_files = sorted([file for file in listdir(folder_train)])[1346:],\
                                     sorted([file for file in listdir(folder_val)]),\
                                     sorted([file for file in listdir(folder_test)]) # + 1200

sample = torch.load(folder_train + train_files[1])
grid = torch.load(dataset + "/train/grid.pt")
coverage = pd.read_csv(dataset + "/train/coverage.csv")

len(train_files), len(val_files), len(test_files)

(1812, 366, 365)

In [3]:
def preprocess_image(tensor: torch.Tensor) -> torch.Tensor:
    return (torch.nan_to_num(tensor, nan=-10.0) + grid['land']*10)

In [4]:
images_train = [preprocess_image(torch.load(folder_train + train_files[i])["jaxa.sic"])\
                            for i in tqdm(range(0, len(train_files)))]

images_val = [preprocess_image(torch.load(folder_val + val_files[i])["jaxa.sic"])\
                            for i in tqdm(range(0, len(val_files)))]

def handler(data: dict):
    try:
        return data["jaxa.sic"]
    except:
        return data["jaxa.sic_asc"]
    
images_test = [preprocess_image(handler(torch.load(folder_test + test_files[i])))\
                            for i in tqdm(range(0, len(test_files)))]

images_tensor_train, images_tensor_val, images_tensor_test = torch.stack(images_train).to(device),\
                                                             torch.stack(images_val).to(device),\
                                                             torch.stack(images_test).to(device)
images_tensor_train.shape

100%|██████████| 1812/1812 [00:32<00:00, 56.35it/s]
100%|██████████| 366/366 [00:06<00:00, 55.70it/s]
100%|██████████| 365/365 [00:06<00:00, 56.78it/s]


torch.Size([1812, 360, 500])

In [5]:
def average(data: list) -> float:
    return sum(data)/len(data)

def load_data(batch_size, images_tensor, d_in, d_out):
    in_batch, out_batch = [], []
    for i in range(batch_size):
        start_point = choice(range(len(images_tensor) - d_in - d_out))
        in_batch.append(images_tensor[start_point:start_point+d_in])
        out_batch.append(images_tensor[start_point+d_in:start_point+d_in+d_out])
    return torch.stack(in_batch), torch.stack(out_batch)

def train(model, criterion, d_in, d_out, epochs, batch_size, lr, eval_step):
    optimizer = optim.Adam(model.parameters(), lr=lr) #weight_decay=1e-8, momentum=0.9
#    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=2)  # goal: maximize Dice score
#    grad_scaler = torch.cuda.amp.GradScaler(enabled=amp)
    for epoch in range(1, epochs+1):
#        print("Current epoch", epoch)
        model.train()
        images_in, images_out = load_data(batch_size, images_tensor_train, d_in, d_out)
        optimizer.zero_grad()
        model_out = model(images_in)
        loss = criterion(model_out, images_out)
        loss.backward(retain_graph=True)
        optimizer.step()
        if epoch % eval_step == 0:
            mae_total, rmse_total, mape_total = [], [], []
            
            total_test_len = len(images_tensor_test)
            start_point, steps = 0, int(total_test_len/(d_in + d_out))
            
            for i in range(steps):
                images_in, images_out = images_tensor_test[start_point:start_point+d_in],\
                                        images_tensor_test[start_point+d_in:start_point+d_in+d_out]
                
                start_point += d_in + d_out
            
                model_out = model(images_in[None, :, :, :])
                
                criterion_mse = nn.MSELoss()
                
                loss_mse = criterion_mse(model_out, images_out[None, :, :, :])
                loss_mae = criterion(model_out, images_out[None, :, :, :])
                
                mae_total.append(loss_mae.detach().cpu().numpy())
                rmse_total.append(math.sqrt(loss_mse.detach().cpu().numpy()))
                
            
            wandb.log(
                {
                 "test/test_MAE": average(mae_total), 
                 "test/test_RMSE": average(rmse_total)
                }
            )
            
            print('Epoch ', epoch, ', test MAE - ', average(mae_total))

In [6]:
architecture = "VGG19_default"
i = 0
wandb.init(project="SeaIcePrediction", entity="eighonet", group=architecture)
wandb.run.name = architecture + "_" + str(i)  # "gs_3_ffd_3_128_test_MAEloss_lr_10^{-4}_10000"
wandb.run.save()

[34m[1mwandb[0m: Currently logged in as: [33msbercv[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.21 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade
2022-07-25 10:17:49.337534: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1




True

In [4]:
from torchvision import datasets, transforms, models

In [19]:
model = models.vgg19(pretrained=False)
#model = resnet50()



In [20]:
model.features[0] = nn.Conv2d(7, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
model = torch.nn.Sequential(*(list(model.children())[:-2]))

In [21]:
device = 'cuda'
model = model.to(device)

In [22]:
test_input = torch.zeros(8, 7, 360, 500).to(device)

In [23]:
model(test_input).shape

torch.Size([8, 512, 5, 7])

In [24]:
conv_int1 = nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=False).to(device)
conv_int2 = nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False).to(device)

up1 = nn.ConvTranspose2d(512, 512 // 2, kernel_size=4, stride=(4,4)).to(device)
up2 = nn.ConvTranspose2d(256, 256 // 2, kernel_size=2, stride=(2,2)).to(device)
up3 = nn.ConvTranspose2d(128, 128 // 42, kernel_size=(9,5), stride=(9,9)).to(device)

#print(resnet152_aug(test_input).shape)
print(conv_int1(up1(model(test_input))).shape)
#print(up2(up1(model(test_input))).shape)
print(conv_int2(up2(up1(model(test_input)))).shape)
print(up3(conv_int2(up2(up1(model(test_input))))).shape)
#print(up4(up3(conv_int2(up2(up1(model(test_input)))))).shape)
#print(up5(up4(up3(up2(up1(model(test_input)))))).shape)
#print(up6(up5(up4(up3(up2(up1(model(test_input))))))).shape)

torch.Size([8, 256, 20, 28])
torch.Size([8, 128, 40, 56])
torch.Size([8, 3, 360, 500])


In [25]:
vgg19_aug = nn.Sequential(
    model,
    nn.ConvTranspose2d(512, 512 // 2, kernel_size=4, stride=(4,4)),
    nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=False),
    nn.ReLU(inplace=True),
    nn.ConvTranspose2d(256, 256 // 2, kernel_size=2, stride=(2,2)),
    nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False).to(device),
    nn.ReLU(inplace=True),
    nn.ConvTranspose2d(128, 128 // 42, kernel_size=(9,5), stride=(9,9))
).to(device)

In [26]:
vgg19_aug(test_input).shape

torch.Size([8, 3, 360, 500])

In [27]:
torch.save(vgg19_aug, 'vgg_test.pt')

In [14]:
model.requires_grad = False

In [17]:
from models.unet.unet.unet_model import UNet

epochs = 1000
d_in, d_out = 7, 3
batch_size = 8
lr = 1e-3
eval_step = 20
criterion = nn.L1Loss()

train(vgg19_aug, criterion, d_in, d_out, epochs, batch_size, lr, eval_step)

Epoch  20 , test MAE -  12.69529300265842
Epoch  40 , test MAE -  12.684488084581163
Epoch  60 , test MAE -  12.67928738064236
Epoch  80 , test MAE -  12.676917182074654
Epoch  100 , test MAE -  12.67539554172092
Epoch  120 , test MAE -  12.67483859592014
Epoch  140 , test MAE -  12.674826727973091
Epoch  160 , test MAE -  12.674792819552952
Epoch  180 , test MAE -  12.674880981445312
Epoch  200 , test MAE -  12.674822489420572
Epoch  220 , test MAE -  12.674756368001303
Epoch  240 , test MAE -  12.6749267578125
Epoch  260 , test MAE -  12.674857245551216
Epoch  280 , test MAE -  12.674819946289062
Epoch  300 , test MAE -  12.674749586317274
Epoch  320 , test MAE -  12.674841139051649
Epoch  340 , test MAE -  12.674908108181423
Epoch  360 , test MAE -  12.674836052788628
Epoch  380 , test MAE -  12.674738566080729
Epoch  400 , test MAE -  12.674845377604166
Epoch  420 , test MAE -  12.674830966525608
Epoch  440 , test MAE -  12.674806382921007
Epoch  460 , test MAE -  12.67477077907986

In [91]:
train(vgg19bn_aug, criterion, d_in, d_out, 50000, 16, 1e-4, eval_step)

Epoch  20 , test MAE -  7.720959133572048
Epoch  40 , test MAE -  7.728277418348524
Epoch  60 , test MAE -  7.720995585123698
Epoch  80 , test MAE -  7.707149081759983
Epoch  100 , test MAE -  7.712307400173611
Epoch  120 , test MAE -  7.705301072862413
Epoch  140 , test MAE -  7.704624599880642
Epoch  160 , test MAE -  7.704344007703993
Epoch  180 , test MAE -  7.698826260036892
Epoch  200 , test MAE -  7.708406236436632
Epoch  220 , test MAE -  7.70106675889757
Epoch  240 , test MAE -  7.6994069417317705
Epoch  260 , test MAE -  7.708365546332465
Epoch  280 , test MAE -  7.702963087293837
Epoch  300 , test MAE -  7.7069286770290795
Epoch  320 , test MAE -  7.7067921956380205
Epoch  340 , test MAE -  7.701663547092014
Epoch  360 , test MAE -  7.702557881673177
Epoch  380 , test MAE -  7.703909132215712
Epoch  400 , test MAE -  7.70243665907118
Epoch  420 , test MAE -  7.699701097276476
Epoch  440 , test MAE -  7.697693718804254
Epoch  460 , test MAE -  7.696612040201823
Epoch  480 , t

KeyboardInterrupt: 