In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import sys
sys.path.insert(1, '/scratch2/biomass_estimation/code/ml')
from dataloader import *
import wandb

In [3]:
import pickle

# Open the file in binary mode for reading
with open('../data/normalization_values.pkl', 'rb') as f:
    data = pickle.load(f)

# Now you can analyze the data
print(data)

{'S2_bands': {'B01': {'mean': 0.13021514, 'std': 0.017152175, 'min': 1e-04, 'max': 1.1213, 'p1': 0.1273, 'p99': 0.1074}, 'B02': {'mean': 0.1363337, 'std': 0.018509913, 'min': 1e-04, 'max': 1.8768, 'p1': 0.1366, 'p99': 0.1128}, 'B03': {'mean': 0.16427371, 'std': 0.02087248, 'min': 0.0411, 'max': 1.7888, 'p1': 0.1692, 'p99': 0.1364}, 'B04': {'mean': 0.13865142, 'std': 0.025569845, 'min': 0.0121, 'max': 1.7232, 'p1': 0.1445, 'p99': 0.1184}, 'B05': {'mean': 0.20296873, 'std': 0.028621713, 'min': 0.0672, 'max': 1.6344, 'p1': 0.2157, 'p99': 0.1591}, 'B06': {'mean': 0.38582557, 'std': 0.070499, 'min': 0.0758, 'max': 1.6699, 'p1': 0.3286, 'p99': 0.2766}, 'B07': {'mean': 0.4361872, 'std': 0.086211845, 'min': 0.0573, 'max': 1.6645, 'p1': 0.3621, 'p99': 0.2278}, 'B08': {'mean': 0.4448093, 'std': 0.08623231, 'min': 0.0737, 'max': 1.6976, 'p1': 0.3588, 'p99': 0.2122}, 'B8A': {'mean': 0.4580875, 'std': 0.08798952, 'min': 0.0772, 'max': 1.6709, 'p1': 0.3775, 'p99': 0.26}, 'B09': {'mean': 0.45806482, 

### creating the dataset mapping (train, test, val)

In [4]:
import h5py
import pandas as pd
import pickle
import os
from sklearn.model_selection import train_test_split

# Initialize an empty dictionary to store the data
data = {'train': [], 'val': [], 'test': []} 
path_h5 = '/scratch2/biomass_estimation/code/ml/data/data_no_outliers/'

all_tiles = []
# Iterate over all the h5 files
for fname in os.listdir(path_h5):
    if fname.endswith('.h5'):
        with h5py.File(os.path.join(path_h5, fname), 'r') as f:
            # Get the list of all tiles in the file
            all_tiles.extend(list(f.keys()))

train_tiles, test_and_val_tiles = train_test_split(all_tiles, test_size=0.35, random_state=42)
val_tile, test_tile = train_test_split(test_and_val_tiles, test_size=0.6, random_state=42)
data['val'].extend(val_tile)
data['test'].extend(test_tile)
data['train'].extend(train_tiles)

print("training tiles: ", len(data['train']))
print(data['train'])
print("validation tiles: ", len(data['val']))
print(data['val'])
print("testing tiles: ", len(data['test']))
print(data['test'])
# Pickle the DataFrame and save it to a file
with open('/scratch2/biomass_estimation/code/ml/data/mapping.pkl', 'wb') as f:
    pickle.dump(data, f)

training tiles:  45
['53UNQ', '52UFV', '53TPN', '53ULQ', '51UWQ', '53UNT', '52UFU', '51UXS', '52UFC', '51UVP', '53UMR', '50UQA', '51UVT', '51TWM', '53UNS', '51TVM', '52UFA', '54UUU', '54UVV', '54UVU', '51UVR', '51UYT', '52UGB', '51UYS', '53ULT', '51UXR', '53UPS', '54UUC', '51UUQ', '52UEA', '54UVA', '54UUA', '52UFB', '52UDA', '53TPL', '54UUV', '50TPT', '53UMT', '51UWS', '50TQT', '51UXQ', '51UWR', '53UPT', '51UVQ', '53UMS']
validation tiles:  10
['51UVS', '52UEV', '52UEC', '53UPP', '51TVN', '52UEB', '52UDC', '54UUB', '53ULR', '51UUP']
testing tiles:  15
['53TQN', '53UPQ', '51UWT', '52UCA', '51TWN', '51UUT', '51TVL', '50TPS', '52UGU', '51UWP', '53UMQ', '54UVC', '52UDB', '50UQB', '53UNR']


### defining loss and training args

In [5]:
class RMSE(nn.Module):
    """ 
        Weighted RMSE.
    """

    def __init__(self):
        super(RMSE, self).__init__()
        self.mse = torch.nn.MSELoss(reduction='none')
        
    def __call__(self, prediction, target, weights = 1):
        # prediction = prediction[:, 0]
        return torch.sqrt(torch.mean(weights * self.mse(prediction,target)))


class Args:
    def __init__(self):
        self.latlon = True
        self.bands = ['B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B8A', 'B09', 'B11', 'B12']
        self.bm = True
        self.patch_size = [15,15]
        self.norm_strat = 'pct'
        self.norm = False

args = Args()
fnames = ['data_nonan_0-5.h5', 'data_nonan_1-5.h5', 'data_nonan_2-5.h5', 'data_nonan_3-5.h5', 'data_nonan_4-5.h5']

### defining the SimpleFCN model

In [6]:
class UNetFCN(nn.Module):
    def __init__(self,
                 in_features=18,
                 channel_dims = (16, 32, 64, 128, 64, 32, 16),
                 num_outputs=1,
                 kernel_size=3,
                 stride=1):
        """
        A simple fully convolutional neural network.
        """
        super(SimpleFCN, self).__init__()
        self.relu = nn.ReLU(inplace = True)
        layers = list()
        for i in range(len(channel_dims)):
            in_channels = in_features if i == 0 else channel_dims[i-1]
            layers.append(nn.Conv2d(in_channels=in_channels, 
                                    out_channels=channel_dims[i], 
                                    kernel_size=kernel_size, stride=stride, padding=1))
            layers.append(nn.BatchNorm2d(num_features=channel_dims[i]))
            layers.append(self.relu)
        # print(layers)
        self.conv_layers = nn.Sequential(*layers)
        
        self.conv_output = nn.Conv2d(in_channels=channel_dims[-1], out_channels=num_outputs, kernel_size=1,
                                     stride=1, padding=0, bias=True)

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.conv_output(x)

        return x

training simple FCN for 10 epochs with learning rate 0.001 (model3)

In [7]:

wandb.init()
model = UNetFCN()
wandb.watch(model, log_freq=100)

if torch.cuda.is_available():
    model = model.cuda()
# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)

mode = 'train'
ds_training = GEDIDataset({'h5':'/scratch2/biomass_estimation/code/ml/data', 'norm': '/scratch2/biomass_estimation/code/ml/data', 'map': '/scratch2/biomass_estimation/code/ml/data/'}, fnames = fnames, chunk_size = 1, mode = mode, args = args)
trainloader = DataLoader(dataset = ds_training, batch_size = 512, shuffle = True, num_workers = 8)
mode = 'val'
ds_validation = GEDIDataset({'h5':'/scratch2/biomass_estimation/code/ml/data', 'norm': '/scratch2/biomass_estimation/code/ml/data', 'map': '/scratch2/biomass_estimation/code/ml/data/'}, fnames = fnames, chunk_size = 1, mode = mode, args = args)
validloader = DataLoader(dataset = ds_validation, batch_size = 512, shuffle = False, num_workers = 8)

min_valid_loss = float('inf')
plotnr = 0
# Training loop
for epoch in range(10):  # 10 epochs
    train_loss = 0.0
    model.train()
    i=0
    for inputs, targets in trainloader:
        i+=1
        if torch.cuda.is_available():
            inputs, targets = inputs.cuda(), targets.cuda()

        optimizer.zero_grad()
        outputs = model(inputs)
        # print("inputs.shape: ", inputs.shape)
        # print("targets.shape: ", targets.shape)
        # # # print(outputs)
        # print("outputs.shape: ", outputs.shape)
        # loss1 = criterion(outputs[:,:,7,7].squeeze(), targets)
        loss = RMSE()(outputs[:,:,7,7].squeeze(), targets)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        # print(loss.item())
        if i%20==0:
            print(f'Epoch {epoch+1} \t Batch {i} \t Training Loss: {train_loss / i}')
            wandb.log({'train_loss': train_loss / i})
            np.save(f'training_predictions/{plotnr}training_outputs_epoch{epoch+1}_batch{i}_unet.npy', outputs[0,0].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs0_epoch{epoch+1}_batch{i}.npy', inputs[0,0].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs1_epoch{epoch+1}_batch{i}.npy', inputs[0,1].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs2_epoch{epoch+1}_batch{i}.npy', inputs[0,2].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs3_epoch{epoch+1}_batch{i}.npy', inputs[0,3].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs4_epoch{epoch+1}_batch{i}.npy', inputs[0,4].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs5_epoch{epoch+1}_batch{i}.npy', inputs[0,5].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs6_epoch{epoch+1}_batch{i}.npy', inputs[0,6].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs7_epoch{epoch+1}_batch{i}.npy', inputs[0,7].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs8_epoch{epoch+1}_batch{i}.npy', inputs[0,8].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs9_epoch{epoch+1}_batch{i}.npy', inputs[0,9].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs10_epoch{epoch+1}_batch{i}.npy', inputs[0,10].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs11_epoch{epoch+1}_batch{i}.npy', inputs[0,11].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs12_epoch{epoch+1}_batch{i}.npy', inputs[0,12].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs13_epoch{epoch+1}_batch{i}.npy', inputs[0,13].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs14_epoch{epoch+1}_batch{i}.npy', inputs[0,14].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs15_epoch{epoch+1}_batch{i}.npy', inputs[0,15].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs16_epoch{epoch+1}_batch{i}.npy', inputs[0,16].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs17_epoch{epoch+1}_batch{i}.npy', inputs[0,17].detach().cpu().numpy())
            plotnr+=1

    
    valid_loss = 0.0
    i=0
    model.eval()
    for inputs, targets in validloader:
        i+=1
        if torch.cuda.is_available():
            inputs, targets = inputs.cuda(), targets.cuda()
        
        outputs = model(inputs)
        loss = criterion(outputs[:,:,7,7].squeeze(),targets)
        loss = RMSE()(outputs[:,:,7,7].squeeze(), targets)
        valid_loss += loss.item()
        if i%20==0:
            print(f'Epoch {epoch+1} \t Batch {i} \t Validation Loss: {valid_loss / i}')
            wandb.log({'valid_loss': valid_loss / i})
            np.save(f'training_predictions/{plotnr}validation_outputs_epoch{epoch+1}_batch{i}_unet.npy', outputs[0,0].detach().cpu().numpy())
            plotnr+=1
        
 
    print(f'Epoch {epoch+1} Training Loss: {train_loss / len(trainloader)} Validation Loss: {valid_loss / len(validloader)}')
     
    if min_valid_loss > valid_loss:
        print(f'Validation Loss Decreased({min_valid_loss}--->{valid_loss}) Saving The Model')
        min_valid_loss = valid_loss
         
        # Saving State Dict
        torch.save(model.state_dict(), 'saved_model3.pth')


    print(f"Epoch {epoch+1} completed")

wandb.finish()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mdsenti[0m ([33mdose[0m). Use [1m`wandb login --relogin`[0m to force relogin


Epoch 1 	 Batch 20 	 Training Loss: 136.9248016357422
Epoch 1 	 Batch 40 	 Training Loss: 135.94439373016357
Epoch 1 	 Batch 60 	 Training Loss: 135.69731305440266
Epoch 1 	 Batch 80 	 Training Loss: 135.44433317184448
Epoch 1 	 Batch 100 	 Training Loss: 134.57679115295412
Epoch 1 	 Batch 120 	 Training Loss: 132.8104523976644
Epoch 1 	 Batch 140 	 Training Loss: 130.16272577558246
Epoch 1 	 Batch 160 	 Training Loss: 126.6805968284607
Epoch 1 	 Batch 180 	 Training Loss: 122.05983365376791
Epoch 1 	 Batch 200 	 Training Loss: 116.77051052093506
Epoch 1 	 Batch 220 	 Training Loss: 111.78964432803068
Epoch 1 	 Batch 240 	 Training Loss: 107.39167443911235
Epoch 1 	 Batch 260 	 Training Loss: 103.57279670421894
Epoch 1 	 Batch 280 	 Training Loss: 100.37322741917201
Epoch 1 	 Batch 300 	 Training Loss: 97.46230312347411
Epoch 1 	 Batch 320 	 Training Loss: 95.06554889678955
Epoch 1 	 Batch 340 	 Training Loss: 92.8478827196009
Epoch 1 	 Batch 360 	 Training Loss: 90.85709658728706
Epoc

KeyboardInterrupt: 

In [18]:
class SimpleFCN(nn.Module):
    def __init__(self,
                 in_features=18,
                 channel_dims = (16, 32, 64),
                 num_outputs=1,
                 kernel_size=1,
                 stride=1):
        """
        A simple fully convolutional neural network.
        """
        super(SimpleFCN, self).__init__()
        self.relu = nn.ReLU(inplace = True)
        layers = list()
        for i in range(len(channel_dims)):
            in_channels = in_features if i == 0 else channel_dims[i-1]
            layers.append(nn.Conv2d(in_channels=in_channels, 
                                    out_channels=channel_dims[i], 
                                    kernel_size=kernel_size, stride=stride, padding=1))
            layers.append(nn.BatchNorm2d(num_features=channel_dims[i]))
            layers.append(self.relu)
        # print(layers)
        self.conv_layers = nn.Sequential(*layers)
        
        self.conv_output = nn.Conv2d(in_channels=channel_dims[-1], out_channels=num_outputs, kernel_size=1,
                                     stride=1, padding=0, bias=True)

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.conv_output(x)

        return x

In [19]:

wandb.init()
model = SimpleFCN()
wandb.watch(model, log_freq=100)

if torch.cuda.is_available():
    model = model.cuda()
# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)

mode = 'train'
ds_training = GEDIDataset({'h5':'/scratch2/biomass_estimation/code/ml/data', 'norm': '/scratch2/biomass_estimation/code/ml/data', 'map': '/scratch2/biomass_estimation/code/ml/data/'}, fnames = fnames, chunk_size = 1, mode = mode, args = args)
trainloader = DataLoader(dataset = ds_training, batch_size = 512, shuffle = True, num_workers = 8)
mode = 'val'
ds_validation = GEDIDataset({'h5':'/scratch2/biomass_estimation/code/ml/data', 'norm': '/scratch2/biomass_estimation/code/ml/data', 'map': '/scratch2/biomass_estimation/code/ml/data/'}, fnames = fnames, chunk_size = 1, mode = mode, args = args)
validloader = DataLoader(dataset = ds_validation, batch_size = 512, shuffle = False, num_workers = 8)

min_valid_loss = float('inf')
plotnr = 0
# Training loop
for epoch in range(10):  # 10 epochs
    train_loss = 0.0
    model.train()
    i=0
    for inputs, targets in trainloader:
        i+=1
        if torch.cuda.is_available():
            inputs, targets = inputs.cuda(), targets.cuda()

        optimizer.zero_grad()
        outputs = model(inputs)
        # print("inputs.shape: ", inputs.shape)
        # print("targets.shape: ", targets.shape)
        # # # print(outputs)
        # print("outputs.shape: ", outputs.shape)
        # loss1 = criterion(outputs[:,:,7,7].squeeze(), targets)
        loss = RMSE()(outputs[:,:,7,7].squeeze(), targets)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        # print(loss.item())
        if i%20==0:
            print(f'Epoch {epoch+1} \t Batch {i} \t Training Loss: {train_loss / i}')
            wandb.log({'train_loss': train_loss / i})
            np.save(f'training_predictions/{plotnr}training_outputs_epoch{epoch+1}_batch{i}_simple.npy', outputs[0,0].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs0_epoch{epoch+1}_batch{i}.npy', inputs[0,0].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs1_epoch{epoch+1}_batch{i}.npy', inputs[0,1].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs2_epoch{epoch+1}_batch{i}.npy', inputs[0,2].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs3_epoch{epoch+1}_batch{i}.npy', inputs[0,3].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs4_epoch{epoch+1}_batch{i}.npy', inputs[0,4].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs5_epoch{epoch+1}_batch{i}.npy', inputs[0,5].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs6_epoch{epoch+1}_batch{i}.npy', inputs[0,6].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs7_epoch{epoch+1}_batch{i}.npy', inputs[0,7].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs8_epoch{epoch+1}_batch{i}.npy', inputs[0,8].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs9_epoch{epoch+1}_batch{i}.npy', inputs[0,9].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs10_epoch{epoch+1}_batch{i}.npy', inputs[0,10].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs11_epoch{epoch+1}_batch{i}.npy', inputs[0,11].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs12_epoch{epoch+1}_batch{i}.npy', inputs[0,12].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs13_epoch{epoch+1}_batch{i}.npy', inputs[0,13].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs14_epoch{epoch+1}_batch{i}.npy', inputs[0,14].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs15_epoch{epoch+1}_batch{i}.npy', inputs[0,15].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs16_epoch{epoch+1}_batch{i}.npy', inputs[0,16].detach().cpu().numpy())
            # np.save(f'training_predictions/{plotnr}inputs17_epoch{epoch+1}_batch{i}.npy', inputs[0,17].detach().cpu().numpy())
            plotnr+=1

    
    valid_loss = 0.0
    i=0
    model.eval()
    for inputs, targets in validloader:
        i+=1
        if torch.cuda.is_available():
            inputs, targets = inputs.cuda(), targets.cuda()
        
        outputs = model(inputs)
        loss = criterion(outputs[:,:,7,7].squeeze(),targets)
        loss = RMSE()(outputs[:,:,7,7].squeeze(), targets)
        valid_loss += loss.item()
        if i%20==0:
            print(f'Epoch {epoch+1} \t Batch {i} \t Validation Loss: {valid_loss / i}')
            wandb.log({'valid_loss': valid_loss / i})
            np.save(f'training_predictions/{plotnr}validation_outputs_epoch{epoch+1}_batch{i}_simple.npy', outputs[0,0].detach().cpu().numpy())
            plotnr+=1
        
 
    print(f'Epoch {epoch+1} Training Loss: {train_loss / len(trainloader)} Validation Loss: {valid_loss / len(validloader)}')
     
    if min_valid_loss > valid_loss:
        print(f'Validation Loss Decreased({min_valid_loss}--->{valid_loss}) Saving The Model')
        min_valid_loss = valid_loss
         
        # Saving State Dict
        torch.save(model.state_dict(), 'saved_model3.pth')


    print(f"Epoch {epoch+1} completed")

wandb.finish()

VBox(children=(Label(value='0.002 MB of 0.034 MB uploaded\r'), FloatProgress(value=0.06857834308869878, max=1.…

0,1
train_loss,██▇▆▅▃▃▂▁▁

0,1
train_loss,86.89493


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112199035576649, max=1.0…

Epoch 1 	 Batch 20 	 Training Loss: 138.49563331604003
Epoch 1 	 Batch 40 	 Training Loss: 138.43864040374757
Epoch 1 	 Batch 60 	 Training Loss: 138.04681104024252
Epoch 1 	 Batch 80 	 Training Loss: 137.43556051254274
Epoch 1 	 Batch 100 	 Training Loss: 137.06648147583007
Epoch 1 	 Batch 120 	 Training Loss: 136.8797592163086
Epoch 1 	 Batch 140 	 Training Loss: 136.6051773071289
Epoch 1 	 Batch 160 	 Training Loss: 136.54829759597777
Epoch 1 	 Batch 180 	 Training Loss: 136.22900093926324
Epoch 1 	 Batch 200 	 Training Loss: 136.0531792449951
Epoch 1 	 Batch 220 	 Training Loss: 135.88046205694025


KeyboardInterrupt: 