In [1]:
import torch
import torch.nn as nn
from dataloader import *

In [2]:
import pickle

# Open the file in binary mode for reading
with open('data/normalization_values.pkl', 'rb') as f:
    data = pickle.load(f)

# Now you can analyze the data
print(data)

{'S2_bands': {'B01': {'mean': 0.13021514, 'std': 0.017152175, 'min': 1e-04, 'max': 1.1213, 'p1': 0.1273, 'p99': 0.1074}, 'B02': {'mean': 0.1363337, 'std': 0.018509913, 'min': 1e-04, 'max': 1.8768, 'p1': 0.1366, 'p99': 0.1128}, 'B03': {'mean': 0.16427371, 'std': 0.02087248, 'min': 0.0411, 'max': 1.7888, 'p1': 0.1692, 'p99': 0.1364}, 'B04': {'mean': 0.13865142, 'std': 0.025569845, 'min': 0.0121, 'max': 1.7232, 'p1': 0.1445, 'p99': 0.1184}, 'B05': {'mean': 0.20296873, 'std': 0.028621713, 'min': 0.0672, 'max': 1.6344, 'p1': 0.2157, 'p99': 0.1591}, 'B06': {'mean': 0.38582557, 'std': 0.070499, 'min': 0.0758, 'max': 1.6699, 'p1': 0.3286, 'p99': 0.2766}, 'B07': {'mean': 0.4361872, 'std': 0.086211845, 'min': 0.0573, 'max': 1.6645, 'p1': 0.3621, 'p99': 0.2278}, 'B08': {'mean': 0.4448093, 'std': 0.08623231, 'min': 0.0737, 'max': 1.6976, 'p1': 0.3588, 'p99': 0.2122}, 'B8A': {'mean': 0.4580875, 'std': 0.08798952, 'min': 0.0772, 'max': 1.6709, 'p1': 0.3775, 'p99': 0.26}, 'B09': {'mean': 0.45806482, 

In [3]:
import h5py
import pandas as pd
import pickle
import os

# Initialize an empty dictionary to store the data
data = {'train': [], 'val': [], 'test': []} 
path_h5 = '/scratch2/biomass_estimation/code/ml/data'

# Iterate over all the h5 files
for fname in os.listdir(path_h5):
    if fname.endswith('.h5'):
        with h5py.File(os.path.join(path_h5, fname), 'r') as f:
            # Get the list of all tiles in the file
            all_tiles = list(f.keys())
            
            # Select one tile for validation, one for testing, and the rest for training
            val_tile = all_tiles[0:2]
            test_tile = all_tiles[2:5]
            train_tiles = all_tiles[5:]
            
            # Add the selected tiles to the dictionary
            data['val'].extend(val_tile)
            data['test'].extend(test_tile)
            data['train'].extend(train_tiles)

print("training tiles: ", len(data['train']))
print("validation tiles: ", len(data['val']))
print("testing tiles: ", len(data['test']))
# Pickle the DataFrame and save it to a file
with open('/scratch2/biomass_estimation/code/ml/data/mapping2.pkl', 'wb') as f:
    pickle.dump(data, f)

training tiles:  45
validation tiles:  10
testing tiles:  15


In [4]:
class SimpleFCN(nn.Module):
    def __init__(self,
                 in_features=18,
                 channel_dims = (16, 32, 64, 128),
                 num_outputs=1,
                 kernel_size=3,
                 stride=1):
        """
        A simple fully convolutional neural network.
        """
        super(SimpleFCN, self).__init__()
        self.relu = nn.ReLU(inplace = True)
        layers = list()
        for i in range(len(channel_dims)):
            in_channels = in_features if i == 0 else channel_dims[i-1]
            layers.append(nn.Conv2d(in_channels=in_channels, 
                                    out_channels=channel_dims[i], 
                                    kernel_size=kernel_size, stride=stride, padding=1))
            layers.append(nn.BatchNorm2d(num_features=channel_dims[i]))
            layers.append(self.relu)
        print(layers)
        self.conv_layers = nn.Sequential(*layers)
        
        self.conv_output = nn.Conv2d(in_channels=channel_dims[-1], out_channels=num_outputs, kernel_size=1,
                                     stride=1, padding=0, bias=True)
        # self.fc = nn.Linear(15*15*num_outputs, 1)  # Fully connected layer to get a single output value

    def forward(self, x):
        x = self.conv_layers(x)
        # print(x.shape)
        x = self.conv_output(x)
        # x = x.flatten(start_dim=1)
        # predictions = self.fc(x)
        # return predictions.squeeze()  # Remove the extra dimension
        return x
    
model = SimpleFCN()

[Conv2d(18, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), ReLU(inplace=True), Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), ReLU(inplace=True), Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), ReLU(inplace=True), Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), ReLU(inplace=True)]


In [7]:
import torch
from torch import nn, optim
from dataloader import *

model = SimpleFCN()
if torch.cuda.is_available():
    model = model.cuda()
# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

class RMSE(nn.Module):
    """ 
        Weighted RMSE.
    """

    def __init__(self):
        super(RMSE, self).__init__()
        self.mse = torch.nn.MSELoss(reduction='none')
        
    def __call__(self, prediction, target, weights = 1):
        # prediction = prediction[:, 0]
        return torch.sqrt(torch.mean(weights * self.mse(prediction,target)))


class Args:
    def __init__(self):
        self.latlon = True
        self.bands = ['B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B8A', 'B09', 'B11', 'B12']
        self.bm = True
        self.patch_size = [15,15]
        self.norm_strat = 'pct'
        self.norm = False

args = Args()
fnames = ['data_nonan_0-5.h5', 'data_nonan_1-5.h5', 'data_nonan_2-5.h5', 'data_nonan_3-5.h5', 'data_nonan_4-5.h5']
mode = 'train'
ds_training = GEDIDataset({'h5':'/scratch2/biomass_estimation/code/ml/data', 'norm': '/scratch2/biomass_estimation/code/ml/data', 'map': '/scratch2/biomass_estimation/code/ml/data/'}, fnames = fnames, chunk_size = 1, mode = mode, args = args)
trainloader = DataLoader(dataset = ds_training, batch_size = 512, shuffle = True, num_workers = 8)
mode = 'val'
ds_validation = GEDIDataset({'h5':'/scratch2/biomass_estimation/code/ml/data', 'norm': '/scratch2/biomass_estimation/code/ml/data', 'map': '/scratch2/biomass_estimation/code/ml/data/'}, fnames = fnames, chunk_size = 1, mode = mode, args = args)
validloader = DataLoader(dataset = ds_validation, batch_size = 512, shuffle = False, num_workers = 8)

min_valid_loss = float('inf')
# Training loop
for epoch in range(100):  # 100 epochs
    train_loss = 0.0
    model.train()
    i=0
    for inputs, targets in trainloader:
        i+=1
        if torch.cuda.is_available():
            inputs, targets = inputs.cuda(), targets.cuda()

        optimizer.zero_grad()
        outputs = model(inputs)
        # print("inputs.shape: ", inputs.shape)
        # print("targets.shape: ", targets.shape)
        # # # print(outputs)
        # print("outputs.shape: ", outputs.shape)
        # loss1 = criterion(outputs[:,:,7,7].squeeze(), targets)
        loss = RMSE()(outputs[:,:,7,7].squeeze(), targets)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        # print(loss.item())
        if i%20==0:
            print(f'Epoch {epoch+1} \t Batch {i} \t Training Loss: {train_loss / i}')

    
    valid_loss = 0.0
    i=0
    model.eval()
    for inputs, targets in validloader:
        i+=1
        if torch.cuda.is_available():
            inputs, targets = inputs.cuda(), targets.cuda()
        
        outputs = model(inputs)
        loss = criterion(outputs[:,:,7,7].squeeze(),targets)
        loss = RMSE()(outputs[:,:,7,7].squeeze(), targets)
        valid_loss += loss.item()
        if i%20==0:
            print(f'Epoch {epoch+1} \t Batch {i} \t Validation Loss: {valid_loss / i}')
 
    print(f'Epoch {epoch+1} Training Loss: {train_loss / len(trainloader)} Validation Loss: {valid_loss / len(validloader)}')
     
    if min_valid_loss > valid_loss:
        print(f'Validation Loss Decreased({min_valid_loss}--->{valid_loss}) Saving The Model')
        min_valid_loss = valid_loss
         
        # Saving State Dict
        torch.save(model.state_dict(), 'saved_model2.pth')


    print(f"Epoch {epoch+1} completed")

[Conv2d(18, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), ReLU(inplace=True), Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), ReLU(inplace=True), Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), ReLU(inplace=True), Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), ReLU(inplace=True)]
Epoch 1 	 Batch 20 	 Training Loss: 109.57520904541016
Epoch 1 	 Batch 40 	 Training Loss: 98.65093517303467
Epoch 1 	 Batch 60 	 Training Loss: 87.56623617808025
Epoch 1 	 Batch 80 	 Training Loss: 79.36099495887757
Epoch 1 	 Batch 100 	 Training Loss: 74.23469905853271
Epoch 1 	 Batch 120 	 Training Loss: 70.8660120964050

In [None]:
import torch
from torch import nn, optim
from dataloader import *

model = SimpleFCN()
if torch.cuda.is_available():
    model = model.cuda()
# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

class RMSE(nn.Module):
    """ 
        Weighted RMSE.
    """

    def __init__(self):
        super(RMSE, self).__init__()
        self.mse = torch.nn.MSELoss(reduction='none')
        
    def __call__(self, prediction, target, weights = 1):
        # prediction = prediction[:, 0]
        return torch.sqrt(torch.mean(weights * self.mse(prediction,target)))


class Args:
    def __init__(self):
        self.latlon = True
        self.bands = ['B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B8A', 'B09', 'B11', 'B12']
        self.bm = True
        self.patch_size = [15,15]
        self.norm_strat = 'pct'
        self.norm = False

args = Args()
fnames = ['data_nonan_0-5.h5', 'data_nonan_1-5.h5', 'data_nonan_2-5.h5', 'data_nonan_3-5.h5', 'data_nonan_4-5.h5']
mode = 'train'
ds_training = GEDIDataset({'h5':'/scratch2/biomass_estimation/code/ml/data', 'norm': '/scratch2/biomass_estimation/code/ml/data', 'map': '/scratch2/biomass_estimation/code/ml/data/'}, fnames = fnames, chunk_size = 1, mode = mode, args = args)
trainloader = DataLoader(dataset = ds_training, batch_size = 512, shuffle = True, num_workers = 8)
mode = 'val'
ds_validation = GEDIDataset({'h5':'/scratch2/biomass_estimation/code/ml/data', 'norm': '/scratch2/biomass_estimation/code/ml/data', 'map': '/scratch2/biomass_estimation/code/ml/data/'}, fnames = fnames, chunk_size = 1, mode = mode, args = args)
validloader = DataLoader(dataset = ds_validation, batch_size = 512, shuffle = False, num_workers = 8)

min_valid_loss = float('inf')
# Training loop
for epoch in range(100):  # 10 epochs
    train_loss = 0.0
    model.train()
    i=0
    for inputs, targets in trainloader:
        i+=1
        if torch.cuda.is_available():
            inputs, targets = inputs.cuda(), targets.cuda()

        optimizer.zero_grad()
        outputs = model(inputs)
        # print("inputs.shape: ", inputs.shape)
        # print("targets.shape: ", targets.shape)
        # # # print(outputs)
        # print("outputs.shape: ", outputs.shape)
        # loss1 = criterion(outputs[:,:,7,7].squeeze(), targets)
        loss = RMSE()(outputs[:,:,7,7].squeeze(), targets)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        # print(loss.item())
        if i%20==0:
            print(f'Epoch {epoch+1} \t Batch {i} \t Training Loss: {train_loss / i}')

    
    valid_loss = 0.0
    i=0
    model.eval()
    for inputs, targets in validloader:
        i+=1
        if torch.cuda.is_available():
            inputs, targets = inputs.cuda(), targets.cuda()
        
        outputs = model(inputs)
        loss = criterion(outputs[:,:,7,7].squeeze(),targets)
        loss = RMSE()(outputs[:,:,7,7].squeeze(), targets)
        valid_loss += loss.item()
        if i%20==0:
            print(f'Epoch {epoch+1} \t Batch {i} \t Validation Loss: {valid_loss / i}')
 
    print(f'Epoch {epoch+1} Training Loss: {train_loss / len(trainloader)} Validation Loss: {valid_loss / len(validloader)}')
     
    if min_valid_loss > valid_loss:
        print(f'Validation Loss Decreased({min_valid_loss}--->{valid_loss}) Saving The Model')
        min_valid_loss = valid_loss
         
        # Saving State Dict
        torch.save(model.state_dict(), 'saved_model2.pth')


    print(f"Epoch {epoch+1} completed")

[Conv2d(18, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), ReLU(inplace=True), Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), ReLU(inplace=True), Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), ReLU(inplace=True), Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), ReLU(inplace=True)]
Epoch 1 	 Batch 20 	 Training Loss: 109.57520904541016
Epoch 1 	 Batch 40 	 Training Loss: 98.65093517303467
Epoch 1 	 Batch 60 	 Training Loss: 87.56623617808025
Epoch 1 	 Batch 80 	 Training Loss: 79.36099495887757
Epoch 1 	 Batch 100 	 Training Loss: 74.23469905853271
Epoch 1 	 Batch 120 	 Training Loss: 70.8660120964050