In [23]:
import torch
import torch.nn as nn
import netCDF4 as nc
import numpy as np
import os
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms


In [24]:
class NetCDFDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.file_list = [f for f in os.listdir(root_dir) if f.endswith('.nc')]

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        file_path = os.path.join(self.root_dir, self.file_list[idx])
        
        # Load NetCDF data
        dataset = nc.Dataset(file_path)
        data = dataset.variables['t2m'][:].astype(np.float32)  # Adjust 'data' to the variable name in your file
        dataset.close()
        
        # Reshape the data to (1, 50, 721, 1440)
        data = data.reshape(1, 50, 721, 1440)
        return torch.tensor(data)

In [25]:
# Define your data directories
gfs_biased_dir = 'GFS/'
era5_unbiased_dir = 'ERA5/'

In [26]:
# Create data loaders for GFS (biased) and ERA5 (unbiased) data
gfs_dataset = NetCDFDataset(root_dir=gfs_biased_dir)
era5_dataset = NetCDFDataset(root_dir=era5_unbiased_dir)

In [27]:
batch_size = 1  # Adjust batch size as needed
shuffle = False
num_workers = 0  # Adjust the number of workers for data loading

In [28]:
gfs_data_loader = DataLoader(gfs_dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)
era5_data_loader = DataLoader(era5_dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)

In [29]:
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()

        # Encoder
        self.encoder = nn.Sequential(
            nn.Conv3d(1, 64, kernel_size=(3,3,3), padding=(1,1,1)),
            nn.ReLU(True),
            #nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))
        )
        
        # Bottleneck (no further reduction of dimensions)

        # Decoder
        self.decoder = nn.Sequential(
            nn.ConvTranspose3d(64, 1, kernel_size=(3,3,3), padding=(1,1,1)),
            nn.Sigmoid()
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded



In [32]:
# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(autoencoder.parameters(), lr=0.01)

In [33]:
# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    autoencoder.train()
    for gfs_data, era5_data in zip(gfs_data_loader, era5_data_loader):
        optimizer.zero_grad()
        outputs = autoencoder(gfs_data)
        loss = criterion(outputs, era5_data)
        loss.backward()
        optimizer.step()
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/10], Loss: 77059.1016
Epoch [2/10], Loss: 76529.3516
Epoch [3/10], Loss: 76518.3125
Epoch [4/10], Loss: 76518.3125
Epoch [5/10], Loss: 76518.3125
Epoch [6/10], Loss: 76518.3125
Epoch [7/10], Loss: 76518.3125
Epoch [8/10], Loss: 76518.3125
Epoch [9/10], Loss: 76518.3125
Epoch [10/10], Loss: 76518.3125


In [10]:
first = nn.Conv3d(1, 64, kernel_size=(3,3,3), padding=(1,1,1))

In [11]:
gfs_data.shape

torch.Size([1, 1, 50, 721, 1440])

In [12]:
outputs = first(gfs_data)

In [13]:
outputs.shape

torch.Size([1, 64, 50, 721, 1440])

In [14]:
second = nn.ReLU(True)

In [15]:
output = second(outputs)

In [16]:
output.shape

torch.Size([1, 64, 50, 721, 1440])

In [61]:
third = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))

In [62]:
out = third(output)

In [63]:
out.shape

torch.Size([1, 64, 25, 360, 720])

In [17]:
fourth = nn.ConvTranspose3d(64, 1, kernel_size=(3,3,3), padding=(1,1,1))

In [18]:
inp = fourth(output)

In [19]:
inp.shape

torch.Size([1, 1, 50, 721, 1440])

In [51]:
nn.Conv3d(1, 64, kernel_size=3, padding=1),
            nn.ReLU(True),
            #nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))gfs_data

In [16]:
outputs = autoencoder(gfs_data)

In [52]:
outputs.shape

torch.Size([1, 64, 50, 721, 1440])

In [9]:
# Define the model
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        
        # Encoder
        self.encoder = nn.Sequential(
            nn.Conv3d(in_channels=50, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2)),
            nn.LSTM(input_size=64, hidden_size=128, num_layers=2, batch_first=True, bidirectional=True)
        )
        
        # Bottleneck (will be completed soon)
        
        # Decoder
        self.decoder = nn.Sequential(
            nn.LSTM(input_size=256, hidden_size=128, num_layers=2, batch_first=True, bidirectional=True),
            nn.ConvTranspose3d(in_channels=128, out_channels=50, kernel_size=3, padding=1),
            nn.Sigmoid()
        )


autoencoder = Autoencoder()

In [10]:
# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(autoencoder.parameters(), lr=0.001)