In [3]:
import torch
import sys
import h5py
models_path = "...\\Models architecture" # Replace the ...
sys.path.append(models_path)

# Importing Galaxy images data into pytorch dataloaders:

In [2]:
class dataset(torch.utils.data.Dataset):
    def __init__(self, file):
        self.file = file
        self.hdf = h5py.File(file, 'r')
        self.datasets = list(self.hdf.keys())

    def __len__(self):
        size = len(self.hdf[self.datasets[0]])
        return size

    def __getitem__(self, idx):
        num_datasets = len(self.datasets)
        if num_datasets == 2:
            x = self.hdf[self.datasets[0]][idx]
            z = self.hdf[self.datasets[1]][idx]
            return x, z
        else: 
            x = self.hdf[self.datasets[0]][idx]
            return x


In [3]:
cosmic_survey_data= '...\\cosmos_25.2_all_with_zphot.h5' # only 128*128 galaxy images 
deepfield_data = '...\\cosmos.h5'  # 158*158 galaxy images + their redshifts 

In [4]:
torch.manual_seed(0)
# Creating the galaxy dataset; galaxy_dataset[integer] outputs a tuple (image, redshift) for the cosmic survey data and just an image for the deepfield_data
galaxy_dataset = dataset(cosmic_survey_data)

# Splitting galaxy dataset into a training set and a validation set :
data_size = len(galaxy_dataset)
train_size = int(0.9*data_size)
val_size = data_size - train_size

Generator = torch.Generator()
Generator.manual_seed(0)
train_set, val_set = torch.utils.data.random_split(galaxy_dataset, [train_size, val_size], generator = Generator)

# Creating the training and validation loaders:
batchsize = 64
train_loader = torch.utils.data.DataLoader(train_set, batch_size = batchsize, shuffle = True)
val_loader = torch.utils.data.DataLoader(val_set, batch_size = batchsize, shuffle = True)


# Training the model

In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [6]:
# For the deepfield_data, use Model_128vae
# For the cosmic_survey_data, use Model_158vae 
from Model_158vae import VariationalAutoencoder

A little word before training our model on the hyperparameter $\beta$:
- To train the model with a fixed $\beta$, use `vae.train_time(*args)`;
- To train the model with a non-fixed $\beta$ (ex: $\beta$ increasing over each epoch), use `vae.traintab(*args)`. The shape of the $\beta$ tensor must be `torch.Size([num_epochs])`

In [7]:
# Models parameters  :
nc, nf, z_dim = 1, 64, 64
vae = VariationalAutoencoder(nc, nf, z_dim).to(device)

# Training parameters:
num_epochs = 1
lr = 1e-4
Beta = 0.1

# Time to train ! 
vae.eval()
vae.train()  
train_loss, val_loss, mse, kl = vae.train_time(train_loader, val_loader, epochs=num_epochs, learning_rate=lr, beta=Beta)

# The terms in output are arrays with the value of the term at each iteration. See Loss.ipynb file to transform it into Loss per epoch and plot the loss. 

  0%|          | 0/1 [00:00<?, ?it/s]

# Saving the model and the losses

In [8]:
# Saving the model weights and the losses 
hyperparameters = {'batch size': batchsize, 'epochs': num_epochs, 'beta': Beta, 'learning rate': lr, 'z_dim': z_dim}

In [27]:
if type(Beta) == torch.Tensor:
    beta_behaviour = input("Enter a word to describe beta's behaviour").replace(" ","")
    loss_file_name = "z"+str(z_dim)+"_beta"+beta_behaviour+"_loss.pt"
    weights_file_name = "z"+str(z_dim)+"_beta"+beta_behaviour+"_weights.pt"
else: 
    loss_file_name = "z"+str(z_dim)+"_beta"+str(Beta)+"_loss.pt"
    weights_file_name = "z"+str(z_dim)+"_beta"+str(Beta)+"_weights.pt"


loss_path_deepfield = "...\\"+loss_file_name
loss_path_cosmic_survey = "...\\"+loss_file_name

#torch.save([train_loss, val_loss, mse, kl, hyperparameters], loss_path_deepfield)

z64_betaIncreasing_loss.pt
