In [1]:
import sys
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from torch.nn import functional as F
import random
from sklearn.model_selection import train_test_split
import torch.utils.data as data_utils
import time
import math
import matplotlib.pyplot as plt
import subprocess
import shutil

import psutil # or import multiprocessing

annoying_print = False
do_print_to_file = True

total_cpus = psutil.cpu_count()

seed = 4734
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(42) 
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [2]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

if torch.cuda.is_available():
    num_gpus = torch.cuda.device_count()
    print(f'Número de GPUs disponibles: {num_gpus}')

Using cuda device
Número de GPUs disponibles: 4


### DATOS

In [3]:
# Cargar datos numpy normalizados - imagenes y velocidades

# Rutas de trabajo
npy_folder = '/disk2/alma/tesisSpace/'
numpy_file_name_image = "numpyFile_simulation_images_real"
numpy_file_name_velocity_x = "./numpyFile_simulation_velocity_x_real"
numpy_file_name_velocity_y = "./numpyFile_simulation_velocity_y_real"
numpy_file_name_coordinates = "./numpyFile_simulation_velocity_coordinates"

def load_normalize_data():
    
    # Mensaje de depuración
    print("Ready to load NUMPY images_normalize from " + npy_folder)
    sys.stdout.flush()

    # Carga del archivo de imagenes
    try:
        data_norm_img = np.load(npy_folder + numpy_file_name_image + ".npy", mmap_mode=None)
    except:
        print("ERROR loading file: " + npy_folder + numpy_file_name_image + ".npy")
        sys.stdout.flush()
        sys.exit(1)
    else:
        print("The file " + npy_folder + numpy_file_name_image + ".npy was loaded.")
        sys.stdout.flush()

    # Carga del archivo de velocidades por el momento se toman velocidades verticales y horizontales por separado (Velocidades del dust1)

    try:
        data_norm_vx = np.load(npy_folder + numpy_file_name_velocity_x + ".npy", mmap_mode=None)
    except:
        print("ERROR loading file: " + npy_folder + numpy_file_name_velocity_x + ".npy")
        sys.stdout.flush()
        sys.exit(1)
    else:
        print("The file " + npy_folder + numpy_file_name_velocity_x + ".npy was loaded.")
        sys.stdout.flush()

    try:
        data_norm_vy = np.load(npy_folder + numpy_file_name_velocity_y + ".npy", mmap_mode=None)
    except:
        print("ERROR loading file: " + npy_folder + numpy_file_name_velocity_y + ".npy")
        sys.stdout.flush()
        sys.exit(1)
    else:
        print("The file " + npy_folder + numpy_file_name_velocity_y + ".npy was loaded.")
        sys.stdout.flush()

    # Cargar coordenadas X e Y de las velocidades (n, 2, 300, 300) 0 --> X / 1 --> Y
    try:
        data_coordinates_v = np.load(npy_folder + numpy_file_name_coordinates + ".npy", mmap_mode=None)
    except:
        print("ERROR loading file: " + npy_folder + numpy_file_name_coordinates + ".npy")
        sys.stdout.flush()
        sys.exit(1)
    else:
        print("The file " + npy_folder + numpy_file_name_coordinates + ".npy was loaded.")
        sys.stdout.flush()

    
    # Retorno de las matrices numpy
    return data_norm_img, data_norm_vx, data_norm_vy, data_coordinates_v


In [4]:
# Separa conjunto de entrenamiento, validacion y test considerando imagenes, velcoidades en x, velocidades en y
# y coordinadas de velcidades X e Y (en un mismo arreglo (n, 2, npix, npix)). Luego de esta funcion se debe selecionar
# con que velocidades se quiere trabajar el modelo. Las coordenadas estan porsi se quiere graficar o estudiar las velocidades
# obtenidas.
def adapt_training_data_PyTorch(data_img_norm, data_vel_norm_vx, data_vel_norm_vy, data_coordinates):
    print("\n ADAPTING DATA FOR PYTORCH___________________________")
    
    # Imprimir detalles iniciales del array de datos
    print("I1 dtype =", data_img_norm.dtype, "- shape =", data_img_norm.shape)
    print("Vx1 dtype =", data_vel_norm_vx.dtype, "- shape =", data_vel_norm_vx.shape)
    print("Vy1 dtype =", data_vel_norm_vy.dtype, "- shape =", data_vel_norm_vy.shape)
    print("Coordinates dtype =", data_coordinates.dtype, "- shape =", data_coordinates.shape)
    
    # Expandir el array de imágenes para incluir el canal como una dimensión adicional
    # (batch_size, time_step, pix, pix) ---> (batch_size, time_step, pix, pix, 1)
    data_img_norm = np.expand_dims(data_img_norm, axis=4)
    print("I2 dtype =", data_img_norm.dtype, "- shape =", data_img_norm.shape)

    # Expandir el array de velocidades para incluir el canal como una dimensión adicional
    # (batch_size, pix, pix) ---> (batch_size, pix, pix, 1)
    data_vel_norm_vx = np.expand_dims(data_vel_norm_vx, axis=3)
    print("xV2 dtype =", data_vel_norm_vx.dtype, "- shape =", data_vel_norm_vx.shape)

    data_vel_norm_vy = np.expand_dims(data_vel_norm_vy, axis=3)
    print("Vy2 dtype =", data_vel_norm_vy.dtype, "- shape =", data_vel_norm_vy.shape)

    print("\n")
    sys.stdout.flush()

    print("READY TO SELECT IMAGES TO TRAIN, VALIDATE & TEST______________________")
    sys.stdout.flush()
    
    # Inicialización de listas de índices
    random_list_indexes_train = []
    random_list_indexes_valid = []
    random_list_indexes_test = []
    
    total_data_len = data_img_norm.shape[0]

    # Crear índices para dividir los datos
    indices = np.arange(total_data_len)
    
    # Dividir los datos en conjuntos de entrenamiento + validación y prueba
    random_indexes_valid, random_indexes_test = train_test_split(
        indices, test_size=0.05, random_state=23)
    
    # Dividir los datos de entrenamiento + validación en entrenamiento y validación
    random_indexes_train, random_indexes_valid = train_test_split(
        random_indexes_valid, test_size=0.4, random_state=23)
    
    # Selección de imágenes para los conjuntos de entrenamiento, validación y prueba
    train_images = data_img_norm[random_indexes_train, :, :, :]
    valid_images = data_img_norm[random_indexes_valid, :, :, :]
    test_images = data_img_norm[random_indexes_test, :, :, :]
    print("Train_images dtype =", train_images.dtype, "- shape =", train_images.shape)
    print("Valid_images =", valid_images.dtype, "- shape =", valid_images.shape)
    print("Test_images =", test_images.dtype, "- shape =", test_images.shape)
    print("\n")
    sys.stdout.flush()

    # Separar velocidades en x
    train_velocity_vx = data_vel_norm_vx[random_indexes_train, :, :, :]
    valid_velocity_vx = data_vel_norm_vx[random_indexes_valid, :, :, :]
    test_velocity_vx = data_vel_norm_vx[random_indexes_test, :, :, :]
    print("Train_velocity x dtype =", train_velocity_vx.dtype, "- shape =", train_velocity_vx.shape)
    print("Valid_velocity x dtype =", valid_velocity_vx.dtype, "- shape =", valid_velocity_vx.shape)
    print("Test_velocity x dtype =", test_velocity_vx.dtype, "- shape =", test_velocity_vx.shape)
    print("\n")
    sys.stdout.flush()

    # Separar velocidades en y
    train_velocity_vy = data_vel_norm_vy[random_indexes_train, :, :, :]
    valid_velocity_vy = data_vel_norm_vy[random_indexes_valid, :, :, :]
    test_velocity_vy = data_vel_norm_vy[random_indexes_test, :, :, :]
    print("Train_velocity y dtype =", train_velocity_vy.dtype, "- shape =", train_velocity_vy.shape)
    print("Valid_velocity y dtype =", valid_velocity_vy.dtype, "- shape =", valid_velocity_vy.shape)
    print("Test_velocity y dtype =", test_velocity_vy.dtype, "- shape =", test_velocity_vy.shape)
    print("\n")
    sys.stdout.flush()

    # Separar coordenadas de velocidades
    train_coordinates = data_coordinates[random_indexes_train, :, :, :]
    valid_coordinates = data_coordinates[random_indexes_valid, :, :, :]
    test_coordinates = data_coordinates[random_indexes_test, :, :, :]
    print("Train_coordinates dtype =", train_coordinates.dtype, "- shape =", train_coordinates.shape)
    print("Valid_coordinates dtype =", valid_coordinates.dtype, "- shape =", valid_coordinates.shape)
    print("Test_coordinates dtype =", test_coordinates.dtype, "- shape =", test_coordinates.shape)
    print("\n")
    sys.stdout.flush()
    
    # No hay etiquetas, así que solo se devuelven las imágenes
    return train_images, valid_images, test_images, train_velocity_vx, valid_velocity_vx, test_velocity_vx,\
    train_velocity_vy, valid_velocity_vy, test_velocity_vy, train_coordinates, valid_coordinates, test_coordinates, total_data_len


In [5]:
# Preparar dataset para el entrenamiento

def prepare_datasets(train_images, valid_images, test_images, train_velocity, valid_velocity, test_velocity, batch_size):

    print("\n READY TO PREPARE DATASETS______________________")
    sys.stdout.flush()
    
    # Convertir a tensores de PyTorch
    torch_Tensor_train_images = torch.FloatTensor(train_images)
    torch_Tensor_valid_images = torch.FloatTensor(valid_images)
    torch_Tensor_test_images = torch.FloatTensor(test_images)
    print( "Torch_Tensor_train_images.size() = ", torch_Tensor_train_images.size())
    print( "Torch_Tensor_valid_images.size() = ", torch_Tensor_valid_images.size())
    print( "Torch_Tensor_test_images.size() = ", torch_Tensor_test_images.size())
    print("\n")
    sys.stdout.flush()
    
    torch_Tensor_train_velocitys = torch.FloatTensor(train_velocity)
    torch_Tensor_valid_velocitys = torch.FloatTensor(valid_velocity)
    torch_Tensor_test_velocitys = torch.FloatTensor(test_velocity)
    print( "Torch_Tensor_train_velocitys.size() = ", torch_Tensor_train_velocitys.size())
    print( "Torch_Tensor_valid_velocitys.size() = ", torch_Tensor_valid_velocitys.size())
    print( "Torch_Tensor_test_velocitys.size() = ", torch_Tensor_test_velocitys.size())
    print("\n")
    sys.stdout.flush()
    
    # Crear datasets con ProtoplanetaryDiskDataset
    train_dataset =  data_utils.TensorDataset(torch_Tensor_train_images, torch_Tensor_train_velocitys)
    valid_dataset = data_utils.TensorDataset(torch_Tensor_valid_images, torch_Tensor_valid_velocitys)
    test_dataset = data_utils.TensorDataset(torch_Tensor_test_images, torch_Tensor_test_velocitys)
    print( "Train_dataset = ", train_dataset)
    print( "Valid_dataset = ", valid_dataset)
    print( "Test_dataset = ", test_dataset)
    print("\n")
    sys.stdout.flush()
    
    # Crear dataloaders
    train_dataloader = data_utils.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
    valid_dataloader = data_utils.DataLoader(valid_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
    test_dataloader = data_utils.DataLoader(test_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
    print( "Train_dataloader = ", train_dataloader)
    print( "Valid_dataloader = ", valid_dataloader)
    print( "Test_dataloader = ", test_dataloader)
    print("\n")
    sys.stdout.flush()

    return train_dataloader, valid_dataloader, test_dataloader


In [None]:
# Procesar datos

#Cargar datos (batch_size, time_step, pix, pix) image
data_img_norm, data_vx_norm, data_vy_norm, data_coordinates = load_normalize_data()

num_img = 263
pix_prueba = 300
batch = 2
start_step = 5
end_step = 19
time_steps = end_step - start_step

data_img_norm = data_img_norm[0:num_img, start_step:end_step, 0:pix_prueba, 0:pix_prueba]
data_vy_norm = data_vy_norm[0:num_img, 0:pix_prueba, 0:pix_prueba]

print("Image Shape", data_img_norm.shape)
print("Image Max: ", data_img_norm.max(), " Image Min: ", data_img_norm.min())

print("Vx Shape", data_vx_norm.shape)
print("Vx Max: ", data_vx_norm.max(), " Vx Min: ", data_vx_norm.min())

print("Vy Shape", data_vy_norm.shape)
print("Vy Max: ", data_vy_norm.max(), " Vy Min: ", data_vy_norm.min())

print("Coordinates Shape", data_coordinates.shape)

# Adaptar los datos para PyTorch y generar conjuntos de entrenamiento 
train_images, valid_images, test_images, _, _, _, train_velocity_y, valid_velocity_y, test_velocity_y, _, _, _, batch_size = \
adapt_training_data_PyTorch(data_img_norm, data_vx_norm, data_vy_norm, data_coordinates)


# Crear datasets (considerar velocidad que se desee)
train_loader, valid_loader, test_loader = prepare_datasets(train_images, valid_images, test_images, train_velocity_y, 
                                                           valid_velocity_y, test_velocity_y, batch_size=batch)


### MODELO

In [7]:
#First Densely Layer GPU and Second Densely Layer GPU
# Tensor dimension in (batch, time, n, m, channels) out (batch, time, new channels, n, m) (es necesario permutar salida para conv por posicion de channels)
# Input parameters:
# in_channels, out_channels --> canales de entrada y salida
# num_labels --> ver
# split_gpus, parallel_data --> bool para evaluar uso de gpu
class FirstDenseLayer(nn.Module):
    def __init__(self, in_channels, out_channels, num_labels, split_gpus, parallel_data):
        super(FirstDenseLayer, self).__init__()
        self.split_gpus = split_gpus
        self.parallel_data = parallel_data

        input_num_DENSE_01 = "---"
        print( "Densely Layer  --- in_channels , out_channels , "+\
        "input_num_DENSE_01 , num_labels ===> " , \
        in_channels , out_channels , input_num_DENSE_01 , num_labels)
        sys.stdout.flush()

        self.linear = nn.Linear(in_channels , out_channels)# 1 layer: in 1 (imagen en escala de grises) / out 64

    def forward(self, tensor):

        if annoying_print :
          print('001 tensor device==> {}, shape==> {}, type==> {}\n'.format(\
          tensor.device, tensor.shape, tensor.type()))
          sys.stdout.flush()

        tensor = self.linear(tensor)

        if annoying_print :
          print('002 tensor device==> {}, shape==> {}, type==> {}\n'.format(\
          tensor.device, tensor.shape, tensor.type()))
          sys.stdout.flush()

        tensor = tensor.permute(0, 1, 4, 2, 3)

        if annoying_print :
          print('003 tensor device==> {}, shape==> {}, type==> {}\n'.format(\
          tensor.device, tensor.shape, tensor.type()))
          sys.stdout.flush()
        return tensor


In [8]:
# LSTMConv2D Layer GPU
# Input shape in (batch_size, time_steps, in_channels, height, width) out (batch_size, time_steps, out_channels, height, width)
# Input parameters:
#input_size = canales de entrada
#hidden_size = filters
#kernel_size = tamaño del kernel de conv
#num_layers = numero de capas lstm
#bias = bool para determinar uso de sesgo
#output_size = canales de salida
#split_gpus, parallel_data = bools para evaluar uso de gpu
class Conv2dLSTMCell(nn.Module):
    def __init__(self, input_size, hidden_size, kernel_size, bias=True):
        super(Conv2dLSTMCell, self).__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size

        if isinstance(kernel_size, tuple) and len(kernel_size) == 2:
            self.kernel_size = kernel_size
        elif isinstance(kernel_size, int):
            self.kernel_size = (kernel_size, kernel_size)
        else:
            raise ValueError("Invalid kernel size.")
        
        self.padding = self.calculate_padding(self.kernel_size)      

        self.bias = bias
        self.x2h = nn.Conv2d(in_channels=input_size, out_channels=hidden_size * 4,
                             kernel_size=self.kernel_size, padding=self.padding, bias=bias)

        self.h2h = nn.Conv2d(in_channels=hidden_size, out_channels=hidden_size * 4,
                             kernel_size=self.kernel_size, padding=self.padding, bias=bias)
        self.Wc = None
        self.reset_parameters()
        
    # Padding for 'same' output size
    def calculate_padding(self, kernel_size):
        return (kernel_size[0] // 2, kernel_size[1] // 2)
        
    def reset_parameters(self):
        std = 1.0 / np.sqrt(self.hidden_size)
        for w in self.parameters():
            w.data.uniform_(-std, std)

    def forward(self, input, hx=None):
        # Inputs:
        #       input: of shape (batch_size, input_size, height_size, width_size)
        #       hx: of shape (batch_size, hidden_size, height_size, width_size)
        # Outputs:
        #       hy: of shape (batch_size, hidden_size, height_size, width_size)
        #       cy: of shape (batch_size, hidden_size, height_size, width_size)
        if annoying_print :
            print("Input cellLSTMConv: ", input.shape)
            sys.stdout.flush()
        
        if self.Wc == None:
            if torch.cuda.is_available():
                self.Wc = nn.Parameter(torch.zeros(1, self.hidden_size * 3, input.size(-2), input.size(-1), device='cuda:2'))
            else:
                self.Wc = nn.Parameter(torch.zeros(1, self.hidden_size * 3, input.size(-2), input.size(-1)))
            
        if hx is None:
            if torch.cuda.is_available():
                #times_step, out_channeles, w, h 
                hx = torch.zeros(input.size(0), self.hidden_size, input.size(-2), input.size(-1), device='cuda:2')
                hx = (hx, hx)
            else:
                hx = torch.zeros(input.size(0), self.hidden_size, input.size(-2), input.size(-1))
                hx = (hx, hx)                
        hx, cx = hx
        
        gates = self.x2h(input) + self.h2h(hx)

        # Get gates (i_t, f_t, g_t, o_t)
        input_gate, forget_gate, cell_gate, output_gate = gates.chunk(4, 1)

        Wci, Wcf, Wco = self.Wc.chunk(3, 1)

        i_t = torch.sigmoid(input_gate + Wci * cx)
        f_t = torch.sigmoid(forget_gate + Wcf * cx)
        g_t = torch.tanh(cell_gate)

        cy = f_t * cx + i_t * torch.tanh(g_t)
        o_t = torch.sigmoid(output_gate + Wco * cy)

        hy = o_t * torch.tanh(cy)
        
        return (hy, cy)

class Conv2dLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, kernel_size, num_layers, bias, output_size, split_gpus, parallel_data):
        super(Conv2dLSTM, self).__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size

        if type(kernel_size) == tuple and len(kernel_size) == 2:
            self.kernel_size = kernel_size
        elif type(kernel_size) == int:
            self.kernel_size = (kernel_size, kernel_size)
        else:
            raise ValueError("Invalid kernel size.")

        self.padding = self.calculate_padding(self.kernel_size)   
        self.num_layers = num_layers
        self.bias = bias
        self.output_size = output_size
        self.split_gpus = split_gpus
        self.parallel_data = parallel_data
        
        input_num_CONV_01 = "---"
        print( "LSTMConv Layer  --- filters, kernel_size, padding,  "+\
        "input_num_DENSE_01 , num_layers ===> " , \
        self.hidden_size, kernel_size, self.padding, input_num_CONV_01 , num_layers)
        sys.stdout.flush()

        self.rnn_cell_list = nn.ModuleList()
        self.rnn_cell_list.append(Conv2dLSTMCell(self.input_size, self.hidden_size,
                                                 self.kernel_size, self.bias))
        
        for l in range(1, self.num_layers):
            self.rnn_cell_list.append(Conv2dLSTMCell(self.hidden_size, self.hidden_size,
                                                     self.kernel_size, self.bias))

        self.conv = nn.Conv2d(in_channels=self.hidden_size, out_channels=self.output_size,
                             kernel_size=self.kernel_size, padding=self.padding, bias=self.bias)
        
    def calculate_padding(self, kernel_size):
        return (kernel_size[0] // 2, kernel_size[1] // 2)

    def forward(self, input, hx=None):

        if annoying_print :
          print('001 LSTMConv2D tensor device==> {}, shape==> {}, type==> {}\n'.format(\
          input.device, input.shape, input.type()))
          sys.stdout.flush()

        if hx is None:
            if torch.cuda.is_available():
                # num_capas, batch_size, times_step, out_channeles, w, h   
                h0 = torch.zeros(self.num_layers, input.size(0), self.hidden_size, input.size(-2), input.size(-1), device='cuda:2')
            else:
                h0 = torch.zeros(self.num_layers, input.size(0), self.hidden_size, input.size(-2), input.size(-1))
        else:
             h0 = hx
        
        outs = []
        hidden = list()
            
        # Inicializa los estados ocultos h0 y de celda c0 (mismas dimensiones) para todas las capas 
        for layer in range(self.num_layers):
            hidden.append((h0[layer], h0[layer]))

        #Por cada paso de tiempo aplica LSTMConv2D por capa por celda
        for t in range(input.size(1)):
            for layer in range(self.num_layers):
                if layer == 0:
                    hidden_l = self.rnn_cell_list[layer](
                        input[:, t, :],
                        (hidden[layer][0],hidden[layer][1])
                        )
                else:
                    hidden_l = self.rnn_cell_list[layer](
                        hidden[layer - 1][0],
                        (hidden[layer][0], hidden[layer][1])
                        )

                hidden[layer] = hidden_l

            outs.append(hidden_l[0])

        out = outs[-1].squeeze()

        out = self.conv(out)

        if annoying_print :
          print('002 LSTMConv2D tensor device==> {}, shape==> {}, type==> {}\n'.format(\
          out.device, out.shape, out.type()))
          sys.stdout.flush()

        dims = list(range(out.ndim))
        dims[-3], dims[-2], dims[-1] = dims[-2], dims[-1], dims[-3]
        tensor = out.permute(dims)

        if annoying_print :
          print('003 LSTMConv2D tensor device==> {}, shape==> {}, type==> {}\n'.format(\
          out.device, tensor.shape, tensor.type()))
          sys.stdout.flush()

        return tensor

In [9]:
#Second Densely Layer GPU and Second Densely Layer GPU
# Tensor dimension in (batch, n, m, channels) out (batch, n, m, new channels)
# Input parameters:
# in_channels, out_channels --> canales de entrada y salida
# num_labels --> ver
# split_gpus, parallel_data --> bool para evaluar uso de gpu
class SecondDenseLayer(nn.Module):
    def __init__(self, in_channels, out_channels, num_labels, split_gpus, parallel_data):
        super(SecondDenseLayer, self).__init__()
        self.split_gpus = split_gpus
        self.parallel_data = parallel_data

        input_num_DENSE_01 = "---"
        print( "Densely Layer  --- in_channels , out_channels , "+\
        "input_num_DENSE_01 , num_labels ===> " , \
        in_channels , out_channels , input_num_DENSE_01 , num_labels)
        sys.stdout.flush()

        self.linear = nn.Linear(in_channels , out_channels)# 1 layer: in 128 (imagen en escala de grises) / out 128

    def forward(self, tensor):

        if annoying_print :
          print('001 Second tensor device==> {}, shape==> {}, type==> {}\n'.format(\
          tensor.device, tensor.shape, tensor.type()))
          sys.stdout.flush()

        tensor = self.linear(tensor)

        if annoying_print :
          print('002 Second tensor device==> {}, shape==> {}, type==> {}\n'.format(\
          tensor.device, tensor.shape, tensor.type()))
          sys.stdout.flush()

        return tensor

In [10]:
# Capa extra para prueba
class ThridDenseLayer(nn.Module):
    def __init__(self, in_channels, out_channels, num_labels, split_gpus, parallel_data):
        super(ThridDenseLayer, self).__init__()
        self.split_gpus = split_gpus
        self.parallel_data = parallel_data

        input_num_DENSE_01 = "---"
        print( "Densely Layer  --- in_channels , out_channels , "+\
        "input_num_DENSE_01 , num_labels ===> " , \
        in_channels , out_channels , input_num_DENSE_01 , num_labels)
        sys.stdout.flush()

        self.linear = nn.Linear(in_channels , out_channels)# 1 layer: in 128 (imagen en escala de grises) / out 64

    def forward(self, tensor):

        if annoying_print :
          print('001 Third tensor device==> {}, shape==> {}, type==> {}\n'.format(\
          tensor.device, tensor.shape, tensor.type()))
          sys.stdout.flush()

        tensor = self.linear(tensor)

        if annoying_print :
          print('002 Third tensor device==> {}, shape==> {}, type==> {}\n'.format(\
          tensor.device, tensor.shape, tensor.type()))
          sys.stdout.flush()

        return tensor

In [11]:
# Output Layer GPU
# Tensor dimension in (batch, n, m, channels) (toma la ultima salida de lstm) out (batch, n, m)
# Input parameters:
# in_channels, out_channels --> canales de entrada y salida
# num_labels --> ver
# split_gpus, parallel_data --> bool para evaluar uso de gpu
class OutputDenseLayer(nn.Module):
    def __init__(self, in_channels, out_channels, num_labels, split_gpus, parallel_data):
        super(OutputDenseLayer, self).__init__()
        self.split_gpus = split_gpus
        self.parallel_data = parallel_data

        input_num_DENSE_OUT_01 = "---"
        print( "Densely Output Layer  --- in_channels , out_channels , "+\
        "input_num_DENSE_OUT_01 , num_labels ===> " , \
        in_channels , out_channels , input_num_DENSE_OUT_01 , num_labels)
        sys.stdout.flush()

        self.linear = nn.Linear(in_channels , out_channels) # in 128 / out 1 -- volver a dimension de entrada (change exo 128 to 64)

    def forward(self, tensor):

        if annoying_print :
          print('001 out tensor device==> {}, shape==> {}, type==> {}\n'.format(\
          tensor.device, tensor.shape, tensor.type()))
          sys.stdout.flush()

        tensor = self.linear(tensor)
        
        if annoying_print :
          print('002 out tensor device==> {}, shape==> {}, type==> {}\n'.format(\
          tensor.device, tensor.shape, tensor.type()))
          sys.stdout.flush()
            
        return tensor

In [12]:
class DeepSanne(nn.Module):
    def __init__(self):
        super(DeepSanne, self).__init__()

        channels = 1
        self.num_channels = 64
        filter = 128
        kernel_size = (1,1)
        num_labels = 1 
        bias = True

        self.module0 = FirstDenseLayer(channels, self.num_channels, num_labels, True, False)
        self.module1 = Conv2dLSTM(self.num_channels, filter, kernel_size, num_labels, bias, filter, True, False)
        self.module2 = SecondDenseLayer(filter, filter, num_labels, True, False)
        self.module2_1 = ThridDenseLayer(filter, self.num_channels, num_labels, True, False)
        self.module3 = OutputDenseLayer(self.num_channels, 1, num_labels, True, False)
        #self.module3 = OutputDenseLayer(filter, 1, num_labels, True, False)

    def forward( self, tensor):

        print('Input device==> {}, shape==> {}, type==> {}\n'.format(\
        tensor.device, tensor.shape, tensor.type()))
        sys.stdout.flush()

        tensor = self.module0(tensor)
        print('Input after module0 device==> {}, shape==> {}, type==> {}\n'.format(\
        tensor.device, tensor.shape, tensor.type()))
        sys.stdout.flush()

        tensor = self.module1(tensor)
        print('Input after module1 device==> {}, shape==> {}, type==> {}\n'.format(\
        tensor.device, tensor.shape, tensor.type()))
        sys.stdout.flush()

        tensor = self.module2(tensor)
        print('Input after module2 device==> {}, shape==> {}, type==> {}\n'.format(\
        tensor.device, tensor.shape, tensor.type()))
        sys.stdout.flush()

        # expppp
        tensor = self.module2_1(tensor)
        print('Input after module2 device==> {}, shape==> {}, type==> {}\n'.format(\
        tensor.device, tensor.shape, tensor.type()))
        sys.stdout.flush()

        tensor = self.module3(tensor)
        print('Input after module3 device==> {}, shape==> {}, type==> {}\n'.format(\
        tensor.device, tensor.shape, tensor.type()))
        sys.stdout.flush()

        return tensor

In [13]:
 class ModelParallel(DeepSanne) :
    def __init__(self, *args, **kwargs):
      super(ModelParallel, self).__init__( *args , **kwargs )

    # Distibuir tensor layers en cuda
      self.seq0 = nn.Sequential(self.module0).to('cuda:0')
      self.seq1 = nn.Sequential(self.module1).to('cuda:2')
      self.seq2 = nn.Sequential(self.module2).to('cuda:3')
      self.seq2_1 = nn.Sequential(self.module2_1).to('cuda:3')
      self.seq3 = nn.Sequential(self.module3).to('cuda:3')

    def forward(self, tensor):

      # Conectar (pasar) resultados entre capas
      tensor = self.seq0(tensor).to('cuda:2')
      tensor = self.seq1(tensor).to('cuda:3')
      tensor = self.seq2(tensor).to('cuda:3')
      tensor = self.seq2_1(tensor).to('cuda:3')
      tensor = self.seq3(tensor)

      return tensor

### ENTRENAMIENTO

In [14]:
# Función para liberar memoria antes de empezar
def liberar_memoria():
    torch.cuda.empty_cache()
    torch.cuda.reset_peak_memory_stats()
    torch.cuda.reset_accumulated_memory_stats()

In [15]:
liberar_memoria()

In [None]:
# Crear una instancia del modelo
model = ModelParallel()

# Definir la función de pérdida y el optimizador
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
#optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
#optimizer = torch.optim.RMSprop(model.parameters(), lr=0.001, alpha=0.99)
#optimizer = torch.optim.Adagrad(model.parameters(), lr=0.001)


## ENTRENAR MODELO ###
print("\n READY TO TRAIN THE MODEL______________________")
sys.stdout.flush()

epochs = 10

train_summaries = []
training_times = []
valid_summaries = []
test_summaries = []
start_time_tot = time.time()
for epoch in range(epochs):

    liberar_memoria()
    #print("\n \n Antes:")
    #os.system('nvidia-smi')
    start_time = time.time()
    model.train()

    train_loss = 0.0
    with tqdm(total=len(train_loader), desc=f"Epoch {epoch+1}/{epochs}", unit="batch") as progress_bar:
        for images, velocities in train_loader:
            optimizer.zero_grad()
            outputs = model(images.to('cuda:0')).to('cuda:3')
            loss = criterion(outputs, velocities.to('cuda:3'))
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            progress_bar.set_postfix(train_loss=f"{loss.item():.4f}")
            progress_bar.update()

    train_loss /= len(train_loader)
    train_rmse = math.sqrt(train_loss)  # Calcular RMSE a partir de MSE
    train_summaries.append(train_rmse)

    model.eval()
    valid_loss = 0.0
    with torch.no_grad():
        for images, velocities in valid_loader:
            outputs = model(images.to('cuda:0')).to('cuda:3')
            loss = criterion(outputs, velocities.to('cuda:3'))
            valid_loss += loss.item()

    valid_loss /= len(valid_loader)
    valid_rmse = math.sqrt(valid_loss)  # Calcular RMSE a partir de MSE
    valid_summaries.append(valid_rmse)

    print(f"Epoch {epoch+1}/{epochs}, Training RMSE: {train_rmse:.4f}, Validation RMSE: {valid_rmse:.4f}")

    #print("\n \n Despues:")
    #os.system('nvidia-smi')
    end_time = time.time()
    training_times.append(end_time - start_time)
        
# Evaluar en el conjunto de prueba
model.eval()
test_loss = 0.0
with torch.no_grad():
    with tqdm(total=len(test_loader), desc="Testing", unit="batch") as progress_bar:
        for images, velocities in test_loader:
            outputs = model(images.to('cuda:0')).to('cuda:3')
            loss = criterion(outputs, velocities.to('cuda:3'))
            test_loss += loss.item()
            progress_bar.set_postfix(test_loss=f"{loss.item():.4f}")
            progress_bar.update()

    test_loss /= len(test_loader)
    test_rmse = math.sqrt(test_loss)  # Calcular RMSE a partir de MSE
    test_summaries.append(test_rmse)
    
    print(f"Test RMSE: {test_rmse:.4f}")

# Guardar los resúmenes de entrenamiento, validación y prueba
with open('train_summaries.txt', 'w') as f:
    for epoch, loss in enumerate(train_summaries, 1):
        f.write(f"Epoch {epoch}, Training RMSE: {loss:.4f}\n")

with open('valid_summaries.txt', 'w') as f:
    for epoch, loss in enumerate(valid_summaries, 1):
        f.write(f"Epoch {epoch}, Validation RMSE: {loss:.4f}\n")

with open('test_summaries.txt', 'w') as f:
    for epoch, loss in enumerate(test_summaries, 1):
        f.write(f"Epoch {epoch}, Test RMSE: {loss:.4f}\n")


# Guardar modelo
npix = 300
ntn = len(train_images)
nv = len(valid_images)
ntt = len(test_images)
model_name = f'modelo_npix{npix}_ntn{ntn}_nv{nv}_ntt{ntt}_b{batch}_ts{time_steps}.pth'
torch.save(model.state_dict(), model_name)

end_time_tot = time.time()
print(end_time_tot - start_time_tot)

Densely Layer  --- in_channels , out_channels , input_num_DENSE_01 , num_labels ===>  1 64 --- 1
LSTMConv Layer  --- filters, kernel_size, padding,  input_num_DENSE_01 , num_layers ===>  128 (1, 1) (0, 0) --- 1
Densely Layer  --- in_channels , out_channels , input_num_DENSE_01 , num_labels ===>  128 128 --- 1
Densely Layer  --- in_channels , out_channels , input_num_DENSE_01 , num_labels ===>  128 64 --- 1
Densely Output Layer  --- in_channels , out_channels , input_num_DENSE_OUT_01 , num_labels ===>  64 1 --- 1

 READY TO TRAIN THE MODEL______________________


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  return F.mse_loss(input, target, reduction=self.reduction)
Epoch 1/10: 100%|████████████████████████████████████████████████| 62/62 [01:23<00:00,  1.35s/batch, train_loss=0.0131]


Epoch 1/10, Training RMSE: 0.1765, Validation RMSE: 0.1537


Epoch 2/10: 100%|████████████████████████████████████████████████| 62/62 [01:22<00:00,  1.33s/batch, train_loss=0.0366]


Epoch 2/10, Training RMSE: 0.1542, Validation RMSE: 0.1535


Epoch 3/10: 100%|████████████████████████████████████████████████| 62/62 [01:22<00:00,  1.33s/batch, train_loss=0.0227]


Epoch 3/10, Training RMSE: 0.1539, Validation RMSE: 0.1535


Epoch 4/10: 100%|████████████████████████████████████████████████| 62/62 [01:22<00:00,  1.33s/batch, train_loss=0.0147]


Epoch 4/10, Training RMSE: 0.1537, Validation RMSE: 0.1535


Epoch 5/10: 100%|████████████████████████████████████████████████| 62/62 [01:22<00:00,  1.33s/batch, train_loss=0.0394]


Epoch 5/10, Training RMSE: 0.1543, Validation RMSE: 0.1535


Epoch 6/10: 100%|████████████████████████████████████████████████| 62/62 [01:22<00:00,  1.33s/batch, train_loss=0.0117]


Epoch 6/10, Training RMSE: 0.1538, Validation RMSE: 0.1536


Epoch 7/10: 100%|████████████████████████████████████████████████| 62/62 [01:22<00:00,  1.33s/batch, train_loss=0.0454]


Epoch 7/10, Training RMSE: 0.1545, Validation RMSE: 0.1535


Epoch 8/10: 100%|████████████████████████████████████████████████| 62/62 [01:22<00:00,  1.33s/batch, train_loss=0.0300]


Epoch 8/10, Training RMSE: 0.1541, Validation RMSE: 0.1536


Epoch 9/10:  87%|█████████████████████████████████████████▊      | 54/62 [01:11<00:10,  1.35s/batch, train_loss=0.0310]

In [None]:
# Graficar pérdidas
epochs_range = range(1, epochs + 1)
plt.figure(figsize=(10, 6))
plt.plot(epochs_range, train_summaries, label='Training RMSE')
plt.plot(epochs_range, valid_summaries, label='Validation RMSE')
plt.xlabel('Epochs')
plt.ylabel('RMSE')
plt.title('Training and Validation RMSE per Epoch')
plt.legend()
plt.grid(True)
plt.savefig('rmse_per_epoch.png') 
plt.show()

In [None]:
def plotImage(image_data):
    plt.figure()
    plt.imshow(image_data)
    plt.colorbar()
    plt.title('Imagen FITS')
    plt.xlabel('Pixel X')
    plt.ylabel('Pixel Y')
    plt.gca().invert_yaxis() 
    plt.savefig('Image-disk.png')
    plt.show()

def plotVelocity(v_real, v_model, pix):

    x = np.linspace(0, 300, pix)
    y = np.linspace(0, 300, pix)
    X, Y = np.meshgrid(x, y)
    
    plt.figure(figsize=(14, 6))
    
    plt.subplot(1, 2, 1)
    plt.pcolormesh(X, Y, v_real)
    plt.colorbar(label='Velocidad FARGO3D')
    plt.xlabel('X')
    plt.ylabel('Y')
    plt.title('Componente de Velocidad según FARGO3D')
    
    plt.subplot(1, 2, 2)
    plt.pcolormesh(X, Y, v_model)
    plt.colorbar(label='Velocidad Model')
    plt.xlabel('X')
    plt.ylabel('Y')
    plt.title('Componente de Velocidad según Model')
    
    plt.tight_layout()
    plt.savefig('Velocity-result-comparison.png')
    plt.show()

In [None]:
"""
# Cargar el modelo entrenado para pruebas unitarias
model_name = 'modelo_npix300_ntn123_nv82_ntt11_b2_ts14.pth'
model = ModelParallel()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

model.load_state_dict(torch.load(model_name), strict=False)
"""

In [None]:
# Evaluar visualmente los resultados
model.eval()
all_predictions = np.empty((0, *(npix, npix, 1)))
all_real = np.empty((0, *(npix, npix, 1)))
all_test_images = np.empty((0, *(14, npix, npix, 1)))
# Hacer predicciones
with torch.no_grad():
    for i, (images, velocities) in enumerate(test_loader):
        predicciones = model(images.to('cuda:0'))
        #predicciones = predicciones[np.newaxis, :, :, :]
        if predicciones.shape != torch.Size([batch, 300, 300, 1]):
            print(f"Ignoring prediction at index {i} with shape {predicciones.shape}")
            continue
        loss = criterion(predicciones.to('cuda:3'), velocities.to('cuda:3'))
        all_predictions = np.append(all_predictions, predicciones.cpu().numpy(), axis=0)
        all_real = np.append(all_real, velocities.cpu().numpy(), axis=0)
        all_test_images = np.append(all_test_images, images.cpu().numpy(), axis=0)

print(all_predictions.shape)
print(all_real.shape)
print(all_test_images.shape)

In [None]:
id = 0
plotImage(all_test_images[id][9].reshape((300, 300)))
plotVelocity(all_real[id].reshape((300, 300)), all_predictions[id].reshape((300,300)), 300)
y_true = all_real[id].reshape((300, 300))
y_pred = all_predictions[id].reshape((300,300))

# Calcular RMSE
mse = np.mean((y_true - y_pred) ** 2)
rmse = np.sqrt(mse)
print(f"RMSE: {rmse:.4f}")

# Calcular MAE
mae = np.mean(np.abs(y_true - y_pred))
print(f"MAE: {mae:.4f}")

# Calcular SSIM
from skimage.metrics import structural_similarity as ssim
ssim_index, _ = ssim(y_true, y_pred, data_range=y_true.max() - y_true.min(), full=True)
print(f"SSIM: {ssim_index:.4f}")

# Calcular el mapa de errores
error_map = np.abs(y_true - y_pred)

# Visualizar el mapa de errores
plt.figure(figsize=(12, 6))
plt.imshow(error_map, cmap='inferno', interpolation='nearest')
plt.colorbar(label='Error Absoluto')
plt.title('Mapa de Errores')
plt.xlabel('X')
plt.ylabel('Y')
plt.gca().invert_yaxis() 
plt.savefig('Error_Map.png')
plt.show()