In [None]:
from google.colab import drive
drive.mount("/content/gdrive")

import time
import os
import glob
import random
import json
import subprocess
import sys
import gc

import numpy as np
import matplotlib.pyplot as plt
from collections import OrderedDict
import xml.etree.ElementTree

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
def set_deterministic():
    if torch.cuda.is_available():
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True
    torch.set_deterministic(True)
    
    
def set_all_seeds(seed):
    os.environ["PL_GLOBAL_SEED"] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
def compute_accuracy(model, data_loader, device):
    model.eval()
    numers = []
    denoms = []
    with torch.no_grad():
  
        for i, (features, targets) in enumerate(data_loader):

            targets = targets.to(device)

            features = features.to(device)

            recon = model(features)

            numer = np.sum(np.square(np.array(targets.cpu()) - np.array(recon.cpu())))
            numers = numers + [numer]

            denom = np.sum(np.square(np.array(targets.cpu())))
            denoms = denoms + [denom]

    acc = (1 - (np.array(np.ravel(numers))/np.array(np.ravel(denoms))))

    return np.round(acc[0], 5)
    

def compute_epoch_loss_autoencoder(model, data_loader, loss_fn, device):
    model.eval()
    curr_loss, num_examples = 0., 0
    with torch.no_grad():
        for features, targets in data_loader:

            features = features.to(device)
            targets = targets.to(device)

            predictions = model(features)
            loss = loss_fn(predictions, targets, reduction='sum')
            num_examples += targets.size(0)
            curr_loss += loss

            features = features.to('cpu')
            targets = targets.to('cpu')

            del features
            del targets

        curr_loss = curr_loss / num_examples
        return curr_loss
def train_autoencoder_v1(num_epochs, model, optimizer, 
                         train_loader, device, loss_fn=None, 
                         skip_epoch_stats=False,
                         save_model=None):
    
    log_dict = {'train_loss_per_batch': [],
                'train_loss_per_epoch': []}
    
    if loss_fn is None:
        loss_fn = F.mse_loss

    start_time = time.time()
    for epoch in range(num_epochs):

        model.train()
        for batch_idx, (features, targets) in enumerate(train_loader):

            features = features.to(device)
            targets = targets.to(device)

            # FORWARD AND BACK PROP
            predictions = model(features)
            loss = loss_fn(predictions, targets)
            optimizer.zero_grad()

            loss.backward()

            # UPDATE MODEL PARAMETERS
            optimizer.step()

            #additional

            features = features.to('cpu')
            targets = targets.to('cpu')

            del features
            del targets

            # LOGGING
            log_dict['train_loss_per_batch'].append(loss.item())
            
            print('Epoch: %03d/%03d | Batch %04d/%04d | Loss: %.4f'
                  % (epoch+1, num_epochs, batch_idx+1,
                       len(train_loader), loss))

        if not skip_epoch_stats:
            model.eval()
            
            with torch.set_grad_enabled(False):  # save memory during inference
                
                train_loss = compute_epoch_loss_autoencoder(
                    model, train_loader, loss_fn, device)
                print('***Epoch: %03d/%03d | Loss: %.3f' % (
                      epoch+1, num_epochs, train_loss))
                log_dict['train_loss_per_epoch'].append(train_loss.item())

        print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))

    print('Total Training Time: %.2f min' % ((time.time() - start_time)/60))
    if save_model is not None:
        torch.save(model.state_dict(), save_model)
    
    return log_dict
def plot_training_loss(minibatch_losses, num_epochs, averaging_iterations=100, custom_label=''):

    iter_per_epoch = len(minibatch_losses) // num_epochs

    plt.figure()
    ax1 = plt.subplot(1, 1, 1)
    ax1.plot(range(len(minibatch_losses)),
             (minibatch_losses), label=f'Minibatch Loss{custom_label}')
    ax1.set_xlabel('Iterations')
    ax1.set_ylabel('Loss')

    if len(minibatch_losses) < 1000:
        num_losses = len(minibatch_losses) // 2
    else:
      num_losses = 1000

    #ax1.set_ylim([
    #    0, np.max(minibatch_losses[num_losses:])*1.5
    #    ])

    ax1.plot(np.convolve(minibatch_losses,
                         np.ones(averaging_iterations,)/averaging_iterations,
                         mode='valid'),
             label=f'Running Average{custom_label}')
    ax1.legend()

def plot_accuracy(train_acc, valid_acc):

    num_epochs = len(train_acc)

    plt.plot(np.arange(1, num_epochs+1), 
             train_acc, label='Training')
    plt.plot(np.arange(1, num_epochs+1),
             valid_acc, label='Validation')

    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
def load_data(Rai, Raf, ti, tf, mode):

  file_count = 0

  if mode == 'no_masking':

    _, _, files = next(os.walk('/content/gdrive/My Drive/Project/Results/' + Rai + '_' + Raf + '_13_1_19' + '/snapshots/' + str(ti) + '/arrays'))
    file_count = len(files)

  elif mode == 'diffusion_mask':

    _, _, files = next(os.walk('/content/gdrive/My Drive/Project/Results/' + Rai + '_' + Raf + '_13_1_19' + '/masks/diffusion/' + str(ti) + '/arrays'))
    file_count = len(files)

  elif mode == 'convection_mask':

    _, _, files = next(os.walk('/content/gdrive/My Drive/Project/Results/' + Rai + '_' + Raf + '_13_1_19' + '/masks/convection/' + str(ti) + '/arrays'))
    file_count = len(files)


  print(file_count)
  data_x = []
  file_list_x = []

  data_y = []
  file_list_y = []

  for i in range(file_count):

    if i > 2140: 
      continue  

    if mode == 'no_masking':

      file_e = glob.glob('/content/gdrive/My Drive/Project/Results/' + Rai + '_' + Raf + '_13_1_19' + '/snapshots/' + str(ti) + '/arrays/' + str(i) + '_' + '*.npy')
      file_list_x = file_list_x + [file_e]

    elif mode == 'diffusion_mask':
      file_e = glob.glob('/content/gdrive/My Drive/Project/Results/' + Rai + '_' + Raf + '_13_1_19' + '/masks/diffusion/' + str(ti) + '/arrays/' + str(i) + '_' + '*.npy')
      file_list_x = file_list_x + [file_e]

    elif mode == 'convection_mask':
      file_e = glob.glob('/content/gdrive/My Drive/Project/Results/' + Rai + '_' + Raf + '_13_1_19' + '/masks/convection/' + str(ti) + '/arrays/' + str(i) + '_' + '*.npy')
      file_list_x = file_list_x + [file_e]

  file_list_x = np.ravel(file_list_x)

  for file_path in file_list_x:
    
    a = np.load(file_path, allow_pickle=True)

    data_x = data_x + [a]

  for i in range(file_count):

    if i > 2140: 
      continue  

    if mode == 'no_masking':

      file_e = glob.glob('/content/gdrive/My Drive/Project/Results/' + Rai + '_' + Raf + '_13_1_19' + '/snapshots/' + str(tf) + '/arrays/' + str(i) + '_' + '*.npy')
      file_list_y = file_list_y + [file_e]

    elif mode == 'diffusion_mask':

      file_e = glob.glob('/content/gdrive/My Drive/Project/Results/' + Rai + '_' + Raf + '_13_1_19' + '/masks/diffusion/' + str(tf) + '/arrays/' + str(i) + '_' + '*.npy')
      file_list_y = file_list_y + [file_e]
  
    elif mode == 'convection_mask':

      file_e = glob.glob('/content/gdrive/My Drive/Project/Results/' + Rai + '_' + Raf + '_13_1_19' + '/masks/convection/' + str(tf) + '/arrays/' + str(i) + '_' + '*.npy')
      file_list_y = file_list_y + [file_e]

  file_list_y = np.ravel(file_list_y)

  for file_path in file_list_y:
    
    a = np.load(file_path, allow_pickle=True)

    data_y = data_y + [a]
  
  return data_x, data_y
def swap(i,j):
  a = i.swapaxes(0,1)
  b = j.swapaxes(0,1)
  return a,b

def unsqueeze(i,j):
  a = torch.unsqueeze(i,0)
  b = torch.unsqueeze(j,0)
  return a,b
def split_x_y_list(data_x, data_y, train_size, seed):


  data_x = random.Random(seed).sample(data_x, len(data_x))
  data_y = random.Random(seed).sample(data_y, len(data_y))


  middle_index = int(np.round(len(data_x) * train_size))

  data_x_train = data_x[:middle_index]
  data_x_test = data_x[middle_index:]

  data_y_train = data_y[:middle_index]
  data_y_test = data_y[middle_index:]

  return data_x_train, data_y_train, data_x_test, data_y_test
def generate_loader(data_x, data_y, TRAIN_SIZE, BATCH_SIZE, SEED):

  data_x_train, data_y_train, data_x_test, data_y_test = split_x_y_list(data_x, data_y, TRAIN_SIZE, SEED)

  ##########################
  ### Dataset
  ##########################

  import torch
  import numpy as np
  from torch.utils.data import TensorDataset, DataLoader

  import random

  my_x_train = np.array(data_x_train)
  my_x_test = np.array(data_x_test)

  my_y_train = np.array(data_y_train) 
  my_y_test = np.array(data_y_test)

  tensor_x_train = torch.Tensor(my_x_train) # transform to torch tensor
  tensor_x_test = torch.Tensor(my_x_test)
  tensor_y_train = torch.Tensor(my_y_train) 
  tensor_y_test = torch.Tensor(my_y_test)

  #tensor_x_train, tensor_x_test, tensor_y_train, tensor_y_test = tensor_x_train.to(DEVICE), tensor_x_test.to(DEVICE), tensor_y_train.to(DEVICE), tensor_y_test.to(DEVICE)

  train_dataset = TensorDataset(tensor_x_train, tensor_y_train)
  test_dataset = TensorDataset(tensor_x_test, tensor_y_test)  

  train_loader = DataLoader(train_dataset, batch_size = BATCH_SIZE) 
  test_loader = DataLoader(test_dataset, batch_size = BATCH_SIZE) 


  train_loader = [unsqueeze(i,j) for (i,j) in train_loader]
  train_loader = [swap(i,j) for (i,j) in train_loader]

  test_loader = [unsqueeze(i,j) for (i,j) in test_loader]
  test_loader = [swap(i,j) for (i,j) in test_loader]

  # Checking the dataset
  print('Training Set:\n')
  for image_x, image_y in train_loader: 
      print('Image x batch dimensions:', image_x.size())
      print('Image y batch dimensions:', image_y.size())
    

  # Checking the dataset
  print('\nTesting Set:')
  for image_x, image_y in test_loader:
      print('Image x batch dimensions:', image_x.size())
      print('Image y batch dimensions:', image_y.size())
    

  return train_loader, test_loader

##########################
### MODEL
##########################


class Reshape(nn.Module):
    def __init__(self, *args):
        super().__init__()
        self.shape = args

    def forward(self, x):
        return x.view(self.shape)


class Trim(nn.Module):
    def __init__(self, *args):
        super().__init__()

    def forward(self, x):
        return x[:, :, :256, :256]


class AutoEncoder(nn.Module):

    def __init__(self, m):
        super().__init__()
        
        self.encoder = nn.Sequential( #784
                nn.Conv2d(1, 4, stride=(1, 1), kernel_size=(4, 4), padding=0),
                nn.LeakyReLU(0.01),
                nn.Conv2d(4, 16, stride=(1, 1), kernel_size=(4, 4), padding=0),
                nn.LeakyReLU(0.01),
                nn.Conv2d(16, 16, stride=(1, 1), kernel_size=(4, 4), padding=0),
                nn.LeakyReLU(0.01),
                nn.Conv2d(16, 16, stride=(1, 1), kernel_size=(4, 4), padding=0),
                nn.Flatten(),
                nn.Linear(952576, m)
                )

        self.decoder = nn.Sequential(
                torch.nn.Linear(m, 952576),
                Reshape(-1, 16, 244, 244),
                nn.ConvTranspose2d(16, 16, stride=(1, 1), kernel_size=(4, 4), padding=0),
                nn.LeakyReLU(0.01),
                nn.ConvTranspose2d(16, 16, stride=(1, 1), kernel_size=(4, 4), padding=0),                
                nn.LeakyReLU(0.01),
                nn.ConvTranspose2d(16, 4, stride=(1, 1), kernel_size=(4, 4), padding=0),                
                nn.LeakyReLU(0.01),
                nn.ConvTranspose2d(4, 1, stride=(1, 1), kernel_size=(4, 4), padding=0), 
                #Trim(),  # 1x29x29 -> 1x28x28
                #nn.Sigmoid()
                )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x
def run_cae(Rai, Raf, ti, tf, mode, TRAIN_SIZE, BATCH_SIZE, SEED, RANDOM_SEED, LEARNING_RATE, NUM_EPOCHS, DEVICE, MIN_M, MAX_M, NUM_M):

  data_x, data_y = load_data(Rai, Raf, ti, tf, mode)

  train_loader, test_loader = generate_loader(data_x, data_y, TRAIN_SIZE, BATCH_SIZE, SEED)
  
  #np.random.seed(1)

  #m_values = np.random.randint(MIN_M, MAX_M, NUM_M)
  #m_values = np.delete(m_values, 3)
  #m_values = m_values[-3:]

  m_values = np.array([100])

  print(m_values)

  word = 'word'

  for i, m in enumerate(m_values):

    m = int(m)

    print(m)
    print()

    if i != 0:

      time.sleep(30)

      model = model.cpu()
      del model
      del optimizer
      del test_acc
      gc.collect()
      torch.cuda.empty_cache()

      time.sleep(30)


    set_all_seeds(RANDOM_SEED)

    model = AutoEncoder(m)
    model = model.to(DEVICE)

    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
  
    model = model.cpu()
    del model
    del optimizer
    gc.collect()
    torch.cuda.empty_cache()

    model = AutoEncoder(m)
    model = model.to(DEVICE)

    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)


    log_dict = train_autoencoder_v1(num_epochs=NUM_EPOCHS, model=model, 
                                    optimizer=optimizer,
                                    train_loader=train_loader, device = DEVICE,
                                    skip_epoch_stats=True)
  
    test_acc = compute_accuracy(model, test_loader, DEVICE)

    if mode == 'diffusion_mask':
      word = 'diffusion'
    elif mode == 'convection_mask':
      word = 'convection'
    elif mode == 'no_masking':
      word = 'base'

    os.mkdir('/content/gdrive/My Drive/Project/Results/' + Rai + '_' + Raf + '_' + str(ti) + '_' + str(tf)  + '/models/' + word + '/' + str(m) + '_RS' + str(RANDOM_SEED))

    torch.save(model.state_dict(), '/content/gdrive/My Drive/Project/Results/' + Rai + '_' + Raf + '_' + str(ti) + '_' + str(tf)  + '/models/' + word + '/' + str(m) + '_RS' + str(RANDOM_SEED) + '/model.pt')

    np.save('/content/gdrive/My Drive/Project/Results/' + Rai + '_' + Raf + '_' + str(ti) + '_' + str(tf)  + '/models/' + word + '/' + str(m) + '_RS' + str(RANDOM_SEED) + '/test_acc.npy', test_acc, allow_pickle=True)
    np.save('/content/gdrive/My Drive/Project/Results/' + Rai + '_' + Raf + '_' + str(ti) + '_' + str(tf)  + '/models/' + word + '/' + str(m) + '_RS' + str(RANDOM_SEED) + '/log_dict.npy', log_dict, allow_pickle=True)

    print(test_acc)
  