In [None]:
from google.colab import drive
drive.mount("/content/gdrive")

import matplotlib.pyplot as plt
import numpy as np
from numpy.random import randint
import sklearn as sk
from sklearn.mixture import GaussianMixture
from sklearn.decomposition import SparsePCA
from scipy.io import loadmat
import h5py
from scipy import sparse, linalg
from scipy.optimize import curve_fit, root
from scipy.integrate import odeint
from scipy.interpolate import interp1d
import shutil
import os
import time

plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.serif'] = ['Times New Roman'] + plt.rcParams['font.serif']


import matplotlib as mpl
from matplotlib.colors import ListedColormap

# Seaborn colormap
import seaborn as sns
sns_list = sns.color_palette('deep').as_hex()
sns_list.insert(0, '#ffffff')  # Insert white at zero position
sns_cmap = ListedColormap(sns_list)

cm = sns_cmap

mpl_colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728',
            '#9467bd', '#8c564b', '#e377c2', '#7f7f7f',
            '#bcbd22', '#17becf']

from sklearn.datasets import fetch_openml
from sklearn.decomposition import PCA
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

import time
import os
import glob
import random
import json
import subprocess
import sys
import gc

def load_data(Rai, Raf, ti, tf, mode):

  file_count = 0

  if mode == 'no_masking':

    _, _, files = next(os.walk('/content/gdrive/My Drive/Project/Results/' + Rai + '_' + Raf + '_' + str(ti) + '_' + str(tf)  + '/snapshots/' + str(ti) + '_0/arrays'))
    file_count = len(files)

  elif mode == 'diffusion_mask':

    _, _, files = next(os.walk('/content/gdrive/My Drive/Project/Results/' + Rai + '_' + Raf + '_' + str(ti) + '_' + str(tf)  + '/masks/diffusion/' + str(ti) + '_0/arrays'))
    file_count = len(files)

  elif mode == 'convection_mask':

    _, _, files = next(os.walk('/content/gdrive/My Drive/Project/Results/' + Rai + '_' + Raf + '_' + str(ti) + '_' + str(tf)  + '/masks/convection/' + str(ti) + '_0/arrays'))
    file_count = len(files)

  data_x = []
  file_list_x = []

  data_y = []
  file_list_y = []

  for i in range(file_count):

    if mode == 'no_masking':

      file_e = glob.glob('/content/gdrive/My Drive/Project/Results/' + Rai + '_' + Raf + '_' + str(ti) + '_' + str(tf)  + '/snapshots/' + str(ti) + '_0/arrays/' + str(i) + '_' + '*.npy')
      file_list_x = file_list_x + [file_e]

    elif mode == 'diffusion_mask':
      file_e = glob.glob('/content/gdrive/My Drive/Project/Results/' + Rai + '_' + Raf + '_' + str(ti) + '_' + str(tf)  + '/masks/diffusion/' + str(ti) + '_0/arrays/' + str(i) + '_' + '*.npy')
      file_list_x = file_list_x + [file_e]

    elif mode == 'convection_mask':
      file_e = glob.glob('/content/gdrive/My Drive/Project/Results/' + Rai + '_' + Raf + '_' + str(ti) + '_' + str(tf)  + '/masks/convection/' + str(ti) + '_0/arrays/' + str(i) + '_' + '*.npy')
      file_list_x = file_list_x + [file_e]

  file_list_x = np.ravel(file_list_x)

  for file_path in file_list_x:
    
    a = np.load(file_path, allow_pickle=True)

    data_x = data_x + [a]

  for i in range(file_count):

    if mode == 'no_masking':

      file_e = glob.glob('/content/gdrive/My Drive/Project/Results/' + Rai + '_' + Raf + '_' + str(ti) + '_' + str(tf)  + '/snapshots/' + str(tf) + '_1/arrays/' + str(i) + '_' + '*.npy')
      file_list_y = file_list_y + [file_e]

    elif mode == 'diffusion_mask':

      file_e = glob.glob('/content/gdrive/My Drive/Project/Results/' + Rai + '_' + Raf + '_' + str(ti) + '_' + str(tf)  + '/masks/diffusion/' + str(tf) + '_1/arrays/' + str(i) + '_' + '*.npy')
      file_list_y = file_list_y + [file_e]
  
    elif mode == 'convection_mask':

      file_e = glob.glob('/content/gdrive/My Drive/Project/Results/' + Rai + '_' + Raf + '_' + str(ti) + '_' + str(tf)  + '/masks/convection/' + str(tf) + '_1/arrays/' + str(i) + '_' + '*.npy')
      file_list_y = file_list_y + [file_e]

  file_list_y = np.ravel(file_list_y)

  for file_path in file_list_y:
    
    a = np.load(file_path, allow_pickle=True)

    data_y = data_y + [a]
  
  return data_x, data_y
def scientific_to_string(Ra):
  Ra_s = format(Ra, '.3e')
  Ra_s = Ra_s.replace('.', 'p')
  Ra_s = Ra_s.replace('+', '')

  return Ra_s


def create_time_list(ti, inc, tf):

  time_list = [ti]
  t = ti

  while t < tf:

    t = t + inc
    time_list = time_list + [t]

  return time_list


def create_result_dirs(Rai, Raf, ti, inc, tf):

  time_list = create_time_list(ti, inc, tf)

  Rai_s = scientific_to_string(Rai)
  Raf_s = scientific_to_string(Raf)

  main_directory = Rai_s + '_' + Raf_s + '_' + str(ti) + '_' + str(inc) + '_' + str(tf)

  root_path = '/content/gdrive/My Drive/Project/Results/'

  os.mkdir(os.path.join(root_path, main_directory))

  snapshots_folder = ['snapshots', 'snapshots/' + str(ti), 'snapshots/' + str(ti) + '/arrays', 'snapshots/' + str(ti) + '/images']

  segmentations_folder = ['segmentations', 'segmentations/' + str(ti), 'segmentations/' + str(ti) + '/arrays', 'segmentations/' + str(ti) + '/images']

  masks_folder = ['masks', 'masks/diffusion', 'masks/diffusion/' + str(ti), 'masks/diffusion/' + str(ti) + '/arrays', 'masks/diffusion/' + str(ti) + '/images', 'masks/convection' ,'masks/convection/' + str(ti), 'masks/convection/' + str(ti) + '/arrays', 'masks/convection/' + str(ti) + '/images']

  models_folder = ['models', 'models/diffusion', 'models/convection', 'models/base']

  for t in time_list[1:]:

    snapshots_folder = snapshots_folder + ['snapshots/' + str(t), 'snapshots/' + str(t) + '/arrays', 'snapshots/' + str(t) + '/images']

    segmentations_folder = segmentations_folder + ['segmentations/' + str(t), 'segmentations/' + str(t) + '/arrays', 'segmentations/' + str(t) + '/images']

    masks_folder = masks_folder + ['masks/diffusion/' + str(t), 'masks/diffusion/' + str(t) + '/arrays', 'masks/diffusion/' + str(t) + '/images', 'masks/convection/' + str(t), 'masks/convection/' + str(t) + '/arrays', 'masks/convection/' + str(t) + '/images']

  folders = snapshots_folder + segmentations_folder + masks_folder + models_folder

  for folder in folders:
    
    os.mkdir(os.path.join(os.path.join(root_path, main_directory), folder))

  return Rai_s, Raf_s


def create_Ra_path_list(Rai, Raf):
  n = 0
  Rai_s = scientific_to_string(Rai)
  Raf_s = scientific_to_string(Raf)

  Ra_list = []

  for subdir in os.scandir('/content/gdrive/My Drive/Project/all_snapshot_groups'):
    subdir_name = subdir.name[-8:]
    subdir_name = subdir_name.replace('p', '.')
    subdir_Ra_num = float(subdir_name)

    #count number of data points
    if subdir_Ra_num >= Rai and subdir_Ra_num <= Raf:
      n += 1

    #determine initial index
    if Rai_s in subdir.name:
      Ra_list = Ra_list + [subdir.name]
      start = subdir.name.find("snapshots_") + len("snapshots_")
      end = subdir.name.find(subdir.name[-9:])
      start_index = int(subdir.name[start:end])
  
  #append to list
  for i in range(start_index + 1, start_index + n):
    substring = 'snapshots_' + str(i) + '_'
    
    for subdir in os.scandir('/content/gdrive/My Drive/Project/all_snapshot_groups'):

      if substring in subdir.name:
        Ra_list = Ra_list + [subdir.name]


  Ra_path_list = Ra_list.copy()   

  Ra_path_list = ['/content/gdrive/My Drive/Project/all_snapshot_groups/' + el + '/' + el + '_s1/' + el + '_s1_p0.h5' for el in Ra_path_list]

  return Ra_path_list
def getGradient(f, zm):
  """
  Calculate the gradients of a field
  f        is a matrix of the field
  dx       is the cell size
  f_dx     is a matrix of derivative of f in the x-direction
  f_dz     is a matrix of derivative of f in the z-direction
  """
  # directions for np.roll() 
  R = -1   # right
  L = 1    # left
  

  f_dz = (np.roll(f,R,axis=0) - np.roll(f,L,axis=0) ) / ( np.roll(zm,R,axis=0) - np.roll(zm,L,axis=0))
  f_dz[0,:] = (f[0,:] - f[1,:])/ (zm[0,:] - zm[1,:])
  f_dz[-1,:] = (f[-1,:] - f[-2,:])/ (zm[-1,:] - zm[-2,:])

  
  return f_dz

def get_flux_terms(Ta, ua, wa, sn, P, xm, zm):
  sn = int(sn)
  T_dt = np.flip(((Ta[sn + 1] - Ta[sn - 1]) / 0.2).T)
  T = np.flip(Ta[sn].T)
  u = np.flip(ua[sn].T)
  w = np.flip(wa[sn].T)

  T_bar = (np.mean(T, axis = 1))
  T_bar = np.reshape(T_bar,(256,1))
  T_bar = np.tile(T_bar, (1,256))

  wT = np.multiply(w,T)
  wT_bar = (np.mean(wT, axis = 1))
  wT_bar = np.reshape(wT_bar, (256,1))
  wT_bar = np.tile(wT_bar, (1,256))

  PT_bar_dz = P * getGradient(T_bar, zm)

  return wT_bar, PT_bar_dz, T


def create_masks(T, clustermap):
  mask_diff = T.copy()
  mask_conv = T.copy()

  for i in range(256):

    if clustermap[i,:].all() != 1:

      mask_diff[i, :] = 0

    if clustermap[i,:].all() != 0:
      mask_conv[i, :] = 0

  return mask_diff, mask_conv


def create_masks_smooth(T, clustermap, diffusivity=15):
  mask_diff = T.copy()
  mask_conv = T.copy()
  
  for i in range(256):

    if clustermap[i,:].all() != 1:

      count = 1

      for n in range(i, 128):

        mask_diff[n,:] = (np.absolute(mask_diff[n - 1, :]) - (1/diffusivity)*np.absolute(mask_diff[i, :]))*np.sign(mask_diff[i, :])

        count +=1

        if count > diffusivity:

          mask_diff[n,:] = 0

  mask_diff = np.flip(mask_diff)

  for i in range(256):

    if clustermap[i,:].all() != 1:
 
      count = 1

      for n in range(i, 128):

        mask_diff[n,:] = (np.absolute(mask_diff[n - 1, :]) - (1/diffusivity)*np.absolute(mask_diff[i, :]))*np.sign(mask_diff[i, :])

        count +=1

        if count > diffusivity:

          mask_diff[n,:] = 0

  mask_diff = np.flip(mask_diff)

  mask_conv = T - mask_diff

  return mask_diff, mask_conv


def segment_and_mask(Ta, ua, wa, ti, inc, tf, Ra, x, z):

  time_list = create_time_list(ti, inc, tf)

  index_list = [e*10 for e in time_list]

  P = 1/(np.sqrt(Ra))

  xm, zm = np.meshgrid(x, z)

  T_l, clustermap_l, mask_diff_l, mask_conv_l, thickness_l = [], [], [], [], []

  labels = [r'$\overline{wT}$', r'$P \overline{T}_z$']

  thickness_i = 0
  thickness_f = 0

  for sn in index_list:


    wT_bar, PT_bar_dz, T = get_flux_terms(Ta, ua, wa, sn, P, xm, zm)

    # Train Gaussian mixture model

    features = np.vstack([wT_bar.flatten('F'), PT_bar_dz.flatten('F')]).T
    nfeatures = features.shape[1]
        
    # Fit Gaussian mixture model
    nc = 2  # Number of clusters
    seed = 5
    model = GaussianMixture(n_components=nc, random_state=seed)

    model.fit(features[:, :])

    # "Predict" clusters in entire domain
    cluster_idx = model.predict(features)
    clustermap = np.reshape(cluster_idx, [len(x), len(z)], order = 'F')
  
    if clustermap[0,0] == 0:
      clustermap = 1 - clustermap
      cluster_idx = 1 - cluster_idx

    cnt = 0

    for i in range(256):
      if clustermap[i,0] == 1:
        cnt += 1
      else:
        break

    thickness = z[-1] - z[-cnt]
    thickness_l = thickness_l + [thickness]    
  
    clustermap[cnt: (256-cnt)] = np.zeros(256)
    clustermap[(256-cnt):] = np.ones(256)
    
    mask_diff, mask_conv = create_masks_smooth(T, clustermap)

    T_l = T_l + [T]
    clustermap_l = clustermap_l + [clustermap]
    mask_diff_l = mask_diff_l + [mask_diff]
    mask_conv_l = mask_conv_l + [mask_conv]


    # Colormap of clusters in entire domain
    fig, axs = plt.subplots(1, 5, figsize=(25, 4))

    axs0 = axs[0].pcolor(x, z,np.flip(T))
 
    axs1 = axs[1].pcolor(x, z, np.flip(clustermap + 1), cmap=cm, vmin=-0.4, vmax=cm.N-0.4)

    axs2 = axs[2].scatter(features[:, 0], features[:,1], 10, cluster_idx + 1, cmap=cm)
    axs2.set_clim([-.4, cm.N-0.4])

    axs3 = axs[3].pcolor(x, z, np.flip(mask_diff))

    axs4 = axs[4].pcolor(x, z, np.flip(mask_conv))

    axs[0].tick_params(axis='both', which='major', labelsize=6)
    axs[0].tick_params(axis='both', which='minor', labelsize=6)
    axs[0].autoscale(enable=True, axis='both', tight=None)

    axs[1].tick_params(axis='both', which='major', labelsize=6)
    axs[1].tick_params(axis='both', which='minor', labelsize=6)
    axs[1].autoscale(enable=True, axis='both', tight=None)

    axs[2].tick_params(axis='both', which='major', labelsize=6)
    axs[2].tick_params(axis='both', which='minor', labelsize=6)
    axs[2].autoscale(enable=True, axis='both', tight=None)

    axs[3].tick_params(axis='both', which='major', labelsize=6)
    axs[3].tick_params(axis='both', which='minor', labelsize=6)
    axs[3].autoscale(enable=True, axis='both', tight=None)

    axs[4].tick_params(axis='both', which='major', labelsize=6)
    axs[4].tick_params(axis='both', which='minor', labelsize=6)
    axs[4].autoscale(enable=True, axis='both', tight=None)


    axs[0].set_xlabel('$x$', fontsize=10)
    axs[0].set_ylabel('$z$', fontsize=10)
    axs[0].set_title('Temperature at t = ' + str(np.round(sn*0.1, 1)) +'s', fontsize=12)

    axs[1].set_xlabel('$x$', fontsize=10)
    axs[1].set_ylabel('$z$', fontsize=10)
    axs[1].set_title('GMM Segmentation', fontsize=12)

    axs[2].set_xlabel(labels[0], fontsize=10)
    axs[2].set_ylabel(labels[1], fontsize=10)
    axs[2].set_title('GMM Clustering', fontsize=12)

    axs[3].set_title('Diffusion Mask', fontsize=12)
    axs[3].set_xlabel('$x$', fontsize=10)
    axs[3].set_ylabel('$z$', fontsize=10)

    axs[4].set_title('Convection Mask', fontsize=12)
    axs[4].set_xlabel('$x$', fontsize=10)
    axs[4].set_ylabel('$z$', fontsize=10)

    axs0.set_clim(-0.5, 0.5)
    axs3.set_clim(-0.5, 0.5)
    axs4.set_clim(-0.5, 0.5)

    plt.show()

    print()

  return T_l, clustermap_l, mask_diff_l, mask_conv_l, thickness_l
def generate_data(Rai, Raf, ti, inc, tf):

  time_list = create_time_list(ti, inc, tf)

  Ra_list = []
  thickness_all_list = []

  x = np.load('/content/gdrive/My Drive/Project/Other/x256.npy')
  z = np.load('/content/gdrive/My Drive/Project/Other/z256.npy')
  x = np.ravel(x)
  z = np.ravel(z)

  Rai_s, Raf_s = create_result_dirs(Rai, Raf, ti, inc, tf)

  path_list = create_Ra_path_list(Rai, Raf)

  count = 0
  
  for path in path_list:


    f = h5py.File(path)

    Ta = f['tasks']['T'][:]
    ua = f['tasks']['u'][:]
    wa = f['tasks']['w'][:]

    Ra = path[-17:-9]
    print(count, Ra)
    Ra = Ra.replace('p', '.')
    Ra = float(Ra)

    T_l, clustermap_l, mask_diff_l, mask_conv_l, thickness_l = segment_and_mask(Ta, ua, wa, ti, inc, tf, Ra, x, z)

    Ra_list = Ra_list + [Ra]
    thickness_all_list = thickness_all_list + [thickness_l]

    Ra = path[-17:-9]

    for i in range(len(time_list)):

      np.save('/content/gdrive/My Drive/Project/Results/' + Rai_s + '_' + Raf_s + '_' + str(ti) + '_' + str(inc) + '_' + str(tf) + '/snapshots/' + str(time_list[i]) + '/arrays/' + str(count) + '_' + Ra + '.npy', T_l[i], allow_pickle = True)
      np.save('/content/gdrive/My Drive/Project/Results/' + Rai_s + '_' + Raf_s + '_' + str(ti) + '_' + str(inc) + '_' + str(tf) + '/segmentations/' + str(time_list[i]) + '/arrays/' + str(count) + '_' + Ra + '.npy', clustermap_l[i], allow_pickle = True)
      np.save('/content/gdrive/My Drive/Project/Results/' + Rai_s + '_' + Raf_s + '_' + str(ti) + '_' + str(inc) + '_' + str(tf) + '/masks/' + 'diffusion/' + str(time_list[i]) + '/arrays/' + str(count) + '_' + Ra + '.npy', mask_diff_l[i], allow_pickle = True)
      np.save('/content/gdrive/My Drive/Project/Results/' + Rai_s + '_' + Raf_s + '_' + str(ti) + '_' + str(inc) + '_' + str(tf) + '/masks/' + 'convection/' + str(time_list[i]) + '/arrays/' + str(count) + '_' + Ra + '.npy', mask_conv_l[i], allow_pickle = True)
    
    count += 1

    np.save('/content/gdrive/My Drive/Project/Results/' + Rai_s + '_' + Raf_s + '_' + str(ti) + '_' + str(inc) + '_' + str(tf) + '/Ra_list.npy', Ra_list)
    np.save('/content/gdrive/My Drive/Project/Results/' + Rai_s + '_' + Raf_s + '_' + str(ti) + '_' + str(inc) + '_' + str(tf) + '/thickness_list.npy', thickness_all_list)


    
  return Ra_list, thickness_all_list
