In [1]:
%%capture
import tensorflow as tf
from tensorflow import keras
from scipy.io import loadmat
from tensorflow.keras import regularizers
import os
import time
import numpy as np
import gc
import shutil
from tensorflow.keras.models import model_from_json
!pip install tqdm
!pip install h5py
from tqdm import tqdm_notebook
import matplotlib.pyplot as plt
from tensorflow.keras import backend as K
from sklearn.metrics import r2_score
from sklearn import preprocessing

In [2]:
def scale(x):
    
    return (x - np.mean(x)) / np.std(x)

In [3]:
def preprocess(x):
    
    return x[:,:,:int(0.75*x.shape[2])]

In [4]:
os.chdir("/home/pk2573")
u = preprocess(scale(loadmat('u_F_xyz_T1.mat')["u_F"]))
v = preprocess(scale(loadmat('v_F_xyz_T1.mat')["v_F"]))
w = preprocess(scale(loadmat('w_F_xyz_T1.mat')["w_F"]))
tau_12 = preprocess(scale(loadmat('tau12_xyz_T1.mat')["tau12"]))
tau_13 = preprocess(scale(loadmat('tau13_xyz_T1.mat')["tau13"]))
tau_23 = preprocess(scale(loadmat('tau23_xyz_T1.mat')["tau23"]))

#### Switches

In [5]:
all_taus = False
significance = True

### 1. Generae Input datasets

#### 1.1 Create Input dataset (u, v, w, TKE, $\theta$) and save batches to disk

In [6]:
x = np.array([u, v, w])
x = np.transpose(x, [1, 2, 3, 0])
size = 7
x = np.pad(x, ((size,size), (size,size), (size,size), (0,0)), 'constant', constant_values = 0)
print(x.shape)
del u
del v
del w
gc.collect()

(160, 110, 70, 3)


11

In [9]:
if not os.path.exists("/home/pk2573/Data"):
    os.mkdir("/home/pk2573/Data")
    os.mkdir("/home/pk2573/Data/Data")
    os.mkdir("/home/pk2573/Data/Labels")
    os.mkdir("/home/pk2573/Data/Labels/Tau_23")
    os.mkdir("/home/pk2573/Data/Labels/Tau_12")
    os.mkdir("/home/pk2573/Data/Labels/Tau_13")
else:
    shutil.rmtree("/home/pk2573/Data")
    os.mkdir("/home/pk2573/Data")
    os.mkdir("/home/pk2573/Data/Data")
    os.mkdir("/home/pk2573/Data/Labels")
    os.mkdir("/home/pk2573/Data/Labels/Tau_23")
    os.mkdir("/home/pk2573/Data/Labels/Tau_12")
    os.mkdir("/home/pk2573/Data/Labels/Tau_13")

In [10]:
sample = []
y_tau_23 = []
y_tau_12 = []
y_tau_13 = []
counter = 1
num_files = 10

x_range = np.arange(size, x.shape[0] - size)
np.random.shuffle(x_range)
y_range = np.arange(size, x.shape[1] - size)
np.random.shuffle(y_range)
z_range = np.arange(size, x.shape[2] - size)
np.random.shuffle(z_range)

for i in tqdm_notebook(x_range):
    for j in y_range:
        for k in z_range:
            sample.append(x[i - size: i + size + 1, j - size: j + size + 1, k - size: k + size + 1, :])
            y_tau_23.append(tau_23[i - size][j - size][k - size])
            y_tau_12.append(tau_12[i - size][j - size][k - size])
            y_tau_13.append(tau_13[i - size][j - size][k - size])
        
            if len(sample) == int(((x.shape[0] - size * 2) * (x.shape[1] - size * 2) * (x.shape[2] - size * 2)) / num_files):
                os.chdir("/home/pk2573/Data/Data")
                np.save(str(counter), np.array(sample))
                os.chdir("/home/pk2573/Data/Labels/Tau_23")
                np.save(str(counter), np.array(y_tau_23))
                os.chdir("/home/pk2573/Data/Labels/Tau_12")
                np.save(str(counter), np.array(y_tau_12))
                os.chdir("/home/pk2573/Data/Labels/Tau_13")
                np.save(str(counter), np.array(y_tau_13))
                
                del sample
                del y_tau_13
                del y_tau_12
                del y_tau_23
                gc.collect()
                sample = []
                y_tau_23 = []
                y_tau_12 = []
                y_tau_13 = []
                counter += 1

HBox(children=(IntProgress(value=0, max=146), HTML(value='')))




### 2. Build Model 

In [11]:
class DNN(object):
    
    def __init__(self, activation, initializer, regularizer, input_shape):
        self.activation = activation
        self.initializer = initializer
        self.regularizer = regularizer
        self.input_shape = input_shape
        
        pass 
    
    def create_model(self):
        model = keras.Sequential([
            tf.keras.layers.Flatten(input_shape = self.input_shape),
            tf.keras.layers.Dense(128, 
                             activation = self.activation, 
                             kernel_regularizer = self.regularizer, 
                             kernel_initializer = self.initializer),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(64, 
                             activation = self.activation, 
                             kernel_regularizer = self.regularizer, 
                             kernel_initializer = self.initializer),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(32, 
                             activation = self.activation, 
                             kernel_regularizer = self.regularizer, 
                             kernel_initializer = self.initializer),
            tf.keras.layers.Dense(1)
        ])
        
        return model

### 3. Run Model

In [None]:
if significance:
    activation = tf.nn.relu
    initializer = None
    regularizer = None
    epochs = 1
    batch_size = 1000
    val_split = 0.2

    datasets = ["Tau_12"]#, "Tau_13", "Tau_23"]
    results = {}
    i = 1
    for name in datasets:
        x_test = []
        y_test = []
        print("\nPredicting  " + name)
        input_shape = (2 * size + 1, 2 * size + 1, 2 * size + 1, 3)
        neural_net = DNN(activation, initializer, regularizer, input_shape)
        model = neural_net.create_model()
        model.compile(optimizer = tf.train.AdamOptimizer(), loss = "mse")
        
        for i in range(50):
            for j in range(1, num_files + 1):
                print("Fitting on Dataset " + str(j))
                x = np.load("/home/pk2573/Data/Data/" + str(j) + ".npy")
                y = np.load("/home/pk2573/Data/Labels/" + name + "/" + str(j) + ".npy")

                mask =  np.random.rand(x.shape[0]) < 0.80
                x_train = x[mask,:,:,:,:]
                y_train = y[mask]
                x_test = x[~mask,:,:,:,:]
                y_test = y[~mask]
                del y
                del mask
                gc.collect()

                model.fit(x_train, y_train, epochs = epochs, validation_split = 0.2, batch_size = batch_size, verbose = 0)
                del x_train
                del y_train
                gc.collect()

                y_pred = model.predict(x_test).flatten()
                gc.collect()
                y_true = y_test.flatten()
                print("R^2: %.4f" % r2_score(y_true, y_pred))
                print("Correlation: %.4f\n" % np.corrcoef(y_pred, y_true)[0, 1])
        
                del x
                del x_test
                del y_test
                gc.collect()
                i += 1


Predicting  Tau_12
Fitting on Dataset 1
R^2: 0.0831
Correlation: 0.2964

Fitting on Dataset 2
R^2: 0.1057
Correlation: 0.3335

Fitting on Dataset 3
R^2: 0.1435
Correlation: 0.3890

Fitting on Dataset 4
R^2: 0.1279
Correlation: 0.3582

Fitting on Dataset 5
R^2: 0.1566
Correlation: 0.4202

Fitting on Dataset 6
R^2: 0.1804
Correlation: 0.4408

Fitting on Dataset 7
R^2: 0.2178
Correlation: 0.4766

Fitting on Dataset 8
R^2: 0.2160
Correlation: 0.4904

Fitting on Dataset 9
R^2: 0.1944
Correlation: 0.4526

Fitting on Dataset 10
R^2: 0.2314
Correlation: 0.5085

Fitting on Dataset 1
R^2: 0.2660
Correlation: 0.5435

Fitting on Dataset 2
R^2: 0.2424
Correlation: 0.4958

Fitting on Dataset 3
R^2: 0.2436
Correlation: 0.5015

Fitting on Dataset 4
R^2: 0.2842
Correlation: 0.5489

Fitting on Dataset 5
R^2: 0.2696
Correlation: 0.5546

Fitting on Dataset 6
R^2: 0.2757
Correlation: 0.5337

Fitting on Dataset 7
R^2: 0.3031
Correlation: 0.5712

Fitting on Dataset 8
R^2: 0.3111
Correlation: 0.5691

Fitting