# Preprocess data

This script is used to process some of the data that was saved on MARCC (assumed to be saved in a folder /Users/audreyolivier/Data/AbaqusModel_v3/) and just save the input / outputs needed for network training.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pickle
%run utils_v3

In [2]:
n_inputs = 4
input_names = ['$vf$', '$E_f$ (GPa)', '$b_{m}$ (MPa)', '$c_{m}$']
kwargs_inputs = {'n_inputs': n_inputs}

#n_outputs = 6
#output_names = ['$a_{eff}$ (MPa)', '$b_{eff}$ (MPa)', '$c_{eff}$', '$E_{eff}$ (GPa)', '$\nu_eff$', 
#                '$p_{0.9 GPa}$ (%)']
#kwargs_outputs = {'return_nu': True, 'threshold_perc_ac_yield': 0.01, 'return_pcov':False, 
#                 'threshold_mises': 0.9e3}
n_outputs = 5
output_names = ['$b_{eff}$ (MPa)', '$c_{eff}$', '$E_{eff}$ (GPa)', '$\nu_eff$', '$p_{0.9 GPa}$ (%)']
kwargs_outputs = {'return_nu': True, 'a_fixed': 400., 'return_pcov':False, 'threshold_mises': 0.9e3}

check_fun = lambda p, x: (os.path.isfile(p+'JOB-{}_outputs_volume_averages.pkl'.format(x)) and 
                          os.path.isfile(p+'JOB-{}_outputs_centroid.pkl'.format(x)))

do_save = False
name_ext = '_bis_a[400]_test.pkl'

### Data to look at aleatoric uncertainties

In [5]:
n = 20

extension_file = '_v3_inputs[4]_ndata[{}]_nsame[{}]_lhs'.format(n, n)
path_IO = '/Users/audreyolivier/Data/AbaqusModel_v3/IOfolder'+extension_file+'/'

groups_inpts, groups_outpts = [], []
for j in range(n):
    groups_inpts.append(np.array([extract_input_vars(job_nb, path_IO, extension_file, **kwargs_inputs)
                         for job_nb in range(j * n, (j+1) * n) if check_fun(path_IO, job_nb)]))
    groups_outpts.append(np.array([extract_output_vars2(job_nb, path_IO, **kwargs_outputs) 
                         for job_nb in range(j * n, (j+1) * n) if check_fun(path_IO, job_nb)]))
if do_save:
    with open('data_aleatoric_n[{}]'.format(n) + name_ext, 'wb') as f:
        pickle.dump({'groups_inputs': groups_inpts, 'groups_outputs': groups_outpts}, f)

In [6]:
n = 20
with open('data_aleatoric_n[{}]'.format(n) + name_ext, 'rb') as f:
    d = pickle.load(f)
    print(len(d['groups_inputs']))
    print([a.shape for a in d['groups_inputs']])
    
    print(len(d['groups_outputs']))
    print([a.shape for a in d['groups_outputs']])

20
[(15, 4), (15, 4), (15, 4), (15, 4), (15, 4), (14, 4), (14, 4), (15, 4), (15, 4), (12, 4), (15, 4), (15, 4), (15, 4), (15, 4), (15, 4), (15, 4), (14, 4), (15, 4), (15, 4), (14, 4)]
20
[(15, 5), (15, 5), (15, 5), (15, 5), (15, 5), (14, 5), (14, 5), (15, 5), (15, 5), (12, 5), (15, 5), (15, 5), (15, 5), (15, 5), (15, 5), (15, 5), (14, 5), (15, 5), (15, 5), (14, 5)]


### Training data: beta and random distributions

In [7]:
extension_file = '_v3_inputs[4]_ndata[200]_nsame[1]_random'
path_IO = '/Users/audreyolivier/Data/AbaqusModel_v3/IOfolder'+extension_file+'/'

a = np.array([extract_input_vars(job_nb, path_IO, extension_file, **kwargs_inputs)
            for job_nb in range(200) if check_fun(path_IO, job_nb)])
b = np.array([extract_output_vars2(job_nb, path_IO, **kwargs_outputs)
            for job_nb in range(200) if check_fun(path_IO, job_nb)])
if do_save:
    with open('training_random' + name_ext, 'wb') as f:
        pickle.dump({'inpts': a[:100], 'outpts': b[:100]}, f)

In [8]:
extension_file = '_v3_inputs[4]_ndata[120]_nsame[1]_beta'
path_IO = '/Users/audreyolivier/Data/AbaqusModel_v3/IOfolder'+extension_file+'/'

a = np.array([extract_input_vars(job_nb, path_IO, extension_file, **kwargs_inputs)
            for job_nb in range(120) if check_fun(path_IO, job_nb)])
b = np.array([extract_output_vars2(job_nb, path_IO, **kwargs_outputs)
            for job_nb in range(120) if check_fun(path_IO, job_nb)])
if do_save:
    with open('training_beta' + name_ext, 'wb') as f:
        pickle.dump({'inpts': a[:50], 'outpts': b[:50]}, f)

In [9]:
extension_file = '_v3_inputs[4]_ndata[100]_nsame[1]_beta_2'
path_IO = '/Users/audreyolivier/Data/AbaqusModel_v3/IOfolder'+extension_file+'/'

a = np.array([extract_input_vars(job_nb, path_IO, extension_file, **kwargs_inputs)
            for job_nb in range(120) if check_fun(path_IO, job_nb)])
b = np.array([extract_output_vars2(job_nb, path_IO, **kwargs_outputs)
            for job_nb in range(120) if check_fun(path_IO, job_nb)])
if do_save:
    with open('training_beta_2' + name_ext, 'wb') as f:
        pickle.dump({'inpts': a[:50], 'outpts': b[:50]}, f)

In [10]:
with open('training_beta_2' + name_ext, 'rb') as f:
    d = pickle.load(f)
    print(d['inpts'].shape)

(50, 4)


### Test data: randomly distributed

In [11]:
extension_file = '_v3_inputs[4]_ndata[120]_nsame[1]_random'
path_IO = '/Users/audreyolivier/Data/AbaqusModel_v3/IOfolder'+extension_file+'/'

a = np.array([extract_input_vars(job_nb, path_IO, extension_file, **kwargs_inputs)
            for job_nb in range(120) if check_fun(path_IO, job_nb)])
b = np.array([extract_output_vars2(job_nb, path_IO, **kwargs_outputs)
            for job_nb in range(120) if check_fun(path_IO, job_nb)])
if do_save:
    with open('testing_random' + name_ext, 'wb') as f:
        pickle.dump({'inpts': a[:50], 'outpts': b[:50]}, f)

In [12]:
with open('testing_random' + name_ext, 'rb') as f:
    d = pickle.load(f)
    print(d['inpts'].shape)

(50, 4)
