In [1]:
import numpy as np
import pickle

In [2]:
dirname='/global/homes/a/abbatej/'
with open(dirname+'final_data.pkl', 'rb') as f: 
    data=pickle.load(f, encoding='latin1')

In [4]:
num_train_shots=70
num_val_shots=30

In [5]:
shots = sorted(data.keys())

In [6]:
#print the available
print(data[shots[1]].keys())

dict_keys(['pinj', 'gasB', 'curr', 'gasA', 'curr_target', 'e_temp', 'time', 'e_dens', 'tinj', 'i_dens'])


In [7]:
# specify the signals we want
sigs_1d = sorted(['e_temp'])
sigs_0d = sorted(['tinj', 'pinj', 'curr_target'])
sigs = sigs_0d+sigs_1d

In [9]:
print(set(sigs).issubset(data[shots[1]].keys()))

True


In [10]:
# first get the indices that contain all the data we need
# (both train and validation)
# simultaneously, get the means and stds for the sigs
train_shots=[]
val_shots=[]

means={}
stds={}

for sig in sigs:
    means[sig] = 0
    stds[sig] = 0

for shot in shots:
    if set(sigs).issubset(data[shot].keys()):
        if (len(train_shots)<num_train_shots):
            train_shots.append(shot)
            for sig in sigs:
                means[sig] += np.mean(data[shot][sig])
                stds[sig] += np.std(data[shot][sig])
        elif (len(val_shots)<num_val_shots):
            val_shots.append(shot)
        else:
            break
            
for sig in sigs:
    means[sig] = means[sig] / len(train_shots)
    stds[sig] = stds[sig] / len(train_shots)

In [11]:
lookback = 10
delay = 1

def normalize(obj, mean, std):
    return (obj-mean)/std

def make_final_data(my_shots):
    final_data=[]
    final_target=[]
    for shot in my_shots:
        num_timesteps=len(data[shot][sigs[0]])
        for end_time in range(lookback, num_timesteps-delay):
            final_data.append([])
            for time in range(end_time-lookback,end_time):
                #target
                final_target.append([])
                for sig in sigs_1d:
                    final_target[-1].extend(normalize(data[shot][sig][end_time+delay], means[sig], stds[sig]))
                #data
                final_data[-1].append([])
                for sig in sigs_0d:
                    # time+1 below because the 0d signals are actuators
                    final_data[-1][-1].append(normalize(data[shot][sig][time+delay], means[sig], stds[sig]))
                for sig in sigs_1d:
                    final_data[-1][-1].extend(normalize(data[shot][sig][time], means[sig], stds[sig]))
    return (final_data, final_target)

train_tuple = make_final_data(train_shots)
train_data = train_tuple[0]
train_target = train_tuple[1]

val_tuple = make_final_data(val_shots)
val_data = val_tuple[0]
val_target = val_tuple[1]

In [12]:
# note the shape reflects (num_samples, lookback, num_features)
# num_features has 22 rho values for each 1d sig and 1 value for each 0d sig
print(np.array(train_target).shape)

(149320, 22)


In [13]:
with open(dirname+'train_data.pkl', 'wb') as f: 
    pickle.dump(train_data, f)

In [14]:
with open(dirname+'train_target.pkl', 'wb') as f: 
    pickle.dump(train_target, f)

In [15]:
with open(dirname+'val_data.pkl', 'wb') as f: 
    pickle.dump(val_data, f)

In [16]:
with open(dirname+'val_target.pkl', 'wb') as f: 
    pickle.dump(val_target, f)