# How to quickly save and load data with numpy

Sometimes data is generated in your code that doesn't need to be save in a pretty, human-readable way, but we would still like to save it somewhere so that we can retrieve it later.
Numpy has a couple of pretty nifty functions for that and a general overview can be found here:
https://docs.scipy.org/doc/numpy/reference/routines.io.html

We will be focussing on numpy.savez(), a function that lets to save arrays to .npz files, which are basically zipped files.

In [None]:
# Let's ge tour numpy in
import numpy as np
import os  # so that we can delete the file later

In [None]:
# Generate some random arrays
x = np.arange(10)
y = np.array(([2,3,4,6,4,4,5,4,4,5,6,4,5,56,6,5,3,4], [4,3,4,56,6,7,8,9,7,6,6,], ['Hello!', 'Why', 'not', 'strings?']))
z = np.ones((13,13))
many = np.array([[3,4,5], [6,4,52]])
more = np.ones((4,17))

In [None]:
# It's good practice to double check your data:
print('x shape: ', x.shape)
print('y shape: ', y.shape)
print('z shape: ', z.shape)
print('many shape: ', many.shape)
print('more shape: ', more.shape)

In [None]:
print('x type: ', type(x))
print('y type: ', type(y))
print('z type: ', type(z))
print('more type: ', type(more))

In [None]:
# We can now save that to a .npz file

# We need a name for our file
filename = 'numpy_file_1'

# And this is how we save it
np.savez(filename, name_1=x, name_2=y, name_3=z, name_4=many, name_5=more)

print('This will save it to your current directory btw, whereever you opened this notebook in.')

In [None]:
# Now that we saved it, load it back into this notebook.
in_data = np.load(filename+'.npz')

# Check what data is hidden in our loaded file
print(in_data.files)

In [None]:
# Want to acces the array with name array_name_2? Here we go!
short = in_data['name_4']
print(short)

In [None]:
print('Type of short: ', type(short))
print('Shape of short: ', short.shape)

In [None]:
short

# And delete the file now, so that we don't clutter your current working directory.
os.remove(filename+'.npz')

## Random tests on saved data

In [None]:
path = '/Users/ilaginja/Documents/data_from_repos/hst_marg_data/outputs'
fname1 = 'marginalization_results_testing.npz'
fname2 = 'masked_marginalization_results_testing.npz'
fname3 = 'unmasked_marginalization_results_testing.npz'

In [None]:
# Load
data1 = np.load(os.path.join(path, fname1), allow_pickle=True)
data2 = np.load(os.path.join(path, fname2), allow_pickle=True)
data3 = np.load(os.path.join(path, fname3), allow_pickle=True)

In [None]:
print(data1.files)
print(data2.files)
print(data3.files)

In [None]:
for kw in data1.files:
    print(kw)
    print(data1[kw])
    print(data2[kw])
    print(data3[kw])
    print('\n')