# File Compression

In [14]:
import numpy as np
import os, glob
import h5py            # pip install -U h5py

Check this out: [```https://www.christopherlovell.co.uk/blog/2016/04/27/h5py-intro.html```](https://www.christopherlovell.co.uk/blog/2016/04/27/h5py-intro.html)


Originally all the files were stored using the ```.npy``` format. However, these instances were too big. As a result, the cost and solution matrices were huge. Here a process to split these files as a dictionary to be later read is performed.

In [27]:
name = 'data256_1002-data32_1003_p=S2_Sol'
c = np.load(name + '.npy')

In [28]:
hf = h5py.File(name + '/' + name + '.h5', 'w')

In [29]:
hf.create_dataset(name, data = c)
hf.close()

---

Now we split this file into several compressed files so that we can upload everything to GitHub. This is based on the following repo: https://github.com/TomaszGolan/hdf5_manipulator

Then run
```Python
    ./hdf5_manipulator-master/split.py --input name/name.h5 --size 5000

```
and delete the ```name.h5``` file.

To read back the files:

```Python
    ./hdf5_manipulator-master/merge.py --input name/name --output data.h5
```

In [21]:
hg = h5py.File('data.h5', 'r')

In [25]:
h5Cost = np.array( hg.get('data256_1002-data32_1003_p=S2_Cost') )

Let's check it:

In [26]:
np.allclose(h5Cost, c)

True

---

# Iterative loop

## Costs

In [34]:
Names = [file[:-4] for file in glob.glob("*Cost.npy")]
Names

['data256_1002-data32_1003_p=S2_Cost',
 'data256_1002-data32_1003_p=1_Cost',
 'data256_1002-data32_1003_p=4_Cost',
 'data256_1002-data32_1003_p=1.5_Cost',
 'data256_1002-data32_1003_p=2_Cost',
 'data256_1002-data32_1003_p=∞_Cost']

In [37]:
for name in Names:
    os.mkdir(name)                                     # Create folder
    c  = np.load(name + '.npy')                        # Load matrix
    hf = h5py.File(name + '/' + name + '.h5', 'w')     # Create h5 file
    hf.create_dataset(name, data = c)                  # Store matrix
    hf.close()
    # Now run this in terminal:
    print('./hdf5_manipulator-master/split.py --input ' + name +'/'+name+'.h5 --size 5000')

./hdf5_manipulator-master/split.py --input data256_1002-data32_1003_p=S2_Cost/data256_1002-data32_1003_p=S2_Cost.h5 --size 5000
./hdf5_manipulator-master/split.py --input data256_1002-data32_1003_p=1_Cost/data256_1002-data32_1003_p=1_Cost.h5 --size 5000
./hdf5_manipulator-master/split.py --input data256_1002-data32_1003_p=4_Cost/data256_1002-data32_1003_p=4_Cost.h5 --size 5000
./hdf5_manipulator-master/split.py --input data256_1002-data32_1003_p=1.5_Cost/data256_1002-data32_1003_p=1.5_Cost.h5 --size 5000
./hdf5_manipulator-master/split.py --input data256_1002-data32_1003_p=2_Cost/data256_1002-data32_1003_p=2_Cost.h5 --size 5000
./hdf5_manipulator-master/split.py --input data256_1002-data32_1003_p=∞_Cost/data256_1002-data32_1003_p=∞_Cost.h5 --size 5000


In [40]:
# Now remove files
for name in Names:    
    if name != 'data256_1002-data32_1003_p=S2_Cost':
        os.remove(name + '/' + name + '.h5')

## Costs

In [41]:
Names = [file[:-4] for file in glob.glob("*Sol.npy")]
Names

['data256_1002-data32_1003_p=S2_Sol',
 'data256_1002-data32_1003_p=1_Sol',
 'data256_1002-data32_1003_p=2_Sol',
 'data256_1002-data32_1003_p=1.5_Sol',
 'data256_1002-data32_1003_p=∞_Sol',
 'data256_1002-data32_1003_p=4_Sol']

In [44]:
for name in Names:
    os.mkdir(name)                                     # Create folder
    c  = np.load(name + '.npy')                        # Load matrix
    hf = h5py.File(name + '/' + name + '.h5', 'w')     # Create h5 file
    hf.create_dataset(name, data = c)                  # Store matrix
    hf.close()
    # Now run this in terminal:
    print('./hdf5_manipulator-master/split.py --input ' + name +'/'+name+'.h5 --size 5000')

./hdf5_manipulator-master/split.py --input data256_1002-data32_1003_p=S2_Sol/data256_1002-data32_1003_p=S2_Sol.h5 --size 5000
./hdf5_manipulator-master/split.py --input data256_1002-data32_1003_p=1_Sol/data256_1002-data32_1003_p=1_Sol.h5 --size 5000
./hdf5_manipulator-master/split.py --input data256_1002-data32_1003_p=2_Sol/data256_1002-data32_1003_p=2_Sol.h5 --size 5000
./hdf5_manipulator-master/split.py --input data256_1002-data32_1003_p=1.5_Sol/data256_1002-data32_1003_p=1.5_Sol.h5 --size 5000
./hdf5_manipulator-master/split.py --input data256_1002-data32_1003_p=∞_Sol/data256_1002-data32_1003_p=∞_Sol.h5 --size 5000
./hdf5_manipulator-master/split.py --input data256_1002-data32_1003_p=4_Sol/data256_1002-data32_1003_p=4_Sol.h5 --size 5000


In [45]:
# Now remove files
for name in Names:        os.remove(name + '/' + name + '.h5')

---