# .npz files from patchlets

- Creation of .npz files from the previous patchlets generated by 04_sampling

In [1]:
import os

print(os.getcwd())

/data/lscalambrin/proyecto_integrador/segmentation


In [2]:
import os

import numpy as np
import pandas as pd 
from tqdm.auto import tqdm

from functools import partial 
from concurrent.futures import ProcessPoolExecutor

from fd.utils import prepare_filesystem, multiprocess
from fd.create_npz_files import (
    CreateNpzConfig, 
    extract_npys, 
    concatenate_npys, 
    save_into_chunks
)

In [3]:
save_patch = '/data/lscalambrin/proyecto_integrador/segmentation/pergamino/eopatches'
save_patchlet = '/data/lscalambrin/proyecto_integrador/segmentation/pergamino/patchlets'
save_patchlet_npz = '/data/lscalambrin/proyecto_integrador/segmentation/pergamino/patchlets_npz'
df_path = '/data/lscalambrin/proyecto_integrador/segmentation/pergamino/patchlet-info.csv'

config = CreateNpzConfig(
    bucket_name='bucket-name',
    aws_access_key_id='',
    aws_secret_access_key='',
    aws_region='eu-central-1', 
    patchlets_folder=save_patchlet,
    output_folder=save_patchlet_npz, 
    output_dataframe=df_path,
    chunk_size=50)

  and should_run_async(code)


### List of availables patchlets.

In [4]:
patchlets = [os.path.join(config.patchlets_folder, eop_name)
             for eop_name in os.listdir(config.patchlets_folder)]

In [5]:
len(patchlets)

498

In [6]:
partial_fn = partial(extract_npys, cfg=config)

In [7]:
npys = multiprocess(partial_fn, patchlets, max_workers=24)

  0%|          | 0/498 [00:00<?, ?it/s]

In [8]:
npys_dict = concatenate_npys(npys)

In [9]:
npys_dict.keys()

dict_keys(['X', 'y_boundary', 'y_extent', 'y_distance', 'timestamps', 'eop_names'])

In [10]:
save_into_chunks(config, npys_dict)

#### Check that results make sense 

In [11]:
npzs = os.listdir(config.output_folder)

In [12]:
len(npzs)

114

### Reading an npz for testing

In [13]:
test_npz = np.load(open(os.path.join(config.output_folder, npzs[0]), 'rb'), 
                   allow_pickle=True)

#### chunk_size=50
Each chunk has 50 elements to train the net. These elements correspond to different eopatches and differents dates. See example "df.head"

In [14]:
test_npz['X'].shape, test_npz['y_extent'].shape, test_npz['timestamps'].shape

((50, 256, 256, 4), (50, 256, 256, 1), (50,))

In [15]:
df = pd.read_csv(open(config.output_dataframe))

In [16]:
df.head(52)

Unnamed: 0,chunk,eopatch,patchlet,chunk_pos,timestamp
0,patchlets_field_delineation_0.npz,eopatch_15,/data/lscalambrin/proyecto_integrador/segmenta...,0,2017-09-19 14:00:39
1,patchlets_field_delineation_0.npz,eopatch_15,/data/lscalambrin/proyecto_integrador/segmenta...,1,2017-10-14 14:00:51
2,patchlets_field_delineation_0.npz,eopatch_15,/data/lscalambrin/proyecto_integrador/segmenta...,2,2017-11-23 14:00:51
3,patchlets_field_delineation_0.npz,eopatch_15,/data/lscalambrin/proyecto_integrador/segmenta...,3,2017-11-28 14:00:39
4,patchlets_field_delineation_0.npz,eopatch_15,/data/lscalambrin/proyecto_integrador/segmenta...,4,2017-12-18 14:00:39
5,patchlets_field_delineation_0.npz,eopatch_15,/data/lscalambrin/proyecto_integrador/segmenta...,5,2017-12-28 14:00:39
6,patchlets_field_delineation_0.npz,eopatch_15,/data/lscalambrin/proyecto_integrador/segmenta...,6,2018-01-07 14:00:49
7,patchlets_field_delineation_0.npz,eopatch_15,/data/lscalambrin/proyecto_integrador/segmenta...,7,2018-01-22 14:00:51
8,patchlets_field_delineation_0.npz,eopatch_15,/data/lscalambrin/proyecto_integrador/segmenta...,8,2018-03-03 14:00:51
9,patchlets_field_delineation_0.npz,eopatch_15,/data/lscalambrin/proyecto_integrador/segmenta...,9,2018-03-13 14:00:51


In [17]:
len(df)

  and should_run_async(code)


5670