# Preprocessing of Temperature and Breizhcrops datasets
In the end, we would like to have a dataset where satellites data are associated with the temperature cumulative sum. 

We need to create a h5py file, like the ones provided by breizhcrops. The values have a difference spacing, according to the temperature cumulative sum. 

In [10]:
import sys
sys.path.append("..")
import os 
import glob
import netCDF4
import h5py
from data import BreizhCrops
import numpy as np
import geopandas as gpd
from shapely.geometry import Polygon
from utils.temperature_data.load import get_time_data, get_temperature_data, get_lon_data, get_lat_data, load_nc_files

In [5]:
elects_data_root = os.path.join(os.environ.get("HOME", os.environ.get("USERPROFILE")), "elects_data")
data_path = os.path.join(elects_data_root, "temperature")
# Example usage
nc_datasets = load_nc_files(datapath=data_path)
print("length: ", len(nc_datasets))

length:  365


In [None]:
nc_datasets[5].variables["Temperature_Air_2m_Mean_24h"]

<class 'netCDF4._netCDF4.Variable'>
float32 Temperature_Air_2m_Mean_24h(time, lat, lon)
    _FillValue: -9999.0
    long_name: 2 meter air temperature (00-00LT)
    units: K
    temporal_aggregation: Mean 00-00LT
    missing_value: -9999.0
unlimited dimensions: time
current shape = (1, 21, 44)
filling on

In [6]:
breizhcrops_path = os.path.join(elects_data_root, "breizhcrops")
sequence_length=150
test_ds = BreizhCrops(root=breizhcrops_path,partition="eval", sequencelength=sequence_length, return_id=True, year=2017)

1555075632 1555075632


loading data into RAM: 100%|██████████| 122614/122614 [00:44<00:00, 2731.04it/s]


In [13]:
h5path = test_ds.ds.h5path
print(h5path)
index = test_ds.ds.index
print(index)

C:\Users\anyam\elects_data\breizhcrops\2017\L1C\frh04.h5
             id CODE_CULTU                   path     meanCLD  sequencelength  \
idx                                                                             
0       6017564        PTR  csv/frh04/6017564.csv  780.075742             102   
6       6054675        PTR  csv/frh04/6054675.csv  731.428571              49   
7       6018665        PTR  csv/frh04/6018665.csv  710.530612              49   
8       6019318        PTR  csv/frh04/6019318.csv  770.534653             101   
9       6019435        RGA  csv/frh04/6019435.csv  600.625715              49   
...         ...        ...                    ...         ...             ...   
158327  5057907        PPH  csv/frh04/5057907.csv  633.608050              50   
158328  5057908        MIS  csv/frh04/5057908.csv  640.096150              50   
158330  5057910        MIS  csv/frh04/5057910.csv  662.517416              50   
158331  5057975        MIS  csv/frh04/5057975.csv  6

In [18]:
j=0
row = index.iloc[j]
print("row: ", row)
with h5py.File(h5path, "r") as dataset:
    print("dataset: ", dataset)
    print("row.path: ", row.path)
    print("dataset[(row.path)]: ", dataset[(row.path)])
    X = np.array(dataset[(row.path)])

row:  id                              6017564
CODE_CULTU                          PTR
path              csv/frh04/6017564.csv
meanCLD                      780.075742
sequencelength                      102
classid                               8
classname             temporary meadows
region                            frh04
Name: 0, dtype: object
dataset:  <HDF5 file "frh04.h5" (mode r)>
row.path:  csv/frh04/6017564.csv
dataset[(row.path)]:  <HDF5 dataset "6017564.csv": shape (102, 17), type "<f8">


In [15]:
X

array([[5.43357429e+03, 3.36485714e+01, 4.60258857e+03, ...,
        0.00000000e+00, 1.02400000e+03, 1.48331520e+18],
       [3.17295714e+03, 3.01085714e+01, 2.93972286e+03, ...,
        0.00000000e+00, 1.02400000e+03, 1.48357440e+18],
       [6.35816000e+03, 4.87482857e+02, 2.24155429e+03, ...,
        0.00000000e+00, 1.02400000e+03, 1.48417920e+18],
       ...,
       [7.21186857e+03, 2.90000000e+01, 5.38022857e+03, ...,
        0.00000000e+00, 1.02400000e+03, 1.51398720e+18],
       [8.77835143e+03, 4.54970000e+03, 4.36862000e+03, ...,
        0.00000000e+00, 1.02400000e+03, 1.51424640e+18],
       [2.41632286e+03, 1.46920000e+02, 1.79072857e+03, ...,
        0.00000000e+00, 0.00000000e+00, 1.51441920e+18]])