In [2]:
import preprocessing as prep
import os,sys
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib
from datetime import datetime, timedelta, time
import glob
from generate_atmosphere import LidarProfile,RadiosondeProfile
import miscLidar as mscLid
from molecular import rayleigh_scattering
import global_settings as gs
import pandas as pd

import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import xarray as xr
%matplotlib inline


In [4]:
import torch, torchvision
# determine the supported device
def get_device():
    if torch.cuda.is_available():
        device = torch.device('cuda:0')
    else:
        device = torch.device('cpu') # don't have GPU
    return device

device = get_device()

In [None]:
'''
print ('sub dataset')
sub_ds = ds.attbsc.sel(Time =slice(start_time,end_time), Wavelength = 532,drop = True)
#sub_ds.sel(Wavelength = 532,drop = True)
sub_ds.plot(cmap='turbo')
#sub_df = sub_ds.to_pandas()
#print(sub_df)
'''

In [18]:
class customDataSet ( torch.utils.data.Dataset ) :
    """TODO"""

    def __init__ ( self , csv_file,
                   top_height = 15.0 ,
                   scales = {'range_corr':1E-6 , 'attbsc': 1E+3 ,
                             'liconst': 1E-9, 'uncertainty_liconst': 1E-9 ,
                             'r0': 1E-3 , 'delta_r': 1E-3 },
                   powers= {'range_corr':0.2 , 'attbsc':0.2 ,
                             'liconst': 0.2, 'uncertainty_liconst': 0.2 ,
                             'r0': 1 , 'delta_r': 1 }) :
        """
        Args:
            csv_file (string): Path to the csv file of the database.
        """
        self.data = pd.read_csv(csv_file)
        self.key = [ 'date' , 'wavelength' , 'cali_method' , 'telescope' , 'cali_start_time' , 'cali_stop_time' ,
                     'start_time_period' , 'end_time_period' ]
        self.Y_features = [ 'LC' , 'LC_std' , 'r0','r1' ]
        self.X_features = [ 'lidar_path' , 'molecular_path' ]
        self.profiles =   ['range_corr', 'attbsc']
        self.X_scales =   [scales[profile] for profile in self.profiles]
        self.X_power =    [powers[profile] for profile in self.profiles]
        self.Y_scales =   [scales[feature] for feature in self.Y_features]
        self.Y_powers =   [powers[feature] for feature in self.Y_features]
        self.top_height = top_height

    def __len__ ( self ) :
        return len ( self.data )

    def __getitem__ ( self , idx ) :
	    # load data
	    row = self.data.loc [ idx , : ]
	    X_paths = row[self.X_features]
	    datasets = [prep.load_dataset(path) for path in X_paths]
	    hslices  = [slice(ds.Height.min().values.tolist(),ds.Height.min().values.tolist()+self.top_height)
                    for ds in datasets]
	    tslice = slice ( row.start_time_period , row.end_time_period )
	    X = [ds.sel( Time = tslice , Height = hslice)[profile]
             for ds,profile,hslice in zip(datasets,profiles,hslices)]
	    Y = row [self.Y_features ]

	    # transform the sample (units scale and gamma correction)


	    # convert to tensors


	    return {'x':X,'y':Y}

In [None]:
start_date = datetime ( 2017 , 9 , 1 )
end_date = datetime ( 2017 , 10 , 31)
csv_path = f"dataset_{start_date.strftime('%Y-%m-%d')}_{end_date.strftime('%Y-%m-%d')}.csv"
dataset = pd.read_csv(csv_path)
dataset

In [19]:
dataloader = torch.utils.data.DataLoader ( csv_path , batch_size = 4 ,
                                           shuffle = True )

In [20]:


for i_batch , sample_batched in enumerate ( dataloader ) :
    print ( i_batch , sample_batched )

0 ['1', '_', '-', '-']
1 ['c', '.', 't', 'a']
2 ['1', '0', '9', 's']
3 ['t', '7', '3', '0']
4 ['2', 's', '0', '1']
5 ['0', '7', '1', '-']
6 ['1', 'v', '0', '-']
7 ['2', 'd', '_', 'a']
8 ['e']


In [38]:
top_height = 15.0
scales = {'range_corr':1E-6 , 'attbsc': 1E+3 ,
          'LC': 1E-9, 'LC_std': 1E-9 ,
          'r0': 1E-3 , 'r1': 1E-3 }

powers= {'range_corr':0.2 , 'attbsc': 0.2 ,
          'LC': 0.2, 'LC_std': 0.2 ,
          'r0': 1 , 'r1': 1 }
data = pd.read_csv(csv_path)
key = [ 'date' , 'wavelength' , 'cali_method' , 'telescope' , 'cali_start_time' , 'cali_stop_time' ,
             'start_time_period' , 'end_time_period' ]
Y_features = [ 'LC' , 'LC_std' , 'r0','r1' ]
X_features = [ 'lidar_path' , 'molecular_path' ]
profiles =   ['range_corr', 'attbsc']
X_scales = [scales[profile] for profile in profiles]
X_powers =   [powers[profile] for profile in profiles]
Y_scales =   [scales[feature] for feature in Y_features]
Y_powers =   [powers[feature] for feature in Y_features]

In [41]:
idx = 1625
row = data.loc [ idx , : ]
X_paths = row[X_features]
datasets = [prep.load_dataset(path) for path in X_paths]

In [42]:
hslices  = [slice(ds.Height.min().values.tolist(),ds.Height.min().values.tolist()+top_height)
            for ds in datasets]
tslice = slice ( row.start_time_period , row.end_time_period )

In [93]:
X = [ds.sel( Time = tslice , Height = hslice)[profile]
     for ds,profile,hslice in zip(datasets,profiles,hslices)]
Y = row[Y_features].tolist()

print(f"x:{X} \n y : {Y}")

x:[<xarray.DataArray 'range_corr' (Height: 2008, Time: 60)>
array([[ 9.47471875e+04,  1.19496406e+05,  8.90777188e+04, ...,
         1.30978422e+05,  8.86548047e+04,  1.00681961e+05],
       [ 9.15771875e+04,  9.18939531e+04,  7.74540781e+04, ...,
         1.05641562e+05,  9.16088438e+04,  2.45590047e+05],
       [ 6.48639922e+04,  1.43152047e+05,  1.34751453e+05, ...,
         1.76731469e+05,  1.51479375e+05,  2.11867234e+05],
       ...,
       [-1.11136336e+08, -1.01473544e+08,  1.28525000e+08, ...,
         9.37317600e+07, -1.10170544e+08,  1.05329608e+08],
       [ 1.12205576e+08, -1.01574264e+08, -9.47996480e+07, ...,
        -1.29627424e+08, -1.10279896e+08, -1.18018064e+08],
       [-1.11356992e+08, -1.01675016e+08,  3.52487520e+08, ...,
        -1.29756008e+08, -1.10389288e+08,  1.05538736e+08]], dtype=float32)
Coordinates:
  * Time        (Time) datetime64[ns] 2017-10-23T05:30:00 ... 2017-10-23T05:5...
  * Height      (Height) float64 0.3088 0.3162 0.3237 0.3312 ... 15.29 15.

In [94]:
def scale_X (X, scales): # x - xr.dataset, self.X_scales
	return  [xr.apply_ufunc(lambda x: x*scale, x_i)
                for (scale,x_i) in zip(scales,X)]

def pow_X (X, powers): # self.X_powers
	return  [xr.apply_ufunc(lambda x: x**pow, x_i)
                for (pow,x_i) in zip(powers,X)]

def scale_Y (Y, scales): # self.Y_scales
	return [y_i*scale for (scale,y_i) in zip(scales,Y)]

def pow_Y (Y ,powers): # self.Y_powers
	return [y_i**pow for (pow,y_i) in zip(powers,Y)]

In [95]:
X_trans = pow_X(scale_X(X, X_scales),X_powers)
Y_trans = pow_Y(scale_Y(Y, Y_scales), Y_powers)
print(f"x:{X_trans} \n y : {Y_trans}")

x:[<xarray.DataArray 'range_corr' (Height: 2008, Time: 60)>
array([[0.6241849 , 0.65383923, 0.6165294 , ..., 0.66594744, 0.6159429 ,
        0.63181555],
       [0.6199511 , 0.62037945, 0.5995271 , ..., 0.6379211 , 0.619994  ,
        0.7551655 ],
       [0.57862973, 0.6778904 , 0.6697407 , ..., 0.7070706 , 0.6855998 ,
        0.7331836 ],
       ...,
       [       nan,        nan, 2.6411772 , ..., 2.4795756 ,        nan,
        2.538108  ],
       [2.5704129 ,        nan,        nan, ...,        nan,        nan,
               nan],
       [       nan,        nan, 3.231683  , ...,        nan,        nan,
        2.539115  ]], dtype=float32)
Coordinates:
  * Time        (Time) datetime64[ns] 2017-10-23T05:30:00 ... 2017-10-23T05:5...
  * Height      (Height) float64 0.3088 0.3162 0.3237 0.3312 ... 15.29 15.3 15.3
    Wavelength  uint64 ..., <xarray.DataArray 'attbsc' (Height: 2008, Time: 60)>
array([[0.27150634, 0.2715065 , 0.27150664, ..., 0.2715148 , 0.27151495,
        0.2715151 ]

In [None]:
'''
Xtensor = torch.from_numpy(sub_ds.values).float().to(device)
print(type(Xtensor), Xtensor.shape)
from PIL import Image
plt.figure()
plt.imshow(Xtensor.cpu() ,#torchvision.transforms.ToPILImage()(Xtensor),
           vmin=Xtensor.min().item(),
           vmax= Xtensor.max().item(),  cmap = 'turbo',aspect='auto')#, interpolation="bicubic")
plt.gca().invert_yaxis()
plt.colorbar()
#tensor_image = Xtensor.view(Xtensor.shape[0], Xtensor.shape[1])

plt.show()
'''