Skip to content

Commit

Permalink
Merge pull request #41 from Addalin/overlap_updates
Browse files Browse the repository at this point in the history
Overlap updates
  • Loading branch information
Addalin committed Aug 1, 2021
2 parents 1cc19f1 + 8326bb7 commit 032c110
Show file tree
Hide file tree
Showing 5 changed files with 169 additions and 95 deletions.
2 changes: 1 addition & 1 deletion data/stations.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
station_name, location, longitude, latitude, altitude, start_bin_height, end_bin_height, n_bins, pt_bin, dt, gdas1_folder, gdastxt_folder, lidar_src_calib_folder,lidar_src_folder,molecular_dataset,lidar_dataset_calib,db_file,aeronet_folder,aeronet_name,generation_folder,gen_signal_dataset,gen_lidar_dataset,gen_aerosol_dataset,gen_bg_dataset,gen_density_dataset, lidar_dataset, bg_dataset
haifa, Haifa,35.02,32.8,230,78.75,22485.66016,3000,250, 50 * 1E-9, H:\data_haifa\DATA FROM TROPOS\GDAS\haifa, H:\data_haifa\DATA FROM TROPOS\GDAS\haifa_preproc, H:\data_haifa\DATA FROM TROPOS\data\level1a\PollyXT_TROPOS,H:\data_haifa\DATA FROM TROPOS\data\level0\PollyXT_TROPOS,D:\data_haifa\DATA FROM TROPOS\molecular_dataset,D:\data_haifa\DATA FROM TROPOS\lidar_dataset_calib,H:\data_haifa\DATA FROM TROPOS\data\level1a\PollyXT_TROPOS\pollyxt_tropos_calibration.db,D:\data_haifa\AERONET,Technion_Haifa_IL,D:\data_haifa\GENERATION\params_dataset,D:\data_haifa\GENERATION\signal_dataset,D:\data_haifa\GENERATION\lidar_dataset,D:\data_haifa\GENERATION\aerosol_dataset,D:\data_haifa\GENERATION\bg_dataset,D:\data_haifa\GENERATION\density_dataset,D:\data_haifa\DATA FROM TROPOS\lidar_dataset,D:\data_haifa\DATA FROM TROPOS\bg_dataset
haifa_shubi, Haifa,35.02,32.8,230,78.75,22485.66016,3000, 250,50 * 1E-9, data_example/gdas, /media/shubi/My Passport/data_haifa/DATA FROM TROPOS/GDAS/haifa_preproc, /media/shubi/My Passport/data_haifa/DATA FROM TROPOS/data/level1a/PollyXT_TROPOS,/home/shubi/PycharmProjects/learning_lidar/data/DATA FROM TROPOS/data/level0,/media/shubi/My Passport/data_haifa/DATA FROM TROPOS/molecular_dataset,/home/shubi/PycharmProjects/learning_lidar/data/DATA FROM TROPOS/lidar_dataset_calib,/home/shubi/PycharmProjects/learning_lidar/data/data_example/pollyxt_tropos_calibration.db,D:\data_haifa\AERONET,Technion_Haifa_IL,/home/shubi/PycharmProjects/learning_lidar/data/GENERATION/bg_dataset,/home/shubi/PycharmProjects/learning_lidar/data/GENERATION/signal_dataset,/home/shubi/PycharmProjects/learning_lidar/data/GENERATION/lidar_dataset,/home/shubi/PycharmProjects/learning_lidar/data/GENERATION/aerosol_dataset,/home/shubi/PycharmProjects/learning_lidar/data/GENERATION/bg_dataset,/home/shubi/PycharmProjects/learning_lidar/data/GENERATION/density_dataset, /home/shubi/PycharmProjects/learning_lidar/data/DATA FROM TROPOS/lidar_dataset, /home/shubi/PycharmProjects/learning_lidar/data/DATA FROM TROPOS/bg_dataset
haifa_shubi, Haifa,35.02,32.8,230,78.75,22485.66016,3000, 250,50 * 1E-9, data_example/gdas, /media/shubi/My Passport/data_haifa/DATA FROM TROPOS/GDAS/haifa_preproc, /media/shubi/My Passport/data_haifa/DATA FROM TROPOS/data/level1a/PollyXT_TROPOS,/home/shubi/PycharmProjects/learning_lidar/data/DATA FROM TROPOS/data/level0,/media/shubi/My Passport/data_haifa/DATA FROM TROPOS/molecular_dataset,/home/shubi/PycharmProjects/learning_lidar/data/DATA FROM TROPOS/lidar_dataset_calib,/home/shubi/PycharmProjects/learning_lidar/data/data_example/pollyxt_tropos_calibration.db,D:\data_haifa\AERONET,Technion_Haifa_IL,/home/shubi/PycharmProjects/learning_lidar/data/GENERATION/params_dataset,/home/shubi/PycharmProjects/learning_lidar/data/GENERATION/signal_dataset,/home/shubi/PycharmProjects/learning_lidar/data/GENERATION/lidar_dataset,/home/shubi/PycharmProjects/learning_lidar/data/GENERATION/aerosol_dataset,/home/shubi/PycharmProjects/learning_lidar/data/GENERATION/bg_dataset,/home/shubi/PycharmProjects/learning_lidar/data/GENERATION/density_dataset, /home/shubi/PycharmProjects/learning_lidar/data/DATA FROM TROPOS/lidar_dataset, /home/shubi/PycharmProjects/learning_lidar/data/DATA FROM TROPOS/bg_dataset
76 changes: 33 additions & 43 deletions learning_lidar/generation/daily_signals_generation.py
Original file line number Diff line number Diff line change
@@ -1,70 +1,60 @@
import datetime
import os
import logging
from multiprocessing import Pool, cpu_count
from itertools import repeat
import pandas as pd
import xarray as xr

import learning_lidar.utils.global_settings as gs
import learning_lidar.generation.generation_utils as gen_utils
import learning_lidar.utils.utils as utils
import learning_lidar.utils.vis_utils as vis_utils
import learning_lidar.generation.daily_signals_generations_utils as gen_sig_utils
from learning_lidar.utils.utils import create_and_configer_logger, get_base_arguments


# TODO: add 2 flags - Debug and save figure.

def generate_daily_lidar_measurement(station, day_date, save_ds=True, update_overlap_only=False):
def generate_daily_lidar_measurement(station: gs.Station, day_date: datetime.date, save_ds: bool = True)\
-> (xr.Dataset, xr.Dataset):
ds_total = gen_sig_utils.calc_total_optical_density(station=station, day_date=day_date)
signal_ds = gen_sig_utils.calc_lidar_signal(station, day_date, ds_total)
measure_ds = gen_sig_utils.calc_daily_measurement(station=station, day_date=day_date, signal_ds=signal_ds,
update_overlap_only=False)

overlap_params = pd.read_csv("../../data/overlap_params.csv", index_col=0)
try:
# TODO Generalize. Currently adapted to 4 months only - (4,5,9,10)
overlap_params_index = {4: 0, 5: 1, 9: 2, 10: 3}[day_date.month]
overlap_param = overlap_params.loc[overlap_params_index, :].values
except KeyError:
raise KeyError(f"This month is not currently supported in overlap function.")
if save_ds:
# TODO: check that the range_corr_p is added to measure_ds, and that the LCNET is uploading the new paths
# (especially if range_corr_p ) . and if so, save only 2 single files of measure_ds, and signal_ds to save
# time and space
# NOTE: saving to separated datasets (for the use of the learning phase),
# is done in dataseting.prepare_generated_samples()
gen_utils.save_generated_dataset(station, measure_ds, data_source='lidar', save_mode='single')
gen_utils.save_generated_dataset(station, signal_ds, data_source='signal', save_mode='single')

if update_overlap_only:
month_folder = utils.get_month_folder_name(station.gen_lidar_dataset, day_date) # drop
nc_path = os.path.join(month_folder, f"{day_date.strftime('%Y_%m_%d')}_{station.location}_generated_lidar.nc") # drop
# TODO: create get_daily_measure_ds similar to get_daily_gen_param_ds (and move it inside calc_daily_measurement use only a flag UPDATE_measure)
return measure_ds, signal_ds

measure_ds = gen_sig_utils.calc_daily_measurement(station, day_date, overlap_params=overlap_param,
signal_ds=None, measure_ds_path=nc_path)

if save_ds:
# NOTE: saving to separated datasets (for the use of the learning phase),
# is done in dataseting.prepare_generated_samples()
gen_utils.save_generated_dataset(station, measure_ds, data_source='lidar', save_mode='single')
def update_daily_lidar_measurement(station: gs.Station, day_date: datetime.date, save_ds: bool = True) -> xr.Dataset:
"""
Updates a measure ds with overlap.
return measure_ds
:param station: gs.station() object of the lidar station
:param day_date: datetime.date object of the required date
:param save_ds: bool, whether to save the genated measure ds or not
:return: xr.Dataset with the generated measure ds
"""

measure_ds = gen_sig_utils.calc_daily_measurement(station, day_date, signal_ds=None, update_overlap_only=True)

if save_ds:
# NOTE: saving to separated datasets (for the use of the learning phase),
# is done in dataseting.prepare_generated_samples()
gen_utils.save_generated_dataset(station, measure_ds, data_source='lidar', save_mode='single')





else:
ds_total = gen_sig_utils.calc_total_optical_density(station=station, day_date=day_date)
signal_ds = gen_sig_utils.calc_lidar_signal(station, day_date, ds_total)
measure_ds = gen_sig_utils.calc_daily_measurement(station=station, day_date=day_date, signal_ds=signal_ds,
overlap_params=overlap_param)

if save_ds:
# TODO: check that the range_corr_p is added to measure_ds, and that the LCNET is uploading the new paths
# (especially if range_corr_p ) . and if so, save only 2 single files of measure_ds, and signal_ds to save
# time and space
# NOTE: saving to separated datasets (for the use of the learning phase),
# is done in dataseting.prepare_generated_samples()
gen_utils.save_generated_dataset(station, measure_ds, data_source='lidar', save_mode='single')
gen_utils.save_generated_dataset(station, signal_ds, data_source='signal', save_mode='single')

return measure_ds, signal_ds
return measure_ds


def daily_signals_generation_main(params):

vis_utils.set_visualization_settings()
gen_sig_utils.PLOT_RESULTS = params.plot_results
# TODO: Toggle PLOT_RESULTS to True - doesn't seem to work. Omer - works for me. Adi, please check again..
Expand All @@ -83,9 +73,9 @@ def daily_signals_generation_main(params):
num_days = len(days_list)
num_processes = 1 if gen_sig_utils.PLOT_RESULTS else min((cpu_count() - 1, num_days))

func = generate_daily_lidar_measurement if not params.update_overlap_only else update_daily_lidar_measurement
with Pool(num_processes) as p:
p.starmap(generate_daily_lidar_measurement, zip(repeat(station), days_list,
repeat(params.save_ds), repeat(params.update_overlap_only)))
p.starmap(func, zip(repeat(station), days_list, repeat(params.save_ds)))

logger.info(f"\nDone generating lidar signals & measurements "
f"for period: [{start_date.strftime('%Y-%m-%d')},{end_date.strftime('%Y-%m-%d')}]")
Expand Down
48 changes: 17 additions & 31 deletions learning_lidar/generation/daily_signals_generations_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import datetime
import logging
import os
from datetime import timedelta
Expand Down Expand Up @@ -366,49 +367,34 @@ def calc_range_corr_measurement(station, day_date, pn_ds, r2_ds):
return pr2n_ds


def calc_daily_measurement(station, day_date, overlap_params, signal_ds, measure_ds_path=None):
def calc_daily_measurement(station: gs.Station, day_date: datetime.date, signal_ds: xr.Dataset,
update_overlap_only: bool=False) -> xr.Dataset:
"""
Generate Lidar measurement, by combining background signal and the lidar signal,
and then creating Poisson signal, which is the measurement of the mean lidar signal.
If measure_ds_path is given, an existing ds is loaded for the measurement, instead of computed from scratch
If update_overlap_only is given, an existing ds is loaded for the lidar and signal, instead of computed from scratch
:param overlap_params: list of parameters for overlap sigmoid function
:param measure_ds_path: path to existing measure ds
:param update_overlap_only: bool, whether to load a precomputed lidar and signal dataset or not
:param station: gs.station() object of the lidar station
:param day_date: datetime.date object of the required date
:param signal_ds: xr.Dataset(), containing the daily lidar signal (clean)
:return: measure_ds: xr.Dataset(), containing the daily lidar measurement (with background and applied photon noise)
"""
"""
if update:
load measure ds
load signal ds (pass signal_ds=None)
# get the ingredients
if update_overlap_only:
measure_ds = gen_utils.get_daily_gen_ds(station, day_date, type_='lidar')
signal_ds = gen_utils.get_daily_gen_ds(station, day_date, type_='signal')
p_mean = measure_ds.p_mean
p_bg = get_daily_bg(station, day_date)

p_bg = get_daily_bg(station, day_date) # daily background: p_bg
# Expand p_bg to coordinates : 'Wavelength','Height', 'Time
bg_ds = p_bg.broadcast_like(signal_ds.range_corr)
p_mean = calc_mean_measurement(station, day_date, signal_ds, bg_ds) if not update
overlap_ds = get_daily_overlap() (load daily overlap params (similar to load daily measurement) , sigmoid, attr)
"""
# get the inigridients
if measure_ds_path:
measure_ds = xr_utils.load_dataset(measure_ds_path)
bg_ds = measure_ds.p_bg
p_mean = measure_ds.p_mean
else:
p_bg = get_daily_bg(station, day_date) # daily background: p_bg
# Expand p_bg to coordinates : 'Wavelength','Height', 'Time
bg_ds = p_bg.broadcast_like(signal_ds.range_corr)
p_mean = calc_mean_measurement(station, day_date, signal_ds, bg_ds) # me

Height_indx = p_mean.Height
# Apply Overlap function
# TODO use gen_utils.create_ratio(total_bins=Height_indx.size, mode='overlap')?
# It has additional options.
overlap = sigmoid(Height_indx, *overlap_params)
overlap_ds = xr.Dataset(data_vars={'overlap': ('Height', overlap)},
coords={'Height': Height_indx.values},
attrs={'name': 'Overlap Function'})

p_mean = calc_mean_measurement(station, day_date, signal_ds, bg_ds) if not update_overlap_only else p_mean

overlap_ds = gen_utils.get_daily_overlap(station, day_date, height_indx=p_mean.Height)

# here the calculation stars
p_mean = xr.apply_ufunc(lambda x, r: (x * r),
p_mean, overlap_ds.overlap,
Expand Down
67 changes: 63 additions & 4 deletions learning_lidar/generation/generation_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@
from matplotlib import pyplot as plt
from scipy.ndimage import gaussian_filter1d
from tqdm import tqdm
import xarray as xr

import learning_lidar.preprocessing.preprocessing_utils as prep_utils
import learning_lidar.utils.xr_utils as xr_utils
from learning_lidar.generation.generate_density_utils import PLOT_RESULTS
from learning_lidar.utils.global_settings import eps
import learning_lidar.utils.global_settings as gs


def get_gen_dataset_file_name(station, day_date, wavelength='*',
Expand Down Expand Up @@ -72,6 +74,8 @@ def save_generated_dataset(station, dataset, data_source='lidar', save_mode='bot
base_folder = station.gen_density_dataset
elif data_source == 'bg':
base_folder = station.gen_bg_dataset
else:
raise Exception("Unsupported data_source.")
month_folder = prep_utils.get_month_folder_name(base_folder, date_datetime)

xr_utils.get_daily_ds_date(dataset)
Expand Down Expand Up @@ -118,7 +122,6 @@ def get_month_gen_params_path(station, day_date, type='density_params'):
'density_params' - for density sampler generator,
'bg'- for generated background signal,
'LC' - for generated Lidar Constant signal
# TODO : add 'overlap'
:param station: gs.station() object of the lidar station
:param day_date: datetime.date object of the required date
:return: str. Path to monthly dataset of generation parameters.
Expand All @@ -129,23 +132,45 @@ def get_month_gen_params_path(station, day_date, type='density_params'):
_, monthdays = calendar.monthrange(year, month)
month_end_day = datetime(year, month, monthdays, 0, 0)

folder_name = prep_utils.get_month_folder_name(station.generation_folder, day_date)

nc_name = f"generated_{type}_{station.location}_{month_start_day.strftime('%Y-%m-%d')}_" \
f"{month_end_day.strftime('%Y-%m-%d')}.nc"

folder_name = prep_utils.get_month_folder_name(station.generation_folder, day_date)

gen_source_path = os.path.join(folder_name, nc_name)
return gen_source_path


def get_daily_ds_path(station: gs.Station, day_date: datetime.date, type_: str) -> str:
"""
Get the path to the daily generated measure (lidar) or signal ds
:param type_: str, 'lidar' for measure dataset. 'signal' for signal dataset
:param station: gs.station() object of the lidar station
:param day_date: datetime.date object of the required date
:return: str. Path to monthly dataset of generation parameters.
"""
if type_ == 'lidar':
parent_folder = station.gen_lidar_dataset
elif type_ == 'signal':
parent_folder = station.gen_signal_dataset
else:
raise Exception("Unsupported type. Should by 'lidar' or 'signal'")

month_folder = prep_utils.get_month_folder_name(parent_folder, day_date)
file_name = get_gen_dataset_file_name(station, day_date, wavelength='*', data_source=type_)
gen_source_path = os.path.join(month_folder, file_name)
return gen_source_path


def get_month_gen_params_ds(station, day_date, type='density_params'):
"""
Returns the monthly parameters of density creation as a dataset.
:param type: type of generated parameter:
'density_params' - for density sampler generator,
'bg'- for generated background signal,
'LC' - for generated Lidar Constant signal
# TODO : add 'overlap'
'overlap' - for generated overlap parameters
:param station: gs.station() object of the lidar station
:param day_date: datetime.date object of the required date
:return: day_params_ds: xarray.Dataset(). Monthly dataset of generation parameters.
Expand Down Expand Up @@ -173,6 +198,40 @@ def get_daily_gen_param_ds(station, day_date, type='density_params'):
return day_params_ds


def get_daily_gen_ds(station: gs.Station, day_date: datetime.date, type_: str) -> xr.Dataset:
"""
Returns the daily parameters of measures (lidar) or signal creation as a dataset.
:param type_: str, should be one of 'signal' / 'lidar'
:param station: gs.station() object of the lidar station
:param day_date: datetime.date object of the required date
:return: day_params_ds: xarray.Dataset(). Daily dataset of generation parameters.
"""
daily_ds_path = get_daily_ds_path(station, day_date, type_)
ds = xr_utils.load_dataset(daily_ds_path)
return ds


def get_daily_overlap(station: gs.Station, day_date: datetime.date, height_indx: xr.DataArray) -> xr.Dataset:
"""
Generates overlap values per height index, from overlap params
:param station: gs.station() object of the lidar station
:param day_date: datetime.date object of the required date
:param height_indx: xr.DataArray, stores the height index per Height
:return: xr.Dataset with the overlap values per height index
"""

overlap_params = get_month_gen_params_ds(station, day_date, type='overlap')
overlap = sigmoid(height_indx, *overlap_params.to_array().values)
overlap_ds = xr.Dataset(data_vars={'overlap': ('Height', overlap)},
coords={'Height': height_indx.values},
attrs={'name': 'Overlap Function'})

return overlap_ds


def dt2binscale(dt_time, res_sec=30):
"""
TODO consider to move this function to Station ?
Expand Down
71 changes: 55 additions & 16 deletions learning_lidar/generation/overlap.ipynb

Large diffs are not rendered by default.

0 comments on commit 032c110

Please sign in to comment.