In [1]:
import os,sys
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib
from datetime import datetime, timedelta, time
import glob
from generate_atmosphere import LidarProfile,RadiosondeProfile
import miscLidar as mscLid
from molecular import rayleigh_scattering
import global_settings as gs
import pandas as pd
import preprocessing as prep
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import xarray as xr
%matplotlib inline
from IPython.display import display

  plt.rcParams['text.latex.preamble'] = [r"\usepackage{amsmath}"]


In [2]:
SMALL_SIZE = 12
MEDIUM_SIZE = 14
BIG_SIZE = 16

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=BIG_SIZE)       # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIG_SIZE)     # fontsize of the figure title

In [3]:
def get_time_slice_dataset(dataset, start_time, end_time):
	sub_ds = dataset.sel(Time =slice(start_time,end_time))
	return(sub_ds)

def humanbytes(B):
   'Return the given bytes as a human friendly KB, MB, GB, or TB string'
   B = float(B)
   KB = float(1024)
   MB = float(KB ** 2) # 1,048,576
   GB = float(KB ** 3) # 1,073,741,824
   TB = float(KB ** 4) # 1,099,511,627,776

   if B < KB:
      return '{0} {1}'.format(B,'Bytes' if 0 == B > 1 else 'Byte')
   elif KB <= B < MB:
      return '{0:.2f} KB'.format(B/KB)
   elif MB <= B < GB:
      return '{0:.2f} MB'.format(B/MB)
   elif GB <= B < TB:
      return '{0:.2f} GB'.format(B/GB)
   elif TB <= B:
      return '{0:.2f} TB'.format(B/TB)

In [4]:
'''set day'''
day_date = datetime(2017,9,13)
print(day_date)

2017-09-13 00:00:00


In [5]:
'''set station'''
print('load station')
haifa_station = gs.Station('Haifa')
#haifa_station.location
haifa_station

load station


Station()

In [6]:
''''Set paths for small tests'''
#lidar_parent_folder = 'H:\data_haifa\DATA FROM TROPOS\data\level1a\PollyXT_TROPOS'
#gdas_parent_folder = 'H:\data_haifa\DATA FROM TROPOS\GDAS\haifa'
#lidar_parent_folder = haifa_station.lidar_src_folder
# changing the path for dev

'''haifa_station.gdas1_folder = os.path.join('.','data examples','gdas')
haifa_station.gdastxt_folder  = os.path.join('.','data examples','gdas_txt')
haifa_station.molecular_dataset =  os.path.join('.','data examples','molecular_dataset')
haifa_station.lidar_dataset = os.path.join('.','data examples','lidar_dataset')
haifa_station.lidar_src_folder = os.path.join('.','data examples','netcdf')
'''
print(haifa_station)

<class 'global_settings.Station'>: {'name': 'Haifa', 'location': 'Haifa', 'lon': 35.02, 'lat': 32.8, 'altitude': 229.0, 'start_bin_height': 78.75, 'end_bin_height': 22485.66016, 'n_bins': 3000, 'dt': 5.0000000000000004e-08, 'gdas1_folder': 'H:\\data_haifa\\DATA FROM TROPOS\\GDAS\\haifa', 'gdastxt_folder': 'H:\\data_haifa\\DATA FROM TROPOS\\GDAS\\haifa_preproc', 'lidar_src_folder': 'H:\\data_haifa\\DATA FROM TROPOS\\data\\level1a\\PollyXT_TROPOS', 'molecular_dataset': 'H:\\data_haifa\\DATA FROM TROPOS\\molecular_dataset', 'lidar_dataset': 'H:\\data_haifa\\DATA FROM TROPOS\\lidar_dataset', 'db_file': 'H:\\data_haifa\\DATA FROM TROPOS\\data\\level1a\\PollyXT_TROPOS\\pollyxt_tropos_calibration.db'}


In [7]:
CONVERT_GDAS = False
SINGLE_DAY = False
if CONVERT_GDAS:
	if SINGLE_DAY:
		#convert cur day (gdas files from TROPOS to txt)
		gdastxt = prep.convert_daily_gdas ( haifa_station, day_date)
		print('gdas_txt',gdastxt)

		#convert next day (gdas files from TROPOS to txt)
		next_day = day_date + timedelta ( days = 1 )
		gdastxt_nextday = prep.convert_daily_gdas ( haifa_station, next_day )
		print('gdas_txt next day',gdastxt_nextday)

	else:
		#%% convert gdas files for a period
		start_day =  datetime(2017,9,1)
		end_day = datetime(2017,9,30)
		chunk_paths = prep.convert_periodic_gdas(haifa_station,start_day, end_day)
		print(chunk_paths)

In [8]:
SHOW_MOL_DF = False
if SHOW_MOL_DF:
	'''set parameters'''
	lambda_nm = gs.LAMBDA_nm().G
	df_sigma , df_beta = prep.get_daily_molecular_profiles( haifa_station , day_date , lambda_nm , 'Km' )
	# Visualizing molecular profiles

	plt.figure()
	ax = df_beta.plot()
	ax.set_ylabel(r'$\beta_{\rm mol}[1/m]$')
	plt.show()

In [9]:
# for profiling un-highlight the line below
# %%prun -s cumulative
CREATE_MOL_DS = False
if CREATE_MOL_DS:
	print(f"Create daily molecular dataset for {day_date}")
	ds =  prep.generate_daily_molecular(haifa_station,day_date,optim_size = True)
	print(ds)

In [10]:
SAVE_MOL_DS = False
if SAVE_MOL_DS:
	print('Saving dataset to:')
	netcdfs = prep.save_molecular_dataset(haifa_station,ds,save_mode='both')
	print(netcdfs)

In [11]:
TEST_LOAD_DS = False
if TEST_LOAD_DS:
	netcdf = netcdfs[-1]
	ds_load = prep.load_dataset(netcdf)
	print(ds_load)

In [12]:
if TEST_LOAD_DS:
	netcdf = netcdfs[-2]
	ds_load_prof = prep.load_dataset(netcdf)
	print(ds_load_prof)

In [13]:
VIS_DS = False
if VIS_DS:
	profileTypes = list(ds.data_vars)[0:3]
	#list(ds.data_vars)[3:]
	xfmt = mdates.DateFormatter('%H:%M')
	timetics = ds.Time.dt.strftime('%H:%M')
	timetics

	for row,prof in enumerate(profileTypes):
		ds_var = ds[prof]
		g = ds_var.plot(x='Time',y='Height',col='Wavelength',
		                  col_wrap = 3,cmap = 'turbo')# , figsize = (5,8)) #TODO: figure out how to enlarge the figsize, it doesn't work properly :-(
		for ax in g.axes.flat:
			ax.xaxis.set_major_formatter(xfmt)
			ax.xaxis_date()
			ax.get_xaxis().set_major_locator(mdates.HourLocator(interval=4))
			plt.setp( ax.get_xticklabels(), rotation=0,horizontalalignment='center')
			#if row in [0,1]:
			#	ax.xaxis.set_ticklabels([])
			#	ax.set_xlabel(None)
			#if row in [1,2]:
			#	ax.set_title(None)

		date_64 = ds.date.values
		date_datetime = datetime.utcfromtimestamp(date_64.tolist()/1e9)
		date_str = date_datetime.strftime('%d/%m/%Y')
		plt.suptitle('{} - {} {}'.format(ds_var.attrs['info'],ds.attrs['location'],date_str), y = 1.05)
		#plt.tight_layout()
		plt.show()

In [14]:
VIS_ATTBSC = False
if VIS_ATTBSC:
	wavelengths = ds.Wavelength.values.tolist()
	for wav in wavelengths:
		 prep.visualize_ds_profile_chan(ds, wav,profile_type = 'attbsc')

In [15]:
RUN_PD= False
if RUN_PD:
	print('retrieve a sub-dataframe from a dataset - according wavelength')
	df_test_beta = ds.beta.sel(Wavelength=355).to_pandas()
	display(df_test_beta)

In [16]:
RUN_SLICE= False
if RUN_SLICE:
	print('retrieve a sub-dataframe from a dataset - according time slice')

	start_time = datetime(2017,9,2,0,30)
	end_time = start_time + timedelta(minutes = 29.5) #datetime(2017,9,1,12,30)
	timestamps = pd.date_range ( start = start_time , end = end_time  ,
	                             freq = timedelta ( seconds = 30 ) )
	#slice_beta= mol_xr.loc['beta'].to_pandas().loc[:,timestamps]
	print('slicing data set by time')
	sub_ds = get_time_slice_dataset(ds, start_time, end_time)#  timestamps).plot()#=slice(start_time,start_time+timedelta(hours = 6)))

	display(sub_ds)

In [17]:
CALC_SIZE = False
if CALC_SIZE:
	profiles = list(ds.data_vars)[0:3]
	total_size =0
	for prof in profiles:
		for wav in ds.Wavelength.values:
			tmp_prof = (ds[prof]).sel(Wavelength=wav).to_pandas()
			cursize = tmp_prof.memory_usage(deep=True).sum()
			print('profile:{}, wavelength: {}, size:{}'.format(prof,wav,humanbytes(cursize)))
			total_size+=cursize

	print('total size of data set is at least:{}'.format(humanbytes(total_size)))


In [18]:
CREATE_LIDAR_DS = True
if CREATE_LIDAR_DS:
	print('get range corrected from TROPOS')
	ds_range_corr_daily =  prep.get_daily_range_corr(haifa_station, day_date,height_units ='Km',
                                                 optim_size = True, verbose = False)
	display(ds_range_corr_daily)

get range corrected from TROPOS


In [19]:
SAVE_LIDAR_DS= True
if SAVE_LIDAR_DS:
	print('Save range corrected dataset')
	lidar_ncpaths =  prep.save_range_corr_dataset(haifa_station,ds_range_corr_daily,
	                                              save_mode='both')
	print(lidar_ncpaths)


Save range corrected dataset
['H:\\data_haifa\\DATA FROM TROPOS\\lidar_dataset\\2017\\09\\2017_09_13_Haifa_range_corr_355_lidar.nc', 'H:\\data_haifa\\DATA FROM TROPOS\\lidar_dataset\\2017\\09\\2017_09_13_Haifa_range_corr_532_lidar.nc', 'H:\\data_haifa\\DATA FROM TROPOS\\lidar_dataset\\2017\\09\\2017_09_13_Haifa_range_corr_1064_lidar.nc', 'H:\\data_haifa\\DATA FROM TROPOS\\lidar_dataset\\2017\\09\\2017_09_13_Haifa_range_corr_lidar.nc']


In [21]:
lidar_ncpaths= prep.get_prep_dataset_paths ( haifa_station , day_date , data_source = 'lidar')
lidar_ncpaths

['H:\\data_haifa\\DATA FROM TROPOS\\lidar_dataset\\2017\\09\\2017_09_13_Haifa_range_corr_1064_lidar.nc',
 'H:\\data_haifa\\DATA FROM TROPOS\\lidar_dataset\\2017\\09\\2017_09_13_Haifa_range_corr_355_lidar.nc',
 'H:\\data_haifa\\DATA FROM TROPOS\\lidar_dataset\\2017\\09\\2017_09_13_Haifa_range_corr_532_lidar.nc',
 'H:\\data_haifa\\DATA FROM TROPOS\\lidar_dataset\\2017\\09\\2017_09_13_Haifa_range_corr_lidar.nc']

In [22]:
LOAD_LIDAR_DS = False
if LOAD_LIDAR_DS:
	print( 'Load range corr in all channels')

	netcdf = lidar_ncpaths[-1]
	ds_range_corr_daily =  prep.load_dataset(netcdf)
	print(ds_range_corr_daily)

In [23]:
if LOAD_LIDAR_DS:
	print( 'Load range corr per channel')
	netcdf = lidar_ncpaths[0]
	ds_load_pr2_chan = prep.load_dataset(netcdf)
	print(ds_load_pr2_chan)

In [24]:
VIS_LIDAR_DS = False
if VIS_LIDAR_DS:
	print('visualizing with range_plot bounding values:')
	wavelengths = ds_range_corr_daily.Wavelength.values.tolist()
	for wav in wavelengths:

		 prep.visualize_ds_profile_chan(ds_range_corr_daily, wav,profile_type = 'range_corr', USE_RANGE='MID')


In [25]:
TEST_PROFILES_PATHS=False
if TEST_PROFILES_PATHS:
	profiles_paths = prep.get_TROPOS_dataset_paths(haifa_station.lidar_src_folder, day_date, file_type = 'profiles')
	print(profiles_paths)
	prof_path = profiles_paths[0]

In [26]:
## AERONET : https://aeronet.gsfc.nasa.gov/cgi-bin/data_display_aod_v3?site=Technion_Haifa_IL&nachal=0&year=2017&month=5&day=19&aero_water=0&level=3&if_day=0&if_err=0&place_code=10&DATA_TYPE=-999&year_or_month=3
## it is possible to merge with Terra MODIS or Aqua MODIS -
# / TODO: locate the function that does donwload of sunphotometer data to cameranetwork (maybe Shubi knows this)
# / TODO: ask about the relevant product from MODIS to our porpose.