In [1]:
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
import pandas as pd
from netCDF4 import Dataset
import random

from ast_io import load_data
from ast_io import ast_tool
from ast_io import shuffle_data


1. select an existing cml netcdf file to start with. (e.g. '/pd/data/CML/data/processed/proc_graf_hess_2019.002/timeseries/cnn_detection_standard_gapfill/proc_cnn082_gapstandard_2018_05.nc')
2. check if this file was already copied for AST processing
3. If no, open the file with xarray and add the ast_processed and anomaly variables and save to netcdf by appending '_ast_2021.001' to the file name


4. If yes or 3. has been performed, open the netcdf using netCDF4 and extract the cml_id and the ast_processed variables.
5. Create a cml_list containing the cml ids in a random order
6. Pick the first cml id from the list
7. Check if ast_processed is True
8. If yes, skip the cml
9. If no, load this cmls txrx series using netCDF4
10. Use the AST to create numpy arrays of the same shape as txrx containing True or False for flags
11. Overwrite the anomaly variables for this cml in the netcdf
12. Pick the next cml from the list and return to 7.

In [2]:
in_file = '../data/ast_example_cml_raw.nc'
save_file = '../data/ast_example_cml_ast.nc'
rado_file = '../data/ast_example_cml_radar.nc'
out_file = '../data/ast_example_cml_ast_temp.nc'
ds = xr.open_dataset(save_file)
ds.to_netcdf(out_file)
ds.close()
ast, ds, rado = load_data(in_file, out_file, rado_file)
cml_id_shuf, ast_proc_shuf = shuffle_data(ast)

Time Series Selection Tool:

In [3]:
%matplotlib widget
ast_tool(ds, ast,rado, cml_id_shuf, ast_proc_shuf)

HBox(children=(VBox(children=(Button(description='next', style=ButtonStyle()), Button(description='previous', …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [4]:
ast.close()
ds_ast_check = xr.open_dataset(out_file).sel(channel_id = 'channel_1').load()
n_cmls = len(ds_ast_check.cml_id.values)
proc_cmls = ds_ast_check.cml_id.values[~np.isnan(ds_ast_check.ast_processed).values]
ds_ast_check = ds_ast_check.sel(cml_id = proc_cmls)
print('Number of processed CMLs: ', len(proc_cmls), '/',n_cmls)
if len(proc_cmls)>0:
    print('percentage of OK: ', np.round(np.sum(ds_ast_check.OK.values)/len(ds_ast_check.OK.values.flatten())*100, decimals=2))
    print('percentage of periodical_mode: ', np.round(np.sum(ds_ast_check.periodical_mode.values)/len(ds_ast_check.periodical_mode.values.flatten())*100, decimals=2))
    print('percentage of flux_above_base: ', np.round(np.sum(ds_ast_check.flux_above_base.values)/len(ds_ast_check.flux_above_base.values.flatten())*100, decimals=2))
    print('percentage of flux_below_base: ', np.round(np.sum(ds_ast_check.flux_below_base.values)/len(ds_ast_check.flux_below_base.values.flatten())*100, decimals=2))
    print('percentage of step: ', np.round(np.sum(ds_ast_check.step.values)/len(ds_ast_check.step.values.flatten())*100, decimals=2))
ds_ast_check.close()
ast, ds, rado = load_data(in_file, out_file, rado_file)
cml_id_shuf, ast_proc_shuf = shuffle_data(ast)

Number of processed CMLs:  4 / 4
percentage of OK:  97.62
percentage of periodical_mode:  0.0
percentage of flux_above_base:  1.89
percentage of flux_below_base:  1.16
percentage of step:  0.0


In [5]:
ds_ast_compare = xr.open_dataset('../data/ast_example_cml_ast_ready.nc').sel(channel_id = 'channel_1').load()
ds_ast_user = xr.open_dataset(out_file).sel(channel_id = 'channel_1').load()

In [8]:
cml_number = 3
fig, ax = plt.subplots(2,1, figsize=(15,8), sharex=True)
cml0 = ds_ast_user.isel(cml_id=cml_number-1)
txrx = cml0.txrx
txrx.plot(ax=ax[0])
ax[0].fill_between(cml0.time.values, np.min(txrx), np.max(txrx), where=cml0.OK, alpha=0.1, color='green', label = 'OK')
ax[0].fill_between(cml0.time.values, np.min(txrx), np.max(txrx), where=np.any([cml0.periodical_mode, 
                                                                               cml0.flux_above_base, 
                                                                               cml0.flux_below_base, 
                                                                               cml0.step ], axis=0), alpha=0.1, color='red', label = 'anomaly')
ax[0].set_title('User flags')
cml0 = ds_ast_compare.isel(cml_id=cml_number-1)
txrx = cml0.txrx
txrx.plot(ax=ax[1])
ax[1].fill_between(cml0.time.values, np.min(txrx), np.max(txrx), where=cml0.OK, alpha=0.1, color='green', label = 'OK')
ax[1].fill_between(cml0.time.values, np.min(txrx), np.max(txrx), where=np.any([cml0.periodical_mode.values, 
                                                                               cml0.flux_above_base.values, 
                                                                               cml0.flux_below_base.values, 
                                                                               cml0.step.values, ], axis=0), alpha=0.1, color='red', label = 'anomaly')
ax[1].set_title('Expert flags')
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [9]:
ds_ast_compare.close()
ds_ast_user.close()

In [6]:
# generate a clean ast file to start over
ds = xr.open_dataset('../data/ast_example_cml_ast_ready.nc')
ds['ast_processed'] = 'cml_id', np.full_like(ds.ast_processed.values, np.nan)
ds['periodical_mode'] = ('channel_id', 'cml_id', 'time'), np.full_like(ds.periodical_mode.values, np.nan)
ds['flux_above_base'] = ('channel_id', 'cml_id', 'time'), np.full_like(ds.flux_above_base.values, np.nan)
ds['flux_below_base'] = ('channel_id', 'cml_id', 'time'), np.full_like(ds.flux_below_base.values, np.nan)
ds['step'] = ('channel_id', 'cml_id', 'time'), np.full_like(ds.step.values, np.nan)
ds['OK'] = ('channel_id', 'cml_id', 'time'), np.full_like(ds.OK.values, np.nan)
ds.to_netcdf('../data/ast_example_cml_ast.nc')