In [1]:
%load_ext autoreload
%autoreload 2
import multiprocessing
from pathlib import Path
import shutil
import numpy as np
import thor.data as data
import thor.data.dispatch as dispatch
import thor.grid as grid
import thor.option as option
import thor.track as track
import thor.analyze as analyze
import thor.parallel as parallel
import thor.visualize as visualize

notebook_name = "gridrad_demo.ipynb"


## You are using the Python ARM Radar Toolkit (Py-ART), an open source
## library for working with weather radar data. Py-ART is partly
## supported by the U.S. Department of Energy as part of the Atmospheric
## Radiation Measurement (ARM) Climate Research Facility, an Office of
## Science user facility.
##
## If you use this software to prepare a publication, please cite:
##
##     JJ Helmus and SM Collis, JORS 2016, doi: 10.5334/jors.119



In [6]:
urls = [line.strip() for line in f]

ValueError: I/O operation on closed file.

In [15]:


parent_local = "/home/ewan/Documents/"
parent_remote = "https://data.rda.ucar.edu"


In [16]:
import concurrent.futures
import time
# Load list of urls from file
urls = []
with open("/home/ewan/Documents/THOR/workflow/gadi/extracted_urls.txt", "r") as f:
    urls = [line.strip() for line in f]
# Download data


In [17]:
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
    futures = []
    for url in urls:
        time.sleep(1)
        data.utils.download(url, parent_remote, parent_local)
    parallel.check_futures(futures)


2024-10-17 15:36:11,056 - thor.data.utils - INFO - /home/ewan/Documents/d841006/volumes/2010/20100120/nexrad_3d_v4_2_20100120T180000Z.nc already exists.
2024-10-17 15:36:12,059 - thor.data.utils - INFO - /home/ewan/Documents/d841006/volumes/2010/20100120/nexrad_3d_v4_2_20100120T181000Z.nc already exists.
2024-10-17 15:36:13,061 - thor.data.utils - INFO - /home/ewan/Documents/d841006/volumes/2010/20100120/nexrad_3d_v4_2_20100120T182000Z.nc already exists.
2024-10-17 15:36:14,066 - thor.data.utils - INFO - /home/ewan/Documents/d841006/volumes/2010/20100120/nexrad_3d_v4_2_20100120T183000Z.nc already exists.
2024-10-17 15:36:15,070 - thor.data.utils - INFO - /home/ewan/Documents/d841006/volumes/2010/20100120/nexrad_3d_v4_2_20100120T184000Z.nc already exists.
2024-10-17 15:36:16,074 - thor.data.utils - INFO - /home/ewan/Documents/d841006/volumes/2010/20100120/nexrad_3d_v4_2_20100120T185000Z.nc already exists.
2024-10-17 15:36:17,079 - thor.data.utils - INFO - /home/ewan/Documents/d841006/vo

KeyboardInterrupt: 

In [None]:

# Download data
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
    futures = []
    for url in urls:
        time.sleep(1)
        futures.append(
            executor.submit(data.utils.download, url, parent_local, parent_remote)
        )
    parallel.check_futures(futures)

In [2]:
# Parent directory for saving outputs
base_local = Path.home() / "THOR_output"
start = "2010-01-20T18:00:00"
end = "2010-01-21T03:30:00"
event_start = "2010-01-20"

period = parallel.get_period(start, end)
intervals = parallel.get_time_intervals(start, end, period=period)

output_parent = base_local / "runs/gridrad_demo"
if output_parent.exists():
    shutil.rmtree(output_parent)
options_directory = output_parent / "options"

# Create the data_options dictionary
converted_options = {"save": True, "load": False, "parent_converted": None}
gridrad_options = data.gridrad.gridrad_data_options(
    start=start, end=end, converted_options=converted_options, event_start=event_start
)
lon_range = [-102, -89]
lat_range = [27, 39]
era5_pl_options = data.era5.data_options(
    start=start, end=end, longitude_range=lon_range, latitude_range=lat_range
)
args_dict = {"start": start, "end": end, "data_format": "single-levels"}
args_dict.update({"longitude_range": lon_range, "latitude_range": lat_range})
era5_sl_options = data.era5.data_options(**args_dict)

data_options = option.consolidate_options(
    [gridrad_options, era5_pl_options, era5_sl_options]
)

dispatch.check_data_options(data_options)
data.option.save_data_options(data_options, options_directory=options_directory)
gridrad_options = data_options["gridrad"]

2024-10-17 14:32:59,227 - thor.utils - DEBUG - Saving options to /home/ewan/THOR_output/runs/gridrad_demo/options/data.yml


In [4]:

# Create the grid_options dictionary using the first file in the cpol dataset
grid_options = grid.create_options(
    name="geographic", regrid=False, altitude_spacing=None, geographic_spacing=None
)
grid.check_options(grid_options)
grid.save_grid_options(grid_options, options_directory=options_directory)

# Create the track_options dictionary
track_options = option.default_track_options(dataset="gridrad")
# Modify the default options for gridrad. Because grids so large we now use a distinct
# global flow box for each object.
track_options.levels[1].objects[0].tracking.global_flow_margin = 70
track_options.levels[1].objects[0].tracking.unique_global_flow = False
track_options.to_yaml(options_directory / "track.yml")

# Create the display_options dictionary
visualize_options = {
    obj: visualize.option.runtime_options(obj, save=True, style="presentation")
    for obj in ["mcs"]
}
visualize_options = None

2024-10-17 14:33:21,044 - thor.utils - DEBUG - Saving options to /home/ewan/THOR_output/runs/gridrad_demo/options/grid.yml


In [9]:
times = data.utils.generate_times(data_options["gridrad"])
tracks = track.simultaneous_track(
    times,
    data_options,
    grid_options,
    track_options,
    visualize_options,
    output_directory=output_parent
)

2024-10-17 14:55:49,495 - thor.track - INFO - Beginning thor run. Saving output to /home/ewan/THOR_output/runs/gridrad_demo.
2024-10-17 14:55:49,496 - thor.track - INFO - Beginning simultaneous tracking.
2024-10-17 14:55:49,558 - thor.track - INFO - Processing 2010-01-20T18:00:00.
2024-10-17 14:55:49,560 - thor.data.gridrad - INFO - Updating gridrad dataset for 2010-01-20T18:00:00.
2024-10-17 14:55:49,561 - thor.data.gridrad - INFO - Converting gridrad data from nexrad_3d_v4_2_20100120T180000Z.nc
2024-10-17 14:55:49,562 - thor.data.gridrad - DEBUG - Converting GridRad dataset at time 2010-01-20T18:00:00.000000000.
2024-10-17 14:55:49,798 - thor.data.gridrad - DEBUG - Filtering GridRad data
2024-10-17 14:55:51,112 - thor.data.gridrad - DEBUG - Removing clutter from the GridRad data
2024-10-17 14:55:51,220 - thor.data.gridrad - DEBUG - Removing speckles from the GridRad data
2024-10-17 14:55:52,314 - thor.data.gridrad - DEBUG - Removing low level clutter from the GridRad data
2024-10-17 

KeyboardInterrupt: 

In [24]:
with multiprocessing.Pool(initializer=parallel.initialize_process) as pool:
    results = []
    for i, time_interval in enumerate(intervals):
        args = [i, time_interval, data_options.copy(), grid_options.copy()]
        args += [track_options.model_copy(deep=True), visualize_options] 
        args += [output_parent, "gridrad"]
        args = tuple(args)
        results.append(pool.apply_async(parallel.track_interval, args))
    pool.close()
    pool.join()
    parallel.check_results(results)

2024-10-16 21:51:14,671 - thor.option - DEBUG - Saving options to /home/ewan/THOR_output/runs/gridrad_demo/interval_0/options/data.yml
2024-10-16 21:51:14,720 - thor.option - DEBUG - Saving options to /home/ewan/THOR_output/runs/gridrad_demo/interval_0/options/grid.yml
2024-10-16 21:51:14,827 - thor.option - DEBUG - Saving options to /home/ewan/THOR_output/runs/gridrad_demo/interval_1/options/data.yml
2024-10-16 21:51:14,882 - thor.option - DEBUG - Saving options to /home/ewan/THOR_output/runs/gridrad_demo/interval_1/options/grid.yml
2024-10-16 21:51:14,914 - thor.track - INFO - Beginning thor run. Saving output to /home/ewan/THOR_output/runs/gridrad_demo/interval_0.
2024-10-16 21:51:14,918 - thor.track - INFO - Beginning simultaneous tracking.
2024-10-16 21:51:15,010 - thor.track - INFO - Processing 2010-01-20T18:00:00.
2024-10-16 21:51:15,013 - thor.data.gridrad - INFO - Updating gridrad dataset for 2010-01-20T18:00:00.
2024-10-16 21:51:15,016 - thor.data.gridrad - INFO - Converting 

In [25]:
parallel.stitch_run(output_parent, intervals, cleanup=True)

2024-10-16 21:55:23,541 - thor.parallel - INFO - Stitching all attribute, mask and record files.
2024-10-16 21:55:31,322 - thor.parallel - INFO - Stitching record files.
2024-10-16 21:55:31,425 - thor.write.attribute - DEBUG - Writing attribute dataframe to /home/ewan/THOR_output/runs/gridrad_demo/records/filepaths/gridrad.csv
2024-10-16 21:55:31,428 - thor.write.attribute - DEBUG - Saving attribute metadata to /home/ewan/THOR_output/runs/gridrad_demo/records/filepaths/gridrad.yml
2024-10-16 21:55:31,436 - thor.parallel - INFO - Stitching attribute files.
2024-10-16 21:55:31,809 - thor.write.attribute - DEBUG - Writing attribute dataframe to /home/ewan/THOR_output/runs/gridrad_demo/attributes/mcs/anvil/core.csv
2024-10-16 21:55:31,815 - thor.write.attribute - DEBUG - Saving attribute metadata to /home/ewan/THOR_output/runs/gridrad_demo/attributes/mcs/anvil/core.yml
2024-10-16 21:55:32,086 - thor.write.attribute - DEBUG - Writing attribute dataframe to /home/ewan/THOR_output/runs/gridra

In [27]:
analysis_options = analyze.mcs.analysis_options()
analyze.mcs.process_velocities(output_parent)
analyze.mcs.quality_control(output_parent, analysis_options)
analyze.mcs.classify_all(output_parent)

2024-10-16 21:57:13,125 - thor.write.attribute - DEBUG - Writing attribute dataframe to /home/ewan/THOR_output/runs/gridrad_demo/analysis/velocities.csv
2024-10-16 21:57:13,130 - thor.write.attribute - DEBUG - Saving attribute metadata to /home/ewan/THOR_output/runs/gridrad_demo/analysis/velocities.yml
2024-10-16 21:57:13,750 - thor.write.attribute - DEBUG - Writing attribute dataframe to /home/ewan/THOR_output/runs/gridrad_demo/analysis/quality.csv
2024-10-16 21:57:13,759 - thor.write.attribute - DEBUG - Saving attribute metadata to /home/ewan/THOR_output/runs/gridrad_demo/analysis/quality.yml
2024-10-16 21:57:13,996 - thor.write.attribute - DEBUG - Writing attribute dataframe to /home/ewan/THOR_output/runs/gridrad_demo/analysis/classification.csv
2024-10-16 21:57:14,003 - thor.write.attribute - DEBUG - Saving attribute metadata to /home/ewan/THOR_output/runs/gridrad_demo/analysis/classification.yml


Unnamed: 0_level_0,Unnamed: 1_level_0,stratiform_offset,inflow,relative_stratiform_offset,tilt,propagation
time,universal_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-20 18:00:00,1,leading,right,left,down-shear,down-shear
2010-01-20 18:00:00,2,leading,right,left,down-shear,shear-perpendicular
2010-01-20 18:10:00,1,leading,right,left,down-shear,down-shear
2010-01-20 18:10:00,2,leading,right,left,shear-perpendicular,shear-perpendicular
2010-01-20 18:20:00,1,leading,right,left,shear-perpendicular,down-shear
...,...,...,...,...,...,...
2010-01-21 03:10:00,20,right,front,right,shear-perpendicular,down-shear
2010-01-21 03:10:00,21,leading,front,leading,down-shear,down-shear
2010-01-21 03:10:00,23,trailing,front,trailing,up-shear,down-shear
2010-01-21 03:20:00,20,right,front,right,shear-perpendicular,down-shear


In [3]:
figure_options = visualize.option.horizontal_attribute_options(
    "mcs_velocity_analysis", style="presentation", attributes=["velocity", "offset"]
)
start_time = np.datetime64("2010-01-20T18:00")
end_time = np.datetime64(np.datetime64("2010-01-21T03:30"))
args = [output_parent, start_time, end_time, figure_options]
args_dict = {"parallel_figure": True, "dt": 5400, "by_date": False}
visualize.attribute.mcs_series(*args, **args_dict)

2024-10-16 21:59:26,974 - thor.data.gridrad - DEBUG - Converting GridRad dataset at time 2010-01-20T18:00:00.000000000.
2024-10-16 21:59:27,157 - thor.data.gridrad - DEBUG - Filtering GridRad data
2024-10-16 21:59:27,862 - thor.data.gridrad - DEBUG - Removing clutter from the GridRad data
2024-10-16 21:59:27,928 - thor.data.gridrad - DEBUG - Removing speckles from the GridRad data
2024-10-16 21:59:28,355 - thor.data.gridrad - DEBUG - Removing low level clutter from the GridRad data
2024-10-16 21:59:28,604 - thor.data.gridrad - DEBUG - Removing speckles from the GridRad data
2024-10-16 21:59:28,965 - thor.data.gridrad - DEBUG - Got domain mask for gridrad.
2024-10-16 21:59:29,063 - thor.visualize.attribute - DEBUG - Getting grid from dataset at time 2010-01-20T18:00:00.000000000.
2024-10-16 21:59:29,064 - thor.data.gridrad - DEBUG - Got grid from dataset at time 2010-01-20T18:00:00.000000000.
2024-10-16 21:59:29,065 - thor.visualize.attribute - DEBUG - Rebuilding processed grid for time