In [4]:
from datetime import datetime, timedelta
import numpy as np
import os
import xarray as xr
from tqdm import tqdm

from dask.distributed import Client, LocalCluster

# Start a local cluster

cluster = LocalCluster(
    n_workers=20,
    threads_per_worker=1,
    memory_limit='4GB',
    dashboard_address=8787,
)

client = Client(cluster)

print(f'Visit me @ {cluster.dashboard_link}')

filenames = os.listdir('../../data/predicted/v_0.5')

# Extract timestamps and convert to datetime objects
timestamps = [datetime.strptime(f.split('_')[1].split('.nc')[0], "%Y-%m-%dT%H:%M:%S") for f in filenames]

# Sort the timestamps
timestamps.sort()

# Find consecutive sequences
consecutive_sequences = []
current_sequence = [timestamps[0]]

treshold_delta = timedelta(seconds=10)
consecutive_length = 20

for i in range(1, len(timestamps)):
    if timestamps[i] - timestamps[i - 1] <= treshold_delta:
        current_sequence.append(timestamps[i])
    else:
        if len(current_sequence) >= consecutive_length:
            consecutive_sequences.append(current_sequence)
        current_sequence = [timestamps[i]]

if len(current_sequence) >= 5:
    consecutive_sequences.append(current_sequence)

# Convert sequences back to filenames
consecutive_filenames = []
for sequence in consecutive_sequences:
    filenames_sequence = [
        f"predicted_{dt.strftime('%Y-%m-%dT%H:%M:%S')}.nc" for dt in sequence
    ]
    consecutive_filenames.append(filenames_sequence)

# Output the results
len(consecutive_filenames)

for sequence in tqdm(consecutive_filenames):
    sequence_start = sequence[0].split('_')[1].split('.nc')[0]
    sequence_end = sequence[-1].split('_')[1].split('.nc')[0]    
    #check if the sequence is already saved
    filename = f"../../data/predicted/pushbroom/v_0.2/{sequence_start}_{sequence_end}.nc"
    if os.path.exists(filename):
        print(f"Sequence {sequence_start} to {sequence_end} already saved")
        continue
    else:
        ds = xr.concat([xr.open_dataset(f"../../data/predicted/v_0.5/{frame}", chunks={'x' : -1, 'y' : -1}) for frame in sequence], dim='time')
        ds.to_netcdf(filename, engine='h5netcdf')
        print(f"Saved sequence {sequence_start} to {sequence_end}")


Visit me @ http://127.0.0.1:34275/status


  6%|▌         | 1/18 [02:11<37:07, 131.00s/it]

Saved sequence 2022-03-20T10:35:00 to 2022-03-20T10:50:00


 11%|█         | 2/18 [02:54<21:14, 79.67s/it] 

Saved sequence 2022-03-20T11:20:00 to 2022-03-20T11:26:00


 17%|█▋        | 3/18 [03:38<15:46, 63.10s/it]

Saved sequence 2022-03-21T11:39:00 to 2022-03-21T11:44:00


 22%|██▏       | 4/18 [08:54<38:01, 162.96s/it]

Saved sequence 2022-03-28T10:28:00 to 2022-03-28T11:05:00


 28%|██▊       | 5/18 [10:04<28:03, 129.50s/it]

Saved sequence 2022-03-28T13:13:30 to 2022-03-28T13:21:30


 33%|███▎      | 6/18 [12:08<25:34, 127.84s/it]

Saved sequence 2022-03-28T14:12:30 to 2022-03-28T14:27:00


 39%|███▉      | 7/18 [13:39<21:10, 115.49s/it]

Saved sequence 2022-03-29T14:27:00 to 2022-03-29T14:37:30


 44%|████▍     | 8/18 [14:29<15:49, 94.94s/it] 

Saved sequence 2022-03-30T09:46:30 to 2022-03-30T09:52:00


 50%|█████     | 9/18 [15:37<12:56, 86.33s/it]

Saved sequence 2022-04-01T09:25:00 to 2022-04-01T09:32:30


 56%|█████▌    | 10/18 [20:26<19:52, 149.04s/it]

Saved sequence 2022-04-01T10:21:00 to 2022-04-01T10:54:00


 61%|██████    | 11/18 [23:23<18:22, 157.50s/it]

Saved sequence 2022-04-01T11:22:30 to 2022-04-01T11:43:30


 67%|██████▋   | 12/18 [27:43<18:52, 188.68s/it]

Saved sequence 2022-04-01T12:20:00 to 2022-04-01T12:51:00


 72%|███████▏  | 13/18 [29:32<13:42, 164.51s/it]

Saved sequence 2022-04-01T13:36:00 to 2022-04-01T13:52:30


 78%|███████▊  | 14/18 [30:16<08:32, 128.05s/it]

Saved sequence 2022-04-04T09:18:30 to 2022-04-04T09:23:30


 83%|████████▎ | 15/18 [35:25<09:07, 182.58s/it]

Saved sequence 2022-04-04T12:24:00 to 2022-04-04T13:01:00


 89%|████████▉ | 16/18 [38:15<05:57, 178.80s/it]

Saved sequence 2022-04-04T13:19:30 to 2022-04-04T13:40:00


 94%|█████████▍| 17/18 [41:33<03:04, 184.68s/it]

Saved sequence 2022-04-04T14:08:00 to 2022-04-04T14:30:00


100%|██████████| 18/18 [43:39<00:00, 145.51s/it]

Saved sequence 2022-04-08T09:22:00 to 2022-04-08T09:40:00



