<a href="https://colab.research.google.com/github/your-repo/your-project/blob/main/v2/nb/generate_sequences.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Generate Sequential Dataset

This notebook generates sequential datasets from processed wildfire data. It:
1. Loads processed data from parquet file
2. Creates sequences using sliding windows
3. Saves the sequences as a compressed npz file

In [None]:
# Mount Google Drive to access data
from google.colab import drive, runtime
import os
import time
from datetime import datetime
import numpy as np
import pandas as pd
import random
import json
import gc

drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
"""
NOTE: use hyperparams to name output dataset

"""
# NOTE: test dataset
# OUTPUT_DIR = '/content/drive/My Drive/Colab Notebooks/wildfire/data/test'
# YEARS = range(2023, 2024)  # 2021-2022

# Update paths and parameters
BASE_INPUT_DIR = '/content/drive/My Drive/Colab Notebooks/wildfire/new_data/processed_scaled'
# OUTPUT_DIR = '/content/drive/My Drive/Colab Notebooks/wildfire/new_data/train'
OUTPUT_DIR = '/content/drive/My Drive/Colab Notebooks/wildfire/new_data/test_detect'
# YEARS = [2022, 2021, 2020]
# YEARS = [2019, 2020, 2021, 2022, 2023, 2024]
YEARS = [2024]
# YEARS = [2025]  # test new multi-labels pipelines

INPUT_DIRS = [os.path.join(BASE_INPUT_DIR, str(year)) for year in YEARS]
WINDOW_SIZE = 15 # 7, 30
PREDICTION_OFFSET = 0 # 7, 15, 30
NEG_POS_RATIO = 4 # 5
TEST_MODE = False

# LABEL = 'targetY'
LABELS = ['targetY', 'targetY_prob', 'targetY_o1', 'targetY_o1_prob',
          'targetY_o2', 'targetY_o2_prob', 'targetY_o3', 'targetY_o3_prob']

In [None]:
"""
Generate Sequential Dataset

This script generates sequential datasets from processed wildfire data. It:
1. Loads location mapping and processes each batch file
2. Creates sequences using sliding windows
3. Saves the sequences as compressed npz files by batch
"""


# Copy preprocessing code
# NUMERIC_FEATURES = [
#     'LST_Day_1km', 'LST_Night_1km', 'Emis_31', 'Emis_32',
#     'relative_humidity_2m_above_ground', 'u_component_of_wind_10m_above_ground',
#     'v_component_of_wind_10m_above_ground', 'precipitable_water_entire_atmosphere'
# ]
NUMERIC_FEATURES = ['LST_Day_1km', 'LST_Night_1km',
       'Emis_31', 'Emis_32', 'dewpoint_temperature_2m',
       'temperature_2m', 'soil_temperature_level_1',
       'surface_net_thermal_radiation', 'u_component_of_wind_10m',
       'v_component_of_wind_10m', 'surface_pressure', 'total_precipitation',
       'elevation', 'NDVI']

GROUP_COLS = ['longitude', 'latitude']

random.seed(42)
np.random.seed(42)

def load_mapping(input_dir):
    """Load location mapping from processed data directory."""
    mapping_path = os.path.join(input_dir, 'location_mapping.parquet')
    mapping_df = pd.read_parquet(mapping_path)
    # Make sure the DataFrame has a MultiIndex
    if not isinstance(mapping_df.index, pd.MultiIndex):
        mapping_df.index = pd.MultiIndex.from_tuples(
            mapping_df.index,
            names=['longitude', 'latitude']
        )
    return mapping_df

def generate_sequences_from_group(group_df, window_size, location_mapping_idx, prediction_offset=0, feature_cols=None, year=None):
    """Generate sequences from a single location group."""
    if feature_cols is None:
        feature_cols = NUMERIC_FEATURES

    Xs_pos = []
    ys_pos = {label: [] for label in LABELS}
    seq_meta_pos = []
    Xs_neg = []
    ys_neg = {label: [] for label in LABELS}
    seq_meta_neg = []

    group_df = group_df.sort_values('date').reset_index(drop=True)
    if len(group_df) < window_size + prediction_offset:
        return None, None, None, None, None, None

    features = group_df[feature_cols].values
    targets = {label: group_df[label].values for label in LABELS}

    # # Debug: Print positive cases in input data
    # for label in LABELS:
    #     if '_prob' in label:
    #         positive_mask = group_df[label.replace('_prob', '')] == 1
    #         if positive_mask.any():
    #             print(f"\nDebug - Input positive {label} values:")
    #             print(f"Values: {group_df[label][positive_mask].values}")

    for i in range(len(group_df) - window_size - prediction_offset + 1):
        seq = features[i:i+window_size]

        # Get target values for all labels
        target_dict = {}
        if prediction_offset > 0:
            for label in LABELS:
                target_window = targets[label][i+window_size-1:i+window_size+prediction_offset-1]
                if '_prob' in label:
                    # For probability targets, take the maximum probability in the prediction window
                    target_dict[label] = np.max(target_window) if len(target_window) > 0 else 0
                else:
                    # For binary targets, check if any value in the prediction window is 1
                    target_dict[label] = 1 if np.any(target_window == 1) else 0
        else:
            # When prediction_offset is 0, use the immediate next value
            target_dict = {label: targets[label][i+window_size-1] for label in LABELS}

        # Get the date range for this sequence
        start_date = group_df['date'].iloc[i]
        end_date = group_df['date'].iloc[i+window_size-1]

        seq_metadata = {
            'location_mapping_idx': location_mapping_idx,
            'window_start': i,
            'window_size': window_size,
            'start_date': start_date.strftime('%Y-%m-%d') if isinstance(start_date, pd.Timestamp) else start_date,
            'end_date': end_date.strftime('%Y-%m-%d') if isinstance(end_date, pd.Timestamp) else end_date,
            'prediction_offset': prediction_offset,
            'year': year
        }

        # Use primary target (targetY) for positive/negative split
        if target_dict['targetY'] == 1:
            # # Debug: Print probability values when adding a positive case
            # for label in LABELS:
            #     if '_prob' in label:
            #         print(f"\nDebug - Adding positive case:")
            #         print(f"Date: {end_date}")
            #         print(f"{label} value: {target_dict[label]}")

            Xs_pos.append(seq)
            for label in LABELS:
                ys_pos[label].append(target_dict[label])
            seq_meta_pos.append(seq_metadata)
        else:
            Xs_neg.append(seq)
            for label in LABELS:
                ys_neg[label].append(target_dict[label])
            seq_meta_neg.append(seq_metadata)

    # Debug: Print summary of positive probability values
    # for label in LABELS:
    #     if '_prob' in label and ys_pos.get(label):
    #         print(f"\nDebug - Final {label} positive values summary:")
    #         values = np.array(ys_pos[label])
    #         print(f"Count: {len(values)}")
    #         print(f"Unique values: {np.unique(values)}")
    #         print(f"Min: {values.min():.4f}")
    #         print(f"Max: {values.max():.4f}")
    #         print(f"Mean: {values.mean():.4f}")

    return (np.array(Xs_pos) if Xs_pos else None,
            {k: np.array(v) for k, v in ys_pos.items()} if ys_pos[LABELS[0]] else None,
            seq_meta_pos if seq_meta_pos else None,
            np.array(Xs_neg) if Xs_neg else None,
            {k: np.array(v) for k, v in ys_neg.items()} if ys_neg[LABELS[0]] else None,
            seq_meta_neg if seq_meta_neg else None)

def process_batch_file(batch_path, mapping_df, window_size, prediction_offset=0, neg_pos_ratio=3, year=None):
    """Process a single batch file and generate sequences."""
    try:
        batch_df = pd.read_parquet(batch_path)

        # Add filter for 2024 data to only include months 8-10
        if year == 2024:
            batch_df['month'] = pd.to_datetime(batch_df['date']).dt.month
            batch_df = batch_df[batch_df['month'].between(8, 10)]
            if len(batch_df) == 0:
                print(f"No data for months 8-10 in batch {os.path.basename(batch_path)}, skipping...")
                return None, None, None

        all_X_pos = []
        all_y_pos = {label: [] for label in LABELS}
        all_meta_pos = []
        all_X_neg = []
        all_y_neg = {label: [] for label in LABELS}
        all_meta_neg = []

        total_groups = len(batch_df.groupby(GROUP_COLS))
        print(f"Processing {total_groups} location groups in batch...")

        for group_idx, ((lon, lat), group) in enumerate(batch_df.groupby(GROUP_COLS), 1):
            if group_idx % 10000 == 0:
                print(f"Processing group {group_idx}/{total_groups}...")

            try:
                # Get the location's mapping_idx from mapping
                location_mapping_idx = mapping_df.loc[(lon, lat)]['mapping_idx']
            except KeyError:
                print(f"Warning: Location ({lon}, {lat}) not found in mapping. Skipping...")
                continue

            X_pos, y_pos, meta_pos, X_neg, y_neg, meta_neg = generate_sequences_from_group(
                group,
                window_size=window_size,
                location_mapping_idx=location_mapping_idx,
                prediction_offset=prediction_offset,
                year=year
            )

            # Collect positive and negative cases separately
            if X_pos is not None and y_pos is not None:
                all_X_pos.append(X_pos)
                for label in LABELS:
                    if len(y_pos[label]) > 0:  # Only add if array is not empty
                        all_y_pos[label].append(y_pos[label])
                all_meta_pos.extend(meta_pos)
            if X_neg is not None and y_neg is not None:
                all_X_neg.append(X_neg)
                for label in LABELS:
                    if len(y_neg[label]) > 0:  # Only add if array is not empty
                        all_y_neg[label].append(y_neg[label])
                all_meta_neg.extend(meta_neg)

        # Handle case where there are no positive samples
        if not all_X_pos:
            print(f"No positive samples in batch {os.path.basename(batch_path)}, skipping...")
            return None, None, None

        # Combine positive cases
        X_pos_combined = np.concatenate(all_X_pos)
        y_pos_combined = {}
        for label in LABELS:
            if all_y_pos[label]:  # Only concatenate if list is not empty
                # Filter out empty arrays before concatenation
                filtered_arrays = [arr for arr in all_y_pos[label] if arr.size > 0]
                if filtered_arrays:
                    # Use appropriate dtype for concatenation
                    dtype = np.float32 if '_prob' in label else np.int8
                    y_pos_combined[label] = np.concatenate(filtered_arrays).astype(dtype)
                else:
                    y_pos_combined[label] = np.array([], dtype=np.float32 if '_prob' in label else np.int8)
            else:
                y_pos_combined[label] = np.array([], dtype=np.float32 if '_prob' in label else np.int8)

        # Add debug logging (commented out)
        # print(f"\nDebug - Before combination:")
        # print(f"n_pos: {n_pos}, n_neg_keep: {0 if n_pos == 0 else int(n_pos * neg_pos_ratio)}")
        # print(f"Number of negative sequences: {len(all_X_neg)}")
        # print(f"X_pos exists: {bool(all_X_pos)}")
        # print(f"X_neg exists: {bool(all_X_neg)}")

        # If no positive samples, skip this batch
        if len(y_pos_combined[LABELS[0]]) == 0:
            print(f"No positive samples in batch {os.path.basename(batch_path)}, skipping...")
            return None, None, None

        # Initialize lists for final combination
        all_X = []
        all_y = {k: [] for k in LABELS}
        all_metadata = []

        # Add positive samples
        # print(f"Adding positive samples, shape: {X_pos_combined.shape}")
        all_X.append(X_pos_combined)
        for label in LABELS:
            all_y[label].append(y_pos_combined[label])
        all_metadata.extend(all_meta_pos)

        # Add negative samples if we have them
        if all_X_neg:
            try:
                # print(f"\nDebug - Processing negative samples:")
                # print(f"Number of negative sequences before concatenation: {len(all_X_neg)}")
                X_neg_combined = np.concatenate(all_X_neg)
                # print(f"Shape after concatenation: {X_neg_combined.shape}")

                y_neg_combined = {}
                for label in LABELS:
                    filtered_arrays = [arr for arr in all_y_neg[label] if arr.size > 0]
                    # print(f"Number of valid arrays for {label}: {len(filtered_arrays)}")
                    if filtered_arrays:
                        y_neg_combined[label] = np.concatenate(filtered_arrays)
                        # print(f"Shape of {label} after concatenation: {y_neg_combined[label].shape}")
                    else:
                        y_neg_combined[label] = np.array([])
                        # print(f"No valid arrays for {label}")

                n_neg_total = len(y_neg_combined[LABELS[0]])
                n_neg_keep = min(n_neg_total, int(len(y_pos_combined[LABELS[0]]) * neg_pos_ratio))
                # print(f"n_neg_total: {n_neg_total}, n_neg_keep: {n_neg_keep}")

                if n_neg_keep > 0:
                    # Randomly sample negative cases
                    neg_indices = np.random.choice(n_neg_total, n_neg_keep, replace=False)
                    X_neg_combined = X_neg_combined[neg_indices]
                    for label in LABELS:
                        y_neg_combined[label] = y_neg_combined[label][neg_indices]
                    all_meta_neg = [all_meta_neg[i] for i in neg_indices]

                    # print(f"Adding negative samples, shape: {X_neg_combined.shape}")
                    all_X.append(X_neg_combined)
                    for label in LABELS:
                        all_y[label].append(y_neg_combined[label])
                    all_metadata.extend(all_meta_neg)
                else:
                    print("No negative samples to keep after ratio calculation")
            except Exception as e:
                print(f"Error processing negative samples: {str(e)}")
                print("Continuing with positive samples only")
        else:
            print("No negative samples in batch, continuing with positive samples only")

        # Combine all samples
        # print("\nDebug - Final combination:")
        # print(f"Number of sequences to combine: {len(all_X)}")
        X = np.concatenate(all_X)
        y = {k: np.concatenate(v) for k, v in all_y.items()}
        # print(f"Final shapes - X: {X.shape}, y: {y[LABELS[0]].shape}")

        # Shuffle the combined dataset
        shuffle_idx = np.random.permutation(len(y[LABELS[0]]))
        X = X[shuffle_idx]
        for label in LABELS:
            y[label] = y[label][shuffle_idx]
        metadata = [all_metadata[i] for i in shuffle_idx]

        # Add sequence indices
        for idx, meta in enumerate(metadata):
            meta['sequence_idx'] = idx

        print(f"Final batch dataset composition: {np.sum(y[LABELS[0]])} positive, {len(y[LABELS[0]]) - np.sum(y[LABELS[0]])} negative samples")
        return X, y, metadata

    except Exception as e:
        print(f"Error processing batch file: {str(e)}")
        return None, None, None
    finally:
        # Explicitly delete batch_df and force garbage collection
        del batch_df
        gc.collect()

def trace_failed_prediction(sequence_idx, metadata_path, processed_data_base_dir):
    """Trace a sequence back to its source time series data."""

    # Load sequence metadata
    with open(metadata_path, 'r') as f:
        sequence_metadata = json.load(f)

    # Get specific sequence metadata
    seq_meta = next(meta for meta in sequence_metadata
                   if meta['sequence_idx'] == sequence_idx)

    # Load location mapping for the year
    year = seq_meta['year']
    year_dir = os.path.join(processed_data_base_dir, str(year))
    mapping_df = pd.read_parquet(os.path.join(year_dir, 'location_mapping.parquet'))

    # Find location information using mapping_idx
    location_info = mapping_df[mapping_df['mapping_idx'] == seq_meta['location_mapping_idx']].iloc[0]
    longitude, latitude = location_info.name
    batch_file = location_info['batch_file']

    # Load and filter batch data
    batch_df = pd.read_parquet(os.path.join(year_dir, batch_file))
    location_data = batch_df[
        (batch_df['longitude'] == longitude) &
        (batch_df['latitude'] == latitude)
    ].sort_values('date')

    # Extract the specific window
    window_data = location_data.iloc[
        seq_meta['window_start']:
        seq_meta['window_start'] + seq_meta['window_size']
    ]

    return window_data

def generate_and_save_sequences(input_dirs, output_dir, window_size, prediction_offset=0,
                              neg_pos_ratio=3, test_mode=False):
    """Generate sequences from all years and save them as a single dataset."""
    start_time = time.time()
    print(f"\n[{datetime.now().strftime('%H:%M:%S')}] Starting sequence generation")
    print(f"Parameters: window_size={window_size}, prediction_offset={prediction_offset}, "
          f"neg_pos_ratio={neg_pos_ratio}, test_mode={test_mode}")

    os.makedirs(output_dir, exist_ok=True)
    temp_dir = os.path.join(output_dir, 'temp')
    os.makedirs(temp_dir, exist_ok=True)

    # Create filename with hyperparameters
    years_str = f"{min(YEARS)}-{max(YEARS)}"
    base_filename = f"sequences_y{years_str}_w{window_size}_o{prediction_offset}_r{neg_pos_ratio}"
    if test_mode:
        base_filename += "_test"

    year_files = []  # Store paths to temporary files
    total_sequences = 0
    total_locations = 0
    total_positive = 0

    # Process each year's data separately
    for input_dir in input_dirs:
        year = os.path.basename(input_dir)
        print(f"\nProcessing year: {year}")

        year_X = []
        year_y = {k: [] for k in LABELS}
        year_metadata = []

        # Load mapping for this year
        mapping_df = load_mapping(input_dir)
        batch_groups = list(mapping_df.groupby('batch_file'))

        if test_mode:
            test_batch_count = 1  # Process last 2 batches in test mode
            original_count = len(batch_groups)
            batch_groups = batch_groups[:test_batch_count]
            print(f"\n[TEST MODE] Processing {len(batch_groups)} out of {original_count} batch files")

        for batch_idx, (batch_file, locations) in enumerate(batch_groups, 1):
            print(f"\n[{datetime.now().strftime('%H:%M:%S')}] Processing {year} batch {batch_idx}/{len(batch_groups)}")

            # Process batch file
            batch_path = os.path.join(input_dir, batch_file)
            try:
                X, y, metadata = process_batch_file(
                    batch_path,
                    mapping_df,
                    window_size=window_size,
                    prediction_offset=prediction_offset,
                    neg_pos_ratio=neg_pos_ratio,
                    year=int(year)
                )

                if X is not None and y is not None and metadata is not None:
                    # Convert numpy types to Python native types in metadata
                    for meta in metadata:
                        meta['sequence_idx'] = int(meta['sequence_idx'])
                        meta['location_mapping_idx'] = int(meta['location_mapping_idx'])
                        meta['window_start'] = int(meta['window_start'])
                        meta['window_size'] = int(meta['window_size'])
                        meta['prediction_offset'] = int(meta['prediction_offset'])
                        if 'year' in meta:
                            meta['year'] = int(meta['year'])
                        # Convert dates to string format if they're not already
                        if isinstance(meta.get('start_date'), pd.Timestamp):
                            meta['start_date'] = meta['start_date'].strftime('%Y-%m-%d')
                        if isinstance(meta.get('end_date'), pd.Timestamp):
                            meta['end_date'] = meta['end_date'].strftime('%Y-%m-%d')

                    year_X.append(X)
                    for label in LABELS:
                        year_y[label].append(y[label])
                    year_metadata.extend(metadata)
                    total_locations += len(locations)

            finally:
                gc.collect()

        # Combine and save year's data
        if year_X:  # Only save if we have data for this year
            year_X_combined = np.concatenate(year_X)
            year_y_combined = {k: np.concatenate(v) for k, v in year_y.items()}

            # Shuffle year's data
            year_shuffle_idx = np.random.permutation(len(year_y_combined[LABELS[0]]))
            year_X_combined = year_X_combined[year_shuffle_idx]
            for label in LABELS:
                year_y_combined[label] = year_y_combined[label][year_shuffle_idx]
            year_metadata = [year_metadata[i] for i in year_shuffle_idx]

            # Update sequence indices for the year
            for idx, meta in enumerate(year_metadata):
                meta['sequence_idx'] = idx

            # Save year's data to temporary files
            year_filename = f"temp_{year}"
            year_path = os.path.join(temp_dir, year_filename)
            np.savez_compressed(f"{year_path}.npz", X=year_X_combined, **year_y_combined)

            with open(f"{year_path}_metadata.json", 'w') as f:
                json.dump(year_metadata, f)

            year_files.append((year_path, len(year_y_combined[LABELS[0]])))
            total_sequences += len(year_y_combined[LABELS[0]])
            total_positive += np.sum(year_y_combined[LABELS[0]])

            print(f"\nYear {year} stats:")
            print(f"  Sequences: {len(year_y_combined[LABELS[0]])}")
            print(f"  Positive samples: {np.sum(year_y_combined[LABELS[0]])}")

            # Clear memory
            del year_X_combined, year_y_combined, year_metadata
            gc.collect()

    # Combine all years' data
    if year_files:
        print("\nCombining all years' data...")

        # Calculate indices for each year in final dataset
        current_idx = 0
        final_metadata = []

        # First pass: load and update metadata
        for year_path, year_size in year_files:
            with open(f"{year_path}_metadata.json", 'r') as f:
                year_metadata = json.load(f)

            # Update sequence indices
            for meta in year_metadata:
                meta['sequence_idx'] += current_idx

            final_metadata.extend(year_metadata)
            current_idx += year_size

        # Second pass: combine data
        final_X = np.zeros((total_sequences, WINDOW_SIZE, len(NUMERIC_FEATURES)), dtype=np.float32)
        final_y_dict = {}
        for label in LABELS:
            # Use float32 for probability targets, int8 for binary targets
            dtype = np.float32 if '_prob' in label else np.int8
            final_y_dict[label] = np.zeros(total_sequences, dtype=dtype)

        current_idx = 0
        for year_path, year_size in year_files:
            year_data = np.load(f"{year_path}.npz")
            final_X[current_idx:current_idx + year_size] = year_data['X']
            for label in LABELS:
                final_y_dict[label][current_idx:current_idx + year_size] = year_data[label]
            current_idx += year_size

            # Clear memory after each year
            del year_data
            gc.collect()

        # Final shuffle of complete dataset
        final_shuffle_idx = np.random.permutation(len(final_y_dict[LABELS[0]]))
        final_X = final_X[final_shuffle_idx]
        for label in LABELS:
            final_y_dict[label] = final_y_dict[label][final_shuffle_idx]
        final_metadata = [final_metadata[i] for i in final_shuffle_idx]

        # Update sequence indices after final shuffle
        for idx, meta in enumerate(final_metadata):
            meta['sequence_idx'] = idx

        # # Add validation
        # for label in LABELS:
        #     if label not in final_y_dict:
        #         raise ValueError(f"Missing target column {label} in final dataset")

        # Save final combined dataset with all labels
        save_dict = {'X': final_X}
        for label in LABELS:
            save_dict[label] = final_y_dict[label]
        np.savez_compressed(os.path.join(output_dir, f'{base_filename}.npz'), **save_dict)
        with open(os.path.join(output_dir, f'{base_filename}_metadata.json'), 'w') as f:
            json.dump(final_metadata, f)

        # Clean up temporary files
        # NOTE: disable for now
        # for year_path, _ in year_files:
        #     os.remove(f"{year_path}.npz")
        #     os.remove(f"{year_path}_metadata.json")
        # os.rmdir(temp_dir)

        total_duration = time.time() - start_time
        print(f"\n[{datetime.now().strftime('%H:%M:%S')}] Sequence generation summary:")
        print(f"Total time: {total_duration:.2f} seconds")
        print(f"Total sequences: {total_sequences}")
        print(f"Total locations: {total_locations}")
        print(f"Total positive samples: {total_positive} ({(total_positive/total_sequences)*100:.2f}%)")

In [None]:
# Generate and save sequences
generate_and_save_sequences(
    input_dirs=INPUT_DIRS,
    output_dir=OUTPUT_DIR,
    window_size=WINDOW_SIZE,
    prediction_offset=PREDICTION_OFFSET,
    neg_pos_ratio=NEG_POS_RATIO,
    test_mode=TEST_MODE
)


[13:31:01] Starting sequence generation
Parameters: window_size=15, prediction_offset=0, neg_pos_ratio=4, test_mode=False

Processing year: 2024

[13:31:04] Processing 2024 batch 1/16
Processing 50000 location groups in batch...
Processing group 10000/50000...
Processing group 20000/50000...
Processing group 30000/50000...
Processing group 40000/50000...
Processing group 50000/50000...
Final batch dataset composition: 66 positive, 264 negative samples

[13:35:11] Processing 2024 batch 2/16
Processing 50000 location groups in batch...
Processing group 10000/50000...
Processing group 20000/50000...
Processing group 30000/50000...
Processing group 40000/50000...
Processing group 50000/50000...
Final batch dataset composition: 99 positive, 396 negative samples

[13:38:52] Processing 2024 batch 3/16
Processing 50000 location groups in batch...
Processing group 10000/50000...
Processing group 20000/50000...
Processing group 30000/50000...
Processing group 40000/50000...
Processing group 500

In [None]:
runtime.unassign()