---
title: Duration
---

They might be multiple ways to define the duration of a discontinuity. Here are some possibilities:

- Maxium distance method: calculate the distance matrix between any two points of the discontinuity, and then define the time interval where the distance is maximum.
- Maxium derivative method: Define $d^* = \max( | dB / dt | )$, and then define time interval where $| dB/dt |$ decreases to $c1 d^*$, where $c1$ is a artificial constant (e.g. 0.25).

Notes:

- The accurate determination of the time interval is important for obtaining other discontinuities parameters, since it is the first step in the process of finding the discontinuity parameters.

Caveats:

- The method to find the time interval should not be specific to the choice of coordinate system

In [50]:
#| default_exp propeties/duration
#| export
from scipy.spatial import distance_matrix
import xarray as xr
import numpy as np
import pandas as pd
from xarray_einstats import linalg
from typing import Literal

## Maxium distance method

In [64]:
#| export
def ts_max_distance(
    ts: xr.DataArray, coord: str = "time"
):
    "Compute the time interval when the timeseries has maxium cumulative variation"
    distance = distance_matrix(ts.data, ts.data)
    max_distance_index = np.unravel_index(np.argmax(distance), distance.shape)
    return ts[coord].values[list(max_distance_index)]

In [155]:
#| code-summary: test for ts_max_distance function
time = pd.date_range('2000-01-01', periods=10)
x = np.linspace(0, np.pi, 10)
# generate data circular in three dimensions, so the biggest distance is between the first and the last point    
data = np.zeros((10, 3))
data[:, 0] = np.sin(x)
data[:, 1] = np.cos(x)
ts = xr.DataArray(data, coords={'time': time}, dims=['time', 'space'])
start, end = ts_max_distance(ts)
assert start == time[0]
assert end == time[-1]

## Maxium derivative method

In [49]:
#| export
THRESHOLD_RATIO  = 1/4

def ts_max_derivative(vec: xr.DataArray, threshold_ratio=THRESHOLD_RATIO):
    # NOTE: gradient calculated at the edge is not reliable.
    vec_diff = vec.differentiate("time", datetime_unit="s").isel(time=slice(1,-1))
    vec_diff_mag = linalg.norm(vec_diff, dims='v_dim')

    # Determine d_star based on trend
    if vec_diff_mag.isnull().all():
        raise ValueError("The differentiated vector magnitude contains only NaN values. Cannot compute duration.")
    
    d_star_index = vec_diff_mag.argmax(dim="time")
    d_star = vec_diff_mag[d_star_index].item()
    d_time = vec_diff_mag.time[d_star_index].values
    
    threshold = d_star * threshold_ratio

    start_time, end_time = find_start_end_times(vec_diff_mag, d_time, threshold)
    
    return start_time, end_time, d_time, d_star, threshold

def calc_d_duration(vec: xr.DataArray, d_time, threshold) -> pd.Series:
    vec_diff = vec.differentiate("time", datetime_unit="s")
    vec_diff_mag = linalg.norm(vec_diff, dims='v_dim')

    start_time, end_time = find_start_end_times(vec_diff_mag, d_time, threshold)

    return pd.Series({
        'd_tstart': start_time,
        'd_tstop': end_time,
    })
 
def find_start_end_times(vec_diff_mag: xr.DataArray, d_time, threshold) -> tuple[pd.Timestamp, pd.Timestamp]:
    # Determine start time
    pre_vec_mag = vec_diff_mag.sel(time=slice(None, d_time))
    start_time = get_time_from_condition(pre_vec_mag, threshold, "last_below")

    # Determine stop time
    post_vec_mag = vec_diff_mag.sel(time=slice(d_time, None))
    end_time = get_time_from_condition(post_vec_mag, threshold, "first_below")

    return start_time, end_time


def get_time_from_condition(vec: xr.DataArray, threshold, condition_type) -> pd.Timestamp:
    if condition_type == "first_below":
        condition = vec < threshold
        index_choice = 0
    elif condition_type == "last_below":
        condition = vec < threshold
        index_choice = -1
    else:
        raise ValueError(f"Unknown condition_type: {condition_type}")

    where_result = np.where(condition)[0]

    if len(where_result) > 0:
        return vec.time[where_result[index_choice]].values
    return None

In [None]:
# | export
AvailableMethod = Literal["distance", "derivative"]

def calc_duration(ts: xr.DataArray, method: AvailableMethod = "distance", **kwargs):
    if method == "distance":
        return ts_max_distance(ts, **kwargs)
    elif method == "derivative":
        return ts_max_derivative(ts, **kwargs)

## Obsolete codes

This is obsolete codes because the timewindow now is overlapping. No need to consider where magnetic discontinuities happens in the boundary of one timewindow.

In [None]:
def calc_candidate_d_duration(candidate, data) -> pd.Series:
    try:
        if pd.isnull(candidate['d_tstart']) or pd.isnull(candidate['d_tstop']):
            candidate_data = get_candidate_data(candidate, data, neighbor=1)
            d_time = candidate['d_time']
            threshold = candidate['threshold']
            return calc_d_duration(candidate_data, d_time, threshold)
        else:
            return pd.Series({
                'd_tstart': candidate['d_tstart'],
                'd_tstop': candidate['d_tstop'],
            })
    except Exception as e:
        # logger.debug(f"Error for candidate {candidate} at {candidate['time']}: {str(e)}")
        print(f"Error for candidate {candidate} at {candidate['time']}: {str(e)}")
        raise e


### Calibrates candidate duration

This calibration is based on the assumption that the magnetic discontinuity is symmetric around the center of time, which is not always true.

So instead of calibrating the duration, we drop the events. 
- Cons: Might influence the statistics of occurrence rate, but 
- Pros: More robust results about the properties of the magnetic discontinuity.

In [None]:
def calibrate_candidate_duration(
    candidate: pd.Series, data:xr.DataArray, data_resolution, ratio = 3/4
):
    """
    Calibrates the candidate duration. 
    - If only one of 'd_tstart' or 'd_tstop' is provided, calculates the missing one based on the provided one and 'd_time'.
    - Then if this is not enough points between 'd_tstart' and 'd_tstop', returns None for both.
    
    
    Parameters
    ----------
    - candidate (pd.Series): The input candidate with potential missing 'd_tstart' or 'd_tstop'.
    
    Returns
    -------
    - pd.Series: The calibrated candidate.
    """
    
    start_notnull = pd.notnull(candidate['d_tstart'])
    stop_notnull = pd.notnull(candidate['d_tstop']) 
    
    match start_notnull, stop_notnull:
        case (True, True):
            d_tstart = candidate['d_tstart']
            d_tstop = candidate['d_tstop']
        case (True, False):
            d_tstart = candidate['d_tstart']
            d_tstop = candidate['d_time'] -  candidate['d_tstart'] + candidate['d_time']
        case (False, True):
            d_tstart = candidate['d_time'] -  candidate['d_tstop'] + candidate['d_time']
            d_tstop = candidate['d_tstop']
        case (False, False):
            return pandas.Series({
                'd_tstart': None,
                'd_tstop': None,
            })
    
    duration = d_tstop - d_tstart
    num_of_points_between = data.time.sel(time=slice(d_tstart, d_tstop)).count().item()
    
    if num_of_points_between <= (duration/data_resolution) * ratio:
        d_tstart = None
        d_tstop = None
    
    return pandas.Series({
        'd_tstart': d_tstart,
        'd_tstop': d_tstop,
    })

In [None]:
def calibrate_candidates_duration(candidates, sat_fgm, data_resolution):
    # calibrate duration

    calibrate_duration = pdp.ApplyToRows(
        lambda candidate: calibrate_candidate_duration(
            candidate, sat_fgm, data_resolution
        ),
        func_desc="calibrating duration parameters if needed",
    )

    temp_candidates = candidates.loc[
        lambda df: df["d_tstart"].isnull() | df["d_tstop"].isnull()
    ]  # temp_candidates = candidates.query('d_tstart.isnull() | d_tstop.isnull()') # not implemented in `modin`

    if not temp_candidates.empty:
        temp_candidates_updated = calibrate_duration(sat_fgm, data_resolution).apply(
            temp_candidates
        )
        candidates.update(temp_candidates_updated)
    return candidates