---
title: "SWD Properties"
---

## Setup

In [1]:
#| hide
import polars as pl
import polars.selectors as cs

from loguru import logger
from ids_finder.utils.basic import load_catalog

%load_ext autoreload
%autoreload 2

In [6]:
from ids_finder import ROOT_DIR

catalog = load_catalog(ROOT_DIR)

Connect `python` with `R` kernel

In [None]:
%load_ext rpy2.ipython
from beforerr.r import py2rpy_polars
import rpy2.robjects as robjects
r = robjects.r
r.source('utils.R')

conv_pl = py2rpy_polars()

### Loading all datasets from different sources

In [None]:
from beforerr.basics import pmap
from ids_finder.utils.analysis import filter_tranges_ds


In [None]:
from ids_finder.datasets import cIDsDataset

sta_dataset = cIDsDataset(sat_id="STA", tau=60, ts=1, catalog=catalog)
jno_dataset = cIDsDataset(sat_id="JNO", tau=60, ts=1, catalog=catalog)
thb_dataset = cIDsDataset(sat_id="THB", tau=60, ts=1, catalog=catalog)

In [None]:
thb_inter_state_sw: pl.LazyFrame = catalog.load('THB.STATE.inter_data_sw')
start, end = thb_inter_state_sw.select(['start', 'end']).collect()

thb_sw_dataset = filter_tranges_ds(thb_dataset, (start, end))

In [None]:
all_datasets = [sta_dataset, jno_dataset, thb_sw_dataset]

In [None]:
all_candidates_l0 : pl.DataFrame = pl.concat(
    all_datasets | pmap(lambda x: x.candidates),
    how="diagonal",
)

## Processing datasets

Some extreme values are present in the data. We will remove them.

In [None]:
NVARS = ['d_star', 'L_mn', 'L_mn_norm', 'j0', 'j0_norm', 'duration', 'v_mn']
DISPLAY_VARS = ['time', 'sat'] + NVARS


def check_candidates(df):
    return df[NVARS].describe()

check_candidates(all_candidates_l0)

In [None]:
from datetime import timedelta
def process_candidates_l1(raw_df: pl.DataFrame):
    "clean data to remove extreme values"

    df = raw_df.filter(
        pl.col("d_star") < 100, # exclude JUNO extreme values
        pl.col('v_mn') > 10,
        pl.col('duration') < timedelta(seconds=60),
        # pl.col("j0") < 100
    ).with_columns(
        pl.col('radial_distance').fill_null(1) # by default, fill with 1 AU
    ).with_columns(
        r_bin = pl.col('radial_distance').round(),
        j0_norm_log = pl.col('j0_norm').log10(),
        L_mn_norm_log = pl.col('L_mn_norm').log10(),
    )

    logger.info(
        f"candidates_l1: {len(df)}, with effective ratio: {len(df) / len(raw_df):.2%}"
    )

    return df

all_events_l1 = process_candidates_l1(all_candidates_l0)
%R -i all_events_l1 -c conv_pl
check_candidates(all_events_l1)


In [None]:
# all_events_l1 : pl.LazyFrame = catalog.load('events.l1.ALL_sw_ts_1s_tau_60s')
# JUNO_events_l1 : pl.LazyFrame = catalog.load('events.l1.JNO_ts_1s_tau_60s')

In [None]:
jno_candidates_l1 = all_events_l1.filter(pl.col('sat') == 'JNO')
%R -i jno_candidates_l1 -c conv_pl

In [None]:
from ids_finder.utils.analysis import filter_before_jupiter
from ids_finder.utils.analysis import link_coord2dim

In [None]:
def process_events_l2(raw_df: pl.DataFrame, avg_window="30d"):
    """L2 level datasets
    - Filter out events before Jupiter (Move it into L1?)
    - Time average
    - Link time and radial distance
    """
    time_col = "time"

    df = raw_df.pipe(filter_before_jupiter)
    df = (
        df.sort(time_col)
        .group_by_dynamic(time_col, every=avg_window, by="sat")
        .agg(cs.numeric().mean(), cs.duration().mean(), id_count=pl.count())
        .filter(pl.col("id_count") > 25)  # make sure each group has enough data, filter out JUNO extreme large thickness
        .sort(time_col)
        .upsample(time_col, every=avg_window, by="sat", maintain_order=True)
        .with_columns(pl.col("sat").forward_fill())
    )
    df = df.pipe(link_coord2dim)
    return df

In [None]:
all_events_l2 = all_events_l1.pipe(process_events_l2)

In [None]:
inspect_df = all_events_l2[NVARS]
inspect_df.describe()

In [None]:
from ids_finder.utils.analysis import n2_normalize

all_events_l2_n2 = n2_normalize(all_events_l2, NVARS)

## Orientation

In [None]:
import hvplot.polars
import warnings

# Suppress specific FutureWarning from pandas in Holoviews
warnings.filterwarnings(
    "ignore", category=FutureWarning, module="holoviews.core.data.pandas"
)


def dist_plot(df: pl.LazyFrame, var, by="sat"):
    return df.hvplot.density(var, by=by, subplots=True, width=300, height=300).cols(1)

In [None]:
(
    dist_plot(all_events_l1, "Vl_x")
    + dist_plot(all_events_l1, "Vl_y")
    + dist_plot(all_events_l1, "Vl_z")
)

In [None]:
import pandas # noqa
import hvplot.pandas
import panel.widgets as pnw
slider = pnw.FloatSlider(name='Vl_x', start=-1, end=1)


In [None]:
thresold = 0.67

def temp_plot(l1_df, y="j0_norm"):
    return l1_df.pipe(process_events_l2).hvplot(
        x="time", y=y, by="sat", hover_cols=["id_count"], width=300, height=300, legend=False
    )


((
    all_events_l1.pipe(temp_plot, y="j0_norm")
    + all_events_l1.filter(pl.col("Vl_x").abs() > thresold).pipe(temp_plot, y="j0_norm")
    + all_events_l1.filter(pl.col("Vl_x").abs() < thresold).pipe(temp_plot, y="j0_norm")
) + (
    all_events_l1.pipe(temp_plot, y="L_mn_norm")
    + all_events_l1.filter(pl.col("Vl_x").abs() > thresold).pipe(temp_plot, y="L_mn_norm")
    + all_events_l1.filter(pl.col("Vl_x").abs() < thresold).pipe(temp_plot, y="L_mn_norm")
)).cols(3)

## Plotting function

Plotting function for Level 1 data.

Similar to the `geom_bin2d` function, but with added functionality

- Normalize the data to every x-axis value
- Add peak values
- Add mean values with error bars

In [None]:
%%R
library(scales)
# Helper function to calculate summary statistics for x-binned data
calculate_summary <- function(data, x_col, y_col, x_seq) {
  data %>%
    mutate(!!x_col := x_seq[findInterval(data[[x_col]], x_seq, rightmost.closed = TRUE)]) %>%
    group_by(!!sym(x_col)) %>%
    summarise(
      mean_y = mean(!!sym(y_col), na.rm = TRUE),
      sd_y = sd(!!sym(y_col), na.rm = TRUE),
      se_y = sd_y / sqrt(n())
    )
}


plot_binned_data <- function(data, x_col, y_col, x_bins, y_bins, y_lim=NULL, log_y=FALSE) {
  
  # If y_lim is provided, filter the data
  if (!is.null(y_lim)) {
    data <- data %>%
      filter(!!sym(y_col) >= y_lim[1], !!sym(y_col) <= y_lim[2])
  }
  
  # If transform_log_y is TRUE, transform y_col to log scale
  if (log_y) {
    data[[y_col]] <- log10(data[[y_col]])
    y_label <- paste("Log10", y_col)
  } else {
    y_label <- y_col
  }
  
  # Define bins for x and y based on the input parameters
  x_seq <- seq(min(data[[x_col]]), max(data[[x_col]]), length.out = x_bins + 1)
  y_seq <- seq(min(data[[y_col]]), max(data[[y_col]]), length.out = y_bins + 1)
  
  data_binned_normalized <- data %>%
    mutate(
      !!x_col := x_seq[findInterval(data[[x_col]], x_seq, rightmost.closed = TRUE,)],
      !!y_col := y_seq[findInterval(data[[y_col]], y_seq, rightmost.closed = TRUE,)]
    ) %>%
    count(!!sym(x_col), !!sym(y_col)) %>%
    group_by(!!sym(x_col)) %>%
    mutate(n = n/sum(n))

  plot <- ggplot() +
    geom_tile(data = data_binned_normalized, aes(x = !!sym(x_col), y = !!sym(y_col), fill = n))

  # Calculate mode for each x-bin
  modes <- data_binned_normalized %>%
    group_by(!!sym(x_col)) %>%
    slice_max(n, n = 1)
    
 # Add the mode line
  plot <- plot + geom_line(data = modes, aes(x = !!sym(x_col), y = !!sym(y_col), group = 1), linetype = "dashed")

  data_xbinned <- calculate_summary(data, x_col, y_col, x_seq)
  
  plot <- plot +
    geom_errorbar(data = data_xbinned, aes(x = !!sym(x_col), ymin = mean_y - sd_y, ymax = mean_y + sd_y), width = 0.2) +
    geom_line(data = data_xbinned, aes(x = !!sym(x_col), y = mean_y))
    # Note: ggline will produce another figure, so we use geom_line instead


  plot <- plot + labs(y = y_label) + # Set y-axis label
    scale_fill_viridis_c() +
    # scale_fill_viridis_c(trans = 'log', labels = label_number(accuracy = 0.001)) +
    theme_pubr(base_size = 16, legend = "r")

  return(plot)
}

Plotting function for Level 2 averaged data.

In [None]:
%%R
# Utility function for plotting
plot_util <- function(df, x_var, y_var, y_lab, y_var_norm, y_lab_norm) {
  # Plot for the main variable
  p1 <- ggplot(df, aes(x = .data[[x_var]], y = .data[[y_var]], color = .data$sat, linetype = .data$sat)) + 
    geom_line() + geom_point() +
    labs(y = y_lab)
      
  # Plot for normalized variable
  p2 <- ggplot(df, aes(x = .data[[x_var]], y = .data[[y_var_norm]], color = .data$sat, linetype = .data$sat)) + 
    geom_line() + geom_point() +
    labs(y = y_lab_norm)

  # Common elements for plots
  common_elements <- list(
    labs(x = x_var, color="Satellites", linetype="Satellites"),
    theme_pubr(base_size = 16),
    theme(legend.text = element_text(size=16)),
    scale_color_okabeito(palette = "black_first")
  )
  
  # Apply common elements and combine the plots vertically
  p1 <- ggpar(p1 + common_elements, xlab=FALSE)
  p2 <- ggpar(p2 + common_elements, legend = "none")
  p <- p1 / p2
  
  return(p)
}

Histogram

## Thickness

Note since want different y-axis titles (labels) for each facet, not different facet titles, it is not clear how to do this with `facet_wrap` after `pivot_longer`. Also these are different units, so it is better to plot them separately and combine them together.

### Evolution

In [None]:
%%R
plot_thickness <- function(df, x_var = "time") {
  plot_util(df, x_var, "L_mn", "Thickness (km)", "L_mn_norm", "Normalized thickness (d_i)")
}

In [None]:
%%R -i all_events_l2 -c conv_pl
p <- plot_thickness(all_events_l2)
p <- ggpar(p, xlab="Time")
print(p)

save_plot("thickness_time")

In [None]:
%%R -i all_events_l2_n2 -c conv_pl
plot_thickness_n2 <- function(df, x_var = "time") {
  plot_util(df, x_var, "L_mn", "Thickness (km)", "L_mn_n2", "Normalized thickness (d_i)")
}

p <- plot_thickness_n2(all_events_l2_n2, x_var="ref_radial_distance")
print(p)


In [None]:
%%R
p <- plot_thickness(all_events_l2, x_var="ref_radial_distance")
p <- ggpar(p, xlab="Referred Radial Distance (AU)")
print(p)

save_plot("thickness_r")

### Map

In [None]:
%%R
y_lim <- NULL
p <- plot_binned_data(jno_candidates_l1, x_col = "radial_distance", y_col = "L_mn", x_bins = 8, y_bins = 32, y_lim = y_lim, log_y = TRUE)
p <- ggpar(p, xlab="Radial Distance (AU)", ylab="Log Thickness (km)")
print(p)

save_plot("thickness_r_dist")

In [None]:
%%R
y_lim <- c(0,100)
p <- plot_binned_data(jno_candidates_l1, x_col = "radial_distance", y_col = "L_mn_norm", x_bins = 8, y_bins = 32, y_lim = y_lim, log_y = TRUE)
p <- p + labs(x = "Radial Distance (AU)", y= expression(Log~Normalized~Thickness~(d[i])))
print(p)

save_plot("thickness_N1_r_dist")

### Histogram

In [None]:
%%R
x <- "L_mn_norm"
x_lim <- c(0,60)
facet_var <- "r_bin"

p <- plot_limited_histogram(all_candidates_l1, x = x, x_lim = x_lim , bins = 10, facet_var=facet_var)
print(p)

save_plot("thickness_N1_r_hist")

In [None]:
%%R
x <- "L_mn_norm_log"
x_lim <- c(0, 2)
facet_var <- "r_bin"

p <- plot_limited_histogram(all_candidates_l1, x = x, x_lim = x_lim , bins = 10, facet_var=facet_var)
print(p)

save_plot("thickness_N1_log_r_hist")

## Current intensity

### Mean value

In [None]:
%%R
plot_j <- function(df, x_var = "time") {
  plot_util(df, x_var, "j0", "J (nA/m^2)", "j0_norm", "Normalized J (J_A)")
}

In [None]:
%%R
p <- plot_j(all_events_l2, x_var="time")
p <- ggpar(p, xlab="Time")
print(p)

save_plot("current_time")

In [None]:
%%R
p <- plot_j(all_events_l2, x_var="ref_radial_distance")
p <- ggpar(p, xlab="Referred Radial Distance (AU)")
print(p)

save_plot("current_r")

### Map

In [None]:
%%R
y_lim <- c(0, 15)
p <- plot_binned_data(jno_candidates_l1, x_col = "radial_distance", y_col = "j0", x_bins = 8, y_bins = 32, y_lim = y_lim, log_y = TRUE)
p <- p + labs(x = "Radial Distance (AU)", y= expression(Log~J~(nA~m^-2)))
print(p)

save_plot("current_r_dist")

In [None]:
%%R
p<-plot_binned_data(jno_candidates_l1, x_col = "radial_distance", y_col = "j0_norm", x_bins = 8, y_bins = 32, y_lim = c(0, 1), log_y = TRUE)
p <- p + labs(x = "Radial Distance (AU)", y= expression(Log~Normalized~J~(J[A])))
print(p)

save_plot("current_N1_r_dist")

### Histogram

In [None]:
%%R
x <- "j0_norm"
x_lim <- c(0, 1)
facet_var <- "r_bin"

p <- plot_limited_histogram(all_candidates_l1, x = x, x_lim = x_lim, bins = 8, facet_var=facet_var)
print(p)

save_plot("current_N1_r_hist")

In [None]:
%%R
x <- "j0_norm_log"
x_lim <- c(-2, 0)
facet_var <- "r_bin"

p <- plot_limited_histogram(all_candidates_l1, x = x, x_lim = x_lim, bins = 8, facet_var=facet_var)
print(p)

save_plot("current_N1_log_r_hist")

## Map of thickness and current intensity

In [None]:
%R -i all_events_l1 -c conv_pl

Why bin over `L_mn_norm_log` does not work as expected?

In [None]:
def bin_df(df: pl.DataFrame, col_to_bin, bins=10):
    binned_col = f"{col_to_bin}_bin"
    
    return (
        df.with_columns(
            pl.col(col_to_bin).qcut(bins).alias(binned_col),
        )
        .group_by(binned_col)
        .agg(cs.numeric().median(), pl.count().alias("bin_count"))
        .drop(binned_col)
    )

col_to_bin="L_mn_norm_log"
# col_to_bin="j0_norm_log"

all_candidates_l1_L_binned = pl.concat(
    [
        data.pipe(bin_df, col_to_bin=col_to_bin, bins=64).with_columns(sat= pl.lit(name))
        for name, data in all_events_l1.group_by("sat")
    ]
)

jno_events_l1_L_binned = pl.concat(
    [
        data.pipe(bin_df, col_to_bin=col_to_bin, bins=64).with_columns(sat= pl.lit(name))
        for name, data in jno_candidates_l1.group_by("r_bin")
    ]
)
%R -i all_candidates_l1_L_binned -c conv_pl


In [None]:
%%R -w 1000 -h 500
# Creating a list of layers for the binned data
# model <- lm(j0_norm_log ~ L_mn_norm_log, data = all_candidates_l1_L_binned)
# slope <- coef(model)[2]

binned_layer <- list(
  geom_line(data = all_candidates_l1_L_binned, color = 'blue'),
  geom_point(data = all_candidates_l1_L_binned, color = 'blue'), 
  geom_smooth(data = all_candidates_l1_L_binned, method = "glm", color = 'red')
)

# Plot creation
p <- ggplot(mapping = aes(x = L_mn_norm_log, y = j0_norm_log)) +
  stat_density_2d(data = all_events_l1, aes(fill = after_stat(density)), geom = "raster", contour = FALSE) +
  binned_layer +
  facet_wrap(~ sat, scales = "free")

  
# Print the plot
print(p)


In [None]:
%%R
p <- ggplot() +
  geom_point(data = all_events_l1, aes(x = L_mn_norm_log, y = j0_norm_log)) +
  binned_layer +
  facet_wrap(~ sat, scales = "free")

print(p)


In [None]:
%%R
# Fit a linear model to the log-transformed data
lm_fit <- lm(j0_norm_log ~ L_mn_norm_log, data = all_events_l1)

# Extract the coefficients
intercept <- coef(lm_fit)[1]
slope <- coef(lm_fit)[2]

# Create a scatter plot with the log-log transformation
p <- ggplot(all_events_l1, aes(x = L_mn_norm_log, y = j0_norm_log)) +
  geom_point() + # Add the scatter points
  geom_abline(intercept = intercept, slope = slope, color = 'blue', size = 1) + # Add the fitted line
  facet_wrap(~ sat, scales = "free") + # Facet by 'sat'
  labs(x = "Log10(L_mn_norm)", y = "Log10(j0_norm)") # Label axes

print(p)

In [None]:
%%R
# Plot creation
p <- ggplot(all_candidates_l1_L_binned, aes(x = L_mn_norm_log, y = j0_norm_log)) +
    geom_line(color = 'blue') +
    geom_point(color = 'blue') +
    geom_smooth(method = "glm", color = 'red') +
    facet_wrap(~ sat, scales = "free") +
    stat_regline_equation()

  
# Print the plot
print(p)


In [None]:
%%R -i jno_events_l1_L_binned -c conv_pl
# Plot creation
p <- ggplot(jno_events_l1_L_binned, aes(x = L_mn_norm_log, y = j0_norm_log)) +
    geom_line(color = 'blue') +
    geom_point(color = 'blue') +
    geom_smooth(method = "glm", color = 'red') +
    facet_wrap(~ r_bin, scales = "free") +
    stat_regline_equation()

  
# Print the plot
print(p)


In [None]:
%%R -i jno_candidates_l1 -c conv_pl

p <- ggplot(jno_candidates_l1, aes(x = L_mn_norm, y = j0_norm)) +
  stat_density_2d(aes(fill = ..density..), geom = "raster", contour = FALSE) +
  facet_wrap(~ r_bin, nrow = length(unique(jno_candidates_l1$r_bin))) +
  scale_x_log10() + 
  scale_y_log10() +
  labs(fill = "Density")


print(p)
