### To get the data for domain adaptation and put it into a desired form

In [1]:
import pyspedas
import pytplot
import numpy as np
import xarray as xr
import h5py
import datetime as dt
from IPython.display import clear_output
import sys  # for debugging
import os
from scipy.constants import physical_constants

In [2]:
# Parameters (will change depending on which model we are using)
window = 30
stride = 10 
n_avg_width = 10  # for smoothing the plasma freq

# Physical constants (for unit conversion and plasma freq calc.)
c = physical_constants["speed of light in vacuum"][0]  # m/s
e = physical_constants["elementary charge"][0]  # coulombs
m_e = physical_constants["electron mass"][0]  # kg
e_0 = physical_constants["vacuum electric permittivity"][0]  # Farads/m

# assumed tail current sheet density, cm^-3
n_cs = 0.625

### Read in the tail region times created by mms_region_to_time.ipynb

In [3]:
with h5py.File('/tigress/kendrab/analysis-notebooks/mms_data/interval_times.h5', 'r') as file:
    # this is split into two separate lines for readability
    times = file['times'][()]  # get the list of times
    times = np.vectorize(lambda x: x.decode())(times)  # decode the times

### Do the processing one timesegment at a time for memory reasons

In [4]:
def loop_end(i):
    """ What we want to happen at the end of every loopy boye whether continue or otherwise"""
    # delete the original sc data to free up memory
    pytplot.del_data()
    # clear the output every n entries to free up memory as well
    if (i % 15 == 14):
        clear_output(wait=True)
        print(f"Cleared output at step {i}")
    return i+1

In [None]:
i=0
while i < times.shape[0]:  # while loop instead of for loop bc time.shape can change in the loop
    # cut up intervals that are too long and will crash the process TRY WITHOUT DOING THIS NOW
    # num_files = pyspedas.mms.fpi(trange=times[i], probe='1', data_rate='brst', datatype='des-moms',
    #                             time_clip=True, available=True, varnames=["mms1_des_numberdensity_brst"])
    # while len(num_files) > 10:  # should be a generous limit
    #     # datetimes to halve the time interval
    #     start_time = dt.datetime.strptime(times[i][0], '%Y-%m-%d/%H:%M:%S')
    #     end_time = dt.datetime.strptime(times[i][1], '%Y-%m-%d/%H:%M:%S')
    #     mid_time = start_time + (end_time - start_time)/2
    #     # back to strings
    #     start_time = start_time.strftime('%Y-%m-%d/%H:%M:%S')
    #     end_time = end_time.strftime('%Y-%m-%d/%H:%M:%S')
    #     mid_time = mid_time.strftime('%Y-%m-%d/%H:%M:%S')
    #     times = np.insert(times, i, [start_time, mid_time], axis=0)  # half step becomes new ith interval, original now i+1
    #     times[i+1] = [mid_time, end_time]  # updating original interval to be second half step  
    #     print(f"Split long interval {[start_time, end_time]} into {times[i]}, {times[i+1]}")
    #     num_files = pyspedas.mms.fpi(trange=times[i], probe='1', data_rate='brst', datatype='des-moms',
    #                             time_clip=True, available=True, varnames=["mms1_des_numberdensity_brst"])
    # okay we have our dataset now    
    start_time_filestr = times[i,0].replace('/','T').replace(':','-')
    end_time_filestr = times[i,1].replace('/','T').replace(':','-')
    outfile = f'/tigress/kendrab/analysis-notebooks/mms_data/mms_slices/{start_time_filestr}_{end_time_filestr}.h5'
    # SKIP if file exists already
    if os.path.exists(outfile):
        i = loop_end(i)
        print(f"{outfile} already exists. Skipping")
        continue
    # get the E and B field data (and density for plasma freq calc). Skip if no data.
    # vars_tmp = pyspedas.mms.fpi(trange=times[i], probe='1', data_rate='brst', datatype='des-moms',
    #                             time_clip=True, varnames=["mms1_des_numberdensity_brst"])    
    # if vars_tmp is None: 
    #     print(f"Missing FPI data for interval {times[i]}")
    #     i = loop_end(i)
    #     continue
    for j in range(1,5):
        vars_tmp = pyspedas.mms.fgm(trange=times[i], probe=str(j), data_rate='brst', time_clip=True,
                                    varnames=[f"mms{j}_fgm_b_gsm_brst_l2", f"mms{j}_fgm_r_gse_brst_l2"], get_fgm_ephemeris=True)
        if vars_tmp is None:
            print(f"Missing FGM data for MMS {j}, interval {times[i]}")
            i = loop_end(i)
            continue

    vars_tmp = pyspedas.mms.edp(trange=times[i], probe='1', data_rate='brst', time_clip=True,
                                varnames=["mms1_edp_dce_gse_brst_l2"]) 
    if vars_tmp is None:
        print(f"Missing EDP data for interval {times[i]}")
        i = loop_end(i)
        continue
    
    print(pytplot.data_quants.keys())
    print(pytplot.data_quants["mms1_fgm_b_gsm_brst_l2"].shape,
          pytplot.data_quants["mms1_edp_dce_gse_brst_l2"].shape)
    print(pytplot.data_quants["mms1_fgm_b_gsm_brst_l2"])
    print(pytplot.data_quants["mms1_edp_dce_gse_brst_l2"])
    # remove duplicates from edp data 
    pytplot.data_quants["mms1_edp_dce_gse_brst_l2"] = pytplot.data_quants["mms1_edp_dce_gse_brst_l2"].drop_duplicates(dim='time', keep='first')

    # Find curlometer j- need to move to GSE for this
    fields=[]
    pos = []
    for j in range(1,5):
        pyspedas.cotrans(name_in = f"mms{j}_fgm_b_gsm_brst_l2", name_out = f"mms{j}_fgm_b_gse_brst_l2",
                         coord_in='gsm', coord_out='gse')
        fields.append(f"mms{j}_fgm_b_gse_brst_l2")
        pos.append(f"mms{j}_fgm_r_gse_brst_l2")
    pyspedas.fgm.curlometer(fields=fields, positions=pos) # jtotal in A/m^2
    
    # find plasma frequency from smoothed density data
    n_cs_m3 = n_cs*(100)**3  # cm^-3 to m^-3
    plasma_freq = np.sqrt(n_cs_m3*e**2/e_0/m_e)

    # Interpolate E (and possibly B if we want to) to a lower data rate
    pytplot.data_quants["mms1_edp_dce_gse_brst_l2"] = \
        pytplot.data_quants["mms1_edp_dce_gse_brst_l2"].interp(method="linear", assume_sorted=False,
                                                               time=pytplot.data_quants["mms1_fgm_b_gsm_brst_l2"].time)
    
    # use pyspedas to transform E field and j data to GSM coordinates
    pyspedas.cotrans(name_in="mms1_edp_dce_gse_brst_l2", name_out="mms1_edp_dce_gsm_brst_l2", coord_in='gse', coord_out='gsm')
    pyspedas.cotrans(name_in="jtotal", name_out="jtotal_gsm", coord_in='gse', coord_out='gsm')
    
    # Convert E, B, J to typical PIC units e = 1, m_e = 1, c = 1, d_e = 1, w_pe = 1
    pytplot.data_quants["mms1_fgm_b_gsm_brst_l2"] *= 10**(-9)/m_e*e/plasma_freq  # T/nT*m_e/kg*C/e*(wpe^-1*s) -> units of m_e wpe / e #TODO FIX THESE
    pytplot.data_quants["mms1_edp_dce_gsm_brst_l2"] *= 10**(-3)/m_e*e/plasma_freq/c  # V/mV*m_e/kg*C/e*(wpe^-1*s)*(c / m/s) -> units of m_e wpe c / e 
    pytplot.data_quants["jtotal_gsm"] *= c*c/plasma_freq**3/e  # units of e wpe^3/c^2 or e wpe / de^2
    
    # group the data to get rid of data gaps
    next_time_interval = np.diff(pytplot.data_quants["mms1_fgm_b_gsm_brst_l2"].time)
    timestep_max = 1.1*np.median(next_time_interval) # bigger than a timestep to avoid float inaccuracy nonsense
    pre_gap_idxs = np.nonzero(next_time_interval > timestep_max)
    bin_idxs = [0,] + list(pre_gap_idxs[0]) + [-1,]
    groups_B_cots = pytplot.data_quants["mms1_fgm_b_gsm_brst_l2"].groupby_bins("time", bins=pytplot.data_quants["mms1_fgm_b_gsm_brst_l2"].time[bin_idxs],
                                                                              include_lowest=True)
    groups_E_cots = pytplot.data_quants["mms1_edp_dce_gsm_brst_l2"].groupby_bins("time", bins=pytplot.data_quants["mms1_edp_dce_gsm_brst_l2"].time[bin_idxs],
                                                                              include_lowest=True)
    groups_j_cots = pytplot.data_quants["jtotal_gsm"].groupby_bins("time", bins=pytplot.data_quants["jtotal
    # make the data into slices
    sliced_B_list=[]
    sliced_E_list=[]
    sliced_j_list=[]
    sliced_time_list=[]
    for B_arr, E_arr, j_arr in zip(groups_B_cots, groups_E_cots, groups_j_cots): 
        B_slices = np.lib.stride_tricks.sliding_window_view(B_arr[1].values, window, axis=0)[::stride,:,:].copy()
        E_slices = np.lib.stride_tricks.sliding_window_view(E_arr[1].values, window, axis=0)[::stride,:].copy()
        j_slices = np.lib.stride_tricks.sliding_window_view(j_arr[1].values, window, axis=0)[::stride,:].copy()                                                                                                    
        time_slices = np.lib.stride_tricks.sliding_window_view(B_arr[1].time.values, window, axis=0)[::stride,:].copy()
        
        sliced_B_list.append(B_slices)
        sliced_E_list.append(E_slices)
        sliced_j_list.append(j_slices)
        sliced_time_list.append(time_slices)
    # save sliced data
    sliced_B = np.concatenate(sliced_B_list, axis=0)
    sliced_E = np.concatenate(sliced_E_list, axis=0)
    sliced_j = np.concatenate(sliced_j_list, axis=0)
    sliced_time = np.concatenate(sliced_time_list, axis=0, dtype='datetime64[us]')
    sliced_time = sliced_time.astype(object)
    sliced_time = np.vectorize(lambda x: x.strftime('%Y-%m-%dT%H:%M:%S.%f').encode('ascii'))(sliced_time)

    with h5py.File(outfile,'w') as file:
        file.create_dataset('B', data=sliced_B)
        file.create_dataset('E', data=sliced_E)
        file.create_dataset('j', data=sliced_j)
        file.create_dataset('time', data=sliced_time)
    
    i = loop_end(i)

Cleared output at step 269
Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/08/mms1_fpi_brst_l2_des-moms_20180708114223_v3.3.0.cdf
Time clip returns empty data.
The name mms1_des_errorflags_brst is currently not in pytplot
The name mms1_des_compressionloss_brst is currently not in pytplot
The name mms1_des_pitchangdist_lowen_brst is currently not in pytplot
The name mms1_des_pitchangdist_miden_brst is currently not in pytplot
The name mms1_des_pitchangdist_highen_brst is currently not in pytplot
The name mms1_des_errorflags_brst_moms is currently not in pytplot
The name mms1_des_errorflags_brst_moms is currently not in pytplot
The name mms1_des_compressionloss_brst_moms is currently not in pytplot
The name mms1_des_compressionloss_brst_moms is currently not in pytplot
Problem reading the variable: mms1_des_compressionloss_brst_moms
The name mms1_dis_compressionloss_brst_moms is currently not in pytplot
The name mms1_dis_compressionloss_brst_moms is currently not in pytplot
Problem read

  duck_array_version = LooseVersion("0.0.0")



['gse', 'gsm']
Running transformation: subgse2gsm
Output variable: mms1_edp_dce_gsm_brst_l2


  duck_array_version = LooseVersion("0.0.0")

  duck_array_version = LooseVersion("0.0.0")



Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/09/mms1_fpi_brst_l2_des-moms_20180709044253_v3.3.0.cdf
Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/09/mms1_fpi_brst_l2_des-moms_20180709045623_v3.3.0.cdf
Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/09/mms1_fpi_brst_l2_des-moms_20180709050623_v3.3.0.cdf
Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/09/mms1_fpi_brst_l2_des-moms_20180709102743_v3.3.0.cdf
Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/09/mms1_fpi_brst_l2_des-moms_20180709110403_v3.3.0.cdf
Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/09/mms1_fpi_brst_l2_des-moms_20180709110823_v3.3.0.cdf
Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/09/mms1_fpi_brst_l2_des-moms_20180709110953_v3.3.0.cdf
Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/09/mms1_fpi_brst_l2_des-moms_20180709111123_v3.3.0.cdf
Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/09/mms1_fpi_brst_l2_des-moms_20180709111303_v3.3.0.cdf
Time clip was applied to: mms1_des_numberdensity_brst
The name m

  duck_array_version = LooseVersion("0.0.0")



['gse', 'gsm']
Running transformation: subgse2gsm
Output variable: mms1_edp_dce_gsm_brst_l2


  duck_array_version = LooseVersion("0.0.0")

  duck_array_version = LooseVersion("0.0.0")



Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/12/mms1_fpi_brst_l2_des-moms_20180712003253_v3.3.0.cdf
Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/12/mms1_fpi_brst_l2_des-moms_20180712003303_v3.3.0.cdf
Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/12/mms1_fpi_brst_l2_des-moms_20180712014243_v3.3.0.cdf
Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/12/mms1_fpi_brst_l2_des-moms_20180712014713_v3.3.0.cdf
Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/12/mms1_fpi_brst_l2_des-moms_20180712034403_v3.3.0.cdf
Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/12/mms1_fpi_brst_l2_des-moms_20180712035343_v3.3.0.cdf
Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/12/mms1_fpi_brst_l2_des-moms_20180712035523_v3.3.0.cdf
Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/12/mms1_fpi_brst_l2_des-moms_20180712035903_v3.3.0.cdf
Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/12/mms1_fpi_brst_l2_des-moms_20180712040453_v3.3.0.cdf
Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/12/mms1_fpi_brs

  duck_array_version = LooseVersion("0.0.0")



['gse', 'gsm']
Running transformation: subgse2gsm
Output variable: mms1_edp_dce_gsm_brst_l2


  duck_array_version = LooseVersion("0.0.0")

  duck_array_version = LooseVersion("0.0.0")



Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/14/mms1_fpi_brst_l2_des-moms_20180714205623_v3.3.0.cdf
Time clip returns full data set.
The name mms1_des_errorflags_brst is currently not in pytplot
The name mms1_des_compressionloss_brst is currently not in pytplot
The name mms1_des_pitchangdist_lowen_brst is currently not in pytplot
The name mms1_des_pitchangdist_miden_brst is currently not in pytplot
The name mms1_des_pitchangdist_highen_brst is currently not in pytplot
The name mms1_des_errorflags_brst_moms is currently not in pytplot
The name mms1_des_errorflags_brst_moms is currently not in pytplot
The name mms1_des_compressionloss_brst_moms is currently not in pytplot
The name mms1_des_compressionloss_brst_moms is currently not in pytplot
Problem reading the variable: mms1_des_compressionloss_brst_moms
The name mms1_dis_compressionloss_brst_moms is currently not in pytplot
The name mms1_dis_compressionloss_brst_moms is currently not in pytplot
Problem reading the variable: mms1_d

  duck_array_version = LooseVersion("0.0.0")



['gse', 'gsm']
Running transformation: subgse2gsm
Output variable: mms1_edp_dce_gsm_brst_l2


  duck_array_version = LooseVersion("0.0.0")

  duck_array_version = LooseVersion("0.0.0")



Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/16/mms1_fpi_brst_l2_des-moms_20180716205543_v3.3.0.cdf
Time clip returns empty data.
The name mms1_des_errorflags_brst is currently not in pytplot
The name mms1_des_compressionloss_brst is currently not in pytplot
The name mms1_des_pitchangdist_lowen_brst is currently not in pytplot
The name mms1_des_pitchangdist_miden_brst is currently not in pytplot
The name mms1_des_pitchangdist_highen_brst is currently not in pytplot
The name mms1_des_errorflags_brst_moms is currently not in pytplot
The name mms1_des_errorflags_brst_moms is currently not in pytplot
The name mms1_des_compressionloss_brst_moms is currently not in pytplot
The name mms1_des_compressionloss_brst_moms is currently not in pytplot
Problem reading the variable: mms1_des_compressionloss_brst_moms
The name mms1_dis_compressionloss_brst_moms is currently not in pytplot
The name mms1_dis_compressionloss_brst_moms is currently not in pytplot
Problem reading the variable: mms1_dis_

  duck_array_version = LooseVersion("0.0.0")



['gse', 'gsm']
Running transformation: subgse2gsm
Output variable: mms1_edp_dce_gsm_brst_l2


  duck_array_version = LooseVersion("0.0.0")

  duck_array_version = LooseVersion("0.0.0")



Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/17/mms1_fpi_brst_l2_des-moms_20180717173113_v3.3.0.cdf
Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/17/mms1_fpi_brst_l2_des-moms_20180717182153_v3.3.0.cdf
Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/17/mms1_fpi_brst_l2_des-moms_20180717183903_v3.3.0.cdf
Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/17/mms1_fpi_brst_l2_des-moms_20180717184113_v3.3.0.cdf
Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/17/mms1_fpi_brst_l2_des-moms_20180717184223_v3.3.0.cdf
Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/17/mms1_fpi_brst_l2_des-moms_20180717184323_v3.3.0.cdf
Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/17/mms1_fpi_brst_l2_des-moms_20180717184933_v3.3.0.cdf
Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/17/mms1_fpi_brst_l2_des-moms_20180717185123_v3.3.0.cdf
Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/17/mms1_fpi_brst_l2_des-moms_20180717185323_v3.3.0.cdf
Loading pydata/mms1/fpi/brst/l2/des-moms/2018/07/17/mms1_fpi_brs