## Import lbraries
---
This cell calls the libraries needed for computation, for downloading the data, and for plotting it.

In [1]:
# Numerical libraries:
import numpy as np
import pandas as pd
from scipy import interpolate
from scipy.optimize import curve_fit

# File libraries:
import csv

#Pyspedas libraries:
import pyspedas
import pytplot
from pytplot import del_data, get_data, get_timespan, store_data, tplot_options, tplot_names, tplot

#For handling timestamps:
import datetime as dt
from datetime import timedelta

#Plotting libraries:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib import ticker

## Import the list of events
---
This cell first opens the CSV file that contains the event list. It then creates a variable with the start times, stop times, and the probe to be used for each measurement.

In [2]:
starts = []
stops = []
mms_probe = []
themis_probe = []
artemis_probe = []
time_ranges = []

with open('../eventlist/eventlist.csv', newline='') as events:
    rows = csv.reader(events)
    next(rows)
    for r in rows:
        starts.append(dt.datetime.strptime(r[0], '%Y-%m-%dT%H:%M:%S.%fZ'))
        stops.append(dt.datetime.strptime(r[1], '%Y-%m-%dT%H:%M:%S.%fZ'))
        #mms_probe.append(r[2])
        themis_probe.append(str(r[3]))
        artemis_probe.append(r[4])

## Single-event analysis
---
The following cells perform the analysis for a single period of time. Please refer to the list of events for the correct index corresponding to the event you wish to analyze

`omni_data_import` is the raw tplot object that gets loaded for the given time interval
- `omni_data` is a DataFrame that compiles different variables about the given time interval. It is interpolated to remove NaN values and is strictured like this:
| timestamp | Bx | By | Bz | Vx | Vy | Vz | n | T |

`themis_fgm_data_import` and `themis_esa_data_import` are the raw tplot objects that get loaded for the given time interval
- `themis_fgm` and `themis_esa` are DataFrames that contain different variables about the interval, structured as follows:
| timestamp | Bx | By | Bz | (for the FGM)
| timestamp | Vx | Vy | Vz | n | T | (for the ESA)
- It is important to note that the timestamps are on different cadences for each.
---

In [3]:
#-Choose input time range and spacecraft ID here:-----
event = 13
ID_t = themis_probe[event]
ID_a = artemis_probe[event]
#-----------------------------------------------------

What this cell does:
- Import the raw tplot variable between the specified start and stop times. Converts them to a string in the correct format to search. Get 1-minute, l2 data.
- Create a DataFrame stacking the columns together for all the specified variables above
- Interpolate any NaN values
- Iterate over the times column and convert the strings to datetime objects
- Name the column indices what they represent. Can call them by these names later.

In [5]:
omni_data_import = pyspedas.omni.data(trange=[(starts[event]+timedelta(minutes=30)).strftime('%Y-%m-%d/%H:%M:%S'),stops[event].strftime('%Y-%m-%d/%H:%M:%S')], datatype='1min', level='hro2', time_clip=True)
omni_data = pd.DataFrame(np.column_stack((get_data('IMF')[0], get_data('BX_GSE')[1], get_data('BY_GSE')[1], get_data('BZ_GSE')[1], get_data('Vx')[1], get_data('Vy')[1], get_data('Vz')[1], get_data('proton_density')[1], get_data('T')[1])));
omni_data[[1,2,3,4,5,6,7,8]].interpolate(method='linear');

for i in range(len(omni_data[0])):
    omni_data[0][i] = dt.datetime.utcfromtimestamp(omni_data[0][i])

omni_data.columns = ['time', 'bx', 'by', 'bz', 'vx', 'vy', 'vz', 'n', 'T']

24-Feb-23 14:36:03: Downloading remote index: https://spdf.gsfc.nasa.gov/pub/data/omni/omni_cdaweb/hro2_1min/2019/
24-Feb-23 14:36:03: File is current: omni_data/hro2_1min/2019/omni_hro2_1min_20191101_v01.cdf
24-Feb-23 14:36:04: Time clip was applied to: IMF
24-Feb-23 14:36:04: Time clip was applied to: PLS
24-Feb-23 14:36:04: Time clip was applied to: IMF_PTS
24-Feb-23 14:36:04: Time clip was applied to: PLS_PTS
24-Feb-23 14:36:04: Time clip was applied to: percent_interp
24-Feb-23 14:36:04: Time clip was applied to: Timeshift
24-Feb-23 14:36:04: Time clip was applied to: RMS_Timeshift
24-Feb-23 14:36:04: Time clip was applied to: RMS_phase
24-Feb-23 14:36:04: Time clip was applied to: Time_btwn_obs
24-Feb-23 14:36:04: Time clip was applied to: F
24-Feb-23 14:36:04: Time clip was applied to: BX_GSE
24-Feb-23 14:36:04: Time clip was applied to: BY_GSE
24-Feb-23 14:36:04: Time clip was applied to: BZ_GSE
24-Feb-23 14:36:04: Time clip was applied to: BY_GSM
24-Feb-23 14:36:04: Time clip 

What this cell does:
- Import the raw FGM data between the start and stop times, but make the start time 30 minutes later than that of OMNI. This lets us slide our data around a bit.
- Import the raw ESA data just like above.

In [6]:
themis_fgm_data_import = pyspedas.themis.fgm(trange=[(starts[event]+timedelta(minutes=30)).strftime('%Y-%m-%d/%H:%M:%S'),stops[event].strftime('%Y-%m-%d/%H:%M:%S')], probe=themis_probe[event], varnames='th'+ID_t+'_fgs_gse', time_clip=True)
themis_esa_data_import = pyspedas.themis.esa(trange=[(starts[event]+timedelta(minutes=30)).strftime('%Y-%m-%d/%H:%M:%S'),stops[event].strftime('%Y-%m-%d/%H:%M:%S')], probe=themis_probe[event], varnames=['th'+ID_t+'_peif_density', 'th'+ID_t+'_peif_avgtemp', 'th'+ID_t+'_peif_velocity_gse'], time_clip=True)

artemis_fgm_data_import = pyspedas.themis.fgm(trange=[starts[event].strftime('%Y-%m-%d/%H:%M:%S'),stops[event].strftime('%Y-%m-%d/%H:%M:%S')], probe=artemis_probe[event], varnames='th'+ID_a+'_fgs_gse', time_clip=True)
artemis_esa_data_import = pyspedas.themis.esa(trange=[(starts[event]+timedelta(minutes=30)).strftime('%Y-%m-%d/%H:%M:%S'),stops[event].strftime('%Y-%m-%d/%H:%M:%S')], probe=artemis_probe[event], varnames=['th'+ID_a+'_peif_density', 'th'+ID_a+'_peif_avgtemp', 'th'+ID_a+'_peif_velocity_gse'], time_clip=True)

24-Feb-23 14:36:08: Downloading remote index: http://themis.ssl.berkeley.edu/data/themis/thd/l2/fgm/2019/
24-Feb-23 14:36:08: File is current: themis_data/thd/l2/fgm/2019/thd_l2_fgm_20191127_v01.cdf
24-Feb-23 14:36:08: Time clip was applied to: thd_fgs_gse
24-Feb-23 14:36:08: Downloading remote index: http://themis.ssl.berkeley.edu/data/themis/thd/l2/esa/2019/
24-Feb-23 14:36:09: File is current: themis_data/thd/l2/esa/2019/thd_l2_esa_20191127_v01.cdf
24-Feb-23 14:36:09: Time clip was applied to: thd_peif_density
24-Feb-23 14:36:09: Time clip was applied to: thd_peif_avgtemp
24-Feb-23 14:36:09: Time clip was applied to: thd_peif_velocity_gse
24-Feb-23 14:36:09: Downloading remote index: http://themis.ssl.berkeley.edu/data/themis/thb/l2/fgm/2019/
24-Feb-23 14:36:09: File is current: themis_data/thb/l2/fgm/2019/thb_l2_fgm_20191127_v01.cdf
24-Feb-23 14:36:09: Time clip was applied to: thb_fgs_gse
24-Feb-23 14:36:09: Downloading remote index: http://themis.ssl.berkeley.edu/data/themis/thb/

What this cell does:
- Create a DataFrame by stacking the FGM columns together, first time and then the magnetometer readings
- Create a DataFrame by stacking the ESA columns together, first time and then velocity, density, and temperature
- Iterate iver the time strings and create datetime objects out of them

In [7]:
themis_fgm = pd.DataFrame(np.column_stack((get_data('th'+ID_t+'_fgs_gse')[0], get_data('th'+ID_t+'_fgs_gse')[1])))
themis_esa = pd.DataFrame(np.column_stack((get_data('th'+ID_t+'_peif_velocity_gse')[0], get_data('th'+ID_t+'_peif_velocity_gse')[1], get_data('th'+ID_t+'_peif_density')[1], get_data('th'+ID_t+'_peif_avgtemp')[1])))

for i in range(len(themis_fgm[0])):
    themis_fgm[0][i] = dt.datetime.utcfromtimestamp(themis_fgm[0][i])
for j in range(len(themis_esa[0])):
    themis_esa[0][j] = dt.datetime.utcfromtimestamp(themis_esa[0][j])

artemis_fgm = pd.DataFrame(np.column_stack((get_data('th'+ID_a+'_fgs_gse')[0], get_data('th'+ID_a+'_fgs_gse')[1])))
artemis_esa = pd.DataFrame(np.column_stack((get_data('th'+ID_a+'_peif_velocity_gse')[0], get_data('th'+ID_a+'_peif_velocity_gse')[1], get_data('th'+ID_a+'_peif_density')[1], get_data('th'+ID_a+'_peif_avgtemp')[1])))

for i in range(len(artemis_fgm[0])):
    artemis_fgm[0][i] = dt.datetime.utcfromtimestamp(artemis_fgm[0][i])
for j in range(len(artemis_esa[0])):
    artemis_esa[0][j] = dt.datetime.utcfromtimestamp(artemis_esa[0][j])

What this cell does:
- Defines the function for averaging the THEMIS data

In [19]:
def average(times, data):
    minute = times[0].minute # Set the current first minute of the data set
    timeAvgs = [] # Create empty array to store the time-averaged values
    avgArr = [] # Create empty storage array
    timeStep = [] # Create empty time step array
    for i in range(len(times)): # Index the values
        if times[i].minute == minute: # If the time of the next value equals the one set for the minute
            avgArr.append(data[i]) # Append this to the storage array
        elif times[i].minute == minute + 1: # If the time of the next value equals the next minute
            #print(avgArr)
            timeAvgs.append(np.average(avgArr)) # Average the storage array and append it to the time-averaged value array
            #print(np.average(avgArr))
            timeStep.append(dt.datetime(times[i-1].year, times[i-1].month, times[i-1].day, times[i-1].hour, times[i-1].minute, 00)) # Create a timestamp for the previous minute centered at 0s to line up with the orbit timestamps
            minute= times[i].minute # Set the new current minute to start averaging over
            avgArr = [] # Clear the storage array
        elif times[i].minute == minute - 59: # This is for rollover: when the next minute is 0
            #print(avgArr)
            timeAvgs.append(np.average(avgArr))
            #print(np.average(avgArr))
            timeStep.append(dt.datetime(times[i-1].year, times[i-1].month, times[i-1].day, times[i-1].hour, times[i-1].minute, 00))
            minute = times[i].minute
            avgArr = []
    return [timeStep, timeAvgs] # Return the time-averaged array and the timestamp array

What this cell does:
- Concatenates the averaged 1-minute time, Bx, By, and Bz data into one DataFrame
- Renames the columns to their respective representations, like with the OMNI columns

In [40]:
themis_fgm_1min = pd.DataFrame(np.column_stack((average(themis_fgm[0], themis_fgm[1])[0], average(themis_fgm[0], themis_fgm[1])[1], average(themis_fgm[0], themis_fgm[2])[1], average(themis_fgm[0], themis_fgm[3])[1]))).astype('object')
themis_fgm_1min.columns = ['time', 'bx', 'by', 'bz']

artemis_fgm_1min = pd.DataFrame(np.column_stack((average(artemis_fgm[0], artemis_fgm[1])[0], average(artemis_fgm[0], artemis_fgm[1])[1], average(artemis_fgm[0], artemis_fgm[2])[1], average(artemis_fgm[0], artemis_fgm[3])[1]))).astype('object')
artemis_fgm_1min.columns = ['time', 'bx', 'by', 'bz']

#themis_fgm_1min['time'] = themis_fgm_1min['time'].dt.to_pydatetime()
for i in range(len(themis_fgm_1min['time'])):
    themis_fgm_1min['time'][i] = themis_fgm_1min['time'][i].to_pydatetime()
for j in range(len(artemis_fgm_1min['time'])):
    artemis_fgm_1min['time'][j] = artemis_fgm_1min['time'][j].to_pydatetime()

## Calculate correlation coefficients
---

In [43]:
def calculate_corrs(series_fixed, series_fixed_t, series2, series2T): # Series 1 is the "fixed" series close to Earth, Series 2 is the one we slide
    c=[] # Define a matrix to store correlation coefficient
    for j in range(len(series_fixed_t) - 59): # Range for the length of the fixed series minus one hour
        a = [] # Temporary storage array
        n2_start = series2T.index(series_fixed_t[j])
        n2_stop = series2T.index(series_fixed_t[j+59])
        for i in range(30):
            coef = np.corrcoef(series_fixed[j:j+59], series2[n2_start-i:n2_stop-i], 1)[0, 1]
            if coef < 0:
                coef = 0
            a.append(coef)
        c.append(a)
    return np.column_stack(([j], [j+59], c))

In [31]:
omni_data['time'][1]

datetime.datetime(2019, 11, 27, 13, 31)

In [80]:
artemis_fgm_1min['time'][1]

Timestamp('2019-11-27 13:01:00')

In [44]:
a = calculate_corrs(omni_data['bx'], omni_data['time'], artemis_fgm_1min['bx'], artemis_fgm_1min['time'])

TypeError: 'RangeIndex' object is not callable

In [None]:
corrs = {
    'artemis-omni': {
        for i in range
        'bx': calculate_corrs(omni_data['bx'], omni_data['time'], artemis_fgm_1min['bx'], artemis_fgm_1min['time'])

    }
    'artemis-themis': {}
    'artemis-mms': {}
    'themis-omni': {}
    'mms-omni': {}

}

## Multi-even analysis
---
Stuff

In [None]:
themis_fgm_data = {}
keys = range(len(time_ranges))
for i in keys:
    themis_fgm_data[i] = get_data(pyspedas.themis.fgm(trange=time_ranges[i], probe=themis_probe[i])[2])

In [92]:
themis_fgm_times = {}
themis_fgm_fields = {}
keys = range(len(themis_fgm_data[0][0]))

for i in keys:
    themis_fgm_times[i] = [themis_timestamps.append(dt.datetime.utcfromtimestamp(themis_fgm_data[0][j])) for j in range(len(themis_fgm_data[0]))]

array([1.31941472e+09, 1.31941473e+09, 1.31941473e+09, ...,
       1.31950083e+09, 1.31950084e+09, 1.31950084e+09])

In [82]:

themis_timestamps=[]


themis_fields = themis_fgm_data[1]

21-Feb-23 11:01:02: Downloading remote index: http://themis.ssl.berkeley.edu/data/themis/thd/l2/fgm/2011/
21-Feb-23 11:01:03: File is current: themis_data/thd/l2/fgm/2011/thd_l2_fgm_20111024_v01.cdf
