In [1]:
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

import h5py
import os
import sys

## 10-minute PSD on our stations ::::::::::::::::::::::::::::::::::::::::::::::::

In [None]:
# Do without interpolation

In [None]:
# Identify Minutes to Analyze (from previous analysis)
# Pull records for those minutes (10 records)
# flatten records (60*2597 datapoints for reach record)
# Check for outliers
# I did this by looking at the individual minutes
# Add timestamps to all records
# Run psd with calibration

In [None]:
def pull_record(filename):
    '''
    Pulls data and timestamps from a SNIPE Magnetometer .h5 file.  Cleans the timestamps and puts them into a 1D array.
    Puts the magnetometer reading data into a 2D array, where each column represents a second and each row one of the
    2597 individual samples for that seconds in chronological order.
    The function also captures the timestamp associated with the filename for possible use.
    
    Parameters:
    - filename (string): .h5 data file.  Ensure the full path is correct!
    
    Returns:
    - file_timestamp (datetime): Timestamp for file (minute worth of data)
    - timestamps (datetime array): Timestamps associated with data 'seconds'
    - data (2D float array): Data associated with timestamps
    
    Note: The function Transposes the data array to make it easier for manipulation.
    '''
    index = np.array(h5py.File(filename)['timestamps'])
    data = np.array(h5py.File(filename)['data']).T # See note above
    
    timestamps = []
    
    file_timestamp = datetime.strptime(filename[29:-3], '%Y-%m-%d_%H-%M-%S-%f')
    
    for i in index:
        # Decode the byte string to a regular string
        decoded_string = i.decode('utf-8')

        # Convert the string to datetime format (the try except was just put in to address bad data in file)
        try:
            d = datetime.strptime(decoded_string, '%Y-%m-%d %H:%M:%S.%f')
            timestamps.append(d)
        except:
            decoded = decoded_string[:19] + '.' + decoded_string[19:]
            d = datetime.strptime(decoded, '%Y-%m-%d %H:%M:%S.%f')
            timestamps.append(d)

    return file_timestamp, timestamps, data



def flatten_data(data):
    '''
    Takes in a 2D array of time data (seconds, samples) and converts it to one long array (samples).
    
    Ensure the data is properly transposed!
    '''
    
    return data.flatten()

In [None]:
start_time = datetime(2024, 7, 26, 17, 30, 0)

analysis_start_time = start_time + timedelta(seconds = 130000)
analysis_end_time = analysis_start_time + timedelta(minutes = 10)

analysis_start_time, analysis_end_time

In [None]:
# Now go through the files in the directory and find filename near
directoryEW = './july_data/Ha_EW'
directoryNS = './july_data/Ha_NS'

In [None]:
def ten_minute_ts(directory, analysis_start_time):
    '''
    '''
    # To ensure proper ordering of data, first need to run a file type check and sort
    file_list = [f for f in os.listdir(directory) if f.endswith(".h5")]
    file_list = sorted(file_list)

    # Initilize the data holder array
    output = np.zeros(10*60*2597) # 10 minutes, 60 seconds, 2597 samples/second

    # For tracking progress
    counter = 0
    
    # Set the end time for ten minutes - this can be made into a parameter as well
    analysis_end_time = analysis_start_time + timedelta(minutes = 10)

    # Iterate over each file in the directory
    for filename in file_list:
        # First, extract the file time from the file's name
        file_time = datetime.strptime(filename[11:-3], '%Y-%m-%d_%H-%M-%S-%f')
        # If the file time is after the analysis_end time, break out of (end) the loop
        if file_time > analysis_end_time:
            break
        # If the file time is before the analysis starts, continue (skip) over it
        if file_time < analysis_start_time:
            continue
        else:
            # If the the file is in the analysis time, print the file name for situational awareness
            print(f'Includes file: {filename}')
            file_path = os.path.join(directory, filename)

            # Extract the timestamps and data from the record
            file_timestamp, timestamps, data = pull_record(file_path)  

            # Flatten data into a single 1D array
            data = flatten_data(data)

            # For plugging into the np.zeros placeholder array, set the start and end of the array using the counter
            low_index = counter * 60 * 2597
            high_index = low_index + 60 * 2597

            output[low_index:high_index] = data

            # Update counter for progress tracking
            counter += 1
            print(f'Completed {counter} of {len(file_list)} records', end = '\r')
        
    return output

In [None]:
sample_rate = 2597
start_time = datetime(2024, 7, 26, 17, 30, 0)

analysis_start_time = start_time + timedelta(seconds = 132000)
analysis_end_time = analysis_start_time + timedelta(minutes = 10)

print('Start Time:',analysis_start_time,'| End Time:',analysis_end_time)
plt.plot(ten_minute_ts(directoryEW, analysis_start_time),alpha=.5)
plt.plot(ten_minute_ts(directoryNS, analysis_start_time), alpha=.5)

In [None]:
tmts_outputEW = ten_minute_ts(directoryEW, analysis_start_time)
tmts_outputNS = ten_minute_ts(directoryNS, analysis_start_time)

In [None]:
fft_EW = np.fft.fft(tmts_outputEW)
fft_NS = np.fft.fft(tmts_outputNS)

n_EW = len(fft_EW)
n_NS = len(fft_NS)

psd_EW = (1 / (sample_rate * n_EW)) * np.abs(fft_EW[:n//2])**2
psd_EW = np.sqrt(2*psd_EW)

psd_NS = (1 / (sample_rate * n_NS)) * np.abs(fft_NS[:n//2])**2
psd_NS = np.sqrt(2*psd_NS)

In [None]:
plt.plot(psd_EW, alpha = .5)
plt.plot(psd_NS, alpha = .5)
plt.yscale('log')
plt.xscale('log')
plt.show()

In [None]:
# Get the fft frequencies needed for this analysis
freqs = np.fft.fftfreq(60*sample_rate, 1/sample_rate)[:(60*sample_rate)//2]
calibration_data_file_EW = 'calibrationN749.csv'
calibration_data_file_NS = 'calibrationN761.csv'

In [None]:
from snipe_analysis import calibrate_data

In [None]:
cal_psd_EW = calibrate_data(freqs, psd_EW, calibration_data_file_EW)
cal_psd_NS = calibrate_data(freqs, psd_NS, calibration_data_file_NS)

In [None]:
plt.plot(cal_psd_EW, alpha = .5)
plt.plot(cal_psd_NS, alpha = .5)
plt.yscale('log')
plt.xscale('log')
plt.show()

In [None]:
len(cal_psd_EW)

In [None]:
for directory in directories:
    # To ensure proper ordering of data, first need to run a file type check and sort
    file_list = [f for f in os.listdir(directory) if f.endswith(".h5")]
    file_list = sorted(file_list)

    # Initilize the data holder array
    output = np.zeros(10*60*2597) # 10 minutes, 60 seconds, 2597 samples/second

    # For tracking progress
    counter = 0

    # Iterate over each file in the directory
    for filename in file_list:
        file_time = datetime.strptime(filename[11:-3], '%Y-%m-%d_%H-%M-%S-%f')
        if file_time > (analysis_end_time):
            continue
        if file_time < (analysis_start_time):
            continue
        else:
            print(filename)

            file_path = os.path.join(directory, filename)

            file_timestamp, timestamps, data = pull_record(file_path)  

            data = flatten_data(data)

            low_index = counter * 60 * 2597
            high_index = low_index + 60 * 2597

            output[low_index:high_index] = data

            # Update counter for progress tracking
            counter += 1
            print(f'Completed {counter} of {len(file_list)} records', end = '\r')

    plt.plot(output, alpha = .5)

In [None]:
# Identify Minutes to Analyze (from previous analysis)
# Pull records for those minutes (10 records)
# flatten records (60*2597 datapoints for reach record)
# Add timestamps to all records
# Interpolate timestamps
# Run psd with calibration

## CALIBRATION WORK :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

In [2]:
cal_data_file1 = 'calibrationN691.csv'
cal_data_file2 = 'Coil1Calibration.dat'

In [None]:
calibration_data = np.loadtxt(cal_data_file1, delimiter = ",")

frequency_calibration = calibration_data[:, 0]  # Frequency values in Hz
voltage_calibration = 10**-3 * calibration_data[:, 1]    # Volts per nano-Tesla (original calibration data is in mV/nT)
phase_calibration = calibration_data[:, 2]

In [None]:
a = np.loadtxt(cal_data_file1, delimiter = ",")
b = np.loadtxt(cal_data_file2)

In [3]:
from snipe_analysis import get_calibration_data

In [5]:
get_calibration_data(cal_data_file2)

(array([2.45276066e+03, 1.36295890e+03, 7.57373918e+02, 4.20860272e+02,
        2.33865154e+02, 1.29955033e+02, 7.22138816e+01, 4.01280703e+01,
        2.22985109e+01, 1.23909170e+01, 6.88542948e+00, 3.82612030e+00,
        2.12611233e+00, 1.18144577e+00, 6.56510051e-01, 3.64811874e-01,
        2.02719979e-01, 1.12648169e-01, 6.25967412e-02, 3.47839830e-02,
        1.93288892e-02, 1.07407469e-02, 5.96845700e-03, 3.31657372e-03,
        1.84296565e-03, 1.02410580e-03, 5.69078809e-04]),
 array([1.12287191e-04, 1.61830973e-04, 1.72531087e-04, 1.78879447e-04,
        1.82661065e-04, 1.84718472e-04, 1.85721981e-04, 1.86865820e-04,
        1.87840177e-04, 1.88606486e-04, 1.88007419e-04, 1.87769617e-04,
        1.88239406e-04, 1.87768249e-04, 1.85937531e-04, 1.81875648e-04,
        1.71471363e-04, 1.65786353e-04, 1.53167249e-04, 1.46496839e-04,
        1.41268730e-04, 1.31100715e-04, 1.16144547e-04, 9.21417451e-05,
        6.53369643e-05, 5.52598503e-05, 2.75744396e-05]),
 array([ 6.05502442e

In [None]:
plt.plot(a[:, 0], label = 'a0')
plt.plot(a[:, 1], label = 'a1')
plt.plot(a[:, 2], label = 'a2')
plt.legend()
plt.show()
plt.plot(b[:,0],b[:, 1], label = 'b1')
plt.plot(b[:,0],b[:, 2], label = 'b2')
plt.xscale('log')
plt.legend()
plt.show()

1. After the entire dataset is finished determine the mean, std and average difference from mean for the minute.
2. Any data that is found to be funky, identify that index.
3. Replace that index with the interpolation from either side.
4. Give data on outliers.

In [None]:
truncated_data.describe()

In [None]:
plt.plot(truncated_data)

In [None]:
stddev_cutoff = 2

avg = truncated_data.mean()
mx = max(truncated_data)
mn = min(truncated_data)
stddev = np.std(truncated_data)
max_diff = max(abs(mx-avg),abs(mn-avg))

std_cutoff_low = avg - (stddev_cutoff * stddev)
std_cutoff_high = avg + (stddev_cutoff * stddev)

outlier_indices = truncated_data[(truncated_data < std_cutoff_low) | (truncated_data > std_cutoff_high)].index

truncated_data.loc[outlier_indices] = np.nan

print(truncated_data[25:35])

# Interpolate the NaN values
truncated_interpolated = truncated_data.interpolate()

# Fill in any leading or trail zeros (for now simply make the first or last good value)
truncated_interpolated = truncated_interpolated.ffill()
truncated_interpolated = truncated_interpolated.bfill()

In [None]:
def outlier_filter(series, stddev_cutoff):
    '''
    stddev_cutoff = the number of standard devaitions from mean we consider an outlier
    
    '''
    avg = series.mean()
    mx = max(series)
    mn = min(series)
    stddev = np.std(series)
    max_diff = max(abs(mx-avg),abs(mn-avg))

    std_cutoff_low = avg - (stddev_cutoff * stddev)
    std_cutoff_high = avg + (stddev_cutoff * stddev)

    outlier_indices = truncated_data[(truncated_data < std_cutoff_low) | (truncated_data > std_cutoff_high)].index

    truncated_data.loc[outlier_indices] = np.nan
    
    # Interpolate the NaN values
    truncated_interpolated = truncated_data.interpolate()

    # Fill in any leading or trail zeros (for now simply make the first or last good value)
    truncated_interpolated = truncated_interpolated.ffill()
    truncated_interpolated = truncated_interpolated.bfill()
    
    return truncated_interpolated

## Establish global start and stop times.
- Reviewed start and stop times for all experiments, this data can be found summarized in a speadsheet created for all the runs.
- Also, checked the overall data integrity and identified a few gaps and others issues in the data.
- The determined boundaries are set below, and then a check run for the expected number of records for that period.

In [None]:
start_time = datetime(2024, 7, 26, 17, 30, 0) # Jul 26 @ 1730
end_time = datetime(2024, 7, 28, 15, 29, 59) # Jul 28 @ 1530, not inclusive
end_time-start_time

In [None]:
print(f'Total length of records should be {24*60*60 + 79199 + 1}.')

In [None]:
def pull_record(filename):
    '''
    Pulls data and timestamps from .h5 file.  Cleans the timestamps and sets it as the index
    for a Pandas DataFrame where each column represents a second of data and the rows are the
    2597 individual samples for each of those seconds in chronological order.
    
    Note: The function Transposes the data array to make it easier for manipulation.
    '''
    index = np.array(h5py.File(filename)['timestamps'])
    data = np.array(h5py.File(filename)['data']).T # See note above
    
    timestamps = []
    
    for i in index:
        # Decode the byte string to a regular string
        decoded_string = i.decode('utf-8')

        # Convert the string to datetime format (the try except was just put in to address bad data in file)
        try:
            d = datetime.strptime(decoded_string, '%Y-%m-%d %H:%M:%S.%f')
            timestamps.append(d)
        except:
            decoded = decoded_string[:19] + '.' + decoded_string[19:]
            d = datetime.strptime(decoded, '%Y-%m-%d %H:%M:%S.%f')
            timestamps.append(d)

    return timestamps, data

In [None]:
def get_interpolated_seconds_data(timestamps, data):
    '''
    For each second's worth of samples (2597) get the key descriptive data as a downsample.
    
    Returns the mean, std and max difference from mean for that second - the latter useful
    for outlier identification.
    '''
    # Create the data holder for the output (the average voltage for each second, indexed to a timestamp)
    avg_voltage = np.zeros(60) 
    
    # Create the array of rounded timestamps (to the second)
    timestamps_floor = [x.replace(microsecond=0) for x in timestamps]   
    
    # Now convert to seconds
    timestamps_floor_int = np.array([dt.timestamp() for dt in timestamps_floor])
    # and do the same for the original timestamps
    timestamps = np.array([dt.timestamp() for dt in timestamps])
        
    for i, second in enumerate(data):
        avg_voltage[i] = second.mean()
        
    # Now interpolate!
    avg_voltage_interp = np.interp(timestamps_floor_int, timestamps, avg_voltage)
        
    return timestamps_floor, avg_voltage_interp

In [None]:
def get_NON_interpolated_seconds_data(timestamps, data):
    '''
    For each second's worth of samples (2597) get the key descriptive data as a downsample.
    
    Returns the mean, std and max difference from mean for that second - the latter useful
    for outlier identification.
    '''
    # Create the data holder for the output (the average voltage for each second, indexed to a timestamp)
    avg_voltage = np.zeros(60) 
        
    for i, second in enumerate(data):
        avg_voltage[i] = second.mean()
    
    
    return avg_voltage

In [None]:
# Quick test 
test_file = 'test_file.h5'
timestamps, data = pull_record(test_file)
get_interpolated_seconds_data(timestamps, data)

In [None]:
directories = [
'./july_data/Bu_1',
 './july_data/Bu_2',
 './july_data/Bu_3',
 './july_data/Bu_4',
 './july_data/Ha_EW',
 './july_data/Ha_NS',
 './july_data/Ms_EW',
 './july_data/Ms_NS',
 './july_data/Ob_EW',
 './july_data/Ob_NS']

Directories to scan:
1. [X] Bucknell Mag 1
2. [x] Bucknell Mag 2
3. [x] Bucknell Mag 3
4. [x] Bucknell Mag 4
5. [x] Hayward N/S
6. [x] Hayward E/W
7. [x] Messiah N/S
8. [x] Messiah E/W
9. [x] Oberlin N/S
10. [x] Oberlin E/W

#### I used the following cell to investigate the directories to identify problematic data artifacts

In [None]:
for directory in directories:
    # To ensure proper ordering of data, first need to run a file type check
    file_list = [f for f in os.listdir(directory) if f.endswith(".h5")]
    file_list = sorted(file_list)

    a = int(file_list[0][-15:-13])
    for file in file_list[1:]:
        if a == 59:
            a = -1

        b = int(file[-15:-13])
        if b != (a + 1):
            print(f"File fail at {directory+'/'+file}")
        a = b

# add more detail:

The file fails for Ha_NS, Ms_EW, and Ms_NS, and Bu_2[0,2] are not true fails, as the error crossed over the millisecond.  Also Bu_4[2].

There is a ~5 minute gap in the data for Bu_1, ~10 minute gap for Bu_2[1].  Mulitple small gaps for Bu_3 and  Bu_4 which conincide in time.

### Errors and actions:
1. Bucknell Mag2 - ValueError: time data '2024-07-27 20:13:44000000' does not match format '%Y-%m-%d %H:%M:%S.%f'; at file 1603
- Resolved!  Deleted bad data files and adjusted pull record to account for single corrupted file.
2. Oberlin EW - OSError: Unable to open file (bad object header version number); at file 1458
- deleted snipe_hunt_2024-07-27_15-44-32-250463
3. Oberlin NS - OSError: Unable to open file (bad object header version number); at file 1460
- deleted snipe_hunt_2024-07-27_15-47-18-730810

## Single Directory, Interpolated Time Series

In [None]:
## For the below, we combine indexed series instead of adding the index at the end

directory = './july_data/Ms_EW'

# To ensure proper ordering of data, first need to run a file type check
file_list = [f for f in os.listdir(directory) if f.endswith(".h5")]
file_list = sorted(file_list)

# Data holder
full_index = pd.date_range(start=start_time, end=end_time, freq='s')
minutes = pd.Series(index = full_index)

# For tracking progress
counter = 0

# Iterate over each file in the directory
for filename in file_list:
    if filename.endswith(".h5"):

        file_path = os.path.join(directory, filename)

        timestamps, data = pull_record(file_path)  

        timestamp_floors, interpolated_data = get_interpolated_seconds_data(timestamps, data)
        
        for i, ts in enumerate(timestamp_floors):
            minutes[ts] = interpolated_data[i]
        
        # Delete the file after reading its content
        #os.remove(file_path)

        # Update counter for progress tracking
        counter += 1
        print(f'Completed {counter} of {len(file_list)} records', end = '\r')

# Now index the data to the new timestamps
truncated_data = minutes.loc[start_time:end_time]

truncated_data = truncated_data.interpolate(method='linear')

In [None]:
plt.plot(truncated_data)

## All Directories, for plotting

In [None]:
fig, axes = plt.subplots(nrows=10, ncols=1, figsize=(10, 30))  # Adjust figsize as needed #x#

for i, directory in enumerate(directories):
    # To ensure proper ordering of data, first need to run a file type check
    file_list = [f for f in os.listdir(directory) if f.endswith(".h5")]
    file_list = sorted(file_list)
    
    # Data holder
    full_index = pd.date_range(start=start_time, end=end_time, freq='s')
    minutes = pd.Series(index = full_index)

    # For tracking progress
    counter = 0

    # Iterate over each file in the directory
    for filename in file_list:
        if filename.endswith(".h5"):

            file_path = os.path.join(directory, filename)

            timestamps, data = pull_record(file_path)  

            timestamp_floors, interpolated_data = get_interpolated_seconds_data(timestamps, data)
        
            for j, ts in enumerate(timestamp_floors):
                minutes[ts] = interpolated_data[j]

            # Delete the file after reading its content
            #os.remove(file_path)

            # Update counter for progress tracking
            counter += 1
            print(f'Completed {counter} of {len(file_list)} records', end = '\r')
    
    truncated_data = minutes.loc[start_time:end_time]

    truncated_data = truncated_data.interpolate(method='linear')
    
    # Plot the data on the corresponding subplot
    axes[i].plot(truncated_data.values)
    directory = directory.split('/')[2]
    axes[i].set_title(f'Data from {directory}')
    axes[i].set_xlabel('Seconds since 1730:00 on 26 July')
    axes[i].set_ylabel('Reading')
    axes[i].set_ylim(-5, 5)

    #save to a single csv
    #data_frames.to_csv('combined_data.csv', index=False)
# Adjust layout to prevent overlap
plt.tight_layout()

# Display the plot
plt.show()

Note: this ran really slow for the first half of the last few records then opens up.

In [None]:


# Define the grid
x, y = np.meshgrid(np.linspace(-10, 10, 20), np.linspace(-10, 10, 20))

# Define the vector components
Fx = x / np.sqrt(x**2 + y**2)
Fy = y / np.sqrt(x**2 + y**2)

# Plot the vector field
plt.figure(figsize=(8, 8))
plt.quiver(x, y, Fx, Fy, color='blue')
plt.xlim(-10, 10)
plt.ylim(-10, 10)
plt.title('Vector Plot of F(x, y) = (x/√(x²+y²), y/√(x²+y²))')
plt.xlabel('x')
plt.ylabel('y')
plt.grid(True)
plt.show()


In [None]:
fig, axes = plt.subplots(nrows=10, ncols=1, figsize=(10, 30))  # Adjust figsize as needed

for i, directory in enumerate(directories):
    # To ensure proper ordering of data, first need to run a file type check
    file_list = [f for f in os.listdir(directory) if f.endswith(".h5")]
    file_list = sorted(file_list)
    
    # Data holder
    minutes = []

    # For tracking progress
    counter = 0

    # Iterate over each file in the directory
    for filename in file_list:
        if filename.endswith(".h5"):

            file_path = os.path.join(directory, filename)

            timestamps, data = pull_record(file_path)  

            # For first minute get timestamp
            if counter == 0:
                 first_timestamp = timestamps[0].replace(microsecond=0)

            minute_interpolated = get_interpolated_seconds_data(timestamps, data)

            # Append the DataFrame to the list
            minutes.extend(minute_interpolated)

            # Delete the file after reading its content
            #os.remove(file_path)

            # Update counter for progress tracking
            counter += 1
            print(f'Completed {counter} of {len(file_list)} records', end = '\r')

    # Number of timestamps needed
    num_timestamps = len(minutes)

    # Create a list of timestamps, each one second apart
    timestamps = [first_timestamp + timedelta(seconds=i) for i in range(num_timestamps)]

    # Now index the data to the new timestamps
    truncated_data = pd.Series(minutes, index = timestamps).loc[start_time:end_time]
    
    # Plot the data on the corresponding subplot
    axes[i].plot(truncated_data.values)
    directory = directory.split('/')[2]
    axes[i].set_title(f'Data from {directory}')
    axes[i].set_xlabel('Seconds since 1730:00 on 26 July')
    axes[i].set_ylabel('Reading')
    axes[i].set_ylim(-5, 5)

    #save to a single csv
    #data_frames.to_csv('combined_data.csv', index=False)
# Adjust layout to prevent overlap
plt.tight_layout()

# Display the plot
plt.show()

# =-=-=-=-=-=-=-=

In [None]:
test_file1 = 'snipe_hunt_2024-07-26_15-29-26-782667.h5'
test_file2 = 'snipe_hunt_2024-07-26_15-30-26-768969.h5'

In [None]:
test_timestamps1, test_data1 = pull_record(test_file1)
test_timestamps2, test_data2 = pull_record(test_file2)

In [None]:
first_timestamp

# ========================================================

### Testing for Jeopardy - By second analysis

In [None]:
def get_seconds_data(timestamps, data):
    '''
    For each second's worth of samples (2597) get the key descriptive data as a downsample.
    
    Returns the mean, std and max difference from mean for that second - the latter useful
    for outlier identification.
    '''
    output=[]
    for i, second in enumerate(data):
        avg = second.mean()
        stddev = second.std()
        mx = max(second)
        mn = min(second)
        max_diff = max(abs(mx-avg),abs(mn-avg))
        
        time = timestamps[i]
        
        output.append({'time':time, 'mean':avg, 'std':stddev, 'max_diff':max_diff})
    
    return output

In [None]:
directory = './June24_mini_expedition/SNIPE Mini Expedition Jun 26-28_EDDIE'

In [None]:
# To ensure proper ordering of data, first need to run a file type check
file_list = [f for f in os.listdir(directory) if f.endswith(".h5")]
file_list.sort(key=lambda x: os.path.getmtime(os.path.join(directory, x)))

# Data holder
minutes = []

# For tracking progress
counter = 0

# Iterate over each file in the directory
for filename in file_list:
    if filename.endswith(".h5"):

        file_path = os.path.join(directory, filename)

        timestamps, data = pull_record(file_path)

        minute = get_seconds_data(timestamps, data)

        # Append the DataFrame to the list
        minutes.extend(minute)
        
        # Delete the file after reading its content
        #os.remove(file_path)

        # Update counter for progress tracking
        counter += 1
        print(f'Completed {counter} of {len(file_list)} records', end = '\r')

#save to a single csv
#data_frames.to_csv('combined_data.csv', index=False)

In [None]:
df = pd.DataFrame(minutes).set_index('time')

In [None]:
df[0:2]

In [None]:
df[55:65]

In [None]:
# Figure out alignment times
# Do our three sites
# Add on those two additional graphs showing the distributions to the right
# Align to times

In [None]:
from snipe_analysis import *
sample_rate = 2597
# directory = './June24_mini_expedition/SNIPE Mini Expedition Jun 26-28_EDDIE' # Test data from June 2024
# calibration_data_file = 'calibrationN691.csv'

# Test 2
directory = './Aux_East&Down_Jul24' # Test data from June 2024
calibration_data_file = 'calibrationN761.csv'

stddev_cutoff = 2 # <-- Change this if desired, the lower the number the less potential noise.

In [None]:
df_s = by_second_descriptive_stats(directory)

In [None]:
df_m = by_minute_descriptive_stats(directory)
plt.plot(np.arange(0, len(df_m)),df_m)
plt.show()

In [None]:
start_cut = 900 
end_cut = start_cut + (24 * 60)
df_m_cut = df_m[start_cut:end_cut]

df_m_clean_indices = outlier_filter(df_m_cut, stddev_cutoff)

In [None]:
# Get the frequency bins using the sample rate
freqs = np.fft.fftfreq(60*sample_rate, 1/sample_rate)[:(60*sample_rate)//2] 

# To ensure proper ordering of data
file_list = [f for f in os.listdir(directory) if f.endswith(".h5")]
file_list.sort(key=lambda x: os.path.getmtime(os.path.join(directory, x)))

In [None]:
# Initialize data holder as an array of zero values
fft_avg1 = np.zeros(60*sample_rate, dtype = 'complex128')
# Initialize data holder as an array of zero values
psd_avg1 = np.zeros(30*sample_rate)


# For tracking progress
counter = 0

# Iterate over each file in the directory
for filename in file_list:
    if filename.endswith(".h5"):
        file_path = os.path.join(directory, filename)
        timestamps, data = pull_record(file_path)

        # Check to see if it is in the filter
        if timestamps[0] in df_m_clean_indices:   

            data = flatten_data(data)

            n = len(data)

            # Compute the Fast Fourier Transform (FFT)
            fft_result = np.fft.fft(data)
            
            fft_avg1 += fft_result

            # Calculate the one-sided power spectral density
            psd = (1 / (sample_rate * n)) * np.abs(fft_result[:n//2])**2
            
            psd = np.sqrt(2*psd)
            
            # This line of code aggregates
            psd_avg1 += psd     #calibrated_psd

        # Update counter for progress tracking
            counter += 1
            print(f'Completed {counter} files', end = '\r')

In [None]:
plt.plot(psd_avg1)

In [None]:
pstart = 25
pstop = 275
plt.yscale('log')
plt.plot(freqs,psd_avg1)
#plt.plot(freqs[pstart:pstop],psd_avg1[pstart:pstop])
plt.xlabel('Frequency, Hz')
plt.ylabel('PSD (log)')
plt.show()

In [None]:
calibration_data = np.loadtxt(calibration_data_file, delimiter = ",")

frequency_calibration = calibration_data[:, 0]  # Frequency values in Hz
voltage_calibration = 10**-3 * calibration_data[:, 1]    # Volts per nano-Tesla (original calibration data is in mV/nT)
phase_calibration = calibration_data[:, 2]

In [None]:
voltage_calibration_interpolated = np.interp(np.abs(freqs), frequency_calibration, voltage_calibration)
#phase_correction_interpolated = np.interp(np.abs(freqs), frequency_calibration, phase_calibration)

In [None]:
calibrated_freq_domain = psd_avg1 / voltage_calibration_interpolated

In [None]:
len(calibrated_freq_domain)

In [None]:
pstart = 0
pstop = 10
plt.yscale('log')
#plt.plot(freqs,calibrated_freq_domain)
plt.plot(freqs[pstart:pstop],calibrated_freq_domain[pstart:pstop])
plt.xlabel('Frequency, Hz')
plt.ylabel('PSD (log)')
plt.show()

We get a spike every 120 Hz interval, beginning with 60Hz.  Although by 1020Hz it seems to fade into the background.

We also have one at 50 Hz.

Below 50Hz, we have two small peaks around 35 and 36 Hz each; bump around 8.5.  Also some rolling increase around 14 and 10 Hz. 

Small bump at 1.75Hz.  The energy of 1.75 Hz is approximately 7.24x10^-15 eV.

In [None]:
# For tracking progress
counter = 0

# Iterate over each file in the directory
for filename in file_list:
    if filename.endswith(".h5"):
        file_path = os.path.join(directory, filename)
        timestamps, data = pull_record(file_path)

        # Check to see if it is in the filter
        if timestamps[0] in df_m_clean_indices:   

            data = flatten_data(data)

            n = len(data)

            # Compute the Fast Fourier Transform (FFT)
            fft_result = np.fft.fft(data)

            # Calculate the one-sided power spectral density
            psd = (1 / (sample_rate * n)) * np.abs(fft_result[:n//2])**2

            # Now Calibrate Here!
            calibrated_psd = calibrate_data(freqs, psd, calibration_data_file)

            # This line of code aggregates
            psd_avg += calibrated_psd

        # Update counter for progress tracking
            counter += 1
            print(f'Completed {counter} files', end = '\r')

###### Finding the Spike

In [None]:
pstart = 100
pstop = 110
plt.yscale('log')
#plt.plot(freqs,calibrated_freq_domain)
plt.plot(freqs[pstart:pstop],calibrated_freq_domain[pstart:pstop])
plt.xlabel('Frequency, Hz')
plt.ylabel('PSD (log)')
plt.show()

In [None]:
def calibrate_data(freqs, psd_agg, calibration_data_file):
    '''
    Takes in the aggregated psd data and frequencies and applies the calibration data to it.
    '''
    calibration_data = np.loadtxt(calibration_data_file, delimiter = ",")
    
    frequency_calibration = calibration_data[:, 0]  # Frequency values in Hz
    voltage_calibration = 10**-3 * calibration_data[:, 1]    # Volts per nano-Tesla (original calibration data is in mV/nT)
    phase_calibration = calibration_data[:, 2]

    voltage_calibration_interpolated = np.interp(np.abs(freqs), frequency_calibration, voltage_calibration)
    phase_correction_interpolated = np.interp(np.abs(freqs), frequency_calibration, phase_calibration)

    calibrated_freq_domain = calibrate_frequency_domain(psd_agg, freqs, voltage_calibration_interpolated, phase_correction_interpolated)
    
    # Convert all to real here
    calibrated_freq_domain = [x.real for x in calibrated_freq_domain]
    
    return calibrated_freq_domain


def calibrate_frequency_domain(freq_domain, freqs, voltage_calibration, phase_correction):
    '''
    Calibrates the frequency domain transformation of the time domain data using magnetometer data.
    '''
    calibrated_freq_domain = np.zeros_like(freq_domain, dtype=complex)
    for i, freq in enumerate(freqs):
        if freq >= 0:  # Only process positive frequencies
            calibration_factor = voltage_calibration[i]
            phase = phase_correction[i]

            # Apply calibration factor and phase correction
            # the 2* is for one sided freq>0
            calibrated_freq_domain[i] = 2 * freq_domain[i] / calibration_factor * np.exp(1j * phase)
    return calibrated_freq_domain

## Parameters

In [None]:
# seconds from (1970/01/01 00:00:00.0) to gps epoch (1980/01/06 00:00:19.0), ignoring leap-seconds
gpsEpoch = 315964819.
# Constants
sample_rate = 2597
rho = 6.04e7 # in nT^2 dark matter density in magnetic field units
R = 0.0212751 # in Hz^-1 Radius of earth divided by c
fd = 1 / 86164 # in Hz, rotation frequency of the Earth (1/day)
dT = 1/sample_rate # sampling period (in s)

## Needed Functions:

In [None]:
# 1. Pull a file (record), each of which represents 1 minute of data at 2597 samples for each of the 60 seconds:
def pull_record(filename):
    '''
    Pulls data and timestamps from .h5 file.  Sets timestamps as the index in datetime format
    for a Pandas DataFrame where each column represents a second of data and the rows are the
    2597 individual samples for each of those seconds in chronological order.
    '''
    index = pd.Series(np.array(h5py.File(filename)['timestamps']))
    df = pd.DataFrame(np.array(h5py.File(filename)['data']))
    re_index = []
    for i in index:
        # Decode the byte string to a regular string
        decoded_string = i.decode('utf-8')

        # Convert the string to datetime format
        d = datetime.strptime(decoded_string, '%Y-%m-%d %H:%M:%S.%f')
        re_index.append(d)
    df.columns = re_index
    return df

# 2. Get descriptive statistics for each minute (record) of data:
def get_minute_data(timestamps, data):
    '''
    Downsamples records all the way to the minute, returning the mean, std and max difference
    from the mean within that minute of data.
    '''
    time = timestamps[0]
    
    avg = data.mean()
    mx = max(data)
    mn = min(data)
    stddev = np.std(data)
    max_diff = max(abs(mx-avg),abs(mn-avg))
    
    output = {'time':time, 'mean':avg, 'std':stddev, 'max_diff':max_diff}
    
    return output

# 3. Create dataframe of each minute to help identify outliers (noise):
def by_minute_descriptive_stats(directory, store = False):
    # List to store DataFrames
    df_m = pd.DataFrame()

    # To ensure proper ordering of data
    file_list = [f for f in os.listdir(directory) if f.endswith(".h5")]
    file_list.sort(key=lambda x: os.path.getmtime(os.path.join(directory, x)))

    # For tracking progress
    counter = 1

    # Iterate over each file in the directory
    for filename in file_list:
        if filename.endswith(".h5"):
            file_path = os.path.join(directory, filename)

            record = pull_record(file_path)

            minute = get_minute_data(record)

            # Append the DataFrame to the list
            df_m = pd.concat([df_m, minute], axis = 0)

            # Update counter for progress tracking
            counter += 1
            print(f'Completed {counter} of {len(file_list) + 1} files', end = '\r')

    # OPTIONAL: save to a single csv
    if store == True:
        data_frames.to_csv(f'minute_data_{directory[-6:-1]}.csv', index=False)
        
    return df_m

# 4. Flatten each minute into one continuous series of data, with timestamps optional:
# ! - Note: Sample rate was hard coded in here but can be changed; when timestamps option its not used anyway
def flatten_record(df, sample_rate = 2597, time_stamp = False):
    '''
    Create a Pandas series where the index is the full sample timestamp
    '''
    
    flattened_list = df.to_numpy().flatten(order='F').tolist()
    
    if time_stamp == True:
        time_stamps = []
        for i in range(len(df.columns)-1):
            interval = (df.columns[i+1] - df.columns[i])/sample_rate
            for j in range(sample_rate):
                time_stamps.append(df.columns[i] + (interval * j))
        # Add in the last second using the interval from the previous
        for j in range(sample_rate):
            time_stamps.append(df.columns[-1] + (interval * j))
        output = pd.Series(flattened_list, index = time_stamps)
    else:
        output = pd.Series(flattened_list)
    
    return output

# 5.  Pulls only those records deemed non-outliers
def time_domain_clean(directory, good_records, store = False):
    time_series = []

    # To ensure proper ordering of data
    file_list = [f for f in os.listdir(directory) if f.endswith(".h5")]
    file_list.sort(key=lambda x: os.path.getmtime(os.path.join(directory, x)))

    # For tracking progress
    counter = 1

    # Iterate over each file in the directory
    for filename in file_list:
        if filename.endswith(".h5"):
            file_path = os.path.join(directory, filename)

            minute = pull_record(file_path)

            # Check if minute is in good data
            if minute.columns[0] in good_records:
                series = flatten_record(minute) # Can change sample rate and if timestamps are used
                time_series.append(series)

            # OPTIONAL: Delete the file after reading its content
    #         if delete == True:
    #             os.remove(file_path)

            # Update counter for progress tracking
            print(f'Completed {counter} files', end = '\r')
            counter += 1


    # OPTIONAL: save to a single csv FIX THIS!
    if store == True:
        pd.Series(time_series).to_csv(f'fft_data_{directory[-6:-1]}.csv', index=False)
        
    return time_series

# 5. Calibrates the frequency domain based on magnetometer calibration data
def calibrate_frequency_domain(freq_domain, freqs, calibration_factors, phase_data):
    calibrated_freq_domain = np.zeros_like(freq_domain, dtype=complex)
    for i, freq in enumerate(freqs):
        if freq >= 0:  # Only process positive frequencies
            calibration_factor = calibration_factors[i]
            phase = phase_data[i]

            # Apply calibration factor and phase correction
            # the 2* is for one sided freq>0
            calibrated_freq_domain[i] = 2 * freq_domain[i] / calibration_factor * np.exp(1j * phase)
    return calibrated_freq_domain

# 6. 
def compute_noise_spectral_density(time_series, sampling_rate):
    # Calculate the length of the time series
    n = len(time_series)

    # Compute the Fast Fourier Transform (FFT)
    fft_result = np.fft.fft(time_series)

    # Calculate the one-sided power spectral density
    psd = (1 / (sampling_rate * n)) * np.abs(fft_result[:n//2])**2

    # Calculate the corresponding frequencies
    freqs = np.fft.fftfreq(n, 1/sampling_rate)[:n//2]

    return freqs, np.sqrt(psd)

## Determining Data to Analyze

##### >>> LEMMY/EDDIE

#### 1. Load Minute Data

In [None]:
directory1 = './June24_mini_expedition/SNIPE Mini Expedition Jun 26-28_EDDIE'
df_m1 = by_minute_descriptive_stats(directory1)
df_m1.info(), df_m1

In [None]:
#21:40 = 20 hours 100 minutes
#1:55 = 1 hours 55 minutes
#19 hours, 45 minutes
19*60+45

In [None]:
df_m1[1170:1220]

#### 2. Use 'start' and 'stop' below to cut off bad data at the beginning and end (iterative process)

In [None]:
start, stop = 0, len(df_m1)
#start, stop = 300, 400
for i in df_m1:
    plt.plot(list(df_m1[i][start:stop]), label = i)
    plt.legend()
plt.show()

#### 3. Make the cut and get descriptive statistics for outlier identification

In [None]:
df_m1_cut = df_m1[start:stop]
df_m1_cut_ds = df_m1_cut.describe()
df_m1_cut_ds

#### 4. Using the descriptive statistics to set outlier filters

In [None]:
std_mean = df_m1_cut_ds.loc['mean','std']
std_std = df_m1_cut_ds.loc['std','std']

max_diff_mean = df_m1_cut_ds.loc['mean','max_diff']
max_diff_std = df_m1_cut_ds.loc['std','max_diff']

# std_limit is how many standard deviations from the mean to set the cutoff at
std_limit = 2

std_cutoff = std_mean + (std_limit * std_std)
max_diff_cutoff = max_diff_mean + (std_limit * max_diff_std)

print(f'Max Difference Cutoff = {max_diff_cutoff},\nStandard Deviation Cutoff = {std_cutoff}')

df_m1_clean = df_m1_cut[df_m1_cut['std'] < std_cutoff]
df_m1_clean = df_m1_clean[df_m1_clean['max_diff'] < max_diff_cutoff]

In [None]:
good_records1 = df_m1_clean.index
len(good_records1)
# del df_m # This deletes the minute data to free up RAM

In [None]:
time_series_N = time_domain_clean(directory1, good_records1)

##### >>> PHIL

#### 1. Load Minute Data

In [None]:
directory2 = './June24_mini_expedition/SNIPE Mini Expedition JUN 26-28_PHILL'
df_m2 = minute_data_analysis(directory2)
df_m2.info(), df_m2

#### 2. Use 'start' and 'stop' below to cut off bad data at the beginning and end (iterative process)

In [None]:
# start, stop = 0, len(df_m)
start, stop = 300, 2100
for i in df_m2:
    plt.plot(list(df_m2[i][start:stop]))
plt.show()

#### 3. Make the cut and get descriptive statistics for outlier identification

In [None]:
df_m2_cut = df_m2[start:stop]
df_m2_cut.describe()

#### 4. Using the descriptive statistics to set outlier filters

In [None]:
std_mean = df_m1_cut_ds.loc['mean','std']
std_std = df_m1_cut_ds.loc['std','std']

max_diff_mean = df_m1_cut_ds.loc['mean','max_diff']
max_diff_std = df_m1_cut_ds.loc['std','max_diff']

# std_limit is how many standard deviations from the mean to set the cutoff at
std_limit = 2

std_cutoff = std_mean + (std_limit * std_std)
max_diff_cutoff = max_diff_mean + (std_limit * max_diff_std)

print(f'Max Difference Cutoff = {max_diff_cutoff},\nStandard Deviation Cutoff = {std_cutoff}')

df_m2_clean = df_m2_cut[df_m2_cut['std'] < std_cutoff]
df_m2_clean = df_m2_clean[df_m2_clean['max_diff'] < max_diff_cutoff]

In [None]:
good_records2 = df_m2_clean.index
len(good_records2)
# del df_m # This deletes the minute data to free up RAM

In [None]:
time_series_E = time_domain_clean(directory2, good_records2)

#### 5. Transform time to frequency domain and calibrate:

In [None]:
BdataN = time_series_N
BdataE = time_series_E

In [None]:
# load lemmy and phil data, get coords for our station, calibrate data after FFT

#convert coordinates into radians
[latL,longL] = (39 + 6.024/60)*np.pi/180, (120 + 55.386/60)*np.pi/180
[latP, longP] = (39 + 6.101/60)*np.pi/180, (120 + 55.426/60)*np.pi/180

#convert to spherical coordinates for theta
latL = np.pi/2 - latL;
LatP = np.pi/2 - latP;

lat1 = latL
long1 = longP

##BELOW IS NOT NECESSARY FOR OUR ANALYSIS METHOD

# #Range of frequencies analyzed
# lowFreq,highFreq = [0.1,5.0];

# N = len(BdataN)
# fp = int(lowFreq*N*dT) # this picks out the data point in our list corresponding to lowFreq = Ntot LowFreq/(frequency span)
# fq = int(highFreq*N*dT)+1;


# # Construct data vector
# fdhat = int(np.round(fd * N * dT)) # fdhat index. fdhat is closest discrete frequency interval to fd (fd = 1/day)

##

FFT1 = -dT*np.fft.fft(BdataN)
FFT2 = dT*np.fft.fft(BdataE)

calibrationL = 'LEMMYCalibration691text.csv'
calibrationP = 'PHILCalibration748text.csv'

freqsN, noise_spectral_densityN = compute_noise_spectral_density(BdataN, sample_rate)
freqsE, noise_spectral_densityE = compute_noise_spectral_density(BdataE, sample_rate)

calibration_dataL = np.loadtxt(calibrationL, delimiter = ",")
calibration_dataP = np.loadtxt(calibrationP, delimiter = ",")

frequency_calibrationL = calibration_dataL[:, 0]  # Frequency values in Hz
voltage_calibrationL = 10**-3 * calibration_dataL[:, 1]    # Volts per nano-Tesla (original calibration data is in mV/nT)
phase_calibrationL = calibration_dataL[:,2]

# FIX THESE BELOW!
frequency_calibrationP = calibration_dataL[:, 0]  # Frequency values in Hz
voltage_calibrationP = 10**-3 * calibration_dataP[:, 0]    # Volts per nano-Tesla (original calibration data is in mV/nT)
phase_calibrationP = calibration_dataP[:,1]
# FIX THE ABOVE

# calculates the calibration factor at every frequency in FFT of our data
voltage_calibration_interpolatedN = np.interp(np.abs(freqsN), frequency_calibrationL, voltage_calibrationL)
phase_correction_interpolatedN = np.interp(np.abs(freqsN), frequency_calibrationL, phase_calibrationL)
voltage_calibration_interpolatedE = np.interp(np.abs(freqsE), frequency_calibrationP, voltage_calibrationP)
phase_correction_interpolatedE = np.interp(np.abs(freqsE), frequency_calibrationP, phase_calibrationP)

calFFT1 = calibrate_frequency_domain(FFT1, freqsN, voltage_calibration_interpolatedN, phase_correction_interpolatedN)
calFFT2 = calibrate_frequency_domain(FFT2, freqsE, voltage_calibration_interpolatedE, phase_correction_interpolatedE)

In [None]:
### STOP HERE ###

In [None]:
# Vector of complete FT data at Compton frequency + sidebands
X = np.stack([calFFT1[fp - fdhat : fq - fdhat], calFFT2[fp - fdhat : fq - fdhat],
              calFFT1[fp : fq], calFFT2[fp : fq],
              calFFT1[fp + fdhat : fq + fdhat], calFFT2[fp + fdhat : fq + fdhat]])

# Compute expectation value
# Discrete sampling correction for sidebands
Q = lambda f: (1 - np.exp(-2 * np.pi * 1j * f * N * dT)) / (1 - np.exp(-2 * np.pi * 1j * f * dT))

# basis vectors for dark matter signal
mu0 = -N * dT * np.sqrt(rho / 2) * np.array([
    0, 0, 0, np.sin(lat1), 0, 0])
muplus = -dT * np.sqrt(rho) / 2 * np.array([
    1j * np.exp(-1j * long1) * Q(fd - fdhat / N / dT), np.cos(lat1) * np.exp(-1j * long1) * Q(fd - fdhat / N / dT),
    1j * np.exp(-1j * long1) * Q(fd), np.cos(lat1) * np.exp(-1j * long1) * Q(fd),
    1j * np.exp(-1j * long1) * Q(fd + fdhat / N / dT), np.cos(lat1) * np.exp(-1j * long1) * Q(fd + fdhat / N / dT)])
muminus = -dT * np.sqrt(rho) / 2 * np.array([
    -1j * np.exp(1j * long1) * Q(fd - fdhat / N / dT), np.cos(lat1) * np.exp(1j * long1) * Q(fd - fdhat / N / dT),
    -1j * np.exp(1j * long1) * Q(fd), np.cos(lat1) * np.exp(1j * long1) * Q(fd),
    -1j * np.exp(1j * long1) * Q(fd + fdhat / N / dT), np.cos(lat1) * np.exp(1j * long1) * Q(fd + fdhat / N / dT)])
# Note that mu, nu, and S are all missing a factor of frequency, so that they can be frequency independent.  This factor will be added back when computing the posterior.

In [None]:
# Compute covariance matrix
Sigma = np.sum(X[2:4, None] * np.conj(X[2:4]), axis = -1) / (fq - fp)

inva = np.linalg.inv(np.linalg.cholesky(Sigma))

invA = np.block([[inva, np.zeros((2, 2)), np.zeros((2, 2))],
                 [np.zeros((2, 2)), inva, np.zeros((2, 2))],
                 [np.zeros((2, 2)), np.zeros((2, 2)), inva]])

# ============================================================= 

# SCRAP WORK/APPENDICES BELOW

### 1. Pull the timestamps as well as the data from a single record file

In [None]:
# Note: H5PY file has two subtypes: 'data' and 'timestamps'
# Given a record file = 60 seconds of 2597 samples of data
# Returns np.array for timestamps (60) and data (60x2597).  Note data is Transposed from original format (2597x60)
file = 'a1.h5'

index = np.array(h5py.File(file)['timestamps'])
data = np.array(h5py.File(file)['data']).T

### 2. Change form of index from S26 to string (or datetime later if it makes sense)

In [None]:
re_index = []
for i in index:    
    # Decode the byte string to a regular string
    d = i.decode('utf-8')

    # Convert the string to datetime format
    d = datetime.strptime(d, '%Y-%m-%d %H:%M:%S.%f')
    re_index.append(d)

### 3. Add the time as the column header for each second in the dataset (itself containing 2597 samples)

In [None]:
df.columns = re_index
df.info()

In [None]:
# Now the data is represented as 60 seconds in the columns, with the row data representing the nth-indexed sample of that second
df.describe()

##### Showing all samples on a single timeline, along with means for each second:

In [None]:
mean = df.mean()
minute = []
for i in df.columns:
    for l in df[i]:
        minute.append(l)
plt.plot(minute)

# Need to rescale x axis for the mean to show it on same plot
x_vals = np.linspace(0,60*2597,60)
plt.plot(x_vals, mean,color='black')
plt.xlabel('Sample #')
plt.ylabel('V')
plt.show()

### 4. Combine the above to pull data from a single file =======

In [None]:
def pull_record(filename):
    '''
    Pulls data and timestamps from .h5 file.  Cleans the timestamps and sets it as the index
    for a Pandas DataFrame where each column represents a second of data and the rows are the
    2597 individual samples for each of those seconds in chronological order.
    
    Note: The function Transposes the data array to make it easier for manipulation.
    '''
    index = pd.Series(np.array(h5py.File(filename)['timestamps']))
    df = pd.DataFrame(np.array(h5py.File(filename)['data'])) 
    re_index = []
    for i in index:
        # Decode the byte string to a regular string
        decoded_string = i.decode('utf-8')

        # Convert the string to datetime format
        d = datetime.strptime(decoded_string, '%Y-%m-%d %H:%M:%S.%f')
        re_index.append(d)
    df.columns = re_index
    return df

In [None]:
def test_pull_record_3(filename):
    '''
    Pulls data and timestamps from .h5 file.  Cleans the timestamps and sets it as the index
    for a Pandas DataFrame where each column represents a second of data and the rows are the
    2597 individual samples for each of those seconds in chronological order.
    
    Note: The function Transposes the data array to make it easier for manipulation.
    '''
    index = np.array(h5py.File(filename)['timestamps'])
    data = np.array(h5py.File(filename)['data']).T # See note above
    
    timestamps = []
    
    for i in index:
        # Decode the byte string to a regular string
        decoded_string = i.decode('utf-8')

        # Convert the string to datetime format
        d = datetime.strptime(decoded_string, '%Y-%m-%d %H:%M:%S.%f')
        timestamps.append(d)
        
    timestamps = extrapolate_timestamps(timestamps, sample_rate = 2597)
    
    data = flatten_data(data)
        
    Series = pd.Series(data, index = timestamps)
    
    return Series

In [None]:
def pull_record(filename):
    '''
    Pulls data and timestamps from .h5 file.  Cleans the timestamps and sets it as the index
    for a Pandas DataFrame where each column represents a second of data and the rows are the
    2597 individual samples for each of those seconds in chronological order.
    
    Note: The function Transposes the data array to make it easier for manipulation.
    '''
    index = np.array(h5py.File(filename)['timestamps'])
    data = np.array(h5py.File(filename)['data']).T # See note above
    
    timestamps = []
    
    for i in index:
        # Decode the byte string to a regular string
        decoded_string = i.decode('utf-8')

        # Convert the string to datetime format
        d = datetime.strptime(decoded_string, '%Y-%m-%d %H:%M:%S.%f')
        timestamps.append(d)
    
    return timestamps, data

2597 samples per second, 60 seconds per minute = 155,820 samples per minute (also datapoints per record)

In [None]:
def interpolate_timestamps(timestamps, sample_rate = 2597):
    '''
    Create an np array with every interpolated timestamp for each sample within the minute
    '''
    
    time_stamps = []
    for i in range(len(timestamps)-1):
        interval = (timestamps[i+1] - timestamps[i])/sample_rate
        for j in range(sample_rate):
                time_stamps.append(timestamps[i] + (interval * j))
    # Add in the last second using the interval from the previous
    for k in range(sample_rate):
        time_stamps.append(timestamps[-1] + (interval * j))
        
    return np.array(time_stamps)

In [None]:
def flatten_data(data):
    '''
    Takes in a 2D array of time data (seconds, samples) and converts it to one long array (samples).
    
    Ensure the data is properly transposed!
    '''
    
    return data.flatten()

In [None]:
df = pull_record('a1.h5')

In [None]:
Series = flatten_record(df, sample_rate = 2597, time_stamp = True)
Series

In [None]:
Series = test_pull_record_3('sample_record.h5')
Series.nbytes, Series

In [None]:
timestamps2, data2 = test_pull_record_2('sample_record.h5')
timestamps2 = extrapolate_timestamps(timestamps2, sample_rate = 2597)
data2 = flatten_data(data2)
Series2 = pd.Series(data2, index = timestamps2)
Series2

In [None]:
Series2.nbytes

In [None]:
pd.DataFrame(Series2, columns = ['V'])

In [None]:
def full_minute_data(filename):
    '''
    Returns an np.array with all timestamps indexed to another np.array with all the data readings at those timestamps.
    '''
    timestamps, data = pull_record(filename)
    
    timestamps = extrapolate_timestamps(timestamps, sample_rate = 2597)
    data = flatten_data(data)
    
    return pd.Series(data, index = timestamps)

In [None]:
t,d = pull_record(file)
d = flatten_data(d)
get_minute_data(t,d)

==========

In [None]:
file = 'sample_record.h5'
pull_record(file)

In [None]:
A = [1,2,3]
B = [3,4,5]
C = [5,6,7]

pd.Series(A, index = B)


### 5. Flatten records to timestamp each measurement

In [None]:
# Example DataFrame
data = {
    'A': [1, 2, 3],
    'B': [4, 5, 6],
    'C': [7, 8, 9]
}
df = pd.DataFrame(data)

# Flatten the DataFrame column by column
flattened_list = df.to_numpy().flatten(order='F').tolist()

print(flattened_list)

In [None]:
df

In [None]:
def flatten_record(df, sample_rate = 2597, time_stamp = False):
    '''
    Create a Pandas series where the index is the full sample timestamp
    '''
    
    flattened_list = df.to_numpy().flatten(order='F').tolist()
    
    if time_stamp == True:
        time_stamps = []
        for i in range(len(df.columns)-1):
            interval = (df.columns[i+1] - df.columns[i])/sample_rate
            for j in range(sample_rate):
                time_stamps.append(df.columns[i] + (interval * j))
        # Add in the last second using the interval from the previous
        for j in range(sample_rate):
            time_stamps.append(df.columns[-1] + (interval * j))
        output = pd.Series(flattened_list, index = time_stamps)
    else:
        output = pd.Series(flattened_list)
    
    return output

In [None]:
df = pull_record
df_s = get_seconds_data(df)

In [None]:
sample_rate = 2597

time_stamps = []
flat_data = []
for i in range(len(df.columns)-1):
    interval = (df.columns[i+1] - df.columns[i])/sample_rate
    for j in range(sample_rate):
        time_stamps.append(df.columns[i] + (interval * j))
# Add in the last second using the interval from the previous
for j in range(sample_rate):
    time_stamps.append(df.columns[-1] + (interval * j))

flattened_list = df.to_numpy().flatten(order='F').tolist()

output = pd.Series(flattened_list, index = time_stamps)

In [None]:
plt.plot(output)

In [None]:
pd.Series(time_stamps)

In [None]:
data_frames_s

In [None]:
flatten_record(df)

### 5. Getting descriptive data for each second on a record
AKA Downsampling via the mean value for each second.

In [None]:
def get_seconds_data(df):
    '''
    For each second's worth of samples (2597) get the key descriptive data as a downsample.
    
    Returns the mean, std and max difference from mean for that second - the latter useful
    for outlier identification.
    '''
    dd = df.describe().loc[['mean','std','min','max']].T
    abs_diff_max_mean = (dd['max'] - dd['mean']).abs()
    abs_diff_min_mean = (dd['mean'] - dd['min']).abs()

    # Calculate the maximum of the absolute differences for each row
    max_diff = []
    for i in range(len(abs_diff_max_mean)):
        max_diff.append(max(abs_diff_max_mean.iloc[i],abs_diff_min_mean.iloc[i]))

    dd['max_diff'] = max_diff
    df = dd[['mean','std','max_diff']]
    
    return df.T

In [None]:
file1 = 'snipe_hunt_2024-07-27_16-01-27-834200.h5'
file2 = 'test_file.h5'

In [None]:
def pull_record(filename):
    '''
    Pulls data and timestamps from a SNIPE Magnetometer .h5 file.  Cleans the timestamps and puts them into a 1D array.
    Puts the magnetometer reading data into a 2D array, where each column represents a second and each row one of the
    2597 individual samples for that seconds in chronological order.
    The function also captures the timestamp associated with the filename for possible use.
    
    Parameters:
    - filename (string): .h5 data file.  Ensure the full path is correct!
    
    Returns:
    - file_timestamp (datetime): Timestamp for file (minute worth of data)
    - timestamps (datetime array): Timestamps associated with data 'seconds'
    - data (2D float array): Data associated with timestamps
    
    Note: The function Transposes the data array to make it easier for manipulation.
    '''
    # Extract the raw data into numpy arrays
    index = np.array(h5py.File(filename)['timestamps'])
    data = np.array(h5py.File(filename)['data']).T # See note above
    
    # This is to hold the timestamps converted into datetime format
    timestamps = []
    
    # In order to accomodate both test files and files of various name formats (you lose the tru file_timestamp though)
    try:
        # In order to accomodate different file/directory combinations, the next line identifies the start index required
        start_index = filename.find('2024')
        # This next line of code extracts the minute record's timestamp, in datetime format
        file_timestamp = datetime.strptime(filename[start_index:-3], '%Y-%m-%d_%H-%M-%S-%f')
    # If the filename is non-standard then use the first timestamp from the GPS for the file timestamp
    except:
        print('Filename not in proper format')
        file_timestamp = datetime.strptime(index[0].decode('utf-8'), '%Y-%m-%d %H:%M:%S.%f')
    
    # Now loop through the timestamp index and convert them to datetime format
    for i in index:
        # Decode the byte string to a regular string
        decoded_string = i.decode('utf-8')

        # Convert the string to datetime format (the try except was just put in to address bad data in file)
        try:
            d = datetime.strptime(decoded_string, '%Y-%m-%d %H:%M:%S.%f')
            timestamps.append(d)
        except:
            decoded = decoded_string[:19] + '.' + decoded_string[19:]
            d = datetime.strptime(decoded, '%Y-%m-%d %H:%M:%S.%f')
            timestamps.append(d)

    return file_timestamp, timestamps, data

In [None]:
file_stamp, timestamps, data = pull_record(file1)
data

In [None]:
def get_second_stats(timestamps, data):
    '''
    Downsamples record to the minute, returning the mean, std and max difference
    from the mean magnetometer reading within that minute.
    '''
    
    output = []

    for i, second in enumerate(data):
        time = timestamps[i]
        avg = second.mean()
        mx = max(second)
        mn = min(second)
        stddev = np.std(second)
        max_diff = max(abs(mx-avg),abs(mn-avg))
        output.append({'time':time, 'mean':avg, 'std':stddev, 'max_diff':max_diff})
    
    return output

In [None]:
a = 'calibrationN761.csv'
b = 'Coil1Calibration.dat'

In [None]:



b[-3:]

In [None]:
def by_second_descriptive_stats(directory, store = False):
    '''
    Takes in a directory of records and returns the descriptive statistics for the aggregated seconds,    
    '''
    # List to store DataFrames
    seconds = []

    # To ensure proper ordering of data
    file_list = [f for f in os.listdir(directory) if f.endswith(".h5")]
    file_list.sort(key=lambda x: os.path.getmtime(os.path.join(directory, x)))

    # For tracking progress
    counter = 1

    # Iterate over each file in the directory
    for filename in file_list:
        if filename.endswith(".h5"):
            file_path = os.path.join(directory, filename)

            file_timestamp, timestamps, data = pull_record(file_path)

            seconds_stats = get_second_stats(timestamps, data)

            # Append the DataFrame to the list
            seconds.extend(seconds_stats)

            # Update counter for progress tracking
            counter += 1
            print(f'Completed {counter} of {len(file_list) + 1} files', end = '\r')

    # OPTIONAL: save to a single csv
    if store == True:
        seconds.to_csv(f'minute_data_{directory[-6:-1]}.csv', index=False)
        
    return pd.DataFrame(seconds).set_index('time')

In [None]:
by_second_descriptive_stats(directoryEW, store = False)

In [None]:
directoryEW

In [None]:
df_s = get_seconds_data(df)
df_s

7x53 gets wrapped up into 1 timestamped record.  Will get 7 samples per second and therefore 60x7 (420) samples per minute.

In [None]:
def get_minute_data(df):
    '''
    Downsamples records all the way to the minute, returning the mean, std and max difference
    from the mean within that minute of data.
    '''
    indx = df.columns[0]
    
    minute_data = df.T.stack()
    avg = minute_data.mean()
    mx = max(minute_data)
    mn = min(minute_data)
    stddev = np.std(minute_data)
    max_diff = max(abs(mx-avg),abs(mn-avg))
    
    output = pd.DataFrame({indx:{'mean':avg, 'std':stddev, 'max_diff':max_diff}}).T
    
    return output

In [None]:
get_minute_data(df)

In [None]:
for i in range(len(df.columns)):
        time = float(df.columns[i][-8:-1])
        print(time)

### 6. Code to extract all data from .h5 files in DataFrame

##### All Data - Note: Without substantial compute this is not recommended

In [None]:
total_records = sample_rate * 60 * len(file_list)

timestamps = np.empty(total_records, dtype=np.float32)

In [None]:
t = [1,2,3,4,5]
i = 5

timestamps[i:(i+len(t))] = t

In [None]:
timestamps[:20]

In [None]:
###!!! - This works but is TOO BIG for the kernel

# Step 1: 
directory = './East-N149'

# To ensure proper ordering of data
file_list = [f for f in os.listdir(directory) if f.endswith(".h5")]
file_list.sort(key=lambda x: os.path.getmtime(os.path.join(directory, x)))

# For tracking progress
counter = 1
i = 0

total_records = sample_rate * 60 * len(file_list)

timestamps = np.empty(total_records, dtype=object)
data = np.empty(total_records, dtype=np.float32)

# Iterate over each file in the directory
for filename in file_list:
    if filename.endswith(".h5"):
        file_path = os.path.join(directory, filename)
        
        t, d = full_minute(file_path)
        
        timestamps[i:(i+len(t))] = t
        data[i:(i+len(d))] = d
        
        # Delete the file after reading its content
        #os.remove(file_path)
        
        # Update counter for progress tracking
        print(f'Completed {counter} of {len(file_list)} files', end = '\r')
        counter += 1
        i += (sample_rate * 60)

#save to a single csv
#data_frames.to_csv('combined_data.csv', index=False)

##### Downsample every N:

In [None]:
# Step 1: 
directory = './SNIPE Mini Expedition Jun 26-28_EDDIE'

df = pd.DataFrame()

# To ensure proper ordering of data
file_list = [f for f in os.listdir(directory) if f.endswith(".h5")]
file_list.sort(key=lambda x: os.path.getmtime(os.path.join(directory, x)))

# For tracking progress
counter = 1

# Iterate over each file in the directory
for filename in file_list:
    if filename.endswith(".h5"):
        file_path = os.path.join(directory, filename)
        
        minute = pull_record(file_path)
        
        df_m = get_minute_data(df)
        
        # Append the DataFrame to the list
        data_frames_m = pd.concat([data_frames_m,df_m], axis = 0)
        
        # Delete the file after reading its content
        #os.remove(file_path)
        
        # Update counter for progress tracking
        counter += 1
        print(f'Completed {counter} of 2370 files', end = '\r')

#save to a single csv
#data_frames.to_csv('combined_data.csv', index=False)

##### By the Minute:

In [None]:
# Specify the directory containing the data files
directory = './SNIPE Mini Expedition Jun 26-28_EDDIE'

# List to store DataFrames
data_frames_m = pd.DataFrame()

# To ensure proper ordering of data
file_list = [f for f in os.listdir(directory) if f.endswith(".h5")]
file_list.sort(key=lambda x: os.path.getmtime(os.path.join(directory, x)))

# For tracking progress
counter = 1

# Iterate over each file in the directory
for filename in file_list:
    if filename.endswith(".h5"):
        file_path = os.path.join(directory, filename)
        
        df = pull_record(file_path)
        
        df_m = get_minute_data(df)
        
        # Append the DataFrame to the list
        data_frames_m = pd.concat([data_frames_m,df_m], axis = 0)
        
        # Delete the file after reading its content
        #os.remove(file_path)
        
        # Update counter for progress tracking
        counter += 1
        print(f'Completed {counter} of 2370 files', end = '\r')

#save to a single csv
#data_frames.to_csv('combined_data.csv', index=False)

In [None]:
data_frames_m.describe()

In [None]:
for i in data_frames_m:
    plt.plot(list(data_frames_m[i][200:2000]))
plt.show()

In [None]:
df_m_cut = data_frames_m[200:2000]

In [None]:
df_m_cut.describe()

In [None]:
std_mean = .005015
std_std = .006107

max_diff_mean = .046910
max_diff_std = .057105

std_limit = 2

std_cutoffs = std_mean + (std_limit * std_std)
max_diff_cutoffs = max_diff_mean + (std_limit * max_diff_std)

std_cutoffs, max_diff_cutoffs

In [None]:
cleaned = df_m_cut[df_m_cut['std'] < std_cutoffs]

In [None]:
cleaned = cleaned[cleaned['max_diff'] < max_diff_cutoffs]

In [None]:
for i in cleaned:
    plt.plot(list(cleaned[i]))
plt.show()

In [None]:
list(cleaned.index)

##### By the second

In [None]:
# Specify the directory containing the data files
directory = './East-N149'

# List to store DataFrames
data_frames_s = pd.DataFrame()

# To ensure proper ordering of data, first need to run a file type check
file_list = [f for f in os.listdir(directory) if f.endswith(".h5")]
file_list.sort(key=lambda x: os.path.getmtime(os.path.join(directory, x)))

# For tracking progress
counter = 1

# Iterate over each file in the directory
for filename in file_list:
    if filename.endswith(".h5"):

        file_path = os.path.join(directory, filename)

        df = pull_record(file_path)

        df_s = get_seconds_data(df).T

        # Append the DataFrame to the list
        data_frames_s = pd.concat([data_frames_s,df_s])

        # Delete the file after reading its content
        #os.remove(file_path)

        # Update counter for progress tracking
        counter += 1
        print(f'Completed {counter} of {len(file_list)+1} records', end = '\r')

#save to a single csv
#data_frames.to_csv('combined_data.csv', index=False)

In [None]:
data_frames_s.describe()

In [None]:
for i in data_frames_s:
    plt.plot(list(data_frames_s[i]), label = i)
plt.legend()
plt.show()

In [None]:
data_frames_s[60320:60380]

### 7. Outlier Analysis Step 1 - Identify best chunk of continuous data

We would ultimately like 48 hours of coordinated data across the (4?) SNIPE hunting grounds.  But we will be happy with 24.  For this example we have 30 hours of data we used.  Manually identified it based off of graph in part 6 above.

In [None]:
# Use dfs as a modified data_frames_s
dfs = data_frames_s[12000:120000].copy()

In [None]:
for i in dfs:
    plt.plot(list(dfs[i]))
plt.show()

In [None]:
dfs.describe()

### 8. Outlier Analysis Part 2 - Identify and replace outlier "spikes".

**! - Need to lock in the method**

If the row has a std for max_diff greater than 0.057 (3 std above normal) which represents around 1626 records to replace.
Loop through values and when you find one that fails it gets replaced by the interpolation between the last good value and the next good value

outliers_free = []

for value in records:
    if value is good:
        if counter = 0:
            append to p
         if counter > 0:
             check the next index to make 
    if value is bad:
        mark index of last good value
        turn on couner
    


In [None]:
7 samples per second
420 samples per minute
check each minute for outliers
throw out each outlier minute

In [None]:
dfs['std_trig'] = dfs['std'] > 0.023612
dfs['max_diff_trig'] = dfs['max_diff'] > 0.057848

In [None]:
dfs['compo'] = dfs['std_trig'] == dfs['max_diff_trig']

In [None]:
for i in dfs:
    print(dfs[i].value_counts())

In [None]:
import pandas as pd
import numpy as np

# Example DataFrame
data = {
    'A': [1, 2, 15, 4, 15, 6, 7, 20, 9, 2],
    'B': [2, 3, 20, 5, 25, 7, 8, 30, 10, 3],
    'C': [3, 4, 25, 6, 30, 8, 9, 35, 11, 4]
}
df = pd.DataFrame(data)

# Define the threshold
threshold = 10

# Function to perform the interpolation
def interpolate_values(series, threshold):
    n = len(series)
    below_threshold_indices = np.where(series <= threshold)[0]
    
    if len(below_threshold_indices) == 0:
        return series
    
    # Replace values before the first below-threshold value with that value
    first_below_threshold_index = below_threshold_indices[0]
    series[:first_below_threshold_index] = series[first_below_threshold_index]
    
    # Iterate through the below-threshold indices to perform interpolation
    for i in range(1, len(below_threshold_indices)):
        start_idx = below_threshold_indices[i - 1]
        end_idx = below_threshold_indices[i]
        
        # Linear interpolation between start_idx and end_idx
        series[start_idx:end_idx + 1] = np.linspace(series[start_idx], series[end_idx], end_idx - start_idx + 1)
    
    return series

# Apply the interpolation function to each column
for column in df.columns:
    df[column] = interpolate_values(df[column], threshold)

# Display the updated DataFrame
print(df)

### 9.  Run Frequency Analysis on Data

In [None]:
measurements = list(dfs['mean'])
timestamps = dfs.index.to_list()

In [None]:
# seconds from (1970/01/01 00:00:00.0) to gps epoch (1980/01/06 00:00:19.0), ignoring leap-seconds
gpsEpoch = 315964819.

# Constants
SampleRate = 2597 # This is the sample rate per second, although I have reduced these to the mean of every minute
## How does the above impact things, if at all?!?!

rho = 6.04e7 # in nT^2 dark matter density in magnetic field units
R = 0.0212751 # in Hz^-1 Radius of earth divided by c
fd = 1 / 86164 # in Hz, rotation frequency of the Earth (1/day)
dT = 1/SampleRate # sampling period (in s)

In [None]:
def compute_noise_spectral_density(time_series, sampling_rate):
    # Calculate the length of the time series
    n = len(time_series)

    # Compute the Fast Fourier Transform (FFT)
    fft_result = np.fft.fft(time_series)

    # Calculate the one-sided power spectral density
    psd = (1 / (sampling_rate * n)) * np.abs(fft_result[:n//2])**2

    # Calculate the corresponding frequencies
    freqs = np.fft.fftfreq(n, 1/sampling_rate)[:n//2]

    return freqs, np.sqrt(psd)


def calibrate_frequency_domain(freq_domain, freqs, calibration_factors, phase_data):
    calibrated_freq_domain = np.zeros_like(freq_domain, dtype=complex)
    for i, freq in enumerate(freqs):
        if freq >= 0:  # Only process positive frequencies
            calibration_factor = calibration_factors[i]
            phase = phase_data[i]

            # Apply calibration factor and phase correction
            # the 2* is for one sided freq>0
            calibrated_freq_domain[i] = 2 * freq_domain[i] / calibration_factor * np.exp(1j * phase)
    return calibrated_freq_domain

In [None]:
# load lemmy and phil data, get coords for our station, calibrate data after FFT

#convert coordinates into radians
[latL,longL] = (39 + 6.024/60)*np.pi/180, (120 + 55.386/60)*np.pi/180
[latP, longP] = (39 + 6.101/60)*np.pi/180, (120 + 55.426/60)*np.pi/180

#convert to spherical coordinates for theta
latL = np.pi/2 - latL;
LatP = np.pi/2 - latP;

lat1 = latL
long1 = longP

#Range of frequencies analyzed
lowFreq,highFreq = [0.1,5.0];

T = len(measurements)
fp = int(lowFreq*T*dT) # this picks out the data point in our list corresponding to lowFreq = Ntot LowFreq/(frequency span)
fq = int(highFreq*T*dT)+1;


# Construct data vector
fdhat = int(np.round(fd * T * dT)) # fdhat index. fdhat is closest discrete frequency interval to fd (fd = 1/day)
FFT1 = -np.fft.fft(measurements)
#FFT2 = np.fft.fft(BdataE)

# os.chdir('/Users/gc2138/Desktop/SNIPE')
# calibrationL = 'LEMMYCalibration691text.csv'
# calibrationP = 'PHILCalibration748text.csv'

freqsN, noise_spectral_densityN = compute_noise_spectral_density(measurements, SampleRate)
#freqsE, noise_spectral_densityE = compute_noise_spectral_density(BdataE, SampleRate)

# calibration_dataL = np.loadtxt(calibrationL, delimiter = ",")
# calibration_dataP = np.loadtxt(calibrationP, delimiter = ",")

# frequency_calibrationL = calibration_dataL[:, 0]  # Frequency values in Hz
# voltage_calibrationL = 10**-3 * calibration_dataL[:, 1]    # Volts per nano-Tesla (original calibration data is in mV/nT)
# phase_calibrationL = calibration_dataL[:,2]

# frequency_calibrationP = calibration_dataP[:, 0]  # Frequency values in Hz
# voltage_calibrationP = 10**-3 * calibration_dataP[:, 1]    # Volts per nano-Tesla (original calibration data is in mV/nT)
# phase_calibrationP = calibration_dataP[:,2]

# # calculates the calibration factor at every frequency in FFT of our data
# voltage_calibration_interpolatedN = np.interp(np.abs(freqsN), frequency_calibrationL, voltage_calibrationL)
# phase_correction_interpolatedN = np.interp(np.abs(freqsN), frequency_calibrationL, phase_calibrationL)
# voltage_calibration_interpolatedE = np.interp(np.abs(freqsE), frequency_calibrationP, voltage_calibrationP)
# phase_correction_interpolatedE = np.interp(np.abs(freqsE), frequency_calibrationP, phase_calibrationP)

#calFFT1 = calibrate_frequency_domain(FFT1, freqsN, voltage_calibration_interpolatedN, phase_correction_interpolatedN)
#calFFT2 = calibrate_frequency_domain(FFT2, freqsE, voltage_calibration_interpolatedE, phase_correction_interpolatedE)

Loading all fast