In [98]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
from datetime import datetime, timedelta
import csv

## Objective:
-Import the datasets. Still need the extra dataset. Includes triggers up to dec. 21

-Create algorithm that searches for matches that are +/- 10 second apart and remove the rest (maybe two seconds)

-First match found at 2018, 7, 11, 17, 2, 4

-Do spectral analysis on the data (counts/bin)

## Importing data

In [99]:
#Importing and redefining the dataframes
#Not loading the trigger list for 2021 yet

ipn_data = pd.read_csv("trigIPN.csv", sep="|")
ipn = pd.DataFrame(ipn_data)
ipn.drop(columns= ['Unnamed: 0', 'Unnamed: 2'], axis=1, inplace=True) #dropping unwanted columns
ipn.rename(columns={ipn.columns[0]:"time"}, inplace = True)

trigB_data = pd.read_csv("./ASIM/trigB.txt", sep = "\s+|\t+|\s+\t+|\t+\s+", engine="python")
trigB = pd.DataFrame(trigB_data)
trigB.drop(columns=["######"], inplace=True) #dropping unwanted columns
trigB.rename(columns={"yyyy-MMM-dd":"date","HH:mm:ss.SSSSSS": "time", "Corr":"corr"}, inplace=True) #renaming columns

#trigB_21_data = pd.read_csv("./ASIM/trigB_2021.txt", sep = "\s+|\t+|\s+\t+|\t+\s+", engine="python")
#trigB_21 = pd.DataFrame(trigB_21_data)
#trigB_21.drop(columns=["######"], inplace=True)
#trigB_21.rename(columns={"yyyy-MMM-dd":"date","HH:mm:ss.SSSSSS": "time" }, inplace=True)

#trigC_data = pd.read_csv("./ASIM/trigC.txt", sep = "\s+|\t+|\s+\t+|\t+\s+", engine="python")
#trigC = pd.DataFrame(trigC_data)
#trigC.drop(columns=["######"], inplace=True)
#trigC.rename(columns={"yyyy-MMM-dd":"date","HH:mm:ss.SSSSSS": "time" ,"Corr":"corr"}, inplace=True)

#trigC_21_data = pd.read_csv("./ASIM/trigC_2021.txt", sep = "\s+|\t+|\s+\t+|\t+\s+", engine="python")
#trigC_21 = pd.DataFrame(trigC_21_data)
#trigC_21.drop(columns=["######"], inplace=True)
#trigC_21.rename(columns={"yyyy-MMM-dd":"date","HH:mm:ss.SSSSSS": "time" }, inplace=True)

#trigM_data = pd.read_csv("./ASIM/trigM.txt", sep = "\s+|\t+|\s+\t+|\t+\s+", engine="python")
#trigM = pd.DataFrame(trigM_data)
#trigM.drop(columns=["######"], inplace=True)
#trigM.rename(columns={"yyyy-MMM-dd":"date","HH:mm:ss.SSSSSS": "time", "Corr":"corr" }, inplace=True)

#trigM_21_data = pd.read_csv("./ASIM/trigM_2021.txt", sep = "\s+|\t+|\s+\t+|\t+\s+", engine="python")
#trigM_21 = pd.DataFrame(trigM_21_data)
#trigM_21.drop(columns=["######"], inplace=True)
#trigM_21.rename(columns={"yyyy-MMM-dd":"date","HH:mm:ss.SSSSSS": "time" }, inplace=True)

### Importing supplementary IPN triggers 
Latest ASIM trigger: 2021, 3, 20, 22, 51, 59

Latest IPN trigger: 2021, 6, 27, 19, 31, 37 --> Extending this until the end of the year

In [100]:
#IPN datetime list
ipn_dt_temp = []
ipn_dict = ipn.to_dict("records")
for row in ipn_dict:
    datetime_str = row["time"]
    datetime_obj = datetime.strptime(datetime_str, "%Y-%m-%d %H:%M:%S")
    ipn_dt_temp.append(datetime_obj)
    
ipn_dt = np.asarray(ipn_dt_temp)

## Vectorization and ASIM datetime correction

In [147]:
# Function for correcting date and time in ASIM data. Returning matrix that contains datetime objects
# Method for correcting time
# 1. Retrive the time from time column
# 2. Isolate the microsecond time from that and cast it to an int
# 3. Retrive the correction time from Corr column and cast it to an int
# 4. Subtract correction time from time and cast it to an string
# 5. Insert the corrected time

# PROBLEM: ONLY ONE DATETIME IS ADDED

def corr_dt(dfs):  # Correcting times from ASIM data

    """   This function corrects the time from ASIM data.
    The correction is done by subtracting the correction from the original time.
    The correction is given as a string.
    The function takes a list of dataframes as input.
    The function returns a list of arrays containing the corrected datetime objects.
    
    Parameters
    ----------
    dfs : list of dataframes
        The dataframes containing the data.
    
    Returns
    -------
    trig_dt : list of arrays
        The corrected datetime objects.
    
    Raises
    ------
    ValueError
        If the lists are not the same length.
    """
    trig_dt = []
    
    for df in dfs:
        temp_dt = []
        # Vectorization of columns
        date = df["date"].values  # date given as string.
        time = df["time"].values  # time given as string
        corr = df["corr"].values  # correction given as string
        try:
            if len(date) and len(time) != len(corr):
                raise ValueError
        except:
            raise ValueError("Lists are not the same length")
        else:
            for i in np.arange(0, len(corr)):  # Iterating over the vectors
                if corr[i] == "--------":  # No correction needed. Appending the datetime object
                    date_str = date[i]
                    time_str = time[i]
                    org_dt = datetime.strptime(
                        date_str + " " + time_str, "%Y-%b-%d %H:%M:%S.%f")
                    temp_dt.append(org_dt)
                    
                elif corr[i][0] == "-":  # If it's a "-" in front; correction is added
                    # formatting the datetime object
                    date_str = date[i]
                    time_str = time[i]
                    org_dt = datetime.strptime(
                        date_str + " " + time_str, "%Y-%b-%d %H:%M:%S.%f")  # Original datetime

                    micro_corr = int(corr[0][1:])

                    # new corrected datetime. Timedelta ccounts for changes in seconds also
                    new_dt = org_dt + timedelta(microseconds=micro_corr)
                    temp_dt.append(new_dt)
                else:
                    date_str = date[i]
                    time_str = time[i]
                    org_dt = datetime.strptime(
                        date_str + " " + time_str, "%Y-%b-%d %H:%M:%S.%f")  # Original datetime

                    micro_corr = int(corr[0][1:])

                    # new corrected datetime. Timedelta ccounts for changes in seconds also
                    new_dt = org_dt - timedelta(microseconds=micro_corr)
                    temp_dt.append(new_dt)
                    
            trig_dt.append(temp_dt)

    trig_dt = np.array(trig_dt)
    return trig_dt

In [148]:
#Callig corr_dt with a list containing the dataframes from ASIM
trig_dt = corr_dt([trigB])

## Algorithm for match between ASIM and IPN

In [231]:
#Most compact algorithm for searching using np.where()
#Storing matches in the match list. Stored as a tuple containing (datetime IPN, index trig_B)

# TODO: wrap a function around it so it can take in several triggers (trigB,trigC etc..)
matches = []

for i in ipn_dt:
    #Searching for matches that are +/- 10 seconds from the IPN trigger
    mask = np.where((i-timedelta(seconds=10) <= trig_dt) & (trig_dt <= i + timedelta(seconds=10)))
    if mask[1].size == 0:
        continue
    else:
        matches.append((i,mask[1][0]))

### Graveyard

In [None]:
# Crude way of searching for a match
#Extracting year,month,day,hour,minute from the ipn and trig array. Narrowing the search!
def extract_datetime(dt_object):
    year = dt_object.year
    month = dt_object.month
    day = dt_object.day
    hour = dt_object.hour
    minute = dt_object.minute
    return year,month,day,hour,minute

# Retriving datetimeobjects that fits the criteria; same year,month,day,hour,minute
#Make a function out of this one
temp_list = []
#def narrowing_search(ipn,trigger)
for i in ipn_dt:
    year,month,day,hour,minute = extract_datetime(i)
    for row in trig_dt[0]:
        if row.year == year and row.month == month and row.day == day and row.hour == hour and row.minute == minute:
            temp_list.append(row)
        else:
            continue
            
minute_match = np.array(temp_list)