In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
from datetime import datetime, timedelta
import csv

## Objective:
-Import the datasets. Still need the extra dataset. Includes triggers up to dec. 21

-datetime helpful docs: https://www.w3schools.com/python/python_datetime.asp

-Create algorithm that searches for matches that are +/- 10 second apart and remove the rest (maybe two seconds)

-First match found at 2018, 7, 11, 17, 2, 4

-Do spectral analysis on the data (counts/bin)
### Notes from stackoverflow:
np.where() implementation: https://stackoverflow.com/questions/35714902/find-location-of-pair-of-elements-in-two-arrays-in-numpy

## Importing data

In [2]:
#Importing and redefining the dataframes
#Not loading the trigger list for 2021 yet

ipn_data = pd.read_csv("./IPN/trigIPN.csv", sep="|")
ipn = pd.DataFrame(ipn_data)
ipn.drop(columns= ['Unnamed: 0', 'Unnamed: 2'], axis=1, inplace=True) #dropping unwanted columns
ipn.rename(columns={ipn.columns[0]:"time"}, inplace = True)

trigB_data = pd.read_csv("./ASIM/trigB.txt", sep = "\s+|\t+|\s+\t+|\t+\s+", engine="python")
trigB = pd.DataFrame(trigB_data)
trigB.drop(columns=["######"], inplace=True) #dropping unwanted columns
trigB.rename(columns={"yyyy-MMM-dd":"date","HH:mm:ss.SSSSSS": "time", "Corr":"corr"}, inplace=True) #renaming columns

#trigB_21_data = pd.read_csv("./ASIM/trigB_2021.txt", sep = "\s+|\t+|\s+\t+|\t+\s+", engine="python")
#trigB_21 = pd.DataFrame(trigB_21_data)
#trigB_21.drop(columns=["######"], inplace=True)
#trigB_21.rename(columns={"yyyy-MMM-dd":"date","HH:mm:ss.SSSSSS": "time" }, inplace=True)

#trigC_data = pd.read_csv("./ASIM/trigC.txt", sep = "\s+|\t+|\s+\t+|\t+\s+", engine="python")
#trigC = pd.DataFrame(trigC_data)
#trigC.drop(columns=["######"], inplace=True)
#trigC.rename(columns={"yyyy-MMM-dd":"date","HH:mm:ss.SSSSSS": "time" ,"Corr":"corr"}, inplace=True)

#trigC_21_data = pd.read_csv("./ASIM/trigC_2021.txt", sep = "\s+|\t+|\s+\t+|\t+\s+", engine="python")
#trigC_21 = pd.DataFrame(trigC_21_data)
#trigC_21.drop(columns=["######"], inplace=True)
#trigC_21.rename(columns={"yyyy-MMM-dd":"date","HH:mm:ss.SSSSSS": "time" }, inplace=True)

#trigM_data = pd.read_csv("./ASIM/trigM.txt", sep = "\s+|\t+|\s+\t+|\t+\s+", engine="python")
#trigM = pd.DataFrame(trigM_data)
#trigM.drop(columns=["######"], inplace=True)
#trigM.rename(columns={"yyyy-MMM-dd":"date","HH:mm:ss.SSSSSS": "time", "Corr":"corr" }, inplace=True)

#trigM_21_data = pd.read_csv("./ASIM/trigM_2021.txt", sep = "\s+|\t+|\s+\t+|\t+\s+", engine="python")
#trigM_21 = pd.DataFrame(trigM_21_data)
#trigM_21.drop(columns=["######"], inplace=True)
#trigM_21.rename(columns={"yyyy-MMM-dd":"date","HH:mm:ss.SSSSSS": "time" }, inplace=True)

### Importing supplementary IPN triggers 
Latest ASIM trigger: 2021, 3, 20, 22, 51, 59

Latest IPN trigger: 2021, 6, 27, 19, 31, 37 --> Extending this until the end of the year

In [3]:
ipn_supp_data = pd.read_csv("./IPN/ipn_supp.txt",sep = "\s+|\t+|\s+\t+|\t+\s+", engine="python")
ipn_supp = pd.DataFrame(ipn_supp_data)
#Dropping unwanted columns
ipn_supp = ipn_supp.drop(["GRB_name_Fermi",'ra(°J2000)', 'decl(°J2000)',
       'pos_error(°1-sigma)', 'T90(s)', 'T90_error(s)', 'T90_start(UTC)',
       'fluence(erg/cm^2)', 'fluence_error(erg/cm^2)', 'redshift', 'T100(s)',
       'GBM_located', 'mjd(T0day)'], 1)

In [4]:
# Vectorization of names and times from dataframe. Zero-padding hour in times
names = ipn_supp["GRB_name"].values
times = ipn_supp["T0(UTC)"].values  # Hour is not zero padded.

# Zero padding times
def zero_padding(non_zero_padd):
    new_times = []

    for time in non_zero_padd:
        if time.index(":") < 2:
            time = "0" + time
            new_times.append(time)
        else:
            new_times.append(time)
    return new_times

new_times = zero_padding(times)

In [5]:
# Defining datetime objects.
# Problem: some of the times have microsecond resolution while other dont
new_ipn = []
if len(names) == len(new_times):
    for i in np.arange(0, len(names)):
        date = names[i][3:9]
        time = new_times[i]

        # Managing the microsecond resolution
        if len(time) > 8:
            dt_obj = datetime.strptime(date + " " + time, "%y%m%d %H:%M:%S.%f")
        else:
            dt_obj = datetime.strptime(date + " " + time, "%y%m%d %H:%M:%S")
            
        new_ipn.append(dt_obj)
else:
    print("Unequal length of arrays")
    
#Earliest entry in the ASIM data 2018,6,1,12,46,8
# Including only entries up to 1142. That means entries in IPN from 2018, 5, 29, 8, 29, 14

new_ipn = np.asarray(new_ipn[:1142][::-1]) #Slicing the list. Returning in reverse order

In [6]:
#Old IPN datetime list
ipn_dt_temp = []
ipn_dict = ipn.to_dict("records")
for row in ipn_dict:
    datetime_str = row["time"]
    datetime_obj = datetime.strptime(datetime_str, "%Y-%m-%d %H:%M:%S")
    ipn_dt_temp.append(datetime_obj)
    
old_ipn = np.asarray(ipn_dt_temp)

## Vectorization and ASIM datetime correction

In [7]:
# Function for correcting date and time in ASIM data. Returning matrix that contains datetime objects
# Method for correcting time
# 1. Retrive the time from time column
# 2. Isolate the microsecond time from that and cast it to an int
# 3. Retrive the correction time from Corr column and cast it to an int
# 4. Subtract correction time from time and cast it to an string
# 5. Insert the corrected time

# PROBLEM: ONLY ONE DATETIME IS ADDED

def corr_dt(dfs):  # Correcting times from ASIM data

    """   This function corrects the time from ASIM data.
    The correction is done by subtracting the correction from the original time.
    The correction is given as a string.
    The function takes a list of dataframes as input.
    The function returns a list of arrays containing the corrected datetime objects.
    
    Parameters
    ----------
    dfs : list of dataframes
        The dataframes containing the data.
    
    Returns
    -------
    trig_dt : list of arrays
        The corrected datetime objects.
    
    Raises
    ------
    ValueError
        If the lists are not the same length.
    """
    trig_dt = []
    
    for df in dfs:
        temp_dt = []
        # Vectorization of columns
        date = df["date"].values  # date given as string.
        time = df["time"].values  # time given as string
        corr = df["corr"].values  # correction given as string
        try:
            if len(date) and len(time) != len(corr):
                raise ValueError
        except:
            raise ValueError("Lists are not the same length")
        else:
            for i in np.arange(0, len(corr)):  # Iterating over the vectors
                if corr[i] == "--------":  # No correction needed. Appending the datetime object
                    date_str = date[i]
                    time_str = time[i]
                    org_dt = datetime.strptime(
                        date_str + " " + time_str, "%Y-%b-%d %H:%M:%S.%f")
                    temp_dt.append(org_dt)
                    
                elif corr[i][0] == "-":  # If it's a "-" in front; correction is added
                    # formatting the datetime object
                    date_str = date[i]
                    time_str = time[i]
                    org_dt = datetime.strptime(
                        date_str + " " + time_str, "%Y-%b-%d %H:%M:%S.%f")  # Original datetime

                    micro_corr = int(corr[0][1:])

                    # new corrected datetime. Timedelta ccounts for changes in seconds also
                    new_dt = org_dt + timedelta(microseconds=micro_corr)
                    temp_dt.append(new_dt)
                else:
                    date_str = date[i]
                    time_str = time[i]
                    org_dt = datetime.strptime(
                        date_str + " " + time_str, "%Y-%b-%d %H:%M:%S.%f")  # Original datetime

                    micro_corr = int(corr[0][1:])

                    # new corrected datetime. Timedelta ccounts for changes in seconds also
                    new_dt = org_dt - timedelta(microseconds=micro_corr)
                    temp_dt.append(new_dt)
                    
            trig_dt.append(temp_dt)

    trig_dt = np.array(trig_dt)
    return trig_dt

In [8]:
#Callig corr_dt with a list containing the dataframes from ASIM
trig_dt = corr_dt([trigB])

## Algorithm for match between ASIM and IPN

In [37]:
#Most compact algorithm for searching using np.where()
#Storing matches in the match list. Stored as a tuple containing (datetime IPN, index trig_B)

# TODO: wrap a function around it so it can take in several triggers (trigB,trigC etc..)
matches_old = []

for i in old_ipn:
    #Searching for matches that are +/- 10 seconds from the IPN trigger
    mask = np.where((i-timedelta(seconds=10) <= trig_dt) & (trig_dt <= i + timedelta(seconds=10)))
    if mask[1].size == 0:
        continue
    else:
        matches_old.append((i,mask[1][0]))

matches_new = []
for i in new_ipn:
    #Searching for matches that are +/- 10 seconds from the IPN trigger
    mask = np.where((i-timedelta(seconds=10) <= trig_dt) & (trig_dt <= i + timedelta(seconds=10)))
    if mask[1].size == 0:
        continue
    else:
        matches_new.append((i,mask[1][0]))

In [11]:
len(matches_old),len(matches_new)

(30, 22)

In [38]:
for tup in matches_old:
    print(tup[0])

2018-07-11 17:02:02
2018-07-20 14:21:44
2018-08-09 13:12:04
2018-08-09 14:37:03
2018-09-10 03:44:16
2018-10-17 20:54:15
2018-12-22 20:11:34
2019-02-06 03:49:23
2019-02-16 11:52:26
2019-02-18 19:27:44
2019-03-05 13:05:15
2019-03-23 21:05:21
2019-04-11 09:45:56
2019-05-12 14:40:09
2019-06-06 01:55:07
2019-07-20 14:42:09
2019-08-29 19:56:44
2019-12-27 17:21:44
2020-02-12 10:49:49
2020-03-31 17:39:28
2020-04-12 06:57:11
2020-05-21 12:16:41
2020-06-19 11:48:43
2020-07-16 22:57:41
2020-09-03 02:34:27
2020-09-25 21:50:37
2020-10-13 18:06:58
2020-11-14 00:39:25
2020-12-27 15:14:07
2021-02-22 15:57:23


In [49]:
matches_new_update = []
for tup in matches_new:
    dt = tup[0].strftime("%Y-%m-%d %H:%M:%S")
    matches_new_update.append((dt,tup[1]))
    
matches_old_update = []
for tup in matches_old:
    dt = tup[0].strftime("%Y-%m-%d %H:%M:%S")
    matches_old_update.append((dt,tup[1]))

In [42]:
for index, row in enumerate(matches_old_update):
    if row[0] == 

true 0
true 1
true 2
true 3
true 4
true 5
true 6
true 7
true 8
true 9
true 10
true 11
true 12
true 13
true 14
true 15
true 16
true 17
true 18
true 19
true 20
true 21
true 22
true 23
true 24
true 25
true 26
true 27
true 28
true 29


In [32]:
check = []
for tup in matches_new_update:
    if tup[0] not in matches_old:
        check.append(tup)
len(check)

22

In [47]:
matches_old_update

['2018-07-11 17:02:02',
 '2018-07-20 14:21:44',
 '2018-08-09 13:12:04',
 '2018-08-09 14:37:03',
 '2018-09-10 03:44:16',
 '2018-10-17 20:54:15',
 '2018-12-22 20:11:34',
 '2019-02-06 03:49:23',
 '2019-02-16 11:52:26',
 '2019-02-18 19:27:44',
 '2019-03-05 13:05:15',
 '2019-03-23 21:05:21',
 '2019-04-11 09:45:56',
 '2019-05-12 14:40:09',
 '2019-06-06 01:55:07',
 '2019-07-20 14:42:09',
 '2019-08-29 19:56:44',
 '2019-12-27 17:21:44',
 '2020-02-12 10:49:49',
 '2020-03-31 17:39:28',
 '2020-04-12 06:57:11',
 '2020-05-21 12:16:41',
 '2020-06-19 11:48:43',
 '2020-07-16 22:57:41',
 '2020-09-03 02:34:27',
 '2020-09-25 21:50:37',
 '2020-10-13 18:06:58',
 '2020-11-14 00:39:25',
 '2020-12-27 15:14:07',
 '2021-02-22 15:57:23']

In [None]:
match_old = matches_old
match_new = matches_new

#Finding the dt which are in the new but not in the old
add_match = []

for dt in match_new:
    if dt not in match_old:
        add_match.append(dt)

In [None]:
print(add_match)

In [None]:
matches_old = matches
matches_old

In [None]:
matches_new = matches
matches_new

In [None]:
matches_not_in_old = []
for dt in matches_new:
    if dt[0] not in matches_old:
        matches_not_in_old.append(dt)
    else:
        continue

In [None]:
matches_not_in_old

In [None]:
matches_old

### Graveyard

In [None]:
# Crude way of searching for a match
#Extracting year,month,day,hour,minute from the ipn and trig array. Narrowing the search!
def extract_datetime(dt_object):
    year = dt_object.year
    month = dt_object.month
    day = dt_object.day
    hour = dt_object.hour
    minute = dt_object.minute
    return year,month,day,hour,minute

# Retriving datetimeobjects that fits the criteria; same year,month,day,hour,minute
#Make a function out of this one
temp_list = []
#def narrowing_search(ipn,trigger)
for i in ipn_dt:
    year,month,day,hour,minute = extract_datetime(i)
    for row in trig_dt[0]:
        if row.year == year and row.month == month and row.day == day and row.hour == hour and row.minute == minute:
            temp_list.append(row)
        else:
            continue
            
minute_match = np.array(temp_list)