In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import argparse
import re
import os 
import glob

In [2]:
os.listdir('Hit-Details')

ie_datfiles = sorted(glob.glob('Hit-Details/*IE.0000.dat'))
se_datfiles = sorted(glob.glob('Hit-Details/*SE.0000.dat'))

pulsar_data_ie = glob.glob('Hit-Details/B*')[0]
pulsar_data_swe = glob.glob('Hit-Details/B*')[1]

In [3]:
snr_thresh = 10 

# print('Irish file path:', ie_dat)
# print('Swedish file path:', se_dat)
print('Inputted SNR threshold:', np.array(snr_thresh), 'Type:', type(snr_thresh))

def read_dat(filename):
    r"""
    Read a turboseti .dat file.
    Parameters
    ----------
    filename : str
        Name of .dat file to open.
    Returns
    -------
    df_data : dict
        Pandas dataframe of hits.
    """
    file_dat = open(filename.strip())
    hits = file_dat.readlines()

    # Get info from the .dat file header
    FileID = hits[1].strip().split(':')[-1].strip()
    Source = hits[3].strip().split(':')[-1].strip()

    MJD = hits[4].strip().split('\t')[0].split(':')[-1].strip()
    RA = hits[4].strip().split('\t')[1].split(':')[-1].strip()
    DEC = hits[4].strip().split('\t')[2].split(':')[-1].strip()

    DELTAT = hits[5].strip().split('\t')[0].split(':')[-1].strip()  # s
    DELTAF = hits[5].strip().split('\t')[1].split(':')[-1].strip()  # Hz

    # Get info from individual hits (the body of the .dat file)
    all_hits = []
    for hit_line in hits[9:]:
        hit_fields = re.split(r'\s+', re.sub(r'[\t]', ' ', hit_line).strip())
        all_hits.append(hit_fields)

    # Now reorganize that info to be grouped by column (parameter)
    # not row (individual hit)
    if all_hits:
        TopHitNum = list(zip(*all_hits))[0]
        DriftRate = [float(df) for df in list(zip(*all_hits))[1]]
        SNR = [float(ss) for ss in list(zip(*all_hits))[2]]
        Freq = [float(ff) for ff in list(zip(*all_hits))[3]]
        ChanIndx = list(zip(*all_hits))[5]
        FreqStart = list(zip(*all_hits))[6]
        FreqEnd = list(zip(*all_hits))[7]
        CoarseChanNum = list(zip(*all_hits))[10]
        FullNumHitsInRange = list(zip(*all_hits))[11]

        data = {'TopHitNum': TopHitNum,
                'DriftRate': DriftRate,
                'SNR': SNR,
                'Freq': Freq,
                'ChanIndx': ChanIndx,
                'FreqStart': FreqStart,
                'FreqEnd': FreqEnd,
                'CoarseChanNum': CoarseChanNum,
                'FullNumHitsInRange': FullNumHitsInRange
                }

        # Creating pandas dataframe from data we just read in
        df_data = pd.DataFrame(data)
        df_data = df_data.apply(pd.to_numeric)

    else:
        df_data = pd.DataFrame()

    # Matching column information from before to the .dat data we read in
    df_data['FileID'] = FileID
    df_data['Source'] = Source.upper()
    df_data['MJD'] = MJD
    df_data['RA'] = RA
    df_data['DEC'] = DEC
    df_data['DELTAT'] = DELTAT
    df_data['DELTAF'] = DELTAF

    # Adding extra columns that will be filled out by this program
    df_data['Hit_ID'] = ''
    df_data['status'] = ''
    df_data['in_n_ons'] = ''
    df_data['RFI_in_range'] = ''

    return df_data

Inputted SNR threshold: 10 Type: <class 'int'>


In [16]:
def TIC_comparison_script(ie_filenames, se_filenames):
    target_list = []; irish_hits = []; swedish_hits = []; mhits_array = []

    for file_idx in range(0, len(ie_filenames)): # cycling through each target 
        ie_dat = ie_filenames[file_idx]; se_dat = se_filenames[file_idx]

        ie_TIC_name = ie_dat.split('/')[1].split('.')[0] # - extracting target name from fname string 
        se_TIC_name = se_dat.split('/')[1].split('.')[0]

        if ie_TIC_name == se_TIC_name: # - ensuring that the target files at each station are the same. 
            df_ie = read_dat('./'+ie_dat)
            df_se = read_dat('./'+se_dat)
        else: 
            print('TIC target is not the same.')

        ie_hit_n = len(df_ie); se_hit_n = len(df_se) # - Reading the hit amounts. 
        irish_hits.append(ie_hit_n); swedish_hits.append(se_hit_n); target_list.append(ie_TIC_name[3:len(ie_TIC_name)])
        print('--- \n Target: %s \n Irish Hits: %s \n Swedish Hits: %s' % (ie_TIC_name, ie_hit_n, se_hit_n))

        # - Parameter limits for accepting a hit as mutual - 
        dr = 0.1 #Hz/s
        df = 4 #Hz

        ie_matches_idx = []; se_matches_idx = []; mutual_hits = 0

        for ind in df_ie.index:
            hit_num = df_ie['TopHitNum'][ind]
            frequency = df_ie['Freq'][ind] * 1e6 # - Converting from MHz to Hz 
            drift_rate = df_ie['DriftRate'][ind]
            sigma = df_ie['SNR'][ind]

            for ind_s in df_se.index: # - compare each Swedish hit to the Irish hit 
                hit_num_se = df_se['TopHitNum'][ind_s] # 
                frequency_se = df_se['Freq'][ind_s] *1e6
                drift_rate_se = df_se['DriftRate'][ind_s]
                sigma_se = df_se['SNR'][ind_s]
                # if sigma < snr_thresh: # - Only looks at values below a certain SNR. 
                # 	if sigma_se < snr_thresh:
                if frequency - df <= frequency_se <= frequency + df:
                    if drift_rate - dr <= drift_rate_se <= drift_rate +dr:
                        print('Matching Signal!')
                        ie_matches_idx.append(ind); se_matches_idx.append(ind_s)
                        # print('Hit Numbers: IR ', ind, 'SE ', ind_s)
                        print('IE Freq: ',frequency, 'SE Freq: ', frequency_se)
                        print('IE D_rate:', drift_rate, 'SE D_rate: ', drift_rate_se)
                        mutual_hits += 1 
        if mutual_hits > 0: 
            np.savetxt(('Hit-Indexes/' + str(ie_TIC_name) + '.matching.idxs.dat'), np.column_stack([ie_matches_idx, se_matches_idx]), fmt='%i')
        else: 
            pass 
        mhits_array.append(mutual_hits)

    body_data = {'TIC_ID':target_list, 'SE_Hits':swedish_hits, 'IE_Hits':irish_hits, 'Mutual_Hits':mhits_array}
    df = pd.DataFrame(data=body_data)

    df.to_csv('hits_overview.csv')
    print('Mean Irish Hits: %s \nMean Swedish Hits: %s' % (df['IE_Hits'].mean(), df['SE_Hits'].mean()))

    df.head()

# TIC Target Comparison Test

In [17]:
ie_tic_names = sorted(glob.glob('Hit-Details/TIC*IE*.dat')); se_tic_names = sorted(glob.glob('Hit-Details/TIC*SE*.dat'))

TIC_comparison_script(ie_tic_names, se_tic_names)

--- 
 Target: TIC121966220 
 Irish Hits: 171 
 Swedish Hits: 376
--- 
 Target: TIC142090065 
 Irish Hits: 282 
 Swedish Hits: 290
--- 
 Target: TIC158002130 
 Irish Hits: 361 
 Swedish Hits: 369
Matching Signal!
IE Freq:  110097583.0 SE Freq:  110097584.0
IE D_rate: 0.0 SE D_rate:  0.002169
Matching Signal!
IE Freq:  112597581.0 SE Freq:  112597582.0
IE D_rate: 0.0 SE D_rate:  0.002169
Matching Signal!
IE Freq:  120097577.0 SE Freq:  120097578.0
IE D_rate: 0.0 SE D_rate:  0.002169
Matching Signal!
IE Freq:  130097573.00000001 SE Freq:  130097574.00000001
IE D_rate: 0.0 SE D_rate:  -0.002169
Matching Signal!
IE Freq:  134472569.0 SE Freq:  134472570.0
IE D_rate: 0.0 SE D_rate:  -0.002169
Matching Signal!
IE Freq:  137597566.0 SE Freq:  137597567.0
IE D_rate: 0.0 SE D_rate:  0.002169
Matching Signal!
IE Freq:  140097564.0 SE Freq:  140097565.0
IE D_rate: 0.0 SE D_rate:  -0.002169
Matching Signal!
IE Freq:  143847563.0 SE Freq:  143847564.0
IE D_rate: 0.0 SE D_rate:  -0.004339
Matching Si

# Pulsar Comparison Test

In [33]:
def pulsar_comparison_script(ie_filenames, se_filenames):
    target_list = []; irish_hits = []; swedish_hits = []; mhits_array = []

    for file_idx in range(0, len(ie_filenames)): # cycling through each target 
        ie_dat = ie_filenames[file_idx]; se_dat = se_filenames[file_idx]

        ie_TIC_name = ie_dat.split('/')[1].split('.')[0] # - extracting target name from fname string 
        se_TIC_name = se_dat.split('/')[1].split('.')[0]

        if ie_TIC_name == se_TIC_name: # - ensuring that the target files at each station are the same. 
            df_ie = read_dat('./'+ie_dat)
            df_se = read_dat('./'+se_dat)
        else: 
            print('TIC target is not the same.')

        ie_hit_n = len(df_ie); se_hit_n = len(df_se) # - Reading the hit amounts. 
        irish_hits.append(ie_hit_n); swedish_hits.append(se_hit_n); target_list.append(ie_TIC_name[3:len(ie_TIC_name)])
        print('--- \n Target: %s \n Irish Hits: %s \n Swedish Hits: %s' % (ie_TIC_name, ie_hit_n, se_hit_n))

        # - Parameter limits for accepting a hit as mutual - 
        dr = 0.2 #Hz/s
        df = 5 #Hz

        hit_matches = []

        for ind in df_ie.index:
            hit_num = df_ie['TopHitNum'][ind]
            frequency = df_ie['Freq'][ind] * 1e6 # - Converting from MHz to Hz 
            drift_rate = df_ie['DriftRate'][ind]
            sigma = df_ie['SNR'][ind]

            for ind_s in df_se.index: # - compare each Swedish hit to the Irish hit 
                hit_num_se = df_se['TopHitNum'][ind_s] # 
                frequency_se = df_se['Freq'][ind_s] *1e6
                drift_rate_se = df_se['DriftRate'][ind_s]
                sigma_se = df_se['SNR'][ind_s]
                # if sigma < snr_thresh: # - Only looks at values below a certain SNR. 
                # 	if sigma_se < snr_thresh:
                if frequency - df <= frequency_se <= frequency + df:
                    if drift_rate - dr <= drift_rate_se <= drift_rate +dr:
                        hit_matches.append('IE'+str(hit_num)+'_SE'+str(hit_num_se))
                        print('Hit Numbers: IR ', ind, 'SE ', ind_s)
                        print('IE Freq: ',frequency, 'SE Freq: ', frequency_se)
                        print('IE D_rate:', drift_rate, 'SE D_rate: ', drift_rate_se)
        if len(hit_matches) > 0:
            mutual_hits = len(hit_matches)
        else: 
            mutual_hits = 0 
        mhits_array.append(mhits_array)

    body_data = {'TIC_ID':target_list, 'SE_Hits':swedish_hits, 'IE_Hits':irish_hits, 'Mutual_Hits':mutual_hits}
    df = pd.DataFrame(data=body_data)

    df.to_csv('hits_overview.csv')
    print('Mean Irish Hits: %s \nMean Swedish Hits: %s' % (df['IE_Hits'].mean(), df['SE_Hits'].mean()))

    df.head()

In [34]:
ie = ['Hit-Details/B0329+54.bary.IE.210707.0000.dat']; se = ['Hit-Details/B0329+54.bary.SE.210707.0000.dat']
pulsar_comparison_script(ie, se)

--- 
 Target: B0329+54 
 Irish Hits: 222 
 Swedish Hits: 113
Mean Irish Hits: 222.0 
Mean Swedish Hits: 113.0


'B0329+54'