In [1]:
import pandas as pd
import numpy as np
from collections import Counter

pd.set_option("display.max_columns", None)

In [2]:
df21 = pd.read_csv("./data/raw_data/chime_frb_catalog_2021.csv")
df23 = pd.read_csv("./data/raw_data/chime_frb_catalog_2023.csv")

df21["is_repeater"] = (df21["repeater_name"] != "-9999").astype(int)
df23["is_repeater"] = (df23["repeater_name"] != "-9999").astype(int)

In [3]:
df23["ra"] = df23["ra_1"]
df23["dec"] = df23["dec_1"]
df23["dm_exc_ne2001"] = df23["dm_exc_1_ne2001"]
df23["dm_exc_ymw16"] = df23["dm_exc_1_ymw16"]

try:
    df21["width_fitb"] = (df21["width_fitb"].str.replace(r"<", "")).astype(float)
    df23["width_fitb"] = (df23["width_fitb"].str.replace(r"<", "")).astype(float)
except Exception as e:
    print(e)

try:
    df21["scat_time"] = (df21["scat_time"].str.replace(r"<", "")).astype(float)
    df23["scat_time"] = (df23["scat_time"].str.replace(r"<", "")).astype(float)
except Exception as e:
    print(e)

DROP_COLUMNS = [
    "ra_1",
    "ra_2",
    "dec_1",
    "dec_2",
    "dm_exc_1_ne2001",
    "dm_exc_2_ne2001",
    "dm_exc_1_ymw16",
    "dm_exc_2_ymw16",
    "excluded_flag",
    "previous_name",
    "ra_notes",
    "dec_notes",
    "exp_up_notes",
    "exp_low_notes",
    "flux_notes",
    "fluence_notes",
    "ra_err",
    "dec_err",
    "exp_up_err",
    "exp_low_err",
    "dm_fitb_err",
    "scat_time_err",
    "flux_err",
    "fluence_err",
    "mjd_400",
    "mjd_400_err",
    "mjd_inf",
    "mjd_inf_err",
    "width_fitb_err",
    "sp_idx_err",
    "sp_run_err",
    "ra_1_err_low",
    "ra_1_err_up",
    "ra_2_err_low",
    "ra_2_err_up",
    "dec_1_err_up",
    "dec_1_err_low",
    "dec_2_err_low",
    "dec_2_err_up",
    "low_ft_68",
    "up_ft_68",
    "low_ft_90",
    "up_ft_90",
    "low_ft_95",
    "up_ft_95",
]
for col in DROP_COLUMNS:
    if col in df21.columns:
        df21.drop(col, axis=1, inplace=True)
    if col in df23.columns:
        df23.drop(col, axis=1, inplace=True)

Can only use .str accessor with string values!


## Drop flux = 0

In [4]:
# join list as string
", ".join(list((df21[df21["flux"] == 0])["tns_name"]))

'FRB20190307A, FRB20190307B, FRB20190329B, FRB20190329C, FRB20190531A, FRB20190531B'

In [5]:
print("2021:")
print(df21["tns_name"].nunique())
print(Counter(df21["is_repeater"]))
df21.drop(df21[df21["flux"] == 0].index, inplace=True)
print(df21["tns_name"].nunique())
print(Counter(df21["is_repeater"]))

print("\n2023:")
print(df23["tns_name"].nunique())
print(Counter(df23["is_repeater"]))
df23.drop(df23[df23["flux"] == 0].index, inplace=True)
print(df23["tns_name"].nunique())
print(Counter(df23["is_repeater"]))

2021:
536
Counter({0: 506, 1: 94})
530
Counter({0: 500, 1: 94})

2023:
127
Counter({1: 151})
127
Counter({1: 151})


## Process 2023 DF

The paper which published the FRBs from 2023 identified 14 repeater candidates: bursts in a cluster which appear to originate from the same repeating source, but for which the probability of chance coincidence ($P_{cc}$) > 0.5 and < 5.

These have been flaged as likely to be repeaters by the CHIME/FRB collaboration. We move these to the unlabeled flag during training, with the hope that our models will identify them many of them as repeater candidates.

In [6]:
from utils import PCC_REPEATER_CANDIDATES

# Save the separate candidates to a file
pcc = df23[
    (df23["tns_name"].isin(PCC_REPEATER_CANDIDATES))
    | (df23["repeater_name"].isin(PCC_REPEATER_CANDIDATES))
]
print(pcc.shape)
pcc.to_csv("data/candidates/2023_chimefrb_silver.csv", index=False)

# Note for each burst which sample of repeaters it is from
df23["is_pcc_candidate"] = df23.apply(
    lambda row: 1
    if row["tns_name"] in PCC_REPEATER_CANDIDATES
    else 1
    if row["repeater_name"] in PCC_REPEATER_CANDIDATES
    else 0,
    axis=1,
)

(32, 30)


In [7]:
df21.sort_values("tns_name")

Unnamed: 0,tns_name,repeater_name,ra,dec,gl,gb,exp_up,exp_low,bonsai_snr,bonsai_dm,snr_fitb,dm_fitb,dm_exc_ne2001,dm_exc_ymw16,bc_width,scat_time,flux,fluence,sub_num,width_fitb,sp_idx,sp_run,high_freq,low_freq,peak_freq,chi_sq,dof,flag_frac,is_repeater
0,FRB20180725A,-9999,93.42,67.07,147.29,21.29,30.0,-9999.0,19.2,716.6,33.2,715.80930,644.2,635.4,0.00295,0.001100,1.70,4.10,0,0.000296,38.20,-45.80,760.1,485.3,607.4,371857.954,371481,0.403,0
1,FRB20180727A,-9999,197.72,26.42,24.76,85.60,10.4,-9999.0,10.4,642.1,12.2,642.13400,620.9,622.4,0.00295,0.001700,0.58,2.31,0,0.001390,3.80,-9.20,800.2,400.2,493.3,382969.318,381818,0.387,0
2,FRB20180729A,-9999,199.40,55.58,115.26,61.16,21.0,-9999.0,32.0,108.4,206.6,109.59418,78.8,86.8,0.00098,0.000157,11.70,17.00,0,0.000100,16.46,-30.21,692.7,400.2,525.6,264732.041,186953,0.399,0
3,FRB20180729B,-9999,89.93,56.50,156.90,15.68,21.0,-9999.0,12.4,318.6,22.0,317.22350,223.2,198.8,0.00197,0.000660,0.92,1.20,0,0.000314,14.50,-14.60,800.2,441.8,657.5,425139.488,421337,0.323,0
4,FRB20180730A,-9999,57.39,87.19,125.11,25.11,270.0,214.0,69.5,849.2,89.8,848.90410,789.7,790.5,0.00492,0.002073,5.20,27.00,0,0.000468,4.27,-11.31,759.2,400.2,483.5,429165.844,417689,0.329,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
595,FRB20190701A,-9999,277.47,59.04,88.29,25.72,23.0,-9999.0,12.1,635.7,14.6,637.09340,582.8,587.8,0.00197,0.000720,1.26,1.70,0,0.000608,-1.10,3.30,800.2,400.2,800.2,341779.300,341690,0.451,0
596,FRB20190701B,-9999,302.93,80.18,112.88,23.40,69.0,70.0,15.0,748.9,17.5,749.11400,687.6,688.1,0.00295,0.000340,1.10,1.90,0,0.000630,3.90,-11.80,732.8,400.2,471.5,329229.311,330137,0.470,0
597,FRB20190701C,-9999,96.36,81.63,132.18,25.88,82.0,82.0,11.5,972.1,16.8,974.19500,915.8,916.6,0.00197,0.001800,0.88,2.50,0,0.001440,46.20,-211.00,495.5,402.2,446.4,285697.192,286362,0.540,0
598,FRB20190701D,-9999,112.10,66.70,149.28,28.38,34.0,-9999.0,34.4,934.9,44.8,933.36290,877.4,879.4,0.00885,0.001530,1.33,8.60,0,0.001400,6.49,-20.90,651.8,400.2,467.6,358566.724,354457,0.431,0


In [8]:
df23.sort_values("tns_name")

Unnamed: 0,tns_name,repeater_name,gl,gb,exp_up,exp_low,bonsai_snr,bonsai_dm,snr_fitb,dm_fitb,bc_width,scat_time,flux,fluence,sub_num,width_fitb,sp_idx,sp_run,high_freq,low_freq,peak_freq,chi_sq,dof,flag_frac,R_cc,is_repeater,ra,dec,dm_exc_ne2001,dm_exc_ymw16,is_pcc_candidate
43,FRB20180909A,FRB20180909A,160.63,31.71,75.0,-9999.0,11.03,407.6,13.86,408.6470,0.01573,0.013000,0.43,0.90,0,0.006310,-0.30,-1.30,800.2,400.2,400.2,256401.455,256250,0.441,3.930000e+00,1,120.040,57.004,356.2,360.1,1
30,FRB20180910A,FRB20180910A,122.59,26.19,3910.0,4335.0,36.65,684.2,50.38,684.4081,0.00098,0.000244,6.50,5.60,0,0.000205,0.05,-0.53,800.2,400.2,417.6,135823.872,134076,56.927,4.300000e-01,1,354.830,89.014,628.3,629.7,0
32,FRB20181201D,FRB20181201D,121.98,27.37,3590.0,4359.0,16.20,448.0,16.80,448.2660,0.00197,0.000168,0.56,1.40,0,0.000220,5.60,-56.00,515.3,400.2,420.7,147096.312,146673,52.881,6.400000e-01,1,266.900,89.125,394.5,397.4,1
23,FRB20181226F,FRB20181226F,129.79,26.21,390.0,380.0,24.81,237.8,38.22,236.4200,0.00492,0.003430,3.00,11.30,0,0.000444,3.29,-5.73,800.2,400.2,533.4,463029.409,461351,50.598,4.900000e-02,1,95.870,83.800,178.7,179.9,0
35,FRB20190107B,FRB20190107B,0.00,0.00,360.0,330.0,20.62,166.6,26.84,166.0939,0.00098,0.000900,2.80,4.30,0,0.000451,-1.30,-3.10,784.4,400.2,400.2,126924.469,122525,60.638,7.800000e-01,1,49.310,83.400,98.2,93.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
147,FRB20210331F,FRB20210323C,142.57,31.55,135.0,108.0,24.28,288.3,50.27,288.4200,0.00885,0.002500,1.02,6.10,0,0.005000,58.00,-88.00,662.7,480.9,564.5,497640.634,492355,47.278,2.990000e+00,1,122.070,72.350,237.9,243.1,1
148,FRB20210331F,FRB20210323C,142.57,31.55,135.0,108.0,24.28,288.3,50.27,288.4200,0.00885,0.002500,1.02,6.10,1,0.001254,47.50,-119.20,578.4,428.0,497.6,497640.634,492355,47.278,2.990000e+00,1,122.070,72.350,237.9,243.1,1
139,FRB20210406A,FRB20201130A,185.42,-29.04,15.0,-9999.0,10.16,287.5,10.23,288.5700,0.00393,0.006600,0.45,1.31,0,0.003320,41.00,-87.00,587.4,430.9,503.1,460103.225,459585,50.787,5.300000e-18,1,64.388,7.941,232.1,219.9,0
149,FRB20210426B,FRB20210323C,142.57,31.55,135.0,108.0,12.49,292.8,31.13,288.9200,0.00786,0.010000,1.60,7.60,0,0.005000,-8.20,15.00,800.2,613.2,800.2,1872864.206,1866568,45.490,2.990000e+00,1,122.070,72.350,238.4,243.6,1


In [9]:
# Change is_repeater to 0 for rows in df23 with tns_name in PCC_REPEATER_CANDIDATES, so that we treat it as unlabeled
print(df23["is_repeater"].value_counts())
df23["is_repeater"] = df23.apply(
    lambda row: 0 if row["is_pcc_candidate"] == 1 else row["is_repeater"], axis=1
)
# Remove repeater_name for pcc
df23["repeater_name"] = df23.apply(
    lambda row: "-9999" if row["is_pcc_candidate"] == 1 else row["repeater_name"],
    axis=1,
)
print(df23["is_repeater"].value_counts())

is_repeater
1    151
Name: count, dtype: int64
is_repeater
1    119
0     32
Name: count, dtype: int64


In [10]:
# We confirm, as per the 2023 paper, that there are 25 repeating sources left in the gold sample
df23[df23["is_repeater"] == 1]["repeater_name"].nunique()

25

In [11]:
df23.drop(columns=["R_cc"], inplace=True)

## Compare 2021 and 2023 catalogs

In [12]:
# Reorder columns to be consistent across the 2021 and 2023 dataframes
df21["is_pcc_candidate"] = 0
df21_cols = df21.columns
df23 = df23[df21_cols]

In [13]:
print("2021 catalog:")
print(f"{df21.shape[0]} sub-bursts with {df21.shape[1]} features each")
print(f"{len(df21[df21['is_repeater'] == 1])} repeat sub-bursts")
print(f"{len(df21[df21['is_repeater'] == 0])} non-repeat sub-bursts")
print("\n")

print("2023 catalog:")
print(f"{df23.shape[0]} sub-bursts with {df23.shape[1]} features each")
print(f"{len(df23[df23['is_repeater'] == 1])} repeat sub-bursts")
print(f"{len(df23[df23['is_repeater'] == 0])} non-repeat sub-bursts")
print("\n")
print(
    "Different columns: ",
    df21.columns.difference(df23.columns)
    + "\n"
    + df23.columns.difference(df21.columns),
)

2021 catalog:
594 sub-bursts with 30 features each
94 repeat sub-bursts
500 non-repeat sub-bursts


2023 catalog:
151 sub-bursts with 30 features each
119 repeat sub-bursts
32 non-repeat sub-bursts


Different columns:  Index([], dtype='object')


In [14]:
df21_bursts = list(set(df21["tns_name"]))
df21_repeat_bursts = list(set(df21[df21["is_repeater"] == 1]["tns_name"]))
df21_nonrepeat_bursts = list(set(df21[df21["is_repeater"] == 0]["tns_name"]))

df23_bursts = list(set(df23["tns_name"]))
df23_repeat_bursts = list(set(df23[df23["is_repeater"] == 1]["tns_name"]))
df23_nonrepeat_bursts = list(set(df23[df23["is_repeater"] == 0]["tns_name"]))

print("2021:")
print("Bursts:", len(df21_bursts))
print("Repeat bursts:", len(df21_repeat_bursts))
print("Non-repeat bursts:", len(df21_nonrepeat_bursts))

print("\n2023:")
print("Bursts:", len(df23_bursts))
print("Repeat bursts:", len(df23_repeat_bursts))
print("Non-repeat bursts:", len(df23_nonrepeat_bursts))

2021:
Bursts: 530
Repeat bursts: 62
Non-repeat bursts: 468

2023:
Bursts: 127
Repeat bursts: 98
Non-repeat bursts: 29


In [15]:
repeaters21 = list(df21["repeater_name"].unique())
repeaters21.remove("-9999")
repeaters23 = list(df23["repeater_name"].unique())
repeaters23.remove("-9999")

print("Number of repeating sources in 2021:", len(repeaters21))
print("Number of repeating sources in 2023:", len(repeaters23))

Number of repeating sources in 2021: 18
Number of repeating sources in 2023: 25


In [16]:
len(df23_bursts)

127

In [17]:
len(df21_bursts)

530

In [18]:
overlapping_bursts = set(df23_bursts).intersection(df21_bursts)
print("Number of overlapping bursts:", len(overlapping_bursts))

overlapping_repeater_bursts = set(df23_repeat_bursts).intersection(df21_repeat_bursts)
print("Number of overlapping repeater bursts:", len(overlapping_repeater_bursts))

overlapping_nonrepeater_bursts = set(df23_nonrepeat_bursts).intersection(
    df21_nonrepeat_bursts
)
print(
    "Number of non-repeater bursts from 2021 in the silver sample in the 2023 catalog:",
    len(overlapping_nonrepeater_bursts),
)

new_21_repeaters = set(df21_nonrepeat_bursts).intersection(set(df23_repeat_bursts))
print(
    "Number of apparent non-repeaters from the 2021 catalog discovered as repeaters in 2023:",
    len(new_21_repeaters),
)

Number of overlapping bursts: 14
Number of overlapping repeater bursts: 0
Number of non-repeater bursts from 2021 in the silver sample in the 2023 catalog: 7
Number of apparent non-repeaters from the 2021 catalog discovered as repeaters in 2023: 7


The 2023 catalog is described by CHIME as only containing bursts detected between 30 September 2019 and 1 May 2021. 
However, it includes some bursts that were identified as non-repeaters in 2021. This is for two possible reasons:
1. New bursts were observed from a close enough sky position that the CHIME clustering algorithm resolved them as originating from the same source with contamination ratio $R_{cc}$ < 0.5. There are exactly 7 of these old bursts. CHIME updates their records in the 2023 table such that they are now flagged as repeaters. Therefore, we drop these rows from the old table.
2. New bursts were observed from a close sky position, but the CHIME algorithm only calculated an $R_{cc}$ value > 0.5 and < 5. There are exactly 7 of these bursts. Since we treat these as unlabeled values in our experiments, we have already updated their rows to non-repeaters in the 2023 table. Therefore, the rows in the original 2021 table are an exact duplicate and we drop them.

In [19]:
# FRB20190308B contains two sub-bursts. All other 13 overlapping bursts just contain one sub-burst.
# Therefore, we drop the 15 sub-bursts which comprise the 14 overlapping bursts
df21_overlapping = df21[df21["tns_name"].isin(overlapping_bursts)]
print("Sub-burst rows to be dropped:", df21_overlapping.shape)
df21_overlapping

Sub-burst rows to be dropped: (15, 30)


Unnamed: 0,tns_name,repeater_name,ra,dec,gl,gb,exp_up,exp_low,bonsai_snr,bonsai_dm,snr_fitb,dm_fitb,dm_exc_ne2001,dm_exc_ymw16,bc_width,scat_time,flux,fluence,sub_num,width_fitb,sp_idx,sp_run,high_freq,low_freq,peak_freq,chi_sq,dof,flag_frac,is_repeater,is_pcc_candidate
23,FRB20180909A,-9999,123.63,56.76,160.94,33.67,21.0,-9999.0,11.0,407.6,13.9,408.647,358.8,364.5,0.01966,0.0082,0.33,1.02,0,0.00631,-0.3,-1.3,800.2,400.2,400.2,256401.455,256250,0.441,0,0
24,FRB20180910A,-9999,352.77,88.21,122.25,25.45,420.0,350.0,36.6,684.2,50.4,684.4081,626.3,627.7,0.00098,0.000244,6.5,5.6,0,0.000205,0.05,-0.53,800.2,400.2,417.6,149900.203,147737,0.525,0,0
221,FRB20190107B,-9999,33.45,83.4,125.41,20.93,108.0,95.0,20.6,166.6,26.8,166.0939,96.1,89.6,0.00098,0.0005,2.8,4.3,0,0.000451,-1.3,-3.1,785.3,400.2,400.2,166068.158,160202,0.485,0,0
227,FRB20190110C,-9999,246.98,41.42,65.52,43.85,12.4,-9999.0,15.6,221.6,28.3,221.961,186.3,193.0,0.00295,0.000217,0.64,1.4,0,0.00039,24.5,-186.0,477.7,400.2,427.4,433108.955,432281,0.306,0,0
233,FRB20190113A,-9999,108.14,-2.99,218.01,3.33,11.3,-9999.0,10.9,430.3,12.6,428.924,250.0,176.4,0.00688,0.0022,1.3,5.6,0,0.00182,7.3,-2.8,800.2,491.9,800.2,386505.498,385466,0.381,0,0
267,FRB20190127B,-9999,150.92,83.56,127.99,31.83,100.0,101.0,11.5,666.4,18.8,663.028,614.6,620.1,0.06783,0.0428,0.63,11.4,0,0.0025,5.3,-7.7,800.2,400.2,562.2,3982765.219,3979097,0.329,0,0
281,FRB20190201A,-9999,64.03,84.84,127.32,23.83,134.0,133.0,11.0,241.0,15.4,242.0038,179.6,178.5,0.00098,0.00072,2.6,3.1,0,0.000608,0.1,-0.2,800.2,400.2,476.6,401017.55,401882,0.354,0,0
301,FRB20190210C,-9999,295.75,89.1,121.95,26.92,950.0,860.0,19.7,642.1,27.3,643.3669,588.1,590.7,0.00197,0.00025,2.37,3.6,0,0.000286,0.35,-1.5,800.2,400.2,448.5,399026.111,397625,0.361,0,0
345,FRB20190226B,-9999,273.57,61.81,91.05,28.02,24.0,-9999.0,9.2,630.8,13.2,631.603,580.9,586.2,0.01868,0.0049,0.38,2.38,0,0.004,29.9,-39.3,745.3,459.4,585.2,679608.542,680346,0.271,0,0
359,FRB20190303D,-9999,179.57,70.84,129.13,45.64,34.0,35.1,9.8,710.1,11.2,711.151,674.1,682.3,0.00295,0.001,0.59,1.17,0,0.00081,-0.9,-5.6,704.3,400.2,400.2,468396.617,469978,0.245,0,0


In [20]:
df23[~df23["tns_name"].isin(overlapping_bursts)]

Unnamed: 0,tns_name,repeater_name,ra,dec,gl,gb,exp_up,exp_low,bonsai_snr,bonsai_dm,snr_fitb,dm_fitb,dm_exc_ne2001,dm_exc_ymw16,bc_width,scat_time,flux,fluence,sub_num,width_fitb,sp_idx,sp_run,high_freq,low_freq,peak_freq,chi_sq,dof,flag_frac,is_repeater,is_pcc_candidate
0,FRB20201130A,FRB20201130A,64.388,7.9410,185.42,-29.04,15.0,-9999.0,9.30,287.1,12.83,288.270,231.8,219.6,0.00688,0.00600,0.700,2.900,0,0.002980,16.6,-17.9,800.2,462.9,627.5,337877.662,336066,46.021,1,0
1,FRB20191106C,FRB20191106C,199.577,43.0025,105.70,73.22,65.6,-9999.0,9.36,331.6,19.83,333.057,308.0,312.5,0.01081,0.01000,0.177,1.480,0,0.005120,10.5,-10.9,800.2,435.6,618.5,493027.006,500853,46.368,1,0
2,FRB20200619A,FRB20200619A,272.570,55.5580,83.96,27.77,109.7,-9999.0,11.48,438.3,18.36,439.440,388.5,394.6,0.00492,0.00066,0.290,0.678,0,0.000720,30.9,-158.0,498.1,400.2,441.4,352956.034,353963,43.146,1,0
3,FRB20190804E,FRB20190804E,261.343,55.0689,82.89,34.11,93.7,-9999.0,33.61,360.7,90.49,362.680,319.3,326.1,0.02949,0.00290,1.020,4.870,0,0.005000,42.8,-156.0,534.1,410.8,468.4,463513.055,460378,50.702,1,0
4,FRB20190804E,FRB20190804E,261.343,55.0689,82.89,34.11,93.7,-9999.0,33.61,360.7,90.49,362.680,319.3,326.1,0.02949,0.00290,1.020,4.870,1,0.001433,50.7,-288.0,477.5,400.2,436.3,463513.055,460378,50.702,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
146,FRB20210313E,FRB20201221B,124.199,48.7810,170.55,33.81,90.3,-9999.0,10.00,510.3,15.43,510.354,459.5,464.4,0.02064,0.00173,0.250,1.130,0,0.001260,34.3,-183.0,491.8,400.2,439.6,352048.133,351911,43.475,1,0
147,FRB20210331F,-9999,122.070,72.3500,142.57,31.55,135.0,108.0,24.28,288.3,50.27,288.420,237.9,243.1,0.00885,0.00250,1.020,6.100,0,0.005000,58.0,-88.0,662.7,480.9,564.5,497640.634,492355,47.278,0,1
148,FRB20210331F,-9999,122.070,72.3500,142.57,31.55,135.0,108.0,24.28,288.3,50.27,288.420,237.9,243.1,0.00885,0.00250,1.020,6.100,1,0.001254,47.5,-119.2,578.4,428.0,497.6,497640.634,492355,47.278,0,1
149,FRB20210426B,-9999,122.070,72.3500,142.57,31.55,135.0,108.0,12.49,292.8,31.13,288.920,238.4,243.6,0.00786,0.01000,1.600,7.600,0,0.005000,-8.2,15.0,800.2,613.2,800.2,1872864.206,1866568,45.490,0,1


In [21]:
df23_overlapping = df23[df23["tns_name"].isin(overlapping_bursts)]
print(df23_overlapping["repeater_name"].nunique())
print(df23_overlapping["tns_name"].nunique())
print(df23_overlapping.shape)
df23_overlapping

8
14
(15, 30)


Unnamed: 0,tns_name,repeater_name,ra,dec,gl,gb,exp_up,exp_low,bonsai_snr,bonsai_dm,snr_fitb,dm_fitb,dm_exc_ne2001,dm_exc_ymw16,bc_width,scat_time,flux,fluence,sub_num,width_fitb,sp_idx,sp_run,high_freq,low_freq,peak_freq,chi_sq,dof,flag_frac,is_repeater,is_pcc_candidate
10,FRB20190609C,FRB20190609C,73.324,24.0678,177.36,-12.39,71.95,-9999.0,10.96,480.4,17.1,480.282,366.9,326.7,0.00393,0.0041,0.64,1.91,0,0.00207,15.2,-138.0,481.3,400.2,422.9,323294.49,324438,47.888,1,0
11,FRB20190226B,FRB20190226B,273.62,61.67,0.0,0.0,85.0,-9999.0,9.17,630.8,13.17,631.603,580.9,586.1,0.01081,0.008,0.38,2.38,0,0.004,29.9,-39.3,706.4,474.3,578.8,467683.775,468078,49.878,1,0
14,FRB20190430C,FRB20190430C,277.2097,24.7699,53.11,15.7,29.4,-9999.0,22.54,399.5,32.21,400.561,301.0,316.9,0.00295,0.0018,5.8,15.2,0,0.000893,48.7,-48.8,800.2,527.8,655.2,297460.588,295102,52.6,1,0
21,FRB20190110C,FRB20190110C,249.325,41.445,65.58,42.09,45.0,-9999.0,15.58,221.6,27.76,221.921,184.9,192.0,0.00295,0.0015,0.64,1.4,0,0.000752,28.2,-202.0,477.7,400.2,427.4,283435.026,283018,54.541,1,0
26,FRB20190113A,FRB20190113A,108.26,-2.98,0.0,0.0,40.0,-9999.0,10.93,430.3,12.55,428.924,251.0,178.1,0.00688,0.0036,1.1,5.7,0,0.00182,7.3,-2.8,800.2,491.9,800.2,288553.329,287540,53.815,1,0
30,FRB20180910A,FRB20180910A,354.83,89.014,122.59,26.19,3910.0,4335.0,36.65,684.2,50.38,684.4081,628.3,629.7,0.00098,0.000244,6.5,5.6,0,0.000205,0.05,-0.53,800.2,400.2,417.6,135823.872,134076,56.927,1,0
31,FRB20190303D,-9999,185.339,70.691,126.52,46.23,55.0,141.7,9.78,710.1,11.16,711.151,674.5,682.5,0.00295,0.0016,0.59,1.17,0,0.00081,-0.9,-5.6,564.8,400.2,440.8,328339.172,328732,47.198,0,1
33,FRB20190328C,-9999,75.649,82.11,130.57,23.32,310.0,290.0,18.25,470.7,21.65,472.8607,408.8,406.4,0.00393,0.0023,4.7,14.9,0,0.00077,0.7,-2.7,800.2,400.2,459.0,500293.147,499256,46.539,0,1
35,FRB20190107B,-9999,49.31,83.4,0.0,0.0,360.0,330.0,20.62,166.6,26.84,166.0939,98.2,93.0,0.00098,0.0009,2.8,4.3,0,0.000451,-1.3,-3.1,784.4,400.2,400.2,126924.469,122525,60.638,0,1
37,FRB20190210C,-9999,313.9,89.19,122.16,26.71,3400.0,3090.0,19.69,642.1,27.31,643.3669,589.5,592.2,0.00197,0.00025,2.37,3.6,0,0.000286,0.35,-1.5,800.2,400.2,448.5,310287.093,310719,50.092,0,1


In [22]:
df21[df21["tns_name"].isin(overlapping_nonrepeater_bursts)]

Unnamed: 0,tns_name,repeater_name,ra,dec,gl,gb,exp_up,exp_low,bonsai_snr,bonsai_dm,snr_fitb,dm_fitb,dm_exc_ne2001,dm_exc_ymw16,bc_width,scat_time,flux,fluence,sub_num,width_fitb,sp_idx,sp_run,high_freq,low_freq,peak_freq,chi_sq,dof,flag_frac,is_repeater,is_pcc_candidate
23,FRB20180909A,-9999,123.63,56.76,160.94,33.67,21.0,-9999.0,11.0,407.6,13.9,408.647,358.8,364.5,0.01966,0.0082,0.33,1.02,0,0.00631,-0.3,-1.3,800.2,400.2,400.2,256401.455,256250,0.441,0,0
221,FRB20190107B,-9999,33.45,83.4,125.41,20.93,108.0,95.0,20.6,166.6,26.8,166.0939,96.1,89.6,0.00098,0.0005,2.8,4.3,0,0.000451,-1.3,-3.1,785.3,400.2,400.2,166068.158,160202,0.485,0,0
267,FRB20190127B,-9999,150.92,83.56,127.99,31.83,100.0,101.0,11.5,666.4,18.8,663.028,614.6,620.1,0.06783,0.0428,0.63,11.4,0,0.0025,5.3,-7.7,800.2,400.2,562.2,3982765.219,3979097,0.329,0,0
301,FRB20190210C,-9999,295.75,89.1,121.95,26.92,950.0,860.0,19.7,642.1,27.3,643.3669,588.1,590.7,0.00197,0.00025,2.37,3.6,0,0.000286,0.35,-1.5,800.2,400.2,448.5,399026.111,397625,0.361,0,0
359,FRB20190303D,-9999,179.57,70.84,129.13,45.64,34.0,35.1,9.8,710.1,11.2,711.151,674.1,682.3,0.00295,0.001,0.59,1.17,0,0.00081,-0.9,-5.6,704.3,400.2,400.2,468396.617,469978,0.245,0,0
368,FRB20190308B,-9999,38.59,83.62,125.9,21.35,107.0,103.0,10.3,179.5,21.8,180.18,111.5,106.0,0.00197,0.000134,1.11,1.39,0,0.000186,18.6,-52.9,587.9,400.2,477.2,392158.517,391540,0.371,0,0
369,FRB20190308B,-9999,38.59,83.62,125.9,21.35,107.0,103.0,10.3,179.5,21.8,180.18,111.5,106.0,0.00197,0.000134,1.11,1.39,1,0.00052,9.5,-37.0,585.3,400.2,455.5,392158.517,391540,0.371,0,0
400,FRB20190328C,-9999,73.83,81.96,130.57,23.02,86.0,88.0,18.2,470.7,21.6,472.8607,408.2,405.3,0.00492,0.0023,4.7,14.9,0,0.00077,0.7,-2.7,800.2,400.2,459.0,520858.438,518921,0.444,0,0


In [23]:
print(df21.shape)
df21.drop(df21_overlapping.index, axis=0, inplace=True)
print(df21.shape)
df21

(594, 30)
(579, 30)


Unnamed: 0,tns_name,repeater_name,ra,dec,gl,gb,exp_up,exp_low,bonsai_snr,bonsai_dm,snr_fitb,dm_fitb,dm_exc_ne2001,dm_exc_ymw16,bc_width,scat_time,flux,fluence,sub_num,width_fitb,sp_idx,sp_run,high_freq,low_freq,peak_freq,chi_sq,dof,flag_frac,is_repeater,is_pcc_candidate
0,FRB20180725A,-9999,93.42,67.07,147.29,21.29,30.0,-9999.0,19.2,716.6,33.2,715.80930,644.2,635.4,0.00295,0.001100,1.70,4.10,0,0.000296,38.20,-45.80,760.1,485.3,607.4,371857.954,371481,0.403,0,0
1,FRB20180727A,-9999,197.72,26.42,24.76,85.60,10.4,-9999.0,10.4,642.1,12.2,642.13400,620.9,622.4,0.00295,0.001700,0.58,2.31,0,0.001390,3.80,-9.20,800.2,400.2,493.3,382969.318,381818,0.387,0,0
2,FRB20180729A,-9999,199.40,55.58,115.26,61.16,21.0,-9999.0,32.0,108.4,206.6,109.59418,78.8,86.8,0.00098,0.000157,11.70,17.00,0,0.000100,16.46,-30.21,692.7,400.2,525.6,264732.041,186953,0.399,0,0
3,FRB20180729B,-9999,89.93,56.50,156.90,15.68,21.0,-9999.0,12.4,318.6,22.0,317.22350,223.2,198.8,0.00197,0.000660,0.92,1.20,0,0.000314,14.50,-14.60,800.2,441.8,657.5,425139.488,421337,0.323,0,0
4,FRB20180730A,-9999,57.39,87.19,125.11,25.11,270.0,214.0,69.5,849.2,89.8,848.90410,789.7,790.5,0.00492,0.002073,5.20,27.00,0,0.000468,4.27,-11.31,759.2,400.2,483.5,429165.844,417689,0.329,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
595,FRB20190701A,-9999,277.47,59.04,88.29,25.72,23.0,-9999.0,12.1,635.7,14.6,637.09340,582.8,587.8,0.00197,0.000720,1.26,1.70,0,0.000608,-1.10,3.30,800.2,400.2,800.2,341779.300,341690,0.451,0,0
596,FRB20190701B,-9999,302.93,80.18,112.88,23.40,69.0,70.0,15.0,748.9,17.5,749.11400,687.6,688.1,0.00295,0.000340,1.10,1.90,0,0.000630,3.90,-11.80,732.8,400.2,471.5,329229.311,330137,0.470,0,0
597,FRB20190701C,-9999,96.36,81.63,132.18,25.88,82.0,82.0,11.5,972.1,16.8,974.19500,915.8,916.6,0.00197,0.001800,0.88,2.50,0,0.001440,46.20,-211.00,495.5,402.2,446.4,285697.192,286362,0.540,0,0
598,FRB20190701D,-9999,112.10,66.70,149.28,28.38,34.0,-9999.0,34.4,934.9,44.8,933.36290,877.4,879.4,0.00885,0.001530,1.33,8.60,0,0.001400,6.49,-20.90,651.8,400.2,467.6,358566.724,354457,0.431,0,0


## Merge catalogs

In [24]:
df23["catalog"] = "2023"
df21["catalog"] = "2021"
df = pd.concat([df21, df23])
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,tns_name,repeater_name,ra,dec,gl,gb,exp_up,exp_low,bonsai_snr,bonsai_dm,snr_fitb,dm_fitb,dm_exc_ne2001,dm_exc_ymw16,bc_width,scat_time,flux,fluence,sub_num,width_fitb,sp_idx,sp_run,high_freq,low_freq,peak_freq,chi_sq,dof,flag_frac,is_repeater,is_pcc_candidate,catalog
0,FRB20180725A,-9999,93.420,67.070,147.29,21.29,30.0,-9999.0,19.20,716.6,33.20,715.80930,644.2,635.4,0.00295,0.001100,1.70,4.10,0,0.000296,38.20,-45.80,760.1,485.3,607.4,371857.954,371481,0.403,0,0,2021
1,FRB20180727A,-9999,197.720,26.420,24.76,85.60,10.4,-9999.0,10.40,642.1,12.20,642.13400,620.9,622.4,0.00295,0.001700,0.58,2.31,0,0.001390,3.80,-9.20,800.2,400.2,493.3,382969.318,381818,0.387,0,0,2021
2,FRB20180729A,-9999,199.400,55.580,115.26,61.16,21.0,-9999.0,32.00,108.4,206.60,109.59418,78.8,86.8,0.00098,0.000157,11.70,17.00,0,0.000100,16.46,-30.21,692.7,400.2,525.6,264732.041,186953,0.399,0,0,2021
3,FRB20180729B,-9999,89.930,56.500,156.90,15.68,21.0,-9999.0,12.40,318.6,22.00,317.22350,223.2,198.8,0.00197,0.000660,0.92,1.20,0,0.000314,14.50,-14.60,800.2,441.8,657.5,425139.488,421337,0.323,0,0,2021
4,FRB20180730A,-9999,57.390,87.190,125.11,25.11,270.0,214.0,69.50,849.2,89.80,848.90410,789.7,790.5,0.00492,0.002073,5.20,27.00,0,0.000468,4.27,-11.31,759.2,400.2,483.5,429165.844,417689,0.329,0,0,2021
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
725,FRB20210313E,FRB20201221B,124.199,48.781,170.55,33.81,90.3,-9999.0,10.00,510.3,15.43,510.35400,459.5,464.4,0.02064,0.001730,0.25,1.13,0,0.001260,34.30,-183.00,491.8,400.2,439.6,352048.133,351911,43.475,1,0,2023
726,FRB20210331F,-9999,122.070,72.350,142.57,31.55,135.0,108.0,24.28,288.3,50.27,288.42000,237.9,243.1,0.00885,0.002500,1.02,6.10,0,0.005000,58.00,-88.00,662.7,480.9,564.5,497640.634,492355,47.278,0,1,2023
727,FRB20210331F,-9999,122.070,72.350,142.57,31.55,135.0,108.0,24.28,288.3,50.27,288.42000,237.9,243.1,0.00885,0.002500,1.02,6.10,1,0.001254,47.50,-119.20,578.4,428.0,497.6,497640.634,492355,47.278,0,1,2023
728,FRB20210426B,-9999,122.070,72.350,142.57,31.55,135.0,108.0,12.49,292.8,31.13,288.92000,238.4,243.6,0.00786,0.010000,1.60,7.60,0,0.005000,-8.20,15.00,800.2,613.2,800.2,1872864.206,1866568,45.490,0,1,2023


In [25]:
df21[df21["is_repeater"] == 0].shape

(485, 31)

In [26]:
df[df["is_repeater"] == 0].shape

(517, 31)

In [27]:
print(df[df["tns_name"].isin(overlapping_bursts)].shape)
df[df["tns_name"].isin(overlapping_bursts)]

(15, 31)


Unnamed: 0,tns_name,repeater_name,ra,dec,gl,gb,exp_up,exp_low,bonsai_snr,bonsai_dm,snr_fitb,dm_fitb,dm_exc_ne2001,dm_exc_ymw16,bc_width,scat_time,flux,fluence,sub_num,width_fitb,sp_idx,sp_run,high_freq,low_freq,peak_freq,chi_sq,dof,flag_frac,is_repeater,is_pcc_candidate,catalog
589,FRB20190609C,FRB20190609C,73.324,24.0678,177.36,-12.39,71.95,-9999.0,10.96,480.4,17.1,480.282,366.9,326.7,0.00393,0.0041,0.64,1.91,0,0.00207,15.2,-138.0,481.3,400.2,422.9,323294.49,324438,47.888,1,0,2023
590,FRB20190226B,FRB20190226B,273.62,61.67,0.0,0.0,85.0,-9999.0,9.17,630.8,13.17,631.603,580.9,586.1,0.01081,0.008,0.38,2.38,0,0.004,29.9,-39.3,706.4,474.3,578.8,467683.775,468078,49.878,1,0,2023
593,FRB20190430C,FRB20190430C,277.2097,24.7699,53.11,15.7,29.4,-9999.0,22.54,399.5,32.21,400.561,301.0,316.9,0.00295,0.0018,5.8,15.2,0,0.000893,48.7,-48.8,800.2,527.8,655.2,297460.588,295102,52.6,1,0,2023
600,FRB20190110C,FRB20190110C,249.325,41.445,65.58,42.09,45.0,-9999.0,15.58,221.6,27.76,221.921,184.9,192.0,0.00295,0.0015,0.64,1.4,0,0.000752,28.2,-202.0,477.7,400.2,427.4,283435.026,283018,54.541,1,0,2023
605,FRB20190113A,FRB20190113A,108.26,-2.98,0.0,0.0,40.0,-9999.0,10.93,430.3,12.55,428.924,251.0,178.1,0.00688,0.0036,1.1,5.7,0,0.00182,7.3,-2.8,800.2,491.9,800.2,288553.329,287540,53.815,1,0,2023
609,FRB20180910A,FRB20180910A,354.83,89.014,122.59,26.19,3910.0,4335.0,36.65,684.2,50.38,684.4081,628.3,629.7,0.00098,0.000244,6.5,5.6,0,0.000205,0.05,-0.53,800.2,400.2,417.6,135823.872,134076,56.927,1,0,2023
610,FRB20190303D,-9999,185.339,70.691,126.52,46.23,55.0,141.7,9.78,710.1,11.16,711.151,674.5,682.5,0.00295,0.0016,0.59,1.17,0,0.00081,-0.9,-5.6,564.8,400.2,440.8,328339.172,328732,47.198,0,1,2023
612,FRB20190328C,-9999,75.649,82.11,130.57,23.32,310.0,290.0,18.25,470.7,21.65,472.8607,408.8,406.4,0.00393,0.0023,4.7,14.9,0,0.00077,0.7,-2.7,800.2,400.2,459.0,500293.147,499256,46.539,0,1,2023
614,FRB20190107B,-9999,49.31,83.4,0.0,0.0,360.0,330.0,20.62,166.6,26.84,166.0939,98.2,93.0,0.00098,0.0009,2.8,4.3,0,0.000451,-1.3,-3.1,784.4,400.2,400.2,126924.469,122525,60.638,0,1,2023
616,FRB20190210C,-9999,313.9,89.19,122.16,26.71,3400.0,3090.0,19.69,642.1,27.31,643.3669,589.5,592.2,0.00197,0.00025,2.37,3.6,0,0.000286,0.35,-1.5,800.2,400.2,448.5,310287.093,310719,50.092,0,1,2023


In [28]:
print("Combined 2021 and 2023 catalog:")
print("Total:", df.shape)
print("Sub-bursts of repeat bursts:", df[df["is_repeater"] == 1].shape[0])
print(
    "Sub-bursts of apparently non-repeating bursts:",
    df[df["is_repeater"] == 0].shape[0],
)
print("Bursts of repeat bursts:", df[df["is_repeater"] == 1]["tns_name"].nunique())
print(
    "Bursts of apparently non-repeating bursts:",
    df[df["is_repeater"] == 0]["tns_name"].nunique(),
)

Combined 2021 and 2023 catalog:
Total: (730, 31)
Sub-bursts of repeat bursts: 213
Sub-bursts of apparently non-repeating bursts: 517
Bursts of repeat bursts: 160
Bursts of apparently non-repeating bursts: 483


In [29]:
df.to_csv("data/raw_data/combined_2021_23_catalog.csv", index=False)