In [1]:
import ast
import time
import math
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from numba import njit, prange
from concurrent.futures import ProcessPoolExecutor, as_completed
import ast
import numpy as np
import pandas as pd
from typing import Dict, Tuple, List
from collections import Counter

In [2]:
df = pd.read_parquet('Data/bandpass_qa0_no_partitions.parquet')
df['uid'] = df.index
df = df.reset_index(drop=True)

In [3]:
specs = [np.array(x, dtype=float)
            for x in df['amplitude_corr_tsys'].tolist()]
freqs = [np.array(x, dtype=float)
            for x in df['frequency_array'].tolist()]
keep = [not np.all(s == 0.0) for s in specs]
specs = [s for s,k in zip(specs, keep) if k]
freqs = [f for f,k in zip(freqs, keep) if k]

uid = df['uid'].values[keep]
ref = df['ref_antenna_name'].values[keep]
ant = df['antenna'].values[keep]
pol = df['polarization'].values[keep]

In [7]:
length_groups: Dict[int, List[int]] = {}
for i, s in enumerate(specs):
        L = s.shape[0]
        length_groups.setdefault(L, []).append(i)

In [8]:
result: Dict[int, Tuple[np.ndarray, ...]] = {}
for L, idxs in length_groups.items():
        specs_L = np.vstack([specs[i] for i in idxs])
        freqs_L = np.vstack([freqs[i] for i in idxs])
        uid_L   = uid[idxs]
        ref_L   = ref[idxs]
        ant_L   = ant[idxs]
        pol_L   = pol[idxs]
        result[L] = (specs_L, uid_L, ref_L, ant_L, pol_L, freqs_L)

In [10]:
L = 960
specs_L, uid_L, ref_L, ant_L, pol_L, freqs_L = result[L]

endpoints = [(f.min(), f.max()) for f in freqs_L]

cnt = Counter(endpoints)

for (fmin, fmax), freq in cnt.items():
    print(f"Range = ({fmin:.3f}, {fmax:.3f})  →  {freq} rows")


Range = (330099133810.759, 331972180685.759)  →  86 rows
Range = (331976637594.269, 333849684469.269)  →  86 rows
Range = (342214294270.508, 344087341145.508)  →  86 rows
Range = (344080161031.769, 345953207906.769)  →  86 rows
Range = (218613194755.014, 220486241630.014)  →  86 rows
Range = (219926740165.171, 220863263602.671)  →  86 rows
Range = (230065320487.436, 231001843924.936)  →  86 rows
Range = (230966718192.514, 232839765067.514)  →  86 rows
Range = (287448256141.012, 289321303016.012)  →  86 rows
Range = (137084964843.650, 138958011718.650)  →  88 rows
Range = (297773077447.438, 299646124322.438)  →  86 rows
Range = (232071975997.235, 233945022872.235)  →  86 rows
Range = (299448206883.834, 301321253758.834)  →  86 rows
Range = (285773390294.511, 287646437169.511)  →  86 rows
Range = (287448521270.960, 289321568145.960)  →  86 rows
Range = (297773352070.546, 299646398945.546)  →  86 rows
Range = (299448483046.994, 301321529921.994)  →  86 rows
Range = (138980464843.650, 1408

In [17]:
fmin_target, fmax_target = cnt.most_common(1)[0][0]
mask = np.array([
    (f.min() == fmin_target and f.max() == fmax_target)
    for f in freqs_L
])

uids_to_keep = uid_L[mask]
df_filtered = df[df["uid"].isin(uids_to_keep)].reset_index(drop=True)

In [18]:
df_filtered = df[df["uid"].isin(uids_to_keep)].reset_index(drop=True)

In [20]:
df_filtered.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1326 entries, 0 to 1325
Data columns (total 15 columns):
 #   Column               Non-Null Count  Dtype              
---  ------               --------------  -----              
 0   eb_uid               1326 non-null   object             
 1   cal_data_id          1326 non-null   object             
 2   cal_reduction_id     1326 non-null   object             
 3   start_valid_time     1326 non-null   datetime64[ns, UTC]
 4   receiver_band        1326 non-null   object             
 5   ref_antenna_name     1326 non-null   object             
 6   antenna              1326 non-null   object             
 7   polarization         1326 non-null   object             
 8   sideband             1326 non-null   object             
 9   baseband_name        1326 non-null   object             
 10  spw_name             1326 non-null   object             
 11  frequency_array      1326 non-null   object             
 12  amplitude_corr_tsys 

In [22]:
df_filtered.to_csv('bandpass_filtered_same_freq.csv',index=None)