In [None]:
import os
import io
import gzip
import tarfile
import warnings

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from avro.datafile import DataFileReader, DataFileWriter
from avro.io import DatumReader, DatumWriter
import fastavro

#from astropy.time import Time
#from astropy.io import fits
#import astropy.units as u
#import aplpy
%matplotlib inline

In [None]:
tar_archive = 'ztf_public_20231215.tar.gz'
output_dir = tar_archive.split('/')[-1].split('.')[-3]
#archive = tarfile.open(tar_archive,'r:gz')
#archive.extractall(path=output_dir)
#archive.close()

In [None]:
def find_files(root_dir):
    for dir_name, subdir_list, file_list in os.walk(root_dir, followlinks=True):
        for fname in file_list:
            if fname.endswith('.avro'):
                yield dir_name+'/'+fname
print('{} has {} avro files'.format(output_dir, len(list(find_files(output_dir)))))

In [None]:
def open_avro(fname):
    with open(fname,'rb') as f:
        freader = fastavro.reader(f)
        # in principle there can be multiple packets per file
        for packet in freader:
            yield packet

In [None]:
def generate_dictionaries(root_dir):
    for fname in find_files(root_dir):
        for packet in open_avro(fname):
            yield packet

In [None]:
%%time
from collections import defaultdict
programs = defaultdict(int)
for packet in generate_dictionaries(output_dir):
    programs[packet['candidate']['programid']] += 1
print(programs)

In [None]:
def is_alert_pure(packet):
    pure = True
    pure &= packet['candidate']['rb'] >= 0.65
    pure &= packet['candidate']['nbad'] == 0
    pure &= packet['candidate']['fwhm'] <= 5
    pure &= packet['candidate']['elong'] <= 1.2
    pure &= np.abs(packet['candidate']['magdiff']) <= 0.1
    return pure

In [None]:
%%time
from collections import defaultdict
programs = defaultdict(int)
for packet in filter(is_alert_pure,generate_dictionaries(output_dir)):
    programs[packet['candidate']['programid']] += 1
print(programs)

In [None]:
def make_dataframe(packet):
    dfc = pd.DataFrame(packet['candidate'], index=[0])
    df_prv = pd.DataFrame(packet['prv_candidates'])
    dflc = pd.concat([dfc,df_prv], ignore_index=True)
    # we'll attach some metadata--not this may not be preserved after all operations
    # https://stackoverflow.com/questions/14688306/adding-meta-information-metadata-to-pandas-dataframe
    dflc.objectId = packet['objectId']
    dflc.candid = packet['candid']
    return dflc

In [None]:
def is_transient(dflc):
    
    candidate = dflc.loc[0]
    
    is_positive_sub = candidate['isdiffpos'] == 't'
    
    if (candidate['distpsnr1'] is None) or (candidate['distpsnr1'] > 1.5):
        no_pointsource_counterpart = True
    else:
        if candidate['sgscore1'] < 0.5:
            no_pointsource_counterpart = True
        else:
            no_pointsource_counterpart = False
            
    where_detected = (dflc['isdiffpos'] == 't') # nondetections will be None
    if np.sum(where_detected) >= 2:
        detection_times = dflc.loc[where_detected,'jd'].values
        dt = np.diff(detection_times)
        not_moving = np.max(dt) >= (30*u.minute).to(u.day).value
    else:
        not_moving = False
    
    no_ssobject = (candidate['ssdistnr'] is None) or (candidate['ssdistnr'] < 0) or (candidate['ssdistnr'] > 5)
    
    return is_positive_sub and no_pointsource_counterpart and not_moving and no_ssobject

In [None]:
%%time
transient_alerts = []
for packet in filter(is_alert_pure,generate_dictionaries(output_dir)):
    dflc = make_dataframe(packet)
    if is_transient(dflc):
        print(packet['objectId'])
        transient_alerts.append(packet)


In [None]:
df_transients = pd.DataFrame(transient_alerts)

# Mengonversi DataFrame ke dalam file CSV
csv_folder_path = r'TA_Azzidny'
os.makedirs(csv_folder_path, exist_ok=True)  # Membuat folder jika belum ada

csv_file_path = os.path.join(csv_folder_path, f'{output_dir}.csv')
df_transients.to_csv(csv_file_path, index=False)

print(f'Transient alerts DataFrame saved to {csv_file_path}')