In [1]:
import os
import pandas as pd
from pathlib import Path
import psycopg2
from astropy.io import fits
from matplotlib import pyplot as plt
from tqdm import tqdm
import copy
import torch
from torch.utils.data import DataLoader, TensorDataset, random_split
from torch import nn, optim
from photutils.aperture import aperture_photometry, CircularAperture
from photutils import CircularAnnulus, EllipticalAperture
from astropy.stats import sigma_clip
import math
from scipy.stats import norm
from dask.distributed import Client

import nayo
import numpy as np
import sqlalchemy as sqla

conn = nayo.connect_db()

  from photutils import CircularAnnulus, EllipticalAperture
  from photutils import CircularAnnulus, EllipticalAperture


In [2]:
def background_annulus(data, mask, aperture_x, aperture_y, r_in=25, r_out=45):#r_in=30
    """Measure background in an annulus."""
    
    masked_data = np.ma.array(data=data, mask=mask != 0)
    masked_data = masked_data.filled(fill_value=0)

    center = (aperture_x, aperture_y)
    annulus_apertures = CircularAnnulus(center, r_in=r_in, r_out=r_out)
    masks = annulus_apertures.to_mask(method='center')

    cutout_data = masks.cutout(masked_data)

    clip_annulus_array = sigma_clip(cutout_data[cutout_data != 0], sigma=3, maxiters=2)

    S = pd.Series()
    S['annulus_mean'] = np.ma.mean(clip_annulus_array)
    S['annulus_median'] = np.ma.median(clip_annulus_array)
    S['annulus_std'] = np.ma.std(clip_annulus_array)
    S['annulus_samples'] = np.ma.count(clip_annulus_array)

    return S

def flux_elliptical(image, mask, aperture_x, aperture_y, aperture_theta, aperture_a, aperture_b):
    """Measure the flux withing an elliptical aperture."""
    
    PIXEL_SCALE = 0.263
    theta = -aperture_theta * np.pi / 180.
    a = aperture_a / PIXEL_SCALE
    b = aperture_b / PIXEL_SCALE

    center = (aperture_x, aperture_y)
    source_aperture = EllipticalAperture(center, a, b, theta)

    xmask = mask != 0
    raw_flux = aperture_photometry(image, source_aperture, mask=xmask)
   
    S = pd.Series()
    S['raw_flux'] = float(raw_flux['aperture_sum'][0])
    S['area'] = source_aperture.area
    
    return S

def cal_calerror(sig_src,sig_zp,zp,f_src):
    sig_cal = np.sqrt(sig_src**2 * sig_zp**2 + sig_src**2 * zp**2 + sig_zp**2 * f_src**2)
    return sig_cal

def cal_fcal(f_src,zp):
    f_cal = f_src * zp
    return f_cal

def creat_stamps(image, sources):
    y = math.floor(sources['aperture_x'].values[0])
    x = math.floor(sources['aperture_y'].values[0])
    #y = math.floor(sources['aperture_x'])
    #x = math.floor(sources['aperture_y'])
    x_start = max((x - cutout_size), 0)
    x_end = min((x + cutout_size), image.shape[0])
    y_start = max((y - cutout_size), 0)
    y_end = min((y + cutout_size), image.shape[1])

    stamps = image[x_start:x_end, y_start:y_end]
    return stamps

def photometry_oneimage(image, mask, aperture_x, aperture_y, aperture_theta, aperture_a, aperture_b):
    
    S1 = background_annulus(image, mask, aperture_x, aperture_y)
    S2 = flux_elliptical(image, mask, aperture_x, aperture_y, aperture_theta, aperture_a, aperture_b)

    flux_obs = S2['raw_flux'] - S2['area'] * S1['annulus_mean']
    return flux_obs

def generate_mask(size_data, image):

    num_sample = int(size_data[0] * size_data[1] * (1 - ratio))
    mask = np.ones(size_data)
    output = image

    for ich in range(size_data[2]):
        idy_msk = np.random.randint(0, size_data[0], num_sample)
        idx_msk = np.random.randint(0, size_data[1], num_sample)

        idy_neigh = np.random.randint(-size_window[0] // 2 + size_window[0] % 2, size_window[0] // 2 + size_window[0] % 2, num_sample)
        idx_neigh = np.random.randint(-size_window[1] // 2 + size_window[1] % 2, size_window[1] // 2 + size_window[1] % 2, num_sample)

        idy_msk_neigh = idy_msk + idy_neigh
        idx_msk_neigh = idx_msk + idx_neigh

        idy_msk_neigh = idy_msk_neigh + (idy_msk_neigh < 0) * size_data[0] - (idy_msk_neigh >= size_data[0]) * size_data[0]
        idx_msk_neigh = idx_msk_neigh + (idx_msk_neigh < 0) * size_data[1] - (idx_msk_neigh >= size_data[1]) * size_data[1]

        id_msk = (idy_msk, idx_msk, ich)
        id_msk_neigh = (idy_msk_neigh, idx_msk_neigh, ich)

        output[id_msk] = image[id_msk_neigh]
        mask[id_msk] = 0.0

    return output, mask

In [3]:
#write the function to calculate the flux by myself

def background_annulus_jiefeng(data, mask, aperture_x, aperture_y, r_in=30, r_out=45):
    
    masked_data = np.ma.array(data=data, mask=mask != 0)
    masked_data = masked_data.filled(fill_value=0)

    center = (aperture_x, aperture_y)
    annulus_apertures = CircularAnnulus(center, r_in=r_in, r_out=r_out)
    masks = annulus_apertures.to_mask(method='center')

    cutout_data = masks.cutout(masked_data)

    clip_annulus_array = sigma_clip(cutout_data[cutout_data != 0], sigma=3, maxiters=2)

    background_annulus = np.ma.mean(clip_annulus_array)
    #we use median here, in the dataset they use mdian
    #background_annulus = np.ma.median(clip_annulus_array)
    return background_annulus

def flux_elliptical_jiefeng(image, mask, aperture_x, aperture_y, aperture_theta, aperture_a, aperture_b):

    image_shape = (cutout_size*2,cutout_size*2)
    PIXEL_SCALE = 0.263
    theta = -aperture_theta * np.pi / 180.
    a = aperture_a / PIXEL_SCALE
    b = aperture_b / PIXEL_SCALE

    center = (aperture_x, aperture_y)
    source_aperture = EllipticalAperture(center, a, b, theta)
    mask_object = source_aperture.to_mask(method='exact')
    mask_image_photutils_fractional = mask_object.to_image(shape=image_shape)
    
    xmask = mask != 0
    image_good = image * (1 - xmask)
    
    raw_flux = np.sum(image_good * mask_image_photutils_fractional)#calculate by myself

    return raw_flux, source_aperture.area

In [None]:
dsn = 'postgresql://readonly:PAUsc1ence@db.pau.pic.es/dm'
engine = sqla.create_engine(dsn)

sql = """SELECT fa.image_id, fa.ref_id, image.ccd_num, image.filter, cosmos."I_auto", zp.zp,
mosaic.filename, mosaic.archivepath, fa.aperture_x, fa.aperture_y, fa.aperture_theta, fa.aperture_a, fa.aperture_b
FROM forced_aperture AS fa 
JOIN image ON image_id = image.id 
JOIN mosaic ON image.mosaic_id = mosaic.id 
JOIN image_zp AS zp ON zp.image_id=fa.image_id
JOIN cosmos ON fa.ref_id = cosmos.paudm_id WHERE fa.production_id=821
AND 18<"I_auto" AND "I_auto"<23
AND fa.flag=0 AND zp.phot_method_id = 2 AND zp.calib_method = 'MBE2.1_xsl'"""
#AND 18<"I_auto" AND "I_auto"<19. 20,22

df1 = pd.read_sql(sql, engine)

In [None]:
df1['archivepath'] = df1['archivepath'].str.replace('NightlyR10', 'NightlyR11')
df1['filename'] = df1['filename'].str.replace('red_paucam.', 'red_NightlyR11.paucam.')
df1['archivepath'] = df1['archivepath'].str.replace('tape', 'disk')
def replace_partial(row):
    return row['filename'].replace('.std.', f'.std.0{row.ccd_num}.')

df1['filename'] = df1.apply(replace_partial, axis=1)
df1['path'] = df1['archivepath'] + '/' + df1['filename']
#df1.drop(columns=['filename','archivepath','ccd_num'], inplace=True)

In [17]:
#filtered_df = df1.loc[df1['filter'] == 'NB705']
filtered_df = df1.loc[(df1['filter'] == 'NB645')]
filtered_df = filtered_df[filtered_df['image_id'] != 3974236]
filtered_df = filtered_df[(filtered_df['I_auto'] < 22)&(filtered_df['I_auto'] > 19)]
filtered_df = filtered_df.reset_index(drop=True)
element_counts = filtered_df['ref_id'].value_counts()
print(element_counts)

ref_id
33874    13
33873    13
83359    12
36218    12
55935    11
         ..
55340     1
55463     1
38912     1
21343     1
72264     1
Name: count, Length: 27632, dtype: int64


In [18]:
value_counts = filtered_df['ref_id'].value_counts()
frequent_ref_ids = element_counts[element_counts > 4].index
filtered_df = filtered_df[filtered_df['ref_id'].isin(frequent_ref_ids)]
filtered_df = filtered_df.reset_index(drop=True)
element_counts = filtered_df['ref_id'].value_counts()
print(element_counts)

ref_id
33874    13
33873    13
36218    12
83359    12
55935    11
         ..
21446     5
21428     5
21153     5
21028     5
21265     5
Name: count, Length: 15305, dtype: int64


In [19]:
image_id_elements = filtered_df['image_id'].unique()
element_counts = filtered_df['image_id'].value_counts()
print(image_id_elements.shape)
print(element_counts)

(240,)
image_id
3996553    753
3989218    747
4007389    745
3981193    744
3984916    744
          ... 
3988474     10
3979879     10
4013044      9
3976618      7
4003831      2
Name: count, Length: 240, dtype: int64


In [20]:
import time
start_time = time.perf_counter()

model_dir = Path('/home/eriksen/data/bkgnet/models')
df_image_dict_NB705 = {}

#L = []
for i in tqdm(range(len(image_id_elements))):
    selected_image_id = filtered_df[filtered_df.image_id == image_id_elements[i]]
    image = fits.getdata(selected_image_id['path'].iloc[0])
    mask = fits.getdata(selected_image_id['path'].iloc[0].replace('.fits', '.mask.fits'))
    fname_img = selected_image_id['filename'].iloc[0]
    
    exp_num = int(fname_img.split('.')[2])
    interv = 'after' if 13 < exp_num else 'before' # Fix this number (13)
    band = selected_image_id['filter'].iloc[0]
    
    flux = nayo.photometry(image, mask, selected_image_id, model_dir, interv, band)
    df_image_dict_NB705[f'image_id_{image_id_elements[i]}'] = flux

end_time = time.perf_counter()
elapsed_time = end_time - start_time
print(f"运行时间: {elapsed_time:.5f} 秒")

100%|██████████| 240/240 [46:31<00:00, 11.63s/it] 

运行时间: 2791.70381 秒





In [21]:
addflux_df_image_dict_NB705 = {}

for i in tqdm(range(len(image_id_elements))):
    a = df_image_dict_NB705[list(df_image_dict_NB705.keys())[i]]
    a['flux'] = a['raw_flux'] - a['area'] * a['annulus_mean']
    
    b = filtered_df[filtered_df['image_id'] == image_id_elements[i]]
    df_combined = pd.concat([a, b], axis=1)
    addflux_df_image_dict_NB705[f'image_id_{image_id_elements[i]}'] = df_combined

100%|██████████| 240/240 [00:00<00:00, 425.44it/s]


In [22]:
filtered_df = pd.concat(addflux_df_image_dict_NB705.values(), ignore_index=True)
filtered_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 84996 entries, 0 to 84995
Data columns (total 21 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   annulus_mean     84996 non-null  float64
 1   annulus_median   84996 non-null  float64
 2   annulus_std      84996 non-null  float64
 3   annulus_samples  84996 non-null  float64
 4   raw_flux         84996 non-null  float64
 5   area             84996 non-null  float64
 6   flux             84996 non-null  float64
 7   image_id         84996 non-null  int64  
 8   ref_id           84996 non-null  int64  
 9   ccd_num          84996 non-null  int64  
 10  filter           84996 non-null  object 
 11  I_auto           84996 non-null  float64
 12  zp               84996 non-null  float64
 13  filename         84996 non-null  object 
 14  archivepath      84996 non-null  object 
 15  aperture_x       84996 non-null  float64
 16  aperture_y       84996 non-null  float64
 17  aperture_the

In [23]:
filtered_df.drop(columns=['annulus_mean','annulus_median','annulus_std','annulus_samples',
                          'filter','image_id','ccd_num','filename','archivepath'], inplace=True)
filtered_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 84996 entries, 0 to 84995
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   raw_flux        84996 non-null  float64
 1   area            84996 non-null  float64
 2   flux            84996 non-null  float64
 3   ref_id          84996 non-null  int64  
 4   I_auto          84996 non-null  float64
 5   zp              84996 non-null  float64
 6   aperture_x      84996 non-null  float64
 7   aperture_y      84996 non-null  float64
 8   aperture_theta  84996 non-null  float64
 9   aperture_a      84996 non-null  float64
 10  aperture_b      84996 non-null  float64
 11  path            84996 non-null  object 
dtypes: float64(10), int64(1), object(1)
memory usage: 7.8+ MB


In [24]:
filtered_df.to_csv('/data/aai/scratch/jchan/denoise/PAUS/output_save/modify_pn2v/selectdata.csv')
