In [None]:
#import boto3
from astropy.io import fits
from io import BytesIO
import sys
import pandas as pd 
import numpy as np 
import os
import matplotlib.pyplot as plt
sys.path.append('/home/bekah/gPhoton2')
from gPhoton.coadd import zero_flag_and_edge
import pyarrow.parquet as parquet
import fast_histogram as fh
import time



In [None]:
def centile_clip(image, centiles=(0, 90)):
    """
    simple clipping function that clips values above and below a given
    percentile range
    """
    import numpy as np
    finite = np.ma.masked_invalid(image)
    bounds = np.percentile(finite[~finite.mask].data, centiles)
    result = np.ma.clip(finite, *bounds)

    if isinstance(image, np.ma.MaskedArray):
        return result

    return result.data

In [None]:
nuv_drawn = fits.open("/media/bekah/BekahA/hotspot/drawn/nuv_spots.fits")
mask = nuv_drawn[0].data

In [None]:
nf = parquet.read_table("/home/bekah/gPhoton2/test_data/e23456/e23456-nd-b00.parquet").to_pandas()

In [None]:
nf = nf[(nf['row']*4 >= -200) & (nf['row']*4 <= 3400) & (nf['col']*4 >= -200) & (nf['col']*4 <= 3400)]

In [None]:
nf['col_adj'] = nf['col']*(16/3)+200*(4/3)
nf['row_adj'] = nf['row']*(16/3)+200*(4/3)

In [None]:
nf['row_rnd'] = nf['row_adj'].round().astype(int)
nf['col_rnd'] = nf['col_adj'].round().astype(int)

nf = nf[(nf['row_rnd'] <= 4799) & (nf['col_rnd'] <= 4799)]

mask = mask[nf['col_rnd'], nf['row_rnd']] == 1

nf['flagged'] = mask



In [None]:
nf.keys()

In [None]:
plt.scatter( nf[nf['flagged']]['col'],nf[nf['flagged']]['row'],s=.25)

In [None]:
plt.scatter( nf[nf['flagged']]['ra'],nf[nf['flagged']]['dec'],s=.00001)
plt.xlim(322.75,324.25)
plt.ylim(-2.8,-1.4)

In [None]:
#pixel = nf[(nf['col_rnd2'] == 620) &(nf['row_rnd2'] == 223) ] # &(nf['row_rnd2'] == 400)
pixel = nf[(nf['col_rnd'] == 260) &(nf['row_rnd'] == 420) ]

In [None]:
nf['row_rnd2'] = nf['row'].round().astype(int)
nf['col_rnd2'] = nf['col'].round().astype(int)

In [None]:
plt.scatter(pixel['ra'],pixel['dec'],s=.15)

In [None]:
plt.scatter(pixel['col'],pixel['row'],s=.15)

In [None]:
plt.scatter(pixel['row'],pixel['col'])

In [None]:
plt.hist(pixel['row_rnd2'],bins=100)

In [None]:
# this is computationally very expensive 

import numpy as np
from scipy.spatial.distance import pdist, squareform

def average_dispersion(group):
    if len(group) < 2:
        return np.nan  
    coords = group[['ra', 'dec']].values
    pairwise_distances = pdist(coords, metric='euclidean')
    avg_dispersion = np.mean(pairwise_distances)
    return avg_dispersion

dispersion = nf.groupby(['col_rnd2', 'row_rnd2']).apply(average_dispersion).reset_index(name='average_dispersion')

nf = pd.merge(nf, dispersion, on=['col_rnd2', 'row_rnd2'])

In [None]:
dispersion = nf.groupby(['col_rnd2', 'row_rnd2']).agg({
    'ra': 'std',
    'dec': 'std'
}).reset_index()

In [None]:
plt.scatter(dispersion[dispersion['ra']>50]['col_rnd2'],dispersion[dispersion['ra']>50]['row_rnd2'],c=dispersion[dispersion['ra']>50]['dec']+dispersion[dispersion['ra']>50]['ra'],s=.5)

In [None]:
dispersion

In [None]:
plt.hist(dispersion['ra'],bins=50)

In [None]:
dispersion.columns = ['col_rnd2', 'row_rnd2', 'ra_std', 'dec_std']


In [None]:
# this crashes, uses too much memory, don't use 
nf = pd.merge(nf, dispersion, on=['col_rnd2', 'row_rnd2'])

In [None]:
# ok so, make histogram / dosemap from points, identify hotspots and dark spots 
# then measure "dispersion" aka standard deviation of ra and dec for bright spots to determine if they're a 
# hotspot or a real star
# for coldspots you obviously can't measure dispersion if there's no points, so need to determine some kind of cutoff
#  based on the background rate 

# pre-filtering with the 2d histogram should significantly cut down on the time to calculate standard deviation 

In [None]:
def dosemap_ranges(radius):
    slop = 400 - radius
    return [slop, 3200 - slop], [slop, 3200 - slop]

def is_point_in_list(row, points_list):
    return (row['col'], row['row']) in points_list

In [None]:
# load and filter data for being on detector / in dosemap frame, convert to dosemap coords

nf = parquet.read_table("/home/bekah/gPhoton2/test_data/e23456/e23456-nd-b00.parquet").to_pandas()


In [None]:
# filter
nf = nf[(nf['row']*4 >= -200) & (nf['row']*4 <= 3400) & (nf['col']*4 >= -200) & (nf['col']*4 <= 3400)]

nf['col_adj'] = nf['col']*(16/3)+200*(4/3)
nf['row_adj'] = nf['row']*(16/3)+200*(4/3)

nf['row_rnd'] = nf['row'].round().astype(int)
nf['col_rnd'] = nf['col'].round().astype(int)

nf = nf[(nf['row_rnd'] <= 4799) & (nf['col_rnd'] <= 4799)]

In [None]:
start_time = time.time()

print("finished filtering data, making histogram now")

# make 2d histogram and filter for dark and hotpsots. "hotspots" will inevitably contain bright stars
hist = fh.histogram2d(
        nf['col'], nf['row'], bins=800, range=dosemap_ranges(600)
    ).astype('float32')

print("done with histogram, selecting points")

mask = (hist > 30) | (hist < 1)

print("calculating dispersion")

dispersion = nf.groupby(['col_rnd', 'row_rnd']).agg({
    'ra': 'std',
    'dec': 'std'
}).reset_index()

end_time = time.time()

duration = end_time - start_time
print(f"it took {duration:.4f} seconds")

In [None]:
dispersion

In [None]:
hist.shape

In [None]:
plt.scatter(dispersion[dispersion['ra']>100]['col_rnd'],dispersion[dispersion['ra']>100]['row_rnd'],c=abs(dispersion[dispersion['ra']>100]['ra'])+abs(dispersion[dispersion['ra']>100]['dec']),s=.05)

In [None]:
plt.hist(abs(dispersion[dispersion['ra']>20]['ra'])+abs(dispersion[dispersion['ra']>20]['dec']),bins=100)

In [None]:
hist = fh.histogram2d(
        nf['col'], nf['row'], bins=800, range=([0,800],[0,800])
    ).astype('float32')

In [None]:
plt.scatter(dispersion['col_rnd'],dispersion['row_rnd'],s=.00002)
plt.xlim(0,800)
plt.ylim(0,800)

In [None]:
dispersion = dispersion[(dispersion['col_rnd']<800)&(dispersion['row_rnd']<800)]
max_col = dispersion['col_rnd'].max()
max_row = dispersion['row_rnd'].max()

high_disp = np.zeros((max_col + 1, max_row + 1))

high_disp[dispersion['col_rnd'], dispersion['row_rnd']] = abs(dispersion['ra'])+abs(dispersion['dec'])

In [None]:
plt.imshow(high_disp,origin='lower',interpolation=None)
plt.colorbar()

In [None]:
plt.imshow(hist-high_disp,origin='lower')

In [None]:
plt.imshow(np.isnan(high_disp))

In [None]:
# density mask
mask1 = (hist > 85) | (hist < 20)

# dispersion mask
mask2 = (high_disp > 5) | np.isnan(high_disp)

# combo mask
result_array = np.zeros(hist.shape, dtype=int)

result_array[mask1 & mask2] = 2
result_array[mask1 & ~mask2] = 1
result_array[~mask1 & mask2] = -1


In [None]:
plt.figure(figsize=(10,10))
plt.imshow(result_array, interpolation=None, origin='lower')
plt.colorbar()

In [None]:
plt.imshow(centile_clip(hist),origin='lower',interpolation=None)
plt.colorbar()

In [None]:
plt.figure(figsize=(10,10))

plt.imshow(mask2,interpolation=None)
plt.colorbar()

In [None]:
plt.figure(figsize=(10,10))

plt.imshow(mask1,interpolation=None)
plt.colorbar()

In [None]:
pixel = dispersion[(dispersion['col_rnd'] == 700) &(nf['row_rnd'] == 380) ]

In [None]:
nf.keys()


In [None]:
plt.scatter(nf['ra'],nf['dec'],c=nf['response'],s=.0005)