In [1]:
#just to check python version - should be 3.7.4
from platform import python_version
print(python_version())

#importing libraries
from astropy.io import fits
from astropy.convolution import convolve, Gaussian2DKernel, Box2DKernel
from astropy.nddata import Cutout2D
from astropy.wcs import WCS

import glob
import itertools
import matplotlib 
matplotlib.use('Agg') #invokved b/c just plain matplotlib was insufficient
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sys

%matplotlib inline

3.8.13




In [2]:
# #finding the path to every fits images in a directory
def im_name_finder(path, file_type):
    #Using glob (it's a unix command similar to ls)
    #WARNING: using recursive=True...depending how many images you use this could be very slow, it's recommended not to have too many subfolders
    #if needed, some example code is commented towards the latter half of this code that could help make an alternative
    all_names = glob.glob(path, recursive=True)

    #IMPORTANT: Using "fit" here because it is inclusive of both fits and FIT...some files end in "FIT" and need to be included
    #using s.lower() include uppercase names
    im_names = [s for s in all_names if 'fit' in s.lower()]

    return im_names


'''now convolve my image with a PSF of the image we're projecting ONTO
an approx PSF can be found by assuming a 2D Gaussian func with a width (a FWHM) of the diffrac limit
that is the st dev of the Gaussian is about the st dev is about = lambda/D
a list of PSFs are found on https://docs.astropy.org/en/stable/convolution/kernels.html

Notes:
FIRST: always must convert hdu1_pixtorad to radians! It's inconsistent otherwise, and lambda/D is generally in radians

what we're using for the gaussian width is the FWHM, not the radius of the first ring of the diffraction pattern,
so it's 1.2 not 1.22 times lambda/D

D is 85 cm for spitzer
D is 2.4 m for hubble
'''

def im_conv(D, hdu_pix_torad, hdu_dat, lam, kern):
    #gaussian kernel
    if kern == 'gauss':
        #update: usually cannot find wavelength but these headers are well-labeled    
        #finding angular resolution...the FWHM of our Gaussian PSF
        res = 1.2 * lam / D         #resolution in radians
        res = res / hdu_pix_torad        #so converting to pixels

        #finding PSF and then calculating the convolution of our image and the PSF of the image we're projecting onto
        kernel = Gaussian2DKernel(res)

    #box kernel
    if kern == 'box':
        kernel = Box2DKernel(16.)

    hdu_conv = convolve(hdu_dat, kernel)
    return hdu_conv

# In[27]:

#setting up a new fits file to be saved and viewed in DS9
#primarily to save the image we reprojected, but can also be used to save the convolved images
def fits_saver(array, wcs_header, name, save_path):
    '''
    array is a 2d array of data - could be from reprojecting one image onto another or from convolution
    wcs_header is a header containing the wcs coords of the image that we projected onto or of the orig image (if from the convolution)
    name is the path to some image you're using. It will get string split at the / character, and the func only takes the last element of that splitting
    save_path is the folder you want to save to...recommended to also add something to the start of the images names to make it clear what you did to them (e.g. 'Regridded/regrid_')
    '''

    #creating a new file and adding the reprojected array of data as well as the WCS that we projected onto
    hdu_new = fits.PrimaryHDU(array, header=wcs_header)
    hdul = fits.HDUList([hdu_new])
    
    #saving the file
    if name.find('FIT') == -1: #needed if file end incorrect
        new_filename = name.split('/')[-1]  #grabs the file name we were using from before
        hdul.writeto(save_path+new_filename, overwrite=True)
    else:
        name_fixfit = name[:-3] + 'fits'
        new_filename = name_fixfit.split('/')[-1]  #grabs the file name we were using from before
        hdul.writeto(save_path+new_filename, overwrite=True)
        
    return (save_path+new_filename)

#our plotting function
def implot(data, w, wcscond, vmax_p):
    fig = plt.figure()
    
    if  wcscond == True:
        fig.add_subplot(111, projection=w)
    else:
        fig.add_subplot(111)
    
    #for christmas turn on GnRd
    #plt.cm.get_cmap('Blues', 6) is another option
    #can also use RdBu...
    #otherwise just use plt.cm.viridis b/c it works
    plt.imshow(data, origin='lower', cmap=plt.cm.viridis, vmin =0, vmax=vmax_p)
    plt.xlabel('RA')
    plt.ylabel('Dec')


In [3]:
path = '../scaling_for_motions/160_epoch*.fits' # #using ** will grab all files even in subdirectories WARNING takes longer
im_names_n2071 = sorted(im_name_finder(path, 'fit')) #im_finder is basically glob.glob
im_names_n2071 = [i.replace('\\', '/') for i in im_names_n2071]
print(im_names_n2071)

im_names_n2071 = [im_names_n2071[0], im_names_n2071[2]]
print(im_names_n2071)

hdu_list = [fits.open(i) for i in im_names_n2071]

#initializing some lists to be used
hdu_data_list = []
hdu_header_list = []

count = 0
for hdu_data in hdu_list:   
    #reading in data for general use  and header for wcs
    #converting by times by flam * bw from e-/sec...should get units of erg/cm^2/sec as above
    
    #needed because the second image in this list is negative...
    if count == 1:
        sign = -1
    else:
        sign = 1
    hdu_data_list.append(sign * hdu_data[0].data) # * hdu_list[0].header['PHOTFLAM'] * hdu_list[0].header['PHOTBW'])
    hdu_header_list.append(hdu_data[0].header)
    
    count+=1

['../scaling_for_motions/160_epoch1.fits', '../scaling_for_motions/160_epoch1_scaled.fits', '../scaling_for_motions/160_epoch2_synth.fits', '../scaling_for_motions/160_epoch2_synth_scaled.fits']
['../scaling_for_motions/160_epoch1.fits', '../scaling_for_motions/160_epoch2_synth.fits']


# tobac

from https://github.com/tobac-project/tobac/tree/main/tobac

"tobac v1.0: towards a flexible framework for tracking and analysis of clouds in diverse datasets"
by Heikenfeld et al 2019


In [122]:
# import iris
# iris.load_cube(hdu_data_list)

#instead we should base this off of their functions directly...
#based on https://github.com/tobac-project/tobac/blob/main/tobac/analysis.py
def calculate_nearestneighbordistance(features, method_distance=None):
    from itertools import combinations

    features["min_distance"] = np.nan
    for time_i, features_i in features.groupby("time"):
        logging.debug(str(time_i))
        indeces = combinations(features_i.index.values, 2)
        # Loop over combinations to remove features that are closer together than min_distance and keep larger one (either higher threshold or larger area)
        distances = []
        for index_1, index_2 in indeces:
            if index_1 is not index_2:
                distance = calculate_distance(
                    features_i.loc[index_1],
                    features_i.loc[index_2],
                    method_distance=method_distance,
                )
                distances.append(
                    pd.DataFrame(
                        {"index_1": index_1, "index_2": index_2, "distance": distance},
                        index=[0],
                    )
                )
        if any([x is not None for x in distances]):
            distances = pd.concat(distances, ignore_index=True)
            for i in features_i.index:
                min_distance = distances.loc[
                    (distances["index_1"] == i) | (distances["index_2"] == i),
                    "distance",
                ].min()
                features.at[i, "min_distance"] = min_distance
    return features

def calculate_overlap(
    track_1, track_2, min_sum_inv_distance=None, min_mean_inv_distance=None
):
    cells_1 = track_1["cell"].unique()
    #    n_cells_1_tot=len(cells_1)
    cells_2 = track_2["cell"].unique()
    overlap = pd.DataFrame()
    for i_cell_1, cell_1 in enumerate(cells_1):
        for cell_2 in cells_2:
            track_1_i = track_1[track_1["cell"] == cell_1]
            track_2_i = track_2[track_2["cell"] == cell_2]
            track_1_i = track_1_i[track_1_i["time"].isin(track_2_i["time"])]
            track_2_i = track_2_i[track_2_i["time"].isin(track_1_i["time"])]
            if not track_1_i.empty:
                n_overlap = len(track_1_i)
                distances = []
                for i in range(len(track_1_i)):
                    distance = calculate_distance(
                        track_1_i.iloc[[i]], track_2_i.iloc[[i]], method_distance="xy"
                    )
                    distances.append(distance)
                #                mean_distance=np.mean(distances)
                mean_inv_distance = np.mean(1 / (1 + np.array(distances) / 1000))
                #                mean_inv_squaredistance=np.mean(1/(1+(np.array(distances)/1000)**2))
                sum_inv_distance = np.sum(1 / (1 + np.array(distances) / 1000))
                #                sum_inv_squaredistance=np.sum(1/(1+(np.array(distances)/1000)**2))
                overlap = overlap.append(
                    {
                        "cell_1": cell_1,
                        "cell_2": cell_2,
                        "n_overlap": n_overlap,
                        #                                'mean_distance':mean_distance,
                        "mean_inv_distance": mean_inv_distance,
                        #                                'mean_inv_squaredistance':mean_inv_squaredistance,
                        "sum_inv_distance": sum_inv_distance,
                        #                                'sum_inv_squaredistance':sum_inv_squaredistance
                    },
                    ignore_index=True,
                )
    if min_sum_inv_distance:
        overlap = overlap[(overlap["sum_inv_distance"] >= min_sum_inv_distance)]
    if min_mean_inv_distance:
        overlap = overlap[(overlap["mean_inv_distance"] >= min_mean_inv_distance)]

    return overlap


'''
otherwise, the methods use:
-watershed segmentation
-trackpy
-sci-kit image's morphology.reconstruction, either splitting image around dilation (max) or erosion (min)
...reconstruction might not be a bad idea...nevermind, don't do it...
'''

# from skimage.morphology import reconstruction
# image = hdu_data_list[0]

# seed = np.copy(image)
# seed[1:-1, 1:-1] = 1e-16 # image.max()
# mask = image
# filled = reconstruction(seed, mask, method='erosion')

# seed = np.copy(image)
# seed[1:-1, 1:-1] = image.min()
# rec = reconstruction(seed, mask, method='dilation')


# #their main goal is thresholding, see: https://github.com/tobac-project/tobac/blob/main/tobac/feature_detection.py

ValueError: Intensity of seed image must be greater than that of the mask image for reconstruction by erosion.

*Seems difficult to use because of necessity of IRIS and data cubes

# Testing TINT = TINT is not TITAN

See https://github.com/openradar/TINT/tree/master/tint \
"An Adaptive Tracking Algorithm for Convection in Simulated and Remote Sensing Data" by Bhupendra et al 2021

Here the data set also doesn't work well for us, but we can perhaps either base this off of TITAN or use their functions

In [None]:
https://github.com/openradar/TINT/blob/master/tint/phase_correlation.py
    
https://github.com/openradar/TINT/blob/master/tint/matching.py
        
https://github.com/tobac-project/tobac/blob/main/tobac/feature_detection.py