In [None]:
from __future__ import print_function
from astropy.io import fits
import numpy as np
import math
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
from numpy.polynomial.polynomial import polyval, polyder
import time
import csv
from dotenv import load_dotenv, find_dotenv
import os
import configparser
import logging
import pandas as pd
import sys

# sys.path.append('/Users/cwang/Documents/KPF/KPF-Pipeline/modules')
%matplotlib inline

class DotDict(dict):
    pass


In [None]:
from modules.optimal_extraction.src.alg import OptimalExtractionAlg
MODULE_DIR = '../../modules/optimal_extraction/'

In [None]:
from tests.regression import test_optimal_extraction
# add __init__.py tentatively for testing the following functions.
test_optimal_extraction.test_init_exceptions()
test_optimal_extraction.test_get_flux_from_order_exceptions()
test_optimal_extraction.test_optimal_extraction_exceptions()
test_optimal_extraction.test_get_flux_from_order_norect()
test_optimal_extraction.test_get_flux_from_order_vertical()
test_optimal_extraction.test_get_flux_from_order_normal()
test_optimal_extraction.test_optimal_extraction_norect()
test_optimal_extraction.test_optimal_extraction_vertical()
test_optimal_extraction.test_optimal_extraction_normal()

In [None]:
load_dotenv()
TEST_DIR = os.getenv('KPFPIPE_TEST_DATA')
print(TEST_DIR)
KPF_PIPELINE = '/Users/cwang/documents/KPF/KPF-Pipeline/'

In [None]:
# copy of start_logger from logger.py
def get_level(lvl:str) -> int:
    if lvl == 'debug': return logging.DEBUG
    elif lvl == 'info': return logging.INFO
    elif lvl == 'warning': return logging.WARNING
    elif lvl == 'error': return logging.ERROR
    elif lvl == 'critical': return logging.CRITICAL
    else: return logging.NOTSET
    
def start_logger(logger_name: str, config: str):
    if config is None: 
        # a config file is not provided, so don't start logger
        print('[{}] missing log configuration...not starting a new logger'.format(
            logger_name))
        return None
    config_obj = configparser.ConfigParser()
    res = config_obj.read(config)
    if res == []:
        return None

    log_cfg = config_obj['LOGGER']

    log_start = log_cfg.get('start_log', False)
    log_path = log_cfg.get('log_path', 'log')
    log_lvl = log_cfg.get('log_level', logging.WARNING)
    log_verbose = log_cfg.getboolean('log_verbose', True)
    # logger.setLevel(get_level(log_lvl))
        
    # if log_start:
    #     # setup a log format
    #     formatter = logging.Formatter('[%(name)s][%(levelname)s]:%(message)s')
    #     # setup a log file
    #     f_handle = logging.FileHandler(log_path, mode='w') # logging to file
    #     f_handle.setLevel(get_level(log_lvl))
    #     f_handle.setFormatter(formatter)
    #     logger.addHandler(f_handle)

    #     if log_verbose: 
    #         # also print to terminal 
    #         s_handle = logging.StreamHandler()
    #         s_handle.setLevel(get_level(log_lvl))
    #         s_handle.setFormatter(formatter)
    #         logger.addHandler(s_handle)
    # return logger


    logger = logging.getLogger(logger_name)
    logger.setLevel(get_level(log_lvl))
    logger.propagate = False

    formatter = logging.Formatter('[%(name)s][%(levelname)s]:%(message)s')
    s_handle = logging.StreamHandler()
    s_handle.setLevel(get_level(log_lvl))
    s_handle.setFormatter(formatter)
    logger.addHandler(s_handle)
    return logger

In [None]:
def plot_two_fits_trace(spectral1, spectral2, total_rows, coeffs_rows, range_rows = None):
    plt.figure(figsize=(20,20))
    plt.subplot(1, 2, 1) 
    im1 = plt.imshow(spectral1['data'], cmap='gray', norm=LogNorm())

    total_col = np.shape(coeffs_rows)[1]

    for y in range(0, total_rows):
        if range_rows is not None:
            x_val = np.arange(range_rows[y, 0], range_rows[y, 1])
        else:
            x_val = np.arange(0, spectral['xdim'])
        y_val = np.polyval(coeffs_rows[y], x_val)
        plt.plot(x_val, y_val, 'r--')
    
    plt.ylim(0, spectral1['ydim'])
    plt.colorbar(im1, fraction=0.046, pad=0.04)   
    
    plt.subplot(1, 2, 2)
    im2 = plt.imshow(spectral2['data'], cmap='gray', norm=LogNorm())
    
    plt.ylim(0, spectral2['ydim'])
    plt.colorbar(im2, fraction=0.046, pad=0.04)  
    plt.show()

In [None]:
def plot_output(out_data, total_rows):
    # show output
    plt.figure(figsize=(12,12))
    plt.subplot(1, 1, 1)
    plt.imshow(out_data, cmap='gray')
    plt.ylim(0, total_rows)
    plt.show()
    #plt.colorbar(im, fraction=0.046, pad=0.04)

In [None]:
def load_spectral_sample(fits_file, order_trace_csv, flatlamp_file, config, logger, power):
    spectrum_flux, spectrum_header = fits.getdata(fits_file, header=True)
    print(flatlamp_file)
    # import pdb;pdb.set_trace()
    flat_flux = fits.open(flatlamp_file)
    flat_idx = 1
    if order_trace_csv is not None:
        order_trace_result = np.genfromtxt(order_trace_csv, delimiter=',')  
        order_trace_header = {'POLY_DEG': power}
        flat_idx = 0
    else:
        order_trace_result =  pd.DataFrame(flat_flux[3].data)
        order_trace_header = flat_flux[3].header
        flat_idx = 1
        
            
    print(type(spectrum_flux), type(spectrum_header))
    print(type(order_trace_result), type(order_trace_header))
    opt_extract = OptimalExtractionAlg(flat_flux[flat_idx].data, spectrum_flux, spectrum_header, 
                                       order_trace_result, order_trace_header, config, logger)
    coeffs_rows = opt_extract.order_coeffs
    widths = opt_extract.order_edges
    xrange = opt_extract.order_xrange

    # import pdb;pdb.set_trace()
    spectral = {'data': spectrum_flux, 'xdim': int(spectrum_header['NAXIS1']), 
                                       'ydim': int(spectrum_header['NAXIS2'])}
    flatlamp_spectral = {'data': flat_flux[flat_idx].data, 'xdim': int(flat_flux[flat_idx].header['NAXIS1']), 
                                                           'ydim': int(flat_flux[flat_idx].header['NAXIS2'])}        
   
    return {'spectral': spectral, 'flatlamp_spectral': flatlamp_spectral, 'coeffs': coeffs_rows,
            'op_handle': opt_extract, 'widths': widths, 'xrange': xrange, 'power':power}

In [None]:
def make_fits(data, output_fits, metadata):
    # import pdb;pdb.set_trace()
    hdu = fits.PrimaryHDU(data)
    for key in metadata.keys():
        hdu.header[key] = metadata[key]
        
    hdu.writeto(output_fits, overwrite=True)

In [None]:
def extract_optimal_trace(in_data, selected_order=None):
    if selected_order is None:
        height, width = np.shape(in_data)
        selected_order = np.arange(0, height, dtype=int)
 
    return in_data[selected_order, :]

## 1.1 PARAS: define and load files: spectrum file, flat file, cure file, coeffs/width file

In [None]:
# input for PARAS data, from KPF-Pipeline-TestData/polygon_clipping_test/
mission = 'PARAS'    # NEID or PARAS
power = 4            # power = 4 if using csv from PARAS

fits_base = TEST_DIR + '/polygon_clipping_test/paras_data/14feb2015/a00'
fiber_list = ['A']
f_idx = 0
# flatlamp_file = TEST_DIR + '/polygon_clipping_test/paras_data/paras.flat'+fiber_list[f_idx]+'.fits'
flatlamp_file = KPF_PIPELINE + 'test_results/paras/paras.flat'+fiber_list[f_idx]+'_L0.fits'
fits_list=['18', '19']

# csv from paras
# csv_file =  TEST_DIR+'/polygon_clipping_test/paras_data/order_trace_'+fiber_list[f_idx]+'.csv'

# csv from order trace module, paired with 'for_width_3' or 'for_fixed_width''
# csv_file = TEST_DIR + '/order_trace_test/for_optimal_extraction/paras_poly_3sigma_gaussian_pixel_3_width_3.csv'
# csv_file = TEST_DIR + '/order_trace_test/for_optimal_extraction/paras_poly_3sigma_gaussian_pixel_3.csv'
power = 3
width_type = 'for_width_3'                     # for using .csv from order trace, 'for_fixed_width', 'for_width_3'

# optimal extraction method
# method = OptimalExtractionAlg.NoRECT       # optimal extraction method: no rectified, 
# method = OptimalExtractionAlg.VERTICAL     # rectified using fractional summation in vertical direction
method = OptimalExtractionAlg.NORMAL         # rectified using fractional summation in normal direction

rectification_method = [ 'optimal_norm_fraction','optimal_vertical_fraction', 'optimal_not_rectified']

output_base =  MODULE_DIR + 'results/PARAS_3sigma/' + width_type + '/PARAS_'    # temporarily output to a local directory
# output for paras
print('fits_base:', fits_base, '\nflat file:', flatlamp_file, '\noutput base:', output_base)

config = configparser.ConfigParser()
config_file = MODULE_DIR + 'configs/default.cfg'
config.read(config_file)
logger = start_logger("OptimalExtractionAlg", config_file)
csv_file = None  # order trace result is included in flatlamp_file


### The following is used to create the fits which contains the result of flux collection and optimal extraction from one order of one PARAS fits for the use of optimal extraction unit test

In [None]:
def make_flux_fits(order_flux, to_fits):

    w = order_flux.get('data_width')
    h = order_flux.get('data_height')
    out_flux = np.zeros((h*2, w))
    out_flux[0:h, :]=order_flux.get('order_data')
    out_flux[h: , :]=order_flux.get('order_flat')
    hdu = fits.PrimaryHDU(out_flux)
    hdu.header
        
    hdu.writeto(to_fits, overwrite=True)
    
def get_flux_fits(flux_fits):
    flux, header = fits.getdata(flux_fits, header=True)
    w = header['NAXIS1']
    h = header['NAXIS2']//2
    order_data = flux[0:h, :]
    order_flat = flux[h: , :]
    return order_data, order_flat, h, w

def get_flux_info_csv(file_path: str):
    x = None
    y = None
    index = None
    if os.path.isfile(file_path):
        df = pd.read_csv(file_path, header=None, index_col=None)
        row, col = np.shape(df.values)
        data_h = col//2
        data_w = row
        order_data = np.transpose(df.values[:, 0:data_h])
        order_flat = np.transpose(df.values[:, data_h:])
    return order_data, order_flat, data_h, data_w

In [None]:
fits_file = fits_base + fits_list[0]+'.fits'
csv_file = TEST_DIR + '/order_trace_test/for_optimal_extraction/paras_poly_3sigma_gaussian_pixel_3_width_3.csv'
sample_info = load_spectral_sample(fits_file, csv_file, flatlamp_file, config, logger, power)
test_row = 75
c_set = np.array([test_row], dtype=int)
poly_c = sample_info.get('op_handle')
print(fits_file)

c_order = c_set[0]
order_flux = poly_c.get_flux_from_order(poly_c.order_coeffs[c_order], poly_c.get_order_edges(c_order),
                                        poly_c.get_order_xrange(c_order), poly_c.spectrum_flux, poly_c.flat_flux,
                                        norm_direction = method)

print(np.shape(order_flux.get('order_data')))
out_file = MODULE_DIR + 'results/PARAS_3sigma/paras_flux_'+rectification_method[method]+'.fits'
print("flux file: ", out_file)
make_flux_fits(order_flux, out_file)

order_data, order_flat, data_height, data_width = get_flux_fits(out_file)
  
optimal_output = poly_c.optimal_extraction(order_data, order_flat, data_height, data_width)
optimal_result = optimal_output['extraction'].flatten()  # result in Pandas Dataframe format

hdu = fits.PrimaryHDU(optimal_result)
out_file = MODULE_DIR+  'results/PARAS_3sigma/paras_extraction_'+rectification_method[method]+'.fits'
hdu.writeto(out_file, overwrite=True)    
print("optimal extraction file: ", out_file)

target_file = MODULE_DIR + 'results/'+mission+'_3sigma/' + \
                  width_type +'/'+mission+'_' + fits_list[0] + '_extraction_'+ rectification_method[method] + '.fits'
target_data = fits.open(target_file)
target_optimal_result = target_data[0].data[test_row]
    
print('target_file: ', target_file)

out_not_nan = np.argwhere(~np.isnan(optimal_result))
target_not_nan = np.argwhere(~np.isnan(target_optimal_result))

if np.size(out_not_nan) == np.size(target_not_nan):
    if np.size(out_not_nan) != 0:
        red_diff = target_optimal_result[target_not_nan] - optimal_result[out_not_nan]
        non_zero_diff = np.where(red_diff != 0.0)
        if (np.size(non_zero_diff) != 0):
            print(non_zero_diff)
        else:
            print("same data")
else:
    print("not the same NaN data")

## 1.2 NEID: define and load files: spectrum file, flat file, coeffs/width file

In [None]:
# input for NEID data
mission = 'NEID'
power = 3
fits_base = TEST_DIR+'/NEIDdata/TAUCETI_20191217/L0/neidTemp_2D20191217T'
flatlamp_file = TEST_DIR+'/NEIDdata/FLAT/stacked_2fiber_flat.fits'
fits_list = ['023129', '023815','024240','024704', '025129', '025613', '030057','030724','031210','031636']

csv_base = TEST_DIR+'/order_trace_test/for_optimal_extraction/'   
# csv_file = csv_base + 'neid_poly_3sigma_gaussian_pixel_3.csv'        # paired with 'for_fixed_width'
csv_file = csv_base + 'neid_poly_3sigma_gaussian_pixel_3_width_3.csv'  # paired with 'for_width_3'

width_type = 'for_width_3'     #'for_fixed_width', 'for_width_3'

# optimal extraction method
method = OptimalExtractionAlg.NoRECT      # optimal extraction method: no rectified, 
# method = OptimalExtractionAlg.VERTICAL    # rectified using fractional summation in vertical direction
# method = OptimalExtractionAlg.NORMAL      # rectified using fractional summation in normal direction

rectification_method = [ 'optimal_norm_fraction','optimal_vertical_fraction', 'optimal_not_rectified']

output_base = MODULE_DIR + 'results/NEID_3sigma/' + width_type + '/NEID_'      # temporarily output to a local directory 

# output for neid
print('fits_base:', fits_base, '\ncsv_file:', csv_file, '\nflat file:', flatlamp_file, '\noutput base:', output_base)

config = configparser.ConfigParser()
config_file = MODULE_DIR + 'configs/default.cfg'
config.read(config_file)
logger = start_logger("OptimalExtractionAlg", config_file)

## Optimal extraction (or sum fraction)  on a list of NEID/PARAS fits, create L1 output on original spectrum

In [None]:
for f in range(0, len(fits_list)):
#for f in range(1, 10):
    fits_file = fits_base + fits_list[f]+'.fits'
    
    sample_info = load_spectral_sample(fits_file, csv_file, flatlamp_file, config, logger, power)
    poly_c = sample_info.get('op_handle')

    total_order = poly_c.get_spectrum_order()
    if poly_c.get_instrument().upper() == 'NEID':
        c_set = np.arange(0, total_order, poly_c.get_total_orderlettes(), dtype=int)
    else:
        c_set = None
    
    print(fits_file)
    optimal_output = poly_c.extract_spectrum(rectification_method=method, extraction_method='optimal',
                                             order_set=c_set, show_time=True, print_debug='')

    optimal_result = optimal_output['optimal_extraction_result']    # result in Pandas Dataframe format
    out_order_data = optimal_result.values
    
    plot_output(out_order_data, optimal_result.attrs['TOTALORD'])   # optimal extraction 
    
    output_order_file = MODULE_DIR + 'results/'+mission+'_3sigma/'+width_type+'/'+mission+ '_' \
                        + fits_list[f] + '_extraction_' + rectification_method[method] + '.fits'
    # make_fits(out_order_data, output_order_file, optimal_result.attrs )
    
    
    nan_data = np.argwhere(np.isnan(out_order_data))
    if (np.size(nan_data)>0):
        print('there is pixel with nan data', nan_data)
    else:
        print('no pixel with nan data')
    # compare the result to that before the porting
    
    """
    target_base = '/Users/cwang/documents/KPF/KPF-Pipeline/AlgorithmDev_07122020/test_data_04032020/'
    target_file = target_base + 'order_trace_test/for_optimal_extraction/output/rv_'+mission+'_3sigma/' + \
                  width_type +'/'+mission+'_' + fits_list[f] + '_extraction_'+ rectification_method[method] + '.fits'
    print('target_file: ', target_file)
    target_data = fits.getdata(target_file)
    """
    """
    # for norect only
    if mission == 'NEID':
        # the following directory contain level 1 data from recipe running stored locally
        target_file= '/Users/cwang/documents/KPF/KPF-Pipeline/test_results/neid/tmp/'+ \
             'stacked_2fiber_flat_L0_neidTemp_2D20191217T' + fits_list[f] + '_norect_L1.fits'
       
    else:
        target_file = '/Users/cwang/documents/KPF/KPF-Pipeline/test_results/paras/tmp/' + \
            'paras.flatA_L0_a00' + fits_list[f] + '_norect_L1.fits'
    
    print('target_file: ', target_file)
    target_hdu = fits.open(target_file)
    target_data = target_hdu[1].data
            
    compare_result = poly_c.result_test(target_data, out_order_data)
    if compare_result['result'] != 'ok':
        print(compare_result['result'], compare_result['msg'])
    else:
        print(compare_result['result'])
    """


## comparison between NEID L1 and the result from module of optimal extraction

In [None]:
mission = 'NEID'
method = OptimalExtractionAlg.VERTICAL
width_type = 'for_width_3'
rectification_method = [ 'optimal_norm_fraction','optimal_vertical_fraction', 'optimal_not_rectified']

neid_L1_file = TEST_DIR + '/NEIDdata/TAUCETI_20191217/L1/neidL1_20191217T023129.fits'

output_base =  MODULE_DIR + 'results/NEID_3sigma/' + width_type + '/'+mission+'_'  
my_L1_file = output_base+'023129_extraction_'+ rectification_method[method] +'.fits'

neid_L1_fits, neid_header = fits.getdata(neid_L1_file, header=True)
my_L1_fits, my_header = fits.getdata(my_L1_file, header=True)
                                   
d = 7 
neid_size = np.shape(neid_L1_fits)
my_size = np.shape(my_L1_fits)
total_avail = min(neid_size[0]-d, my_size[0])
print('neid: ',np.shape(neid_L1_fits))
print('my: ', np.shape(my_L1_fits))
print('size_y: ', total_avail)

x0 = 450

center_x = input('center_x: ')
c_x = center_x.strip()
width = input('extension to the center: ')
w_x = width.strip()

c_x = int(c_x)
w_x = int(w_x)
s_x = max(x0, c_x - w_x)
e_x = min(c_x+w_x, my_size[1])

print('show x from ', s_x, ' to ', e_x)
for i in np.arange(0, total_avail, dtype=int):
    neid_order = neid_L1_fits[i+d, s_x:e_x]
    my_order = my_L1_fits[i, s_x:e_x]

    plt.figure(figsize=(18,8))
    plt.subplot(1, 1, 1)
    plt.plot(neid_order, 'b--', label='neid order: '+str(i+d))
    plt.plot(my_order, 'r--', alpha=0.5, label = 'my order: ' + str(i))
   
    plt.title( '['+str(s_x)+','+str(e_x)+']')
    plt.legend(loc="upper right", prop={'size': 12})   
    plt.show()
    
    plt.figure(figsize=(18,8))
    plt.subplot(1, 1, 1)
    ratio = (my_order-neid_order)/neid_order

    abs_my = [ abs(i)  for i in my_order]
    abs_neid = [abs(i) for i in neid_order]
    ratio = np.absolute((my_order - neid_order)/np.maximum(abs_my, abs_neid))
    plt.plot(ratio, 'g--', label='difference: ')
    plt.show()

        