In [1]:
# For this program, I train on Balrog injections to get relations to the principal components.

In [2]:
# The magnitude bins I used are:

# less than 20

# 20 - 22

# 22 - 23

# 23 - 24

# 24 - 25

# 25 - 26

# greater than 26

In [3]:
import numpy as np
from numpy import savetxt
from numpy import loadtxt
import fitsio
import healpy as hp
import matplotlib.pyplot as plt
import Config
from scipy import interpolate as inter
from astropy.table import Table

In [4]:
res = 4096 # Resolution of the heal pixels
sigma = 0.5 # Sigma used for gaussian weighting
numBins = 100 # Number of bins to use
perVar = 0.98 # Percent of the variance to be captured
perMap = 0.625 # Percent of the PC maps to use, adjust this later

In [5]:
condMean = [] # Mean value of the conditions (to be used later in normalization)
condStds = [] # Standard deviation of the conditions (to be used later in normalization)

In [6]:
validPix = fitsio.read("/hdfs/bechtol/balrog/y3/y3a2_survey_conditions_maps/Kyle_Stuff/training/Valid_"+str(res)+"_Pixels.fits")['PIXEL']
# Boolean alternative to validPix allows for some things to be easier.
pixCheck = np.full(12*(res**2), False, dtype = bool)
pixCheck[validPix] = True

In [7]:
# This is the actual file containing all of the balrog data
balrFile = '/afs/hep.wisc.edu/bechtol-group/MegansThings/balrog_detection_catalog_sof_run2_stars_v1.4_avg_added_match_flags.fits'
# This reads in all of the data. Most of these are just flags, the only pieces that get used much outside
# of filtering are detected, true_ra and true_dec which get used to convert into healPixels.
balrData = fitsio.read(balrFile, columns = ['detected', 'true_ra', 'true_dec',
                                            'flags_foreground', 'flags_badregions', 'flags_footprint',
                                            'match_flag_1.5_asec', 'true_g_Corr', 'true_gr_Corr'])

# These are in degrees which is why lonlat is set to True in the next cell.
balrRA = balrData['true_ra']
balrDEC = balrData['true_dec']
# This is used for detection rates, each point is either a 0 (no detection) or a 1 (detection)
balrDETRepeats = balrData['detected']
# Everything from here on out is simply used in order to filter the data
FOREGROUND = balrData['flags_foreground']
BADREGIONS = balrData['flags_badregions']
FOOTPRINT = balrData['flags_footprint']
ARCSECONDS = balrData['match_flag_1.5_asec']
# Magnitudes are used for both color and brightness cuts.
GMAG = balrData['true_g_Corr']
RMAG = balrData['true_g_Corr'] - balrData['true_gr_Corr']

# This is used to filter out any injections that either weren't detected or had flags raised.
cutIndices = np.where((FOREGROUND == 0) & 
                      (BADREGIONS < 2) & 
                      (FOOTPRINT == 1) & 
                      (ARCSECONDS < 2) &
                      # Color cuts
                      (GMAG - RMAG >= -0.3) &
                      (GMAG - RMAG <= 1) &
                      # Magnitude cuts
                      (GMAG > 26))[0]# &
                      # (GMAG <= 26))[0]

# This reduced the data down to the actually valid pixels.
balrDETRepeats = balrDETRepeats[cutIndices]
balrRA = balrRA[cutIndices]
balrDEC = balrDEC[cutIndices]

# This converts the RA and DEC values from above to healpixels so we can compare to the sky condition.
balrPIXRepeats = hp.ang2pix(res, balrRA, balrDEC, lonlat = True, nest = True)

# This sorts by the pixel in order to make following methods more efficient.
sortInds = balrPIXRepeats.argsort()
balrPIXRepeats = balrPIXRepeats[sortInds[::1]]
balrDETRepeats = balrDETRepeats[sortInds[::1]]

# These are indices that will be looping through the pixStar and starPix arrays in parallel.
uniqInd = 0
balrInd = 0

# This will be used to store the number of stars at each pixel.
balrPIX = np.unique(balrPIXRepeats) # The unique pixels, with no repeats.
balrDET = np.zeros_like(balrPIX)
balrINJ = np.zeros_like(balrPIX)

while balrInd < len(balrPIXRepeats):
    if balrPIX[uniqInd] == balrPIXRepeats[balrInd]: # If the pixels match up in the arrays.
        balrDET[uniqInd] += balrDETRepeats[balrInd] # Add one if there was a detection at this location.
        balrINJ[uniqInd] += 1                # Add one to the corresponding spot in the balStar array.
        balrInd += 1                         # Add one to the starInd to see if the next index in starPix is also the same.
        # Since the last index of pixStar and starPix are the same, starInd will increase the last time through the loop,
        # making this the index that we must restrict in the while loop.
    else:
        uniqInd += 1 # If the pixels are no longer the same, increase the index you check in the pixStar array.
        
balrDET = balrDET[pixCheck[balrPIX]]
balrINJ = balrINJ[pixCheck[balrPIX]]
balrPIX = balrPIX[pixCheck[balrPIX]]

In [8]:
# This loads in all of the file names of the survey conditions
directory = '/hdfs/bechtol/balrog/y3/y3a2_survey_conditions_maps/Kyle_Stuff/training/'+str(res)+'_'
conditions = Config.conditions
balrCondMaps = []

# This loops over every condition file
for cond in conditions:
    condData = fitsio.read(directory + cond + '.fits') # This reads in the data
    condSigExt = np.full(12*(res**2), -1.6375e+30) # Gives a default value
    condSigExt[validPix] = condData['SIGNAL'] # Changes all valid pixels to their corresponding signals
    condSigExt[np.where(condSigExt == -1.6375e+30)[0]] = hp.UNSEEN # Masks all non valid pixels
    balrCondMaps.append(condSigExt[balrPIX]) # Only stores the values that are in pixels with injections

balrCondMaps = np.array(balrCondMaps, dtype = object) # Converts to an array

# Stores the original data for later comparisons
originalBalrDET = balrDET
originalBalrINJ = balrINJ
aveEff = np.sum(originalBalrDET) / np.sum(originalBalrINJ) # Average efficiency of detections.

In [9]:
balrStanMaps = []
# This standardizes every map as a first step of PCA
for i in range(len(balrCondMaps)):
    # Store mean and std dev for later use.
    condMean.append(np.average(balrCondMaps[i]))
    condStds.append(np.std(balrCondMaps[i]))
    balrStanMaps.append((balrCondMaps[i] - np.average(balrCondMaps[i])) / np.std(balrCondMaps[i]))
    
balrStanMaps = np.array(balrStanMaps)

In [10]:
# This gives the covariance matrix of the standardized maps
# Bias is true since the variance of each individual standardized map should be 1
cov = np.cov(balrStanMaps.astype(float), bias = True)

# This gives the eigenvalues and vectors of the covariance matrix
evalues, evectors = np.linalg.eig(cov)

# This cuts after the specified percentage of the variance has been achieved
for i in range(len(evalues)):
    if np.sum(evalues[0:i+1]) / np.sum(evalues) >= perVar:
        cutoff = i + 1
        break
featVec = evectors[0:cutoff]

In [11]:
balrRedMaps = np.matmul(featVec, balrStanMaps) # Reduces the maps to PCA maps

In [12]:
# Standard deviations will once more be stored for later use.
# Maps are reduced to standard deviation of 1 for consistent x values in the following steps.
redStds = []
for i in np.arange(len(balrRedMaps)):
    redStds.append(np.std(balrRedMaps[i]))
    balrRedMaps[i] = balrRedMaps[i]/np.std(balrRedMaps[i])

In [13]:
# Goal of this method is to find the index of the map that has the largest impact on detection rates.
def mostSigPCMap(redMaps, detBALR, balrINJ = balrINJ, sigma = sigma, numBins = 100):
    
    maxAdjustment = []

    for i in range(len(redMaps)):
        
        onePC = redMaps[i] # Load up a PC map

        x = np.linspace(-3, 3, 100) # xValues for plot, goes out to 3 standard deviation.
        y = []
        
        for xi in x:
            # Gaussian weighting the values close by to each x value.
            totDet = np.sum(detBALR * np.exp(-1*(((onePC.astype(float) - xi) / sigma)**2)))
            totInj = np.sum(balrINJ * np.exp(-1*(((onePC.astype(float) - xi) / sigma)**2)))
            y.append((totDet / totInj) / aveEff)

        y = np.array(y)
        
        # Make the error the sum of the squared difference between the binned values and 1.
        maxAdjustment.append(np.sum((y - 1)**2))
        
    maxAdjustment = np.array(maxAdjustment)
    
    mostSigIndex = np.where(maxAdjustment == np.max(maxAdjustment))[0]
    
    return mostSigIndex[0] # Return wherever the error is the largest

In [14]:
balrDET = originalBalrDET
yValues = []
corrIndices = []

In [15]:
trimBalrRedMaps = np.copy(balrRedMaps)

# Iterate however many times is called for.
iterations = int(perMap * len(balrRedMaps))

for _ in np.arange(iterations):
    
    # Figure out the most significant map.
    index = mostSigPCMap(trimBalrRedMaps, balrDET)
    
    # Store this index for later use.
    corrIndices.append(index)
    
    # Use this map to generate values.
    onePC = trimBalrRedMaps[index]
    
    x = np.linspace(-3, 3, 100)
    y = []

    for xi in x:
        # Gaussian weight the values when determining y Values.
        totDet = np.sum(balrDET * np.exp(-1*(((onePC.astype(float) - xi) / sigma)**2)))
        totInj = np.sum(balrINJ * np.exp(-1*(((onePC.astype(float) - xi) / sigma)**2)))
        y.append((totDet / totInj) / aveEff)

    y = np.array(y)
    
    yValues.append(y)
    
    # Generate an interpolation function with constant extrapolation around the ends.
    f = inter.interp1d(x, y, bounds_error = False, fill_value = (y[0], y[-1]))

    correction = f(trimBalrRedMaps[index].astype('float'))

    correction = 1 / correction

    # Apply correction and remove whichever principal component was used.
    balrDET = balrDET * correction

    pcMapCutoff = np.full(len(trimBalrRedMaps), True, dtype = bool)
    pcMapCutoff[index] = False
    trimBalrRedMaps = trimBalrRedMaps[pcMapCutoff]

In [16]:
# This is used to find th original indices used accounting for the fact that maps were removed throughout.
actualCorrIndices = []
originalIndices = np.arange(len(balrRedMaps))

for index in corrIndices:
    actualCorrIndices.append(originalIndices[index])
    originalIndices = np.delete(originalIndices, index)
    
actualCorrIndices = np.array(actualCorrIndices)

In [17]:
file = '/hdfs/bechtol/balrog/y3/y3a2_survey_conditions_maps/Kyle_Stuff/training/'

In [18]:
# These next bits all save data, with different formatting when the data size is small.
savetxt(file + str(res) + '_Inter_Blue_>26_Means.csv', condMean, delimiter=',')

In [19]:
savetxt(file + str(res) + '_Inter_Blue_>26_Standard_Deviations.csv', condStds, delimiter=',')

In [20]:
savetxt(file + str(res) + '_Inter_Blue_>26_Red_Standard_Deviations.csv', redStds, delimiter=',')

In [21]:
savetxt(file + str(res) + '_Inter_Blue_>26_Indices.csv', actualCorrIndices, delimiter=',')

In [22]:
savetxt(file + str(res) + '_Inter_Blue_>26_Feature_Vectors.csv', featVec, delimiter='\t', fmt='%s')

In [23]:
my_table = Table()
for i in np.arange(len(actualCorrIndices)):
    my_table[str(actualCorrIndices[i])] = yValues[i]
my_table.write(file + str(res) + '_Inter_Blue_>26_y_Values' + '.fits', overwrite = True)