# Balrog Galaxies Classified as Stars

## ALWAYS USE TEST1A

In [1]:
import sys
sys.path.insert(1, '/afs/hep.wisc.edu/home/kkboone/software/StarWeights/StellarStreams')

import fitsio
import numpy as np
import healpy as hp
import scipy as sc
import matplotlib.pyplot as plt
import matplotlib.style
import matplotlib
from scipy import interpolate as inter
from astropy.table import Table
import BalrogTestConfig as balrConfig
import Config
from TrainAndFullMap import *
from timeit import default_timer as timer
import StellarConfig as strConfig
from matplotlib.path import Path
matplotlib.style.use('des_dr1')

In [2]:
cutOffPercent = .01
res = 4096
binNum = 10
classCut = 1.5
path = strConfig.path
mu = strConfig.mu
rMagCut = [0, 22.9]

In [3]:
validPixFile = balrConfig.test1aValidPixFile

matBalrGalaFile = strConfig.matBalrFile

deepFiles = balrConfig.deepFiles

In [4]:
# All I need from the deep fields is the ID numbers and original KNN classification.
deepCols  = ['KNN_CLASS', 'ID']
deepID = []
deepClass= []

for deepFile in deepFiles:
    deepData = fitsio.read(deepFile, columns = deepCols)
    deepID.extend(deepData['ID'])
    deepClass.extend(deepData['KNN_CLASS'])

deepID = np.array(deepID)
deepClass = np.array(deepClass)

# This serves to make it easier to check the original classification of an object.
# This way I can simply check the classification by indexing to an ID number minus
# the minimum ID number to find the classification. This prevented having an overly
# large array but still has the speed advantage of indexing.
minID = np.min(deepID)
deepGalID = np.zeros(np.max(deepID) - minID + 1)
deepGalID[deepID - minID] = deepClass

In [5]:
validPix = fitsio.read(validPixFile)['PIXEL']

In [6]:
galaData = fitsio.read(matBalrGalaFile, columns = ['true_ra', 'true_dec', 'meas_EXTENDED_CLASS_SOF', 'meas_psf_mag', 'true_id'])

In [7]:
ID = galaData['true_id']
matRa = galaData['true_ra']
matDec = galaData['true_dec']
matRmag = galaData['meas_psf_mag'][:,1]
matGmag = galaData['meas_psf_mag'][:,0]
matClass = galaData['meas_EXTENDED_CLASS_SOF']

In [8]:
matPix = hp.ang2pix(res, matRa, matDec, nest = True, lonlat = True)

In [9]:
pixCut = np.isin(matPix, validPix)
ID = ID[pixCut]
matPix = matPix[pixCut]
matRmag = matRmag[pixCut]
matGmag = matGmag[pixCut]
matClass = matClass[pixCut]

In [10]:
len(matPix) / len(matRa)

0.9860116284599068

In [11]:
idCut = np.where(deepGalID[ID - minID] == 1)[0]
matPix = matPix[idCut]
matRmag = matRmag[idCut]
matGmag = matGmag[idCut]
matClass = matClass[idCut]

In [12]:
len(matPix) / len(matRa)

0.6844482347770323

In [13]:
magCut = np.where((matRmag <= rMagCut[1]) &
                  (matRmag > rMagCut[0]))[0]

matPix = matPix[magCut]
matRmag = matRmag[magCut]
matGmag = matGmag[magCut]
matClass = matClass[magCut]

In [14]:
len(matPix) / len(matRa)

0.15052407174749985

In [15]:
MG = matGmag - mu
GR = matGmag - matRmag

filterSelection=Path.contains_points(path,np.vstack((GR,MG)).T)

matClass = matClass[filterSelection]
matPix = matPix[filterSelection]

In [16]:
len(matPix) / len(matRa)

0.02711894084399883

In [17]:
classCuts = np.where((matClass >= 0) &
                     (matClass <= classCut))[0]

In [18]:
matPix = matPix[classCuts]
origDetPix = np.copy(matPix)
origDetPix = np.sort(origDetPix)

In [19]:
len(matPix) / len(matRa)

0.001173334091794883

In [20]:
_, origAllDetPixCounts = np.unique(np.append(validPix, origDetPix), return_counts = True)
origAllDetPixCounts = origAllDetPixCounts - 1

In [21]:
origCondFiles = balrConfig.test1aCondFiles

origCondMaps = []

# This loops over every condition file
for condFile in origCondFiles:
    condData = fitsio.read(condFile) # This reads in the data
    origCondMaps.append(condData['SIGNAL']) # Only stores the values that are in pixels with injections

origCondMaps = np.array(origCondMaps)

In [22]:
persToUse = np.logspace(1, 2, 20)

In [23]:
def mostSigInd(y):
    maxSquaredDiff = 0
    index = -1
    
    maxSingError = np.max(np.abs(y - 1))
    
    if maxSingError <= cutOffPercent:
        return index, maxSingError
    
    for i in range(len(y)):
        yi = y[i]
        
        diff = np.sum((yi - 1)**2)
        
        if diff > maxSquaredDiff:
            maxSquaredDiff = diff
            index = i
            
    return index, maxSingError

In [None]:
allPixFile = balrConfig.test1aAllPixFile

origInjData = fitsio.read(allPixFile)

origInjPix = hp.ang2pix(res, origInjData['RA'], origInjData['DEC'], nest = True, lonlat = True)
origValidPix = np.unique(origInjPix)

origInjPix = np.sort(origInjPix)

# Everything from here until the main loop is to generate matchInds

origInjPixUnique, origInjPixCounts = np.unique(origInjPix, return_counts = True)

matchInds = np.zeros(len(origDetPix), dtype = int)

startInjInds = np.append(np.array([0]), np.cumsum(origInjPixCounts)[:-1])

startDetInds = np.append(np.array([0]), np.cumsum(origAllDetPixCounts)[:-1])

for i in np.arange(len(origAllDetPixCounts)):
    if origAllDetPixCounts[i] == 0:
        continue
    matchInds[startDetInds[i]: startDetInds[i] + origAllDetPixCounts[i]] = np.arange(origAllDetPixCounts[i]).astype(int) + startInjInds[i]
    
for perObjectsToUse in persToUse:
    
    includeInds = np.full(len(origInjPix), False, dtype = bool)
    includeInds[0:int((float(perObjectsToUse)*len(includeInds)) / 100)] = True
    np.random.shuffle(includeInds)
    
    # Begin Comment Region
#     includeInds = np.full(np.sum(potentialInjTrainInds), False, dtype = bool)
#     includeInds[0:int((float(perObjectsToUse)*len(includeInds)) / 100)] = True
#     np.random.shuffle(includeInds)
    
#     injTrainInds = np.copy(potentialInjTrainInds)
#     injTrainInds[np.where(injTrainInds)[0]] = includeInds
    # End Comment Region
    
    detPix = origDetPix[includeInds[matchInds]]
    injPix = origInjPix[includeInds]
    
    # Begin Comment Region
#     detPixTest = origDetPix[injTestInds[matchInds]]
#     injPixTest = origInjPix[injTestInds]
    
#     detPixTrain = origDetPix[injTrainInds[matchInds]]
#     injPixTrain = origInjPix[injTrainInds]
    
#     detPix = np.sort(np.append(detPixTest, detPixTrain))
#     injPix = np.sort(np.append(injPixTest, injPixTrain))
    # End Comment Region

    validPix =  np.unique(injPix)
    
    condCrop = np.isin(origValidPix, validPix)
    
    constantTrainPixIndicator, origDetPixCounts = np.unique(np.append(validPix, detPix), return_counts = True)
    origDetPixCounts = origDetPixCounts - 1
    
    condMaps = []

    # This loops over every condition file
    for origCondMap in origCondMaps:
        condMaps.append(origCondMap[condCrop]) # Only stores the values that are in pixels with injections

    condMaps = np.array(condMaps)
    
    trainInds = np.full(len(condMaps[0]), False, dtype = bool)
    trainInds[0:int(0.8*len(trainInds))] = True
    np.random.shuffle(trainInds)
    
    # Begin Comment Region
    # trainInds = np.isin(constantTrainPixIndicator, np.unique(injPixTrain))
    # End Comment Region
    
    # trainInds = np.isin(constantTrainPixIndicator, np.unique(injPixTrain))
    
    aveDetTrain = np.sum(origDetPixCounts[trainInds]) / len(origDetPixCounts[trainInds])

    sortInds = []
    for i in range(len(condMaps)):
        sortInds.append(condMaps[i][trainInds].argsort())
    sortInds = np.array(sortInds)
    
    binIndLims = [0]

    for j in range(binNum):
        binIndLims.append(int((np.sum(trainInds) - binIndLims[-1]) / (binNum - j)) + (binIndLims[-1]))
        
    xBins = []

    for i in range(len(condMaps)):
        cond_Map_Sort = condMaps[i][trainInds][sortInds[i][::1]]
        condBins = []
        for j in range(binNum):
            condBins.append(cond_Map_Sort[binIndLims[j]:binIndLims[j+1]])
        indXBin = []

        for j in range(binNum):
            indXBin.append(np.sum(condBins[j]) / len(condBins[j]))

        xBins.append(np.array(indXBin))

    xBins = np.array(xBins)
    
    yBinsOrig = []
    for i in range(len(condMaps)):
        detSort = origDetPixCounts[trainInds][sortInds[i][::1]]
        detBins = []
        for j in range(binNum):
            detBins.append(detSort[binIndLims[j]:binIndLims[j+1]])
        indYBinOrig = []

        for j in range(binNum):
            indYBinOrig.append(np.sum(detBins[j]) / (aveDetTrain * len(detBins[j])))

        yBinsOrig.append(np.array(indYBinOrig))

    yBinsOrig = np.array(yBinsOrig)
    
    detPixCounts = np.copy(origDetPixCounts)
    
    allErrors = []

    while(True):

        yBins = []
        for i in range(len(condMaps)):
            detSort = detPixCounts[trainInds][sortInds[i][::1]]
            detBins = []
            for j in range(binNum):
                detBins.append(detSort[binIndLims[j]:binIndLims[j+1]])
            indYBin = []

            for j in range(binNum):
                indYBin.append(np.sum(detBins[j]) / (aveDetTrain * len(detBins[j])))

            yBins.append(np.array(indYBin))

        yBins = np.array(yBins)

        index, maxErr = mostSigInd(yBins)
        if index == -1:
            break

        allErrors.append(maxErr)

        corrFunc = inter.interp1d(xBins[index], yBins[index], bounds_error = False, fill_value = (yBins[index][0], yBins[index][-1]))

        detPixCounts = detPixCounts / (corrFunc(condMaps[index]))

        detPixCounts = detPixCounts * aveDetTrain / (np.sum(detPixCounts[trainInds]) / len(detPixCounts[trainInds]))
        
    binIndLims = [0]

    for j in range(binNum):
        binIndLims.append(int((np.sum(~trainInds) - binIndLims[-1]) / (binNum - j)) + (binIndLims[-1]))
        
    condMaxErrors = []
    
    aveDetTest = np.sum(detPixCounts[~trainInds]) / len(detPixCounts[~trainInds])

    for condInd in range(len(condMaps)):
        condMap = condMaps[condInd]
        condSortInds = condMap[~trainInds].argsort()

        detStarTemp = detPixCounts[~trainInds][condSortInds[::1]]

        detBins = []

        for j in range(10):
            detBins.append(detStarTemp[binIndLims[j]:binIndLims[j+1]])

        yBinCond = []

        for j in range(10):
            yBinCond.append(np.sum(detBins[j]) / (aveDetTest * len(detBins[j])))

        yBinCond = np.array(yBinCond)

        condMaxErrors.append(np.max(np.abs(yBinCond - 1)))
        
    condErrorsFile = balrConfig.test1aDir + 'MagCutTests/22.9MagCut/' + str(round(perObjectsToUse / 100, 3)) + '_Cond_Errors_' + str(cutOffPercent) + '.fits'
    my_table = Table()
    my_table['Errors'] = condMaxErrors
    my_table.write(condErrorsFile, overwrite = True)
    print(str(round(perObjectsToUse / 100, 3)) + ': ' + str(np.average(condMaxErrors)))

In [None]:
maxDevs = []
for per in persToUse:
    file = balrConfig.test1aDir + 'MagCutTests/22.9MagCut/' + str(round(per / 100, 3)) + '_Cond_Errors_' + str(0.01) + '.fits'
    maxDevs.append(fitsio.read(file)['Errors'])

In [None]:
aveMaxDev = []
for maxDev in maxDevs:
    aveMaxDev.append(np.average(maxDev))

In [None]:
def func(x, C, b, k):
    return C*(x**(-1*k)) + b

In [None]:
popt, pcov = sc.optimize.curve_fit(func, persToUse, aveMaxDev, bounds=([0., 0., 0], [20, 0.1, 2]))

In [None]:
popt

In [None]:
perr = np.sqrt(np.diag(pcov))
perr

In [None]:
plt.figure()
plt.scatter(persToUse, aveMaxDev, zorder = 3, label = 'Calculated')
plt.plot(np.linspace(0.01, 100, 10000), func(np.linspace(0.01, 100, 10000), *popt), color = 'r', zorder = 3, label = 'Fit')
plt.text(20, 0.3, r'$y = Cx^{-k} + b$', bbox={'facecolor': 'white', 'alpha': 1, 'pad': 10}, zorder = 3)
plt.legend()
plt.ylim([0, 0.5])
plt.xlim([0, 100])
plt.grid(zorder = 0)
plt.ylabel('Average Maximum Deviation')
plt.xlabel('Percentage of Balrog Galaxies Used')
plt.title('Varying Test Data Size')
plt.show()

In [None]:
x_test = np.linspace(100, 500, 10000)

In [None]:
y_test = func(x_test, *popt)

In [None]:
if len(np.where(y_test < 0.03)[0]) == 0:
    print('Too High Precision')
else:
    print(x_test[np.where(y_test < 0.03)[0][0]])

In [None]:
y_test[-1]