In [1]:
#Write Normalized Data into files in "DataFiles" folder

import numpy as np
import pandas as pd
import sys
import math

import astropy
from astropy.stats import sigma_clip

from sklearn import preprocessing

root = 'C:\\Users\\Jackson\\Jupyter projects\\'
sys.path.insert(0, root)

zclus = {'a2744' : 0.308,
         'm0416' : 0.396}
pgals = {'a2744' : [1.689791e-01, 1.965115e+00, 2.0],
        'm0416' : [3.737113e-01, 1.322081e+00, 2.0]}
nclus = {'a2744': "Abell 2744",
         'm0416' : "MACS 0416"}



In [2]:
cluster = 'a2744'
base = 'C:\\Users\\Jackson\\Jupyter projects\\'
catalog = base + "Files/" + cluster + "_master_cleaned.csv"

#Set up clusters, not clusters, and unknowns
dzcut=0.03; sigclip=3; sigcut=3; radialcut=120; maglim=23.5; colorcut=1.0
master = pd.read_csv(catalog)
zdat = master['master_z']
valid = master['master_valid']
memberindx = np.where((zdat>zclus[cluster]-dzcut)&(zdat<zclus[cluster]+dzcut)&(valid==1))[0]
print(len(memberindx))
z1 = zdat[memberindx]
z2 = sigma_clip(z1,sigma=sigclip)
print(np.mean(z2),np.std(z2))

dz = sigcut*np.std(z2)
print(dz)
ddat = master['master_d']
zdat = master['master_z']
mag814 = master['master_mag814']
mag606 = master['master_mag606']
valid = master['master_valid']
master_cut = master.iloc[np.where((ddat<radialcut)&(~np.isnan(mag606))&(~np.isnan(mag814))&(mag814<=maglim)&(valid == 1))]

zdat   = master_cut['master_z']
nmemindx = np.where((zdat<zclus[cluster]-dz)|(zdat>zclus[cluster]+dz))[0]
memindx = np.where((zdat>zclus[cluster]-dz)&(zdat<zclus[cluster]+dz))[0]
unknownindx = np.where(np.isnan(zdat))[0]

#Arrays of Spectroscopic cluster members, Spectroscopic non-members, and photometric galaxies

clusterarr = [master_cut.iloc[i] for i in memindx] 
ncluster =[master_cut.iloc[i] for i in nmemindx]
unknown = [master_cut.iloc[i] for i in unknownindx]

print(len(clusterarr))
print(len(ncluster))
print(len(unknown))


486
0.3069860850098326 0.007357462290748165
0.022072386872244495
92
46
235


In [10]:
#Prepare feature and known label arrays for color magnitude
XclustCM = [[row['master_mag814'], row['master_mag606'] - row['master_mag814']] for row in clusterarr]
XnclustCM = [[row['master_mag814'], row['master_mag606'] - row['master_mag814']] for row in ncluster]

Xcm = np.array(XclustCM + XnclustCM)

unknownXcm = [[row['master_mag814'], row['master_mag606'] - row['master_mag814']] for row in unknown]
unknownXcm = np.array(unknownXcm)

y = np.zeros(len(clusterarr) + len(ncluster))
y[:len(clusterarr)] = 1

#Prepare galaxy position/magnitude data
XclustCMPos = [[row['master_x'], row['master_y'], row['master_mag814']] for row in clusterarr]
XnclustCMPos = [[row['master_x'], row['master_y'], row['master_mag814']] for row in ncluster]
unknownXCMPos = [[row['master_x'], row['master_y'], row['master_mag814']] for row in unknown]

In [11]:
#Save spectroscopic position magnitude
filename = base + "Master\\" + nclus[cluster] + "\\Files\\Galaxy Data\\SpecCMP"
np.savez(filename, specMembers = XclustCMPos, specNonMembers = XnclustCMPos)

In [7]:
#Save to File
filename = base + "Master\\" + nclus[cluster] + "\\Files\\Galaxy Data\\ColorMagnitude"
np.savez(filename, knownXData = Xcm, unknownXData = unknownXcm, yData = y,
        clusterPosMag = XclustCMPos, unknownPosMag = unknownXCMPos)

In [7]:
#Prepare feature and known label arrays for color magnitude + position data

Xclustall= [[row['master_mag814'], row['master_mag606'] - row['master_mag814'], row['master_x'], row['master_y']] for row in clusterarr]
Xnclustall = [[row['master_mag814'], row['master_mag606'] - row['master_mag814'], row['master_x'], row['master_y']] for row in ncluster]
Xall = np.array(Xclustall + Xnclustall)

unknownXall = [[row['master_mag814'], row['master_mag606'] - row['master_mag814'], row['master_x'], row['master_y']] for row in unknown]
unknownXall = np.array(unknownXall)

print(unknownXall.shape)

(235, 4)


In [10]:
#Save to file
filename = base + "Master\\" + nclus[cluster] + "\\Files\\Galaxy Data\\ColorMagnitudePosition"
np.savez(filename, knownXData = Xall, unknownXData = unknownXall, yData = y,
        clusterPosMag = XclustCMPos, unknownPosMag = unknownXCMPos)

In [13]:
#Create coe feature/answer arrays

clustercoe = [row for row in clusterarr if not np.isnan(row['coe_RA'])]
nclustercoe = [row for row in ncluster if not np.isnan(row['coe_RA'])]
unknowncoe = [row for row in unknown if not np.isnan(row['coe_RA'])]

unknownNoCoe = [row for row in unknown if np.isnan(row['coe_RA'])]

allknowncoe = clustercoe + nclustercoe
knownYcoe = np.zeros(len(clustercoe) + len(nclustercoe))

coePos = [(row['coe_RA'], row['coe_Dec']) for row in allknowncoe]
knownYcoe[:len(clustercoe)] = 1

print("Number of unknown members with coe data: {}".format(len(unknowncoe)))
print("Number of unknown members without coe data: {}".format(len(unknownNoCoe)))
print("Number of spectroscopic cluster members with coe data: {}".format(len(clustercoe)))
print("Number of spectroscopic noncluster members with coe data: {}".format(len(nclustercoe)))

Number of unknown members with coe data: 217
Number of unknown members without coe data: 18
Number of spectroscopic cluster members with coe data: 89
Number of spectroscopic noncluster members with coe data: 42


In [15]:
#Get relevant features from coe data and add colors
forbidden = ["coe_mag160", "coe_err160", "coe_lab", "coe_sn160"]
knownXcoe = [[row[i] for i in row.keys() if ("coe" in i and not(i in forbidden))] for row in allknowncoe]

for i, row in enumerate(knownXcoe):
    row.append(allknowncoe[i]["coe_mag606"] - allknowncoe[i]["coe_mag814"])

knownXcoe = np.array(knownXcoe)

#Repeat for photometric coe data
unknownXcoe = [[row[i] for i in row.keys() if ("coe" in i and not(i in forbidden))] for row in unknowncoe]

for i, row in enumerate(unknownXcoe):
    row.append(unknowncoe[i]["coe_mag606"] - unknowncoe[i]["coe_mag814"])

unknownXcoe = np.array(unknownXcoe)

#Get CM data for non-coe unknowns
unknownXNoCoe = [[row['master_mag814'], row['master_mag606'] - row['master_mag814']] for row in unknownNoCoe]
unknownXNoCoe = np.array(unknownXNoCoe)


#Get position/magnitude data for unknown coe and unknown no coe
unknownCoePosMag = [[row['master_x'], row['master_y'], row['master_mag814']] for row in unknowncoe]
unknownNoCoePosMag = [[row['master_x'], row['master_y'], row['master_mag814']] for row in unknownNoCoe]

In [16]:
#Save file
filename = base + "Master\\" + nclus[cluster] + "\\Files\\Galaxy Data\\Coe"
np.savez(filename, knownXData = knownXcoe, unknownXData = unknownXcoe,
         unknownXNoCoe = unknownXNoCoe, yData = knownYcoe, 
         unknownPosMagCoe = unknownCoePosMag, unknownPosMagNoCoe = unknownNoCoePosMag)