In [1]:
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from astropy import units as u
from astropy.coordinates import SkyCoord

#comment out the next two lines if you do not have astroquery installed
#from astroquery.sdss import SDSS
#from astroquery.vizier import Vizier

from astropy.table import Table, Column, join
import pandas as pd

In [2]:
GOODS_TRANSLATE = Table.read('GOODSN_plus_translation_all.txt', format='ascii')#the GOODS Translate file is supposed 
#to contain an Index generated by Rebecca Larson's code. Her code scans the field and looks for emission line spikes. 
#That Index is unique to the date she used her code on it could be specific to the field she had her code analyze. 
#This is not a universal Index. The Translate file contains a column called "Steves_ID" which is supposed to be the 
#Index Dr. Finkelstein stores it by in his own more comprehensive catalogue. 

#I really hate that the second row is explicitly stating what kind of object is contained in each column. This is 
#useful information, but I don't need to know that right now. Probably a relic from INL or whatever it's called

CGS1_LINES = Table.read('CGS1_lines.txt', format='ascii') #really just taking a stab in the dark as to whether or not 
#this file is the one I need. It has a lot of data organized in one place, whereas much of the other .txt files 
#contained on the drive given to me by rebecca are disparate. There are hundreds of folders on that drive, and if 
#each one is its own singular emission source then there has been a major misunderstanding about what I am doing...

In [3]:
#let's make the translate file a pandas file as well so that we can easily merge it
TransPD = GOODS_TRANSLATE.to_pandas()
Translate = TransPD.set_index('Steves_ID') #i don't want Python to index this from zero...I may get rid of this line 
#later idk
Translate

Unnamed: 0_level_0,ID,ra,dec,x,y
Steves_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
z4_GND_22332,20029,189.179694,62.240789,11707.786000,10374.013000
z4_GND_19891,21567,189.265962,62.247546,9297.237000,10779.229000
z4_GND_14785,24413,189.164208,62.261807,12139.289000,11635.503000
z4_GND_41377,33402,189.208982,62.311845,10888.025000,14636.981000
z4_GND_819,3777,189.297333,62.154039,8414.783000,5169.538000
z4_GND_39165,9207,189.184421,62.188214,11577.835000,7219.417000
z4_GND_44202,30073,189.232273,62.290575,10238.617000,13360.706000
z4_GND_12864,25624,189.134321,62.267528,12973.424000,11979.763000
z4_GND_41287,33383,189.195998,62.312141,11249.994000,14654.882000
z4_GND_29320,15884,189.152002,62.220837,12483.067000,9177.656000


In [4]:
CGS1_LINES #we can see how Rebecca's line-finding code results look in this box
CGS1PD = CGS1_LINES.to_pandas()
#these are not labelled in the original file, so I'll have to do some 
#educated guessing as to what each column is
CGS1 = CGS1PD.set_index('col1') #i don't want Python to index this from zero...I may get rid of this line 
#later idk
CGS1

Unnamed: 0_level_0,col2,col3,col4,col5,col6,col7,col8,col9,col10,col11,col12
col1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
CGS1_43335_91,80,4.520,9998.03,13.05,8.241390e-20,1.823140e-20,50.336,24.999,-3.892180e-20,4.797380e-19,2.912
CGS1_42854_113,42,4.188,9053.32,3.95,2.189270e-17,5.227390e-18,29.051,3.892,-3.477560e-20,2.183150e-20,0.096
CGS1_43205_113,34,4.519,8853.64,5.24,3.859230e-17,8.539730e-18,47.927,8.037,-2.719170e-20,2.727290e-20,0.486
CGS1_43246_91,26,6.485,8640.84,2.81,1.147630e-16,1.769740e-17,55.081,2.914,4.776030e-18,5.121160e-20,0.115
CGS1_43386_113,36,4.462,8900.73,3.77,3.598330e-17,8.064450e-18,28.481,3.428,2.167430e-18,3.654160e-20,0.145
CGS1_43415_91,14,7.681,8339.00,1.70,2.016510e-16,2.625420e-17,54.707,1.097,1.030400e-17,8.301240e-20,2.998
CGS1_43415_91,18,8.817,8438.33,0.98,2.028620e-16,2.300810e-17,54.919,1.499,9.338460e-18,7.374060e-20,0.850
CGS1_43415_91,24,4.655,8593.89,5.53,9.607250e-17,2.063670e-17,53.275,4.955,7.894200e-18,6.251870e-20,0.251
CGS1_44009_113,8,10.537,8188.39,0.98,3.522900e-16,3.343340e-17,53.977,0.789,9.779850e-18,1.010000e-19,4.535
CGS1_44009_113,11,8.244,8264.99,2.54,2.450760e-16,2.972670e-17,53.845,1.366,9.244830e-18,9.267940e-20,0.631


In [5]:
GOODS_CAT_AT = Table.read('3DHST_GOODS.cat', format='ascii') #here is Dr. Finkelstein's catalogue. There must be at 
#least one common item of information between this and the translate file. i have to get this to a point where it's 
#readable and can be manipulated in Pandas

#the first major problem is that Dr. Finkelstein created about 15 rows at the top of the catalogue that describe 
#the columns, instead of writing out column names. Pandas does not enjoy interpreting this. I'll need to tell it to 
#name the columns.

#okay it really did not fucking like that. I'm guessing that reading it in Pandas did not work. rather than read in
#the goddamn file, it simply assigned every single row in that .cat file to one giant entry. in essence, there are
#no columns in the table...or rather, there is only one column, and that column contains all the information for
#each entry. I can't imagine how pandas could do this badly with a read-in.

GOODS_CAT_from_at = GOODS_CAT_AT.to_pandas() #it seemed to be reading it fine in astropy tables, so let's try to 
#simply convert it to Pandas from Astropy

#that worked!

GOODS_CAT_from_at.columns = ['3DHST ID','RA','DEC', 'Best Redshift','Redshift Type (1=spec,2=grism,3=phot)',
                  'Flux F606W (nJy)', 'Flux Error F606W (nJy)', 'Flux F814W (nJy)', 'Flux Error F814W (nJy)', 
                  'Flux F125W (nJy)', 'Flux Error F125W (nJy)', 'Flux F140W (nJy)', 'Flux Error F140W (nJy)', 
                  'Flux F160W (nJy)', 'Flux Error F160W (nJy)'] #needed to rename these columns

Catalogue = GOODS_CAT_from_at.set_index('3DHST ID') #We added this line so that Python will not auto-generate a new
#zero index for this catalogue

In [6]:
Catalogue#finally we will take a look at Dr. Finkelstein's catalogue as I have reworked it 
#into a Pandas table

Unnamed: 0_level_0,RA,DEC,Best Redshift,"Redshift Type (1=spec,2=grism,3=phot)",Flux F606W (nJy),Flux Error F606W (nJy),Flux F814W (nJy),Flux Error F814W (nJy),Flux F125W (nJy),Flux Error F125W (nJy),Flux F140W (nJy),Flux Error F140W (nJy),Flux F160W (nJy),Flux Error F160W (nJy)
3DHST ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,189.145203,62.094013,-99.0000,3,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73
2,189.154861,62.094128,-99.0000,3,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73
3,189.150406,62.094280,-99.0000,3,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73
4,189.148468,62.095020,-99.0000,3,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73
5,189.154419,62.094681,-99.0000,3,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73
6,189.150589,62.095577,-99.0000,3,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73
7,189.147156,62.095276,-99.0000,3,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73
8,189.154892,62.095711,-99.0000,3,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73
9,189.141296,62.095791,-99.0000,3,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73,-35944.73
10,189.137115,62.096096,1.6385,3,174.74,10.78,-35944.73,-35944.73,270.18,23.22,-35944.73,-35944.73,400.98,55.41


In [7]:
#Dr. Finkelstein seems to believe that there will be no way to identify these sources other than by their
#Right Ascension and Declination. He has asked me to write code that takes the RA and DEC from the Translate file,
#and check it for the least distance from the RA and DEC in the Catalogue. Within a tolerance of 0.5 arc seconds
#we should consider a match. I have been told that this can be optimized so that we do not check it against sources
#that are obviously far away. 

#What we are trying to accomplish here is very difficult. my code must take the RA & DEC from an entry in Translate
#check its distance against each entry of Catalogue, and determine which one has the least distance. Once it has
#determined what the least distant is, take the entry from Translate and merge it with the entry of Catalogue in a 
#new DataFrame. Then it must move on to the next entry of Translate and repeat this process, adding it as the next
#result in our new DataFrame.

#distance = math.sqrt(((p1[0]-p2[0])**2)+((p1[1]-p2[1])**2))

#def distance(RA1, DEC1, RA2, DEC2): #okay now what am i supposed to write for the rest of this...
    #lat1, lon1, lat2, lon2 = map(np.deg2rad, [lat1, lon1, lat2, lon2]) ?????
   ##dlon = lon2 - lon1 ?????
    #a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2 ?????
    #c = 2 * np.arcsin(np.sqrt(a)) ?????
    #total_miles = MILES * c ?????
    #return total_miles ?????




#Catalogue['distance'] = Catalogue.apply(lambda row: distance(189.179694, 62.240789, row['RA'], row['DEC']), axis=1)
#https://github.com/s-heisler/pycon2017-optimizing-pandas
#https://engineering.upside.com/a-beginners-guide-to-optimizing-pandas-code-for-speed-c09ef2c6a4d6
#df['distance'] = haversine(40.671, -73.985, df['latitude'], df['longitude'])

#http://docs.astropy.org/en/stable/coordinates/matchsep.html#astropy-coordinates-matching

In [8]:
#from astropy.coordinates import SkyCoord as sky
#from astropy import units as u
#c = SkyCoord(ra=ra1*u.degree, dec=dec1*u.degree)
#catalog = SkyCoord(ra=ra2*u.degree, dec=dec2*u.degree)
#idx, d2d, d3d = c.match_to_catalog_sky(catalog)

#idx, sep, _ = target_c.match_to_catalog_sky(catalog_c) 

#http://docs.astropy.org/en/stable/coordinates/matchsep.html#astropy-coordinates-matching
#http://docs.astropy.org/en/stable/coordinates/index.html#convenience-methods

#Between this box and the one above it, I should be able to figure out a way to compare these fucking numbers
#GOODS_TRANSLATE
#GOODS_CAT_AT

#if i want to do this, i'll need to strip both of these tables down to just their RA and DEC columns

In [9]:
#idx, sep, _ = TRANSRA.match_coordinates_sky(GOODSRA) 
#this did not work. this would only work if TRANSRA and GOODSRA are SkyCoord objects. they are not. they are tables..

In [10]:
#Roadblocks everywhere. No one to help me solve these problems. Friday at 5 pm. Fuck this.

#Met with Andreia Carrillo, a grad student in the Astronomy Dept here, and we talked through building a "For-Loop"
#with a function to help me find my distances

TRANSRA = GOODS_TRANSLATE['ra'] #we've cut down the RA and DEC we want to compare to their discrete columns
TRANSDEC = GOODS_TRANSLATE['dec']  #this is going to allow us to compare them piecemeal. We're going to treat
GOODSRA = GOODS_CAT_AT['col2']  #the smaller table as our 'i' value, and check it in a loop against the larger
GOODSDEC = GOODS_CAT_AT['col3'] #table entry by entry

Matches = [] #this creates a blank table that we will be saving our findings in

for i in range(len(TRANSRA)):
        DISTANCE = np.sqrt(((GOODSRA-TRANSRA[i])*np.cos(GOODSDEC))**2 + (GOODSDEC-TRANSDEC[i])**2)
        
        CLOSEST = np.where(DISTANCE == np.min(DISTANCE))
        
        if np.min(DISTANCE) < 0.5:
            Matches.append(CLOSEST)
        
        else:
            Matches.append('False')

In [11]:
Matches #this information is fucking useless to me in the way that it's currently formatted

[(array([20028]),),
 (array([21566]),),
 (array([24412]),),
 (array([33401]),),
 (array([3776]),),
 (array([9206]),),
 (array([30072]),),
 (array([25623]),),
 (array([33382]),),
 (array([15883]),),
 (array([25863]),),
 (array([21995]),),
 (array([6625]),),
 (array([28938]),),
 (array([5283]),),
 (array([20061]),),
 (array([3653]),),
 (array([18708]),),
 (array([22045]),),
 (array([29629]),),
 (array([4740]),),
 (array([28327]),),
 (array([29849]),),
 (array([32274]),),
 (array([28269]),),
 (array([31606]),),
 (array([14421]),),
 (array([16798]),),
 (array([7979]),),
 (array([16396]),),
 (array([30745]),),
 (array([18664]),),
 (array([19579]),),
 (array([11039]),),
 (array([34821]),),
 (array([9746]),),
 (array([17393]),),
 (array([14644]),),
 (array([6974]),),
 (array([12423]),),
 (array([15167]),),
 (array([15697]),),
 (array([16623]),),
 (array([28428]),),
 (array([21472]),),
 (array([20977]),),
 (array([19164]),),
 (array([32406]),),
 (array([25700]),),
 (array([18090]),),
 (array([