In this notebook I create a small catalog from the GLADE to the stellar mass estimation.

The code was written by Mária Pálfi (marika97@caesar.elte.hu).

In [1]:
# importing useful packages
import pandas as pd
import numpy as np
from tqdm import tqdm

## Reading the GLADE catalog

Here I used the redshift in the heliocentric frame.

In [3]:
# reading the catalog to 'data' pandas dataframe
data = pd.read_csv( 'GLADE+.txt', delimiter = ' ',
                   usecols=[2,3,4,5,7,8,9,20,21,27,29,31,32], # necessary columns  
                   header=None, # there are no header in the txt
                   names=[ "GWGC","Hyp", "2MASS", "wiseX", "type", "ra", "dec",
                          "W1", "W1_err", "z", "flag", "z_err", "lumdis" ], # I name the columns
                   low_memory = False )
print( 'dataframe with the necessary columns:\n' )
data

dataframe with the necessary columns:



Unnamed: 0,GWGC,Hyp,2MASS,wiseX,type,ra,dec,W1,W1_err,z,flag,z_err,lumdis
0,NGC4736,NGC4736,12505314+4107125,J125053.14+410712.7,G,192.721451,41.120152,5.611000,,0.000991,0.0,0.000029,4.392418
1,NGC4548,NGC4548,12352642+1429467,J123526.45+142946.9,G,188.860123,14.496320,9.416000,,0.004120,1.0,0.000119,15.876007
2,NGC6503,NGC6503,17492651+7008396,J174926.45+700840.8,G,267.360474,70.144341,10.180000,,0.001000,1.0,0.000029,12.446600
3,NGC4442,NGC4442,12280389+0948130,J122803.90+094813.3,G,187.016220,9.803620,8.476000,,0.003557,1.0,0.000103,11.461371
4,NGC4469,NGC4469,,J122928.05+084500.8,G,187.367000,8.749890,9.784000,,0.004139,1.0,0.000120,15.180920
...,...,...,...,...,...,...,...,...,...,...,...,...,...
23181753,,,,,Q,359.999118,28.954734,17.728901,,2.452000,0.0,0.113916,20403.464630
23181754,,,,,Q,359.999303,34.720842,16.933657,,3.109000,0.0,0.135597,27158.368406
23181755,,,,,Q,359.999615,3.268586,14.833991,,1.232962,0.0,0.073688,8792.439985
23181756,,,,,Q,359.999759,20.721079,16.979166,,2.009865,0.0,0.099325,16022.500633


In [4]:
len(data)

23181758

## Adding luminosity distance error

Calculated by Gergely Dálya.

In [5]:
lum_dis_err = pd.read_csv( 'dist_errs.txt', delimiter = ' ', header = None, 
                          names = [ 'lumdis', 'lumdis_err'] )
data['lumdis_err'] = lum_dis_err.lumdis_err
print( 'The dataframe:')
data.head()

The dataframe:


Unnamed: 0,GWGC,Hyp,2MASS,wiseX,type,ra,dec,W1,W1_err,z,flag,z_err,lumdis,lumdis_err
0,NGC4736,NGC4736,12505314+4107125,J125053.14+410712.7,G,192.721451,41.120152,5.611,,0.000991,0.0,2.9e-05,4.392418,0.127479
1,NGC4548,NGC4548,12352642+1429467,J123526.45+142946.9,G,188.860123,14.49632,9.416,,0.00412,1.0,0.000119,15.876007,0.532731
2,NGC6503,NGC6503,17492651+7008396,J174926.45+700840.8,G,267.360474,70.144341,10.18,,0.001,1.0,2.9e-05,12.4466,0.12867
3,NGC4442,NGC4442,12280389+0948130,J122803.90+094813.3,G,187.01622,9.80362,8.476,,0.003557,1.0,0.000103,11.461371,0.45955
4,NGC4469,NGC4469,,J122928.05+084500.8,G,187.367,8.74989,9.784,,0.004139,1.0,0.00012,15.18092,0.535217


## Adding galaxy type

$W2 - W3 \leq 1.5 \Rightarrow \text{passive}$

In [7]:
# reading the galaxy type:
gal_type = pd.read_csv( 'gal_type.txt', delimiter = '\t', header = None, 
                          names = [ 'wiseX', 'gal_type'] )

#### Filtering the galaxies not in the WISE catalog and coding the WISE IDs into integers:

In [8]:
nan_filt =( data.wiseX.values.astype(str) == 'nan' )
data_wisex = data.wiseX.values[ ~nan_filt ]
id_to_num = dict( zip( data_wisex, np.arange(data_wisex.shape[0]) ) ) # dictionary
data_wisex_num = np.array( [  id_to_num[d] for d in data_wisex ] ) # actual coding of the IDs

#### Coding the IDs of the gal_type dataframe with the same code:

In [9]:
galtype_num = np.array( [ id_to_num[d] for d in gal_type.wiseX ] )
gal_type['num'] = galtype_num
gal_type.head()

Unnamed: 0,wiseX,gal_type,num
0,J111604.66-761258.2,1.0,1002108
1,J214416.13-750640.7,0.0,1002149
2,J133240.62-775040.5,1.0,1002187
3,J154815.92-754022.2,1.0,1002394
4,J010701.67-801828.3,1.0,1002413


#### Sorting the gal_type dataframe to ascending 'num' order:

In [10]:
sort_idx = np.argsort( gal_type.num.values )
gal_type_sorted = gal_type.iloc[sort_idx]
gal_type_sorted.head(10)

Unnamed: 0,wiseX,gal_type,num
14288657,J125053.14+410712.7,1.0,0
10235372,J123526.45+142946.9,1.0,1
17083205,J174926.45+700840.8,1.0,2
9014299,J122803.90+094813.3,0.0,3
9014300,J122928.05+084500.8,1.0,4
9481731,J122753.56+121735.8,0.0,6
9481732,J122541.67+124838.1,0.0,7
9014301,J122711.57+092513.9,1.0,8
9014302,J121530.38+093505.9,1.0,9
9481733,J121516.80+130126.3,1.0,10


#### See when the num is changing in the sorted dataframe:

In [11]:
diff_idx = np.where( np.diff( gal_type_sorted.num.values ) )[0]+1
# insert first element (zero) ## otherwise left out!
diff_idx = np.insert( diff_idx, 0, 0, axis=0)
# insert last element (size of array) ## otherwise left out!
diff_idx = np.append( diff_idx, gal_type_sorted.num.values.shape[0] )

#### Collecting the gal_type data with the same ID:

In [12]:
container_gal_type = []
for i in tqdm( range( diff_idx.shape[0]-1 ) ):
    container_gal_type.append( gal_type_sorted.gal_type.values[ diff_idx[i]:diff_idx[i+1] ] )
    
container_gal_type = np.array( container_gal_type ) # converting to array

100%|███████████████████████████| 18380495/18380495 [01:09<00:00, 264928.31it/s]
  container_gal_type = np.array( container_gal_type ) # converting to array


#### See the IDs with more than one row ('duplicated'):

In [13]:
duplicate_num = np.array([ len(i) for i in container_gal_type ])
duplicate_filt = duplicate_num > 1
container_gal_type_duplicate = container_gal_type[ duplicate_filt ] # array of arrays

How many times are the IDs can be duplicated?

In [14]:
np.unique( duplicate_num )

array([1, 2, 3, 4, 5])

See if the gal_type values with the same IDs are identical:

In [15]:
diff_bool = []
for k in tqdm( range( container_gal_type_duplicate.shape[0] ) ):
    arr = container_gal_type_duplicate[k]
    arr_element = arr[:1]
    diff_bool.append( np.allclose( arr, arr_element ) )
    if np.allclose( arr, arr_element ) == False:
        print('False')
diff_bool = np.array( diff_bool )

100%|██████████████████████████████| 1142376/1142376 [00:40<00:00, 28244.75it/s]


Checking the result:

In [16]:
uqs, counts = np.unique( galtype_num, return_counts=True )
(counts > 1).sum() - diff_bool.sum()

0

#### Pairing the galaxy type to the galaxy in GLADE+:

In [17]:
merged_df = pd.merge( data, gal_type_sorted.iloc[ diff_idx[:-1] ], on='wiseX', how='left' )
merged_df.head()

Unnamed: 0,GWGC,Hyp,2MASS,wiseX,type,ra,dec,W1,W1_err,z,flag,z_err,lumdis,lumdis_err,gal_type,num
0,NGC4736,NGC4736,12505314+4107125,J125053.14+410712.7,G,192.721451,41.120152,5.611,,0.000991,0.0,2.9e-05,4.392418,0.127479,1.0,0.0
1,NGC4548,NGC4548,12352642+1429467,J123526.45+142946.9,G,188.860123,14.49632,9.416,,0.00412,1.0,0.000119,15.876007,0.532731,1.0,1.0
2,NGC6503,NGC6503,17492651+7008396,J174926.45+700840.8,G,267.360474,70.144341,10.18,,0.001,1.0,2.9e-05,12.4466,0.12867,1.0,2.0
3,NGC4442,NGC4442,12280389+0948130,J122803.90+094813.3,G,187.01622,9.80362,8.476,,0.003557,1.0,0.000103,11.461371,0.45955,0.0,3.0
4,NGC4469,NGC4469,,J122928.05+084500.8,G,187.367,8.74989,9.784,,0.004139,1.0,0.00012,15.18092,0.535217,1.0,4.0


In [17]:
merged_df.drop('num', axis = 1, inplace=True)

In [18]:
merged_df.head()

Unnamed: 0,GWGC,Hyp,2MASS,wiseX,type,ra,dec,W1,W1_err,z,flag,z_err,lumdis,lumdis_err,gal_type,num
0,NGC4736,NGC4736,12505314+4107125,J125053.14+410712.7,G,192.721451,41.120152,5.611,,0.000991,0.0,2.9e-05,4.392418,0.127479,1.0,0.0
1,NGC4548,NGC4548,12352642+1429467,J123526.45+142946.9,G,188.860123,14.49632,9.416,,0.00412,1.0,0.000119,15.876007,0.532731,1.0,1.0
2,NGC6503,NGC6503,17492651+7008396,J174926.45+700840.8,G,267.360474,70.144341,10.18,,0.001,1.0,2.9e-05,12.4466,0.12867,1.0,2.0
3,NGC4442,NGC4442,12280389+0948130,J122803.90+094813.3,G,187.01622,9.80362,8.476,,0.003557,1.0,0.000103,11.461371,0.45955,0.0,3.0
4,NGC4469,NGC4469,,J122928.05+084500.8,G,187.367,8.74989,9.784,,0.004139,1.0,0.00012,15.18092,0.535217,1.0,4.0


In [19]:
merged_df.shape, data.shape

((23181758, 16), (23181758, 14))

## Writing out the dataframe

In [20]:
merged_df.to_csv( 'data_for_work.txt', sep = '\t', index = False, header = True )