# Classification of Emission Line Galaxies with SDSS & WISE

## Imports

In [14]:
import pandas as pd
import numpy as np
from collections import Counter

%matplotlib
import matplotlib.pyplot as plt
plt.style.use('bmh')

import seaborn as sns
sns.set_theme()
sns.set_context('paper')


Using matplotlib backend: MacOSX


In [2]:
galaxies = pd.read_csv('all_galaxies.csv', header=1)
galaxies

Unnamed: 0,specobjid,plate,mjd,fiberID,subclass,z,oiii_5007_flux,oiii_5007_flux_err,h_alpha_flux,h_alpha_flux_err,...,neiii_3869_flux,neiii_3869_flux_err,oii_3726_flux,oii_3726_flux_err,oii_3729_flux,oii_3729_flux_err,psfMag_g,psfMagErr_g,psfMag_z,psfMagErr_z
0,305144744869128192,271,51883,94,STARBURST,0.049476,76.971610,2.971411,1002.40600,8.275866,...,12.437810,4.509258,178.42940,8.317794,194.339600,8.494857,18.67184,0.026452,17.71873,0.018955
1,305131275851687936,271,51883,45,STARBURST,0.094921,56.613700,3.481388,669.67180,6.949439,...,10.556750,4.273049,89.65728,7.260168,97.169020,6.971335,18.89609,0.031938,17.54852,0.022769
2,306322322795554816,272,51941,282,STARBURST,0.043465,60.383060,3.433703,1448.79300,11.199520,...,9.628464,5.808692,117.33540,10.122940,133.957100,10.596660,18.52655,0.020641,17.54534,0.019796
3,305124403904014336,271,51883,20,,0.155542,8.764762,3.122734,120.04840,3.581961,...,6.877559,3.193254,21.90707,4.156563,12.307080,4.209908,20.13520,0.029316,18.16506,0.026429
4,306326995719972864,272,51941,299,STARBURST,0.084116,46.364630,2.394605,359.65660,3.920645,...,5.442680,3.382548,99.06234,5.877769,112.963200,6.155694,19.54950,0.026726,18.50371,0.031891
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29995,999830217721669632,888,52339,113,STARBURST,0.081071,119.839300,3.048197,557.76840,5.296431,...,13.269840,3.382542,235.61720,6.616136,250.644700,6.883291,18.94547,0.013026,18.16731,0.031358
29996,993122372678608896,882,52370,286,STARFORMING,0.029666,77.741970,2.678211,74.84992,2.040110,...,10.969900,3.287364,37.68483,5.297025,25.738370,5.093920,19.38779,0.022290,18.06018,0.026818
29997,993114950975121408,882,52370,259,STARBURST,0.055820,211.477500,4.594350,1636.70900,13.513470,...,13.644070,4.229692,310.04100,9.232558,362.386700,9.636190,18.05287,0.016326,17.20895,0.021170
29998,993113026829772800,882,52370,252,AGN,0.127547,35.811710,2.501422,75.65469,3.082047,...,4.521542,2.435211,13.48376,2.746356,5.187642,2.577044,19.34168,0.053363,17.63388,0.160981


# Task 1

## Distribution of the data

In [3]:
Counter(galaxies['subclass'])

Counter({'STARBURST': 5858,
         nan: 5622,
         'AGN': 2044,
         'STARFORMING': 15221,
         'STARFORMING BROADLINE': 115,
         'BROADLINE': 568,
         'STARBURST BROADLINE': 222,
         'AGN BROADLINE': 350})

In [57]:
table_cols = ['specobjid', 'subclass', 'z',
       'oiii_5007_flux', 'oiii_5007_flux_err', 'h_alpha_flux', 'h_alpha_flux_err', 
       'h_beta_flux', 'h_beta_flux_err', 'nii_6584_flux', 'nii_6584_flux_err', 
       'sii_6717_flux', 'sii_6717_flux_err', 'sii_6731_flux', 'sii_6731_flux_err', 
       'oi_6300_flux', 'oi_6300_flux_err', 'h_alpha_reqw', 'h_alpha_reqw_err',
       'neiii_3869_flux', 'neiii_3869_flux_err', 'oii_3726_flux', 'oii_3726_flux_err', 
       'oii_3729_flux', 'oii_3729_flux_err', 
       'psfMag_g', 'psfMagErr_g', 'psfMag_z', 'psfMagErr_z']

galaxies[table_cols].to_latex('./final_project_report/tab_all_galaxies.tex')

In [8]:
cols = ['oiii_5007_flux', 'h_beta_flux', 'nii_6584_flux', 'sii_6717_flux', 
        'sii_6731_flux','oi_6300_flux', 'h_alpha_flux']

for col in cols:
    sns.displot(data=galaxies, x=col, log_scale=True, kde=True)

    plt.xlabel(col + r' [$10^{-17}$ erg s$^{-1}$ cm$^{-2}$]')

    # plt.savefig('./data_histograms/distr_' + col + '.pdf')

    plt.show()

oiii_5007_flux
h_beta_flux
nii_6584_flux


  fig = plt.figure(figsize=figsize)


sii_6717_flux
sii_6731_flux
oi_6300_flux
h_alpha_flux


  result = getattr(ufunc, method)(*inputs, **kwargs)


h_alpha_reqw


In [56]:
sns.displot(data=galaxies, x='h_alpha_reqw', log_scale=False, kde=True)

plt.xlabel(r'h_alpha_reqw [Ang]')

plt.xscale('symlog')

# plt.savefig('./data_histograms/distr_h_alpha_reqw.pdf')

plt.show()

In [9]:
sns.displot(data=galaxies, x='z', log_scale=False, kde=True)

plt.xlabel(col)
# plt.savefig('distr_redshift.pdf')

plt.show()

## BPT Diagram

In [10]:
galaxies.columns

Index(['specobjid', 'plate', 'mjd', 'fiberID', 'subclass', 'z',
       'oiii_5007_flux', 'oiii_5007_flux_err', 'h_alpha_flux',
       'h_alpha_flux_err', 'h_beta_flux', 'h_beta_flux_err', 'nii_6584_flux',
       'nii_6584_flux_err', 'sii_6717_flux', 'sii_6717_flux_err',
       'sii_6731_flux', 'sii_6731_flux_err', 'oi_6300_flux',
       'oi_6300_flux_err', 'h_alpha_reqw', 'h_alpha_reqw_err',
       'neiii_3869_flux', 'neiii_3869_flux_err', 'oii_3726_flux',
       'oii_3726_flux_err', 'oii_3729_flux', 'oii_3729_flux_err', 'psfMag_g',
       'psfMagErr_g', 'psfMag_z', 'psfMagErr_z'],
      dtype='object')

In [11]:
# [NII] / H alpha
galaxies['nii_h_alpha'] = galaxies.apply(lambda x: x['nii_6584_flux'] / x['h_alpha_flux'], axis=1)
galaxies['nii_h_alpha_log'] = galaxies.apply(lambda x: np.log10(x['nii_h_alpha']), axis=1)

# [OIII] / H beta
galaxies['oiii_h_beta'] = galaxies.apply(lambda x: x['oiii_5007_flux'] / x['h_beta_flux'], axis=1)
galaxies['oiii_h_beta_log'] = galaxies.apply(lambda x: np.log10(x['oiii_h_beta']), axis=1)

# [SII] / H alpha
galaxies['sii6717_h_alpha'] = galaxies.apply(lambda x: x['sii_6717_flux'] / x['h_alpha_flux'], axis=1)
galaxies['sii6717_h_alpha_log'] = galaxies.apply(lambda x: np.log10(x['sii6717_h_alpha']), axis=1)

galaxies['sii6731_h_alpha'] = galaxies.apply(lambda x: x['sii_6731_flux'] / x['h_alpha_flux'], axis=1)
galaxies['sii6731_h_alpha_log'] = galaxies.apply(lambda x: np.log10(x['sii6731_h_alpha']), axis=1)

# [OIII] / [OI]
galaxies['oi_h_alpha'] = galaxies.apply(lambda x: x['oi_6300_flux'] / x['h_alpha_flux'], axis=1)
galaxies['oi_h_alpha_log'] = galaxies.apply(lambda x: np.log10(x['oi_h_alpha']), axis=1)

### BPT Diagram with SDSS Spectral Subclasses

In [15]:
plt.figure(figsize=(10, 8))

sns.scatterplot(data=galaxies, x='nii_h_alpha', y='oiii_h_beta', palette='colorblind', 
                hue='subclass', style='subclass', size='subclass', sizes=(80, 20))

plt.xlabel(r'[NII] / H$\alpha$', fontsize=13)
plt.ylabel(r'[OIII] / H$\beta$', fontsize=13)

plt.xscale('log')
plt.yscale('log')

plt.legend(loc='lower left', fontsize=10)

# plt.savefig('./BPT_Diagrams/BPT_diagram_sdss_subclasses.pdf')

plt.show()

### [NII]/H $\alpha$ vs. [OIII]/H $\beta$

#### Kauffman et al. 2003

In [18]:
def BPT_class_Kauff(oiii_h_beta, nii_h_alpha):
    # second condition is to avoid double lines
    if (np.log10(oiii_h_beta) < 0.61 / (np.log10(nii_h_alpha) - 0.05) + 1.3) and (np.log10(nii_h_alpha) < 0.0):
        return 'Star Forming'
    else:
        return 'AGN'

#### Kewley et al. 2001

In [16]:
def BPT_class_Kew(oiii_h_beta, nii_h_alpha):
    if np.log10(oiii_h_beta) < 0.61 / (np.log10(nii_h_alpha) - 0.47) + 1.19 and np.log10(nii_h_alpha) < 0.4:
        return 'Star Forming'
    else:
        return 'AGN'


In [19]:
galaxies['BPT Kauffman'] = galaxies.apply(lambda x: BPT_class_Kauff(x['oiii_h_beta'], x['nii_h_alpha']), axis=1)
galaxies['BPT Kewley'] = galaxies.apply(lambda x: BPT_class_Kew(x['oiii_h_beta'], x['nii_h_alpha']), axis=1)

In [20]:
def BPT_final_class(row):
    if row['BPT Kauffman'] != row['BPT Kewley']:
        return 'Composite'
    else:  # they are equal
        return row['BPT Kewley']

In [21]:
galaxies['BPT final'] = galaxies.apply(lambda x: BPT_final_class(x), axis=1)
Counter(galaxies['BPT final'])

Counter({'Star Forming': 18120, 'Composite': 6023, 'AGN': 5857})

In [22]:
def plot_BPT(data, x, y, hue, xlabel, ylabel, Kauffman=True, Kewley=True, ylim=(-2.0, 2.0), save=False, save_name=None):
    
    plt.figure(figsize=(12, 8))

    plt.ylim(ylim)

    sns.scatterplot(data=data, x=x, y=y, palette='colorblind', 
                    hue=hue, style='BPT final', size=hue, sizes=(30, 15))

    # ------------------------------------------------------------------------------------------------
    # Kauffman
    if Kauffman:
        x_to_plot = np.linspace(galaxies[x].min(), 0)
        plt.plot(x_to_plot, 0.61 / (x_to_plot - 0.05) + 1.3, label='Kauffman et al. 2003', 
                linewidth=1.5, alpha=0.8, c='k', linestyle='dashdot')

    # ------------------------------------------------------------------------------------------------
    # Kewley
    if Kewley:
        x_to_plot_1 = np.linspace(galaxies[x].min(), 0.4)
        plt.plot(x_to_plot_1, 0.61 / (x_to_plot_1 - 0.47) + 1.19, label='Kewley et al. 2001', 
                linewidth=1.5, alpha=0.8, c='k', linestyle='dashed')

    # ------------------------------------------------------------------------------------------------

    plt.xlabel(xlabel, fontsize=13)
    plt.ylabel(ylabel, fontsize=13)

    plt.legend(loc='lower left', fontsize=12)

    if save:
        plt.savefig('./BPT_Diagrams/' + save_name + '.pdf')

    plt.show()

In [23]:
plot_BPT(galaxies, 'nii_h_alpha_log', 'oiii_h_beta_log', 'BPT final', 
         r'log [NII] / H$\alpha$', r'log [OIII] / H$\beta$', 
         Kauffman=True, Kewley=True)#, save=True, save_name='BPT_niiHa_oiiiHb')

### [SII]/H $\alpha$ vs. [OIII]/H $\beta$

In [24]:
plot_BPT(galaxies, 'sii6717_h_alpha_log', 'oiii_h_beta_log', 'BPT final', 
         r'log [SII] 6717 / H$\alpha$', r'log [OIII] / H$\beta$', 
         Kauffman=True, Kewley=True)#, save=True, save_name='BPT_sii6717Ha_oiiiHb')

In [26]:
plot_BPT(galaxies, 'sii6731_h_alpha_log', 'oiii_h_beta_log', 'BPT final', 
         r'log [SII] 6731 / H$\alpha$', r'log [OIII] / H$\beta$', 
         Kauffman=True, Kewley=True)#, save=True, save_name='BPT_sii6731Ha_oiiiHb')

In [27]:
galaxies['sii_h_alpha'] = galaxies.apply(lambda x: (x['sii6717_h_alpha'] + x['sii6731_h_alpha']) / 2, axis=1)
galaxies['sii_h_alpha_log'] = galaxies.apply(lambda x: np.log10(x['sii_h_alpha']), axis=1)

In [28]:
plot_BPT(galaxies, 'sii_h_alpha_log', 'oiii_h_beta_log', 'BPT final', 
         r'log [SII] 6717, 6731 / H$\alpha$', r'log [OIII] / H$\beta$', 
         Kauffman=True, Kewley=True)#, save=True, save_name='BPT_sii67176731Ha_oiiiHb')

### [OIII]/H $\beta$ vs [OI]/H $\alpha$

In [29]:
plot_BPT(galaxies, 'oi_h_alpha_log', 'oiii_h_beta_log', 'BPT final', 
         r'log [OI] / H$\alpha$', r'log [OIII] / H$\beta$', 
         Kauffman=True, Kewley=True)#, save=True, save_name='BPT_oiHa_oiiiHb')

## WHAN Diagram

In [35]:
galaxies['h_alpha_reqw_log'] = galaxies.apply(lambda x: np.log10(x['h_alpha_reqw']), axis=1)
galaxies['h_alpha_reqw_abs'] = galaxies.apply(lambda x: np.abs(x['h_alpha_reqw']), axis=1)

  """Entry point for launching an IPython kernel.


In [36]:
galaxies[['h_alpha_reqw', 'h_alpha_reqw_abs', 'h_alpha_reqw_log']].describe()

Unnamed: 0,h_alpha_reqw,h_alpha_reqw_abs,h_alpha_reqw_log
count,30000.0,30000.0,1331.0
mean,-25.592419,25.632868,-0.500455
std,33.598975,33.568126,0.461082
min,-950.6796,0.000136,-3.694147
25%,-33.065627,6.971645,-0.69358
50%,-17.791285,17.791285,-0.398905
75%,-6.971645,33.065627,-0.185971
max,1.942299,950.6796,0.288316


In [48]:
plt.figure(figsize=(12, 8))

sns.scatterplot(data=galaxies, x='nii_h_alpha', y='h_alpha_reqw_abs', palette='colorblind', 
                hue='BPT final', style='BPT final', size='BPT final', sizes=(30, 15))

plt.ylim(10**(-4.2), 10**(3.2))
plt.xlim(10**(-3), 10)

plt.vlines(10**(-0.4), 10**(-4.2), 10**(3.2), linewidth=1.2, alpha=0.8, colors='k', linestyle='solid', 
           label=r'[NII] / H$\alpha = 0.4$')

plt.hlines(6, 10**(-0.4), 10, linewidth=1.5, alpha=0.8, colors='k', linestyle='dashdot', 
           label=r'$W_{H_{\alpha}} = 6$ [Ang], Kewley+2006')
plt.hlines(3, 10**(-0.4), 10, linewidth=1.5, alpha=0.8, colors='k', linestyle='dashed', 
           label=r'$W_{H_{\alpha}} = 3$ [Ang]')

plt.xlabel(r'log [NII] / H$\alpha$', fontsize=13)
plt.ylabel(r'log $W_{H_{\alpha}}$ [Ang]', fontsize=13)

plt.xscale('log')
plt.yscale('log')

plt.legend(loc='lower left', fontsize=10)

# plt.savefig('WHAN_diagram.pdf')

plt.show()

# Task 2

## Defining Sub-Sample

In [61]:
galaxies.shape

(30000, 49)

In [67]:
# Signal to noise ratio of neiii_3869_flux, oii_3729_flux and oii_3726_flux >= 5

sub_gals = galaxies.loc[(galaxies['oii_3729_flux'] / galaxies['oii_3729_flux_err']) >= 5]
sub_gals = sub_gals.loc[(sub_gals['oii_3726_flux'] / sub_gals['oii_3726_flux_err']) >= 5]
sub_gals = sub_gals.loc[(sub_gals['neiii_3869_flux'] / sub_gals['neiii_3869_flux_err']) >= 5]
sub_gals

Unnamed: 0,specobjid,plate,mjd,fiberID,subclass,z,oiii_5007_flux,oiii_5007_flux_err,h_alpha_flux,h_alpha_flux_err,...,sii6731_h_alpha_log,oi_h_alpha,oi_h_alpha_log,BPT Kauffman,BPT Kewley,BPT final,sii_h_alpha,sii_h_alpha_log,h_alpha_reqw_log,h_alpha_reqw_abs
6,305120005857503232,271,51883,4,AGN,0.126605,327.43820,5.687428,705.0918,7.408197,...,-0.898831,0.036151,-1.441881,AGN,AGN,AGN,0.138145,-0.859665,,30.68871
17,307429256395253760,273,51957,213,STARBURST,0.029190,1060.56300,12.604480,1001.4290,10.360010,...,-1.064346,0.024137,-1.617313,Star Forming,Star Forming,Star Forming,0.103402,-0.985470,,72.30124
24,308576046284826624,274,51913,289,STARBURST,0.114152,245.67390,8.575709,677.8744,5.318053,...,-0.915800,0.035004,-1.455888,Star Forming,Star Forming,Star Forming,0.146932,-0.832882,,91.37591
39,309656041530877952,275,51910,122,STARBURST,0.067449,560.65180,5.536908,539.7371,4.685007,...,-1.030080,0.029464,-1.530704,Star Forming,Star Forming,Star Forming,0.106524,-0.972551,,62.84650
49,310804481409312768,276,51909,204,STARFORMING,0.048573,78.79752,2.291931,225.5869,2.767411,...,-0.877706,0.031267,-1.504919,Star Forming,Star Forming,Star Forming,0.160592,-0.794277,,37.98656
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29932,1352222875410524160,1201,52674,62,STARFORMING,0.238385,66.92245,2.532567,171.6186,3.587941,...,-0.924400,0.032596,-1.486835,AGN,Star Forming,Composite,0.125833,-0.900205,,20.35410
29939,1348887781681686528,1198,52669,217,STARBURST,0.125609,272.57330,4.926528,1204.5940,11.103860,...,-0.934943,0.028847,-1.539901,Star Forming,Star Forming,Star Forming,0.136547,-0.864718,,84.84679
29944,1349991966804305920,1199,52703,138,AGN BROADLINE,0.088367,1104.59600,9.757991,1085.1270,10.672690,...,-0.457517,0.142714,-0.845533,AGN,AGN,AGN,0.333590,-0.476787,,63.14660
29957,1581967481851897856,1405,52826,284,STARBURST,0.049196,1087.17700,12.000620,2786.4480,22.237410,...,-1.015682,0.020934,-1.679155,Star Forming,Star Forming,Star Forming,0.109314,-0.961322,,111.11170


## Redshift k-Correction

In [69]:
sub_gals['g_z_color'] = sub_gals['psfMag_g'] - sub_gals['psfMag_z']

In [70]:
sub_gals['g_z_color'].describe()

count    3445.000000
mean        0.944964
std         0.578212
min        -2.364340
25%         0.491480
50%         0.861770
75%         1.445640
max         7.332670
Name: g_z_color, dtype: float64

In [68]:
import calc_kcor

In [None]:
calc_kcor('g', 0.2, 'g - r', 1.1)

In [None]:
sub_gals[''] = sub_gals.apply(lambda x: calc_kcor('g', x['psfMag_g'], 'g - z', x['g_z_color']), axis=1)