In [32]:
# region Imports

#* --------------------------------------------------------------------------------
#* General purpose imports
#* --------------------------------------------------------------------------------
import pandas as pd
import numpy as np

from scipy.stats import fisher_exact, barnard_exact
import pickle as pkl


#* --------------------------------------------------------------------------------
#* Personal librairies imports
#* --------------------------------------------------------------------------------
import sys, os
src_path = os.path.abspath(os.path.join("..", "src"))
if src_path not in sys.path:
    sys.path.insert(0, src_path)
from utils import astro_utils as au
from utils import maths_utils  as mu
from utils import stats_utils  as su
from utils import graphics_utils  as gu
from utils import labels_utils  as lu
from utils import pandas_utils  as pu


#* --------------------------------------------------------------------------------
#* Project modules imports
#* --------------------------------------------------------------------------------
import sSFR
import generate_report as report


#* --------------------------------------------------------------------------------
#* Global variables
#* --------------------------------------------------------------------------------
import config as co

#* --------------------------------------------------------------------------------
#* Project data
#* --------------------------------------------------------------------------------

with open(co.DATA_PATH + co.PROCESS_SAMPLES, "rb") as file:
            sample = pkl.load(file)



# endregion

In [4]:
sample['CG4_Gals'].keys()

Index(['objid', 'specobjid', 'Group', 'RA', 'Dec', 'M_r', 'Lum', 'z',
       'dist2BGG', 'lgm', 'sfr', 'sSFR', 'rank_dist', 'rank_M', 'RA_BGG',
       'Dec_BGG', 'M_BGG', 'sSFR_status', 'p_E', 'p_S', 'morphology',
       'sSFR_raw'],
      dtype='object')

In [42]:
for cat in co.SAMPLE.keys():
    df = sample[cat+co.GASUFF] if 'df' not in locals() else pd.concat([df, sample[cat+co.GASUFF]], ignore_index=True)
    BGGs = df[df['rank_M'] == 1]
    print(cat)
    for status in co.sSFR_status:
        n_BGGs = len(BGGs[BGGs['sSFR_status'] == status])
        total = len(BGGs)
        print(f".  {status}: {n_BGGs} / {total} = {n_BGGs/total:.3f}")

CG4
.  Quenched: 3 / 186 = 0.016
.  Passive: 33 / 186 = 0.177
.  Starforming: 150 / 186 = 0.806
Control4B
.  Quenched: 13 / 885 = 0.015
.  Passive: 140 / 885 = 0.158
.  Starforming: 732 / 885 = 0.827
Control4C
.  Quenched: 23 / 1637 = 0.014
.  Passive: 258 / 1637 = 0.158
.  Starforming: 1356 / 1637 = 0.828
RG4
.  Quenched: 23 / 1693 = 0.014
.  Passive: 273 / 1693 = 0.161
.  Starforming: 1397 / 1693 = 0.825


sSFR_status
Starforming    50
Passive        11
Quenched        1
Name: count, dtype: int64

In [33]:
df['Group'].value_counts()

Group
333    24
165    13
341    12
154    11
188    10
       ..
276     4
131     4
142     4
147     4
175     4
Name: count, Length: 62, dtype: int64

In [13]:
def morph_agg(x):    
    frac_E = len(x['morphology']==co.Morphologies[0]) / len(x)
    frac_S = len(x['morphology']==co.Morphologies[1]) / len(x)
    y = x.loc[x['morphology']!=co.Morphologies[2]]
    frac_E_noU = len(y['morphology']==co.Morphologies[0]) / len(y) if len(y) > 0 else np.nan
    frac_S_noU = len(y['morphology']==co.Morphologies[1]) / len(y) if len(y) > 0 else np.nan

    return pd.Series({
        'frac_E': frac_E,
        'frac_S': frac_S,
        'frac_E_noU': frac_E_noU,
        'frac_S_noU': frac_S_noU
    }, index=['Fraction_Elliptical', 'Fraction_Spiral', 'Fraction_Elliptical_noU', 'Fraction_Spiral_noU'])      
     

df.groupby('Group').apply(morph_agg).reset_index     

  df.groupby('Group').apply(morph_agg).reset_index


<bound method DataFrame.reset_index of        Fraction_Elliptical  Fraction_Spiral  Fraction_Elliptical_noU  \
Group                                                                  
25                     NaN              NaN                      NaN   
27                     NaN              NaN                      NaN   
28                     NaN              NaN                      NaN   
33                     NaN              NaN                      NaN   
42                     NaN              NaN                      NaN   
...                    ...              ...                      ...   
374                    NaN              NaN                      NaN   
386                    NaN              NaN                      NaN   
393                    NaN              NaN                      NaN   
394                    NaN              NaN                      NaN   
405                    NaN              NaN                      NaN   

       Fraction_Spiral_n

In [36]:
df

Unnamed: 0,objid,specobjid,Group,RA,Dec,M_r,Lum,z,dist2BGG,lgm,...,rank_dist,rank_M,RA_BGG,Dec_BGG,M_BGG,sSFR_status,p_E,p_S,morphology,sSFR_raw
0,1237655108898979844,858040220065490944,25,128.717114,44.637127,-21.823291,3.993157e+10,0.038103,0.000000,11.136960,...,1,1,128.717114,44.637127,-21.823291,Passive,,,Uncertain,-10.754226
1,1237655108898914500,858035272263165952,25,128.655752,44.619226,-20.729999,1.458813e+10,0.037865,2.965431,10.407860,...,3,2,128.717114,44.637127,-21.823291,Starforming,,,Uncertain,-11.769536
2,1237655108898914492,858039945187584000,25,128.674476,44.593518,-19.424341,4.382646e+09,0.038568,3.338345,9.313793,...,4,3,128.717114,44.637127,-21.823291,Passive,0.158,0.757,Spiral,-9.516857
3,1237655108898980064,858036371774793728,25,128.751166,44.671599,-19.032422,3.054702e+09,0.037564,2.647204,9.284696,...,2,4,128.717114,44.637127,-21.823291,Passive,0.007,0.993,Spiral,-9.835082
4,1237651533872496853,500074962334279680,27,129.932110,53.013805,-21.884657,4.225351e+10,0.044701,0.000000,10.942540,...,1,1,129.932110,53.013805,-21.884657,Starforming,,,Uncertain,-12.309706
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
243,1237655375185117425,921148341943822336,394,251.209915,38.960246,-19.929320,6.977952e+09,0.038523,4.161014,9.950853,...,4,4,251.141803,38.920509,-22.536017,Passive,0.467,0.400,Uncertain,-10.728016
244,1237662302453039177,1899407216310511616,405,256.367088,23.153194,-21.876334,4.193084e+10,0.031035,0.000000,11.157200,...,1,1,256.367088,23.153194,-21.876334,Passive,,,Uncertain,-11.237338
245,1237662302452974056,1901734881034004480,405,256.249607,23.169049,-21.020003,1.905466e+10,0.030044,6.859332,10.185560,...,4,2,256.367088,23.153194,-21.876334,Passive,0.036,0.964,Spiral,-9.766182
246,1237662302452974098,1899406666554697728,405,256.256739,23.157344,-20.585584,1.277126e+10,0.030752,6.380235,9.701640,...,3,3,256.367088,23.153194,-21.876334,Passive,0.000,0.947,Spiral,-9.677601


In [18]:
orig_df = pd.read_csv(co.DATA_PATH + "CG4_Gals.csv")

In [29]:
orig_df['Group'].value_counts().value_counts()

count
4    78
Name: count, dtype: int64

In [31]:
sample['CG4_Gals']['Group'].value_counts().value_counts()

count
4     27
10     8
8      7
7      6
6      6
5      4
24     1
13     1
12     1
11     1
Name: count, dtype: int64

In [25]:
sample['CG4_Gals']['Group'].drop_duplicates()

0       25
6       27
10      28
14      33
19      42
      ... 
380    374
384    386
388    393
392    394
396    405
Name: Group, Length: 62, dtype: int64