In [None]:
import numpy as np
import pandas as pd
import SAGA

from SAGA import ObjectCuts as C

In [None]:
# from SAGA-halo-ellipticities (filtered + cartesian coords)
%store -r saga_sats 
%store -r VSMDPL_subs
%store -r ellipticity_df_VSMDPL
%store -r ellipticity_df_SAGA

In [None]:
ellipticity_df_VSMDPL.rename(columns = {'upid':'HOSTID'}, inplace = True)
VSMDPL_subs.rename(columns = {'upid':'HOSTID'}, inplace = True)


In [None]:
def get_expected_ellipticities(data_set):

    expected_ellipticity_for_radial_dist = [] # an array of arrays to be made into data frame

    for upid, radii in zip(data_set['HOSTID'], data_set['radii']):
        # Generate random, normalized satellite positions
        num_sats = len(radii)
        radial_stdev = np.std(radii)


        coords = np.random.randn(10000, num_sats, 2)  # (number of times to repeat data set generation, how many satellites, how many spatial dimensions)
        calc_radii = np.sqrt((coords**2).sum(axis=2))
        normalized_coords = coords/calc_radii[:,:,np.newaxis]
        
        # Move the positions of satellites out or in radially until they reach original radial scale
        reshaped_radii = np.reshape(radii, (1,len(radii), 1))
        scaled_coords = normalized_coords*reshaped_radii
        # print('Scaled coordinates:')
        # print(scaled_coords)
        
        
        # Calculate ellipticity
        
        # # First get quadrupole moments
        scaled_coords_squared = np.square(scaled_coords)
        # print('Scaled coordinates, squared:')
        # print(scaled_coords_squared)
        
        xdiff_ydiff_col = np.prod(scaled_coords, axis=2)
        xdiff_ydiff_col = xdiff_ydiff_col[:,:,np.newaxis]
        # print('create x_diff*y_diff column')
        # print(xdiff_ydiff_col)
        
        quadrupole_moments_setup = np.insert(scaled_coords_squared, 0, [[[xdiff_ydiff_col]]], axis=2) # diffx*diffy now at position 0
        # print('quadrupole moment setup:')
        # print(quadrupole_moments_setup)
        quadrupole_moments = quadrupole_moments_setup.mean(axis=1)
        # print('quadrupole_moments:')
        # print(quadrupole_moments) # note again that quadrupole_moments elements go [Qxy, Qxx, Qyy]


        # # Get ellipticity from quadrupole moments
        # Q_xy, Q_xx, Q_yy = np.quadrupole_moments
        quadrupole_xx_yy = quadrupole_moments[:,1:3]
        quadrupole_xy = quadrupole_moments[:,0:1]
        
        e_1_numerator = -np.diff(quadrupole_xx_yy, axis=1)
        e_2_numerator = 2*quadrupole_xy
        
        e_denominator_1 = np.sum(quadrupole_xx_yy, axis=1)[:,np.newaxis]
        e_denominator_inside_sqrt = np.prod(quadrupole_xx_yy, axis=1)[:,np.newaxis] - quadrupole_xy**2
        e_denominator = e_denominator_1 + 2*np.sqrt(e_denominator_inside_sqrt)
        
        e_1 = e_1_numerator / e_denominator
        e_2 = e_2_numerator / e_denominator
        
        
        ellipticities = np.sqrt(e_1**2 + e_2**2)[:,0]
        ellipticity_avg = np.mean(ellipticities)
        ellipticity_std = np.std(ellipticities)

        # print('average for host id {}'.format(upid))
        # print(ellipticity_avg)
        # print('standard deviation')
        # print(ellipticity_std)
        # break

        expected_ellipticity_for_radial_dist.append([upid, ellipticity_avg, ellipticity_std, num_sats, radial_stdev])
    return expected_ellipticity_for_radial_dist

In [None]:
def get_prominence(actual, randomized):
    prominence_table = pd.merge(actual, randomized, left_on='HOSTID', right_on='HOSTID')
    prominence_table = prominence_table[['HOSTID', 'e_3', 'ellipticity_exp', 'ellipticty_stdev', 'num_sats', 'radial_stdev']]
    prominence_table['z_score'] = ( prominence_table['e_3'] - prominence_table['ellipticity_exp'] ) / prominence_table['ellipticty_stdev']
    prominence_table['outlier'] = abs(prominence_table['z_score']) >= 2
    prominence_table.sort_values(by=['z_score'])
    return prominence_table

<h2>SAGA</h2>

In [None]:
saga_sats['R'] = np.sqrt((saga_sats['X']-saga_sats['HOST_X'])**2 + (saga_sats['Y']-saga_sats['HOST_Y'])**2)
display(saga_sats)

In [None]:
SAGA_prominence = pd.DataFrame(saga_sats.groupby('HOSTID')['R'].apply(list).rename('radii'))
SAGA_prominence.reset_index(inplace=True)
SAGA_prominence


In [None]:
expected_ellipticity_for_radial_dist = get_expected_ellipticities(SAGA_prominence)
expected_ellipticities_SAGA = pd.DataFrame(expected_ellipticity_for_radial_dist, columns=['HOSTID', 'ellipticity_exp', 'ellipticty_stdev', 'num_sats', 'radial_stdev'])

In [None]:
prominence_table_SAGA = get_prominence(ellipticity_df_SAGA, expected_ellipticities_SAGA)
prominence_table_SAGA.sort_values(by=['z_score'])


<h2>SIM</h2>

<h3>2D</h3>

In [None]:
display(VSMDPL_subs)

In [None]:
# add radius to each line
VSMDPL_subs['3D_radius'] = np.sqrt((VSMDPL_subs['x_adj']-VSMDPL_subs['x_host'])**2+(VSMDPL_subs['y_adj']-VSMDPL_subs['y_host'])**2+(VSMDPL_subs['z_adj']-VSMDPL_subs['z_host'])**2)
VSMDPL_subs['xy_radius'] = np.sqrt((VSMDPL_subs['x_adj']-VSMDPL_subs['x_host'])**2+(VSMDPL_subs['y_adj']-VSMDPL_subs['y_host'])**2)

# make a new data frame that has first column upid, second column list of radii
VSMDPL_prominence = pd.DataFrame(VSMDPL_subs.groupby('HOSTID')['xy_radius'].apply(list).rename('radii'))
VSMDPL_prominence.reset_index(inplace=True)
VSMDPL_prominence

In [None]:
expected_ellipticity_for_radial_dist = get_expected_ellipticities(VSMDPL_prominence)
expected_ellipticities_VSMDPL = pd.DataFrame(expected_ellipticity_for_radial_dist, columns=['HOSTID', 'ellipticity_exp', 'ellipticty_stdev', 'num_sats', 'radial_stdev'])

In [None]:
prominence_table_VSMDPL = get_prominence(ellipticity_df_VSMDPL, expected_ellipticities_VSMDPL)
prominence_table_VSMDPL.sort_values(by=['z_score'])

In [None]:
%store prominence_table_VSMDPL
%store prominence_table_SAGA