In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
import os
from easyquery import Query
from fast3tree import fast3tree
from dotenv import load_dotenv

load_dotenv()

In [None]:
VSMDPL_DATA = os.getenv('VSMDPL_DATA') # path to parquet

In [None]:
hosts_and_subs = pd.read_parquet(VSMDPL_DATA)

In [None]:
hosts, subs = Query("upid == -1").split(hosts_and_subs)

subs = hosts_and_subs.copy()

In [None]:
box_len = 160

## Grab Interlopers

In [None]:
# threshold redshift
dz_thres = 2.75 #coordinate z + vz / 100 , this

In [None]:
hosts["z_vz"] = np.remainder(hosts["z"] + hosts["vz"] / 100.0, box_len)
subs["z_vz"] = np.remainder(subs["z"] + subs["vz"] / 100.0, box_len)

In [None]:
interloper_groups = pd.DataFrame()
interloper_groups

with fast3tree(np.vstack([subs["x"].values, subs["y"].values]).T) as tree:
    tree.set_boundaries(0, box_len)
    for host in hosts.itertuples(): 
        center = (host.x, host.y)
        idx = tree.query_radius(center, host.rvir * 0.001, periodic=True)
        dz = np.remainder(subs["z_vz"].values[idx] - host.z_vz, box_len)
        idx = idx[(dz < dz_thres) | (dz > box_len - dz_thres)]
        sys_subs = subs.iloc[idx].copy()
        sys_subs["interloper_host_id"] = host.id
        sys_subs["interloper_host_x"] = host.x
        sys_subs["interloper_host_y"] = host.y
        sys_subs["interloper_host_vmax_mpeak"] = host.vmax_mpeak
        interloper_groups = interloper_groups.append(sys_subs)

In [None]:
interloper_groups['x_adj'] = interloper_groups['x']
interloper_groups['x_adj'].mask(interloper_groups['x'] - interloper_groups['interloper_host_x'] > box_len/2, interloper_groups['x'] - 160, inplace=True)
interloper_groups['x_adj'].mask(interloper_groups['x'] - interloper_groups['interloper_host_x'] < -box_len/2, interloper_groups['x'] + 160, inplace=True)

interloper_groups['y_adj'] = interloper_groups['y']
interloper_groups['y_adj'].mask(interloper_groups['y'] - interloper_groups['interloper_host_y'] > box_len/2, interloper_groups['y'] - 160, inplace=True)
interloper_groups['y_adj'].mask(interloper_groups['y'] - interloper_groups['interloper_host_y'] < -box_len/2, interloper_groups['y'] + 160, inplace=True)

In [None]:
VSMDPL_subs_interlopers = interloper_groups.copy()
%store VSMDPL_subs_interlopers

In [None]:
groups_cut = interloper_groups[interloper_groups['vmax_mpeak'] >= 46.77].copy()

groups_cut = groups_cut.groupby(['interloper_host_id']).filter(lambda x: len(x) > 2)

In [None]:
average_num_subs = len(groups_cut)/len(groups_cut['interloper_host_id'].unique())
print("Average # subhalos per system: {}".format(average_num_subs))

<h2>Ellipticity calculations</h2>

<h4>Quadrupole moments</h4>

In [None]:
ellipticity_df = groups_cut.copy()
ellipticity_df['x_diff_sq'] = (ellipticity_df['x'] - ellipticity_df['interloper_host_x'])**2
ellipticity_df['y_diff_sq'] = (ellipticity_df['y'] - ellipticity_df['interloper_host_y'])**2
ellipticity_df['x_diff_y_diff'] = (ellipticity_df['y'] - ellipticity_df['interloper_host_y'])*(ellipticity_df['x'] - ellipticity_df['interloper_host_x'])
ellipticity_df = ellipticity_df.groupby('interloper_host_id', as_index = False)[['x_diff_sq', 'y_diff_sq', 'x_diff_y_diff']].mean().rename(columns={'x_diff_sq':'Q_xx', 'y_diff_sq':'Q_yy', 'x_diff_y_diff':'Q_xy'}).copy()
ellipticity_df


<h4>Ellipticity components</h4>

In [None]:
def e_1 (Q_xx, Q_yy, Q_xy):
    return (( Q_xx - Q_yy )/(Q_xx + Q_yy + 2*(Q_xx*Q_yy-Q_xy**2)**(1/2)))
def e_2 (Q_xx, Q_yy, Q_xy):
    return (2*Q_xy/(Q_xx + Q_yy + 2*(Q_xx*Q_yy-Q_xy**2)**(1/2)))
def e_3 (e_1, e_2):
    return ( e_1**2 + e_2**2 )**(1/2)


ellipticity_df['e_1'] = e_1(ellipticity_df['Q_xx'], ellipticity_df['Q_yy'], ellipticity_df['Q_xy'])
ellipticity_df['e_2'] = e_2(ellipticity_df['Q_xx'], ellipticity_df['Q_yy'], ellipticity_df['Q_xy'])
ellipticity_df['e_3'] = e_3(ellipticity_df['e_1'], ellipticity_df['e_2'])

In [None]:
custom_style = {
    "figure.facecolor": "212946",
    "axes.facecolor": "212946",
    "savefig.facecolor": "212946", 
    "grid.color": "2A3459",
    "text.color": "0.9",
    "axes.labelcolor": "0.9",
    "xtick.color": "0.9",
    "ytick.color": "0.9",
    "grid.linestyle": "-",
    "lines.solid_capstyle": "round"
}

sns.set_style("darkgrid", custom_style)

In [None]:
ax = sns.histplot(ellipticity_df["e_3"], bins=7, element='poly', stat="density", fill=False)
ax.set(xlabel="$\longleftarrow$ more elliptical         (b/a)           more spherical $\longrightarrow$")

<h4>Store ellipticity DataFrame to use in other nbs</h4>

In [None]:
ellipticity_df_VSMDPL_interlopers = ellipticity_df.copy()
%store ellipticity_df_VSMDPL_interlopers

<h2>Highest luminosity analysis</h2>

In [None]:
# add num satellites
groups_cut['num_satellites'] = groups_cut.groupby('interloper_host_id')['interloper_host_id'].transform('count')
highest_luminosity_subs = groups_cut.groupby('interloper_host_id', as_index = False)[['Mr', 'num_satellites']].min()

In [None]:
highest_luminosity_ellipticity = pd.merge(highest_luminosity_subs, ellipticity_df, how="left", left_on="interloper_host_id", right_on="interloper_host_id")[['interloper_host_id', 'Mr', 'e_3']]

In [None]:
highest_luminosity_ellipticity_VSMDPL_interlopers = highest_luminosity_ellipticity[~highest_luminosity_ellipticity['e_3'].isna()]
%store highest_luminosity_ellipticity_VSMDPL_interlopers
highest_luminosity_ellipticity_VSMDPL_interlopers

<h2>Concat highest luminosity w/ log(vmax_mpeak)</h2>

In [None]:
highest_vmax_subs = groups_cut.groupby('interloper_host_id', as_index = False)['vmax_mpeak'].max()

In [None]:
highest_vmax_ellipticity = pd.merge(highest_vmax_subs, ellipticity_df, how="left", left_on="interloper_host_id", right_on="interloper_host_id")[['interloper_host_id', 'vmax_mpeak', 'e_3']]
highest_vmax_ellipticity

In [None]:
highest_vmax_ellipticity['log_vmax_mpeak'] = np.log(highest_vmax_ellipticity['vmax_mpeak'])

In [None]:
highest_vmax_ellipticity_VSMDPL_interlopers = highest_vmax_ellipticity[~highest_vmax_ellipticity['e_3'].isna()]
%store highest_vmax_ellipticity_VSMDPL_interlopers
highest_vmax_ellipticity_VSMDPL_interlopers

In [None]:
import matplotlib.pyplot as plt

In [None]:
fig, ax1 = plt.subplots()
ax1.set_xlabel('"$\longleftarrow$ more elliptical         ellipticity           more spherical $\longrightarrow$"')
ax1.set_ylabel('log(vmax_mpeak)')
sp1 = sns.scatterplot(data=highest_vmax_ellipticity_VSMDPL_interlopers, x='e_3', y='log_vmax_mpeak', ax=ax1, s=2)

In [None]:
fig = sp1.figure.get_figure()
fig.set_size_inches([6, 4])
fig.savefig("figures/SimVmax_mpeakVsEllipticity.png",dpi=600)

<h2>Average # satellites per vmax_mpeak</h2>

In [None]:
hosts_num_vmax = groups_cut.groupby('interloper_host_id').first()[['interloper_host_vmax_mpeak', 'num_satellites']]
hosts_num_vmax

Bin hosts by vmax_mpeak

In [None]:
hosts_num_vmax_averages = hosts_num_vmax.groupby(pd.cut(hosts_num_vmax['interloper_host_vmax_mpeak'], np.linspace(135, 340, 20))).mean()
hosts_num_vmax_averages

In [None]:
fig2, ax2 = plt.subplots()
ax2.set_xlabel('vmax_mpeak')
ax2.set_ylabel('average # satellites')
sp2 = plt.plot(hosts_num_vmax_averages['interloper_host_vmax_mpeak'], hosts_num_vmax_averages['num_satellites'])

In [None]:
fig3, ax3 = plt.subplots()
ax3.set_xlabel('vmax_mpeak')
ax3.set_ylabel('# satellites')
sp3 = plt.scatter(hosts_num_vmax['interloper_host_vmax_mpeak'], hosts_num_vmax['num_satellites'], s=2)

In [None]:
import scipy as sp
spearmanr_results = sp.stats.spearmanr(hosts_num_vmax['interloper_host_vmax_mpeak'], hosts_num_vmax['num_satellites'], axis=0, nan_policy='propagate', alternative='two-sided')
spearmanr_results

In [None]:
fig3 = sp3.figure.get_figure()
fig3.set_size_inches([6, 4])
fig3.savefig("figures/NumsubsVsVmax_mpeak_scatter.png",dpi=600)