In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
import os
from easyquery import Query
from dotenv import load_dotenv

load_dotenv()

In [None]:
# Change the path to your sim data in your .env file 
VSMDPL_DATA = os.getenv('VSMDPL_DATA')

In [None]:
hosts_and_subs = pd.read_parquet(VSMDPL_DATA)
hosts_and_subs['num_satellites'] = hosts_and_subs.groupby('upid')['upid'].transform('count')

<h2>Add corrected positions</h2>

- if x - x_host > box_length/2, shift by -box_length

- if x - x_host < -box_length/2 shift by +box_length

<h4>Add host x, y, z</h4>

In [None]:
hosts, subs = Query("upid == -1").split(hosts_and_subs)
subs = pd.merge(subs, hosts, how="left", left_on="upid", right_on="id", suffixes=("", "_host")).sort_values(by='upid')
len(subs.upid.unique())

<h4>Add adjusted positions x_adj, y_adj, z_adj</h4>

In [None]:
box_len = 160
subs['x_adj'] = subs['x']
subs.loc[subs['x'] - subs['x_host'] > box_len/2, 'x_adj'] = subs['x'] - 160
subs.loc[subs['x'] - subs['x_host'] < -box_len/2, 'x_adj'] = subs['x'] + 160

subs['y_adj'] = subs['y']
subs.loc[subs['y'] - subs['y_host'] > box_len/2, 'y_adj'] = subs['y'] - 160
subs.loc[subs['y'] - subs['y_host'] < -box_len/2, 'y_adj'] = subs['y'] + 160


subs['z_adj'] = subs['z']
subs.loc[subs['z'] - subs['z_host'] > box_len/2, 'z_adj'] = subs['z'] - 160
subs.loc[subs['z'] - subs['z_host'] < -box_len/2, 'z_adj'] = subs['z'] + 160

In [None]:
VSMDPL_subs_raw = subs.copy()
%store VSMDPL_subs_raw
VSMDPL_subs_raw

<h2>Make cuts to get SAGA-like data</h2>
<h4>Current cuts</h4>

- vmax_mpeak >= 45 (following vmax_mpeak vs # satellites & using saga # satellites average)

- 3 or more satellites

<h4>Unused cuts</h4>

- mvir >= 1e10

In [None]:
# Filter by most massive
# subs = df[df['mvir'] >= 1e10]

# # Use vmax_mpeak as proxy for luminosity
# vmax_cutoff_for_avg_num = {
#     '3.5' : 61,
#       '4' : 51.6,
#     '4.5' : 46.4,
#       '5' : 43.5,
#     '5.5' : 40.98,
#     'saga': 44.09 # 4.855 sats per host on average
# }

subs_cut = subs[subs['vmax_mpeak'] >= 44.09].copy() # cutoff for 4.855 subs on average (SAGA average)

len(subs_cut.upid.unique())

In [None]:
# Select only satellite systems with 3 or more members
subs_cut = subs_cut.groupby(['upid']).filter(lambda x: len(x) > 2).copy()
average_num_subs = len(subs_cut)/len(subs_cut['upid'].unique())
average_num_subs
display(average_num_subs)
display(len(subs_cut['upid'].unique()))
VSMDPL_subs_saga_avg = subs_cut.copy()

subs_raw_grouped = subs.groupby(['upid']).filter(lambda x: len(x) > 2).copy()
average_num_subs = len(subs_raw_grouped)/len(subs_raw_grouped['upid'].unique())
average_num_subs
display(average_num_subs)
VSMDPL_subs_raw = subs_raw_grouped.copy()
VSMDPL_subs_raw

<h2>Ellipticity calculations</h2>

<h4>Quadrupole moments</h4>

In [None]:
ellipticity_df = subs_cut.copy()
ellipticity_df['x_diff_sq'] = (ellipticity_df['x_adj'] - ellipticity_df['x_host'])**2
ellipticity_df['y_diff_sq'] = (ellipticity_df['y_adj'] - ellipticity_df['y_host'])**2
ellipticity_df['x_diff_y_diff'] = (ellipticity_df['y_adj'] - ellipticity_df['y_host'])*(ellipticity_df['x_adj'] - ellipticity_df['x_host'])
ellipticity_df = ellipticity_df.groupby('upid', as_index = False)['x_diff_sq', 'y_diff_sq', 'x_diff_y_diff'].mean().rename(columns={'x_diff_sq':'Q_xx', 'y_diff_sq':'Q_yy', 'x_diff_y_diff':'Q_xy'})
ellipticity_df


<h4>Ellipticity components</h4>

In [None]:
def e_1 (Q_xx, Q_yy, Q_xy):
    return (( Q_xx - Q_yy )/(Q_xx + Q_yy + 2*(Q_xx*Q_yy-Q_xy**2)**(1/2)))
def e_2 (Q_xx, Q_yy, Q_xy):
    return (2*Q_xy/(Q_xx + Q_yy + 2*(Q_xx*Q_yy-Q_xy**2)**(1/2)))
def e_3 (e_1, e_2):
    return ( e_1**2 + e_2**2 )**(1/2)


ellipticity_df['e_1'] = e_1(ellipticity_df['Q_xx'], ellipticity_df['Q_yy'], ellipticity_df['Q_xy'])
ellipticity_df['e_2'] = e_2(ellipticity_df['Q_xx'], ellipticity_df['Q_yy'], ellipticity_df['Q_xy'])
ellipticity_df['e_3'] = e_3(ellipticity_df['e_1'], ellipticity_df['e_2'])

In [None]:
# sns.set(style="darkgrid", palette="viridis")
custom_style = {
    "figure.facecolor": "212946",
    "axes.facecolor": "212946",
    "savefig.facecolor": "212946", 
    "grid.color": "2A3459",
    "text.color": "0.9",
    "axes.labelcolor": "0.9",
    "xtick.color": "0.9",
    "ytick.color": "0.9",
    "grid.linestyle": "-",
    "lines.solid_capstyle": "round"
}

sns.set_style("darkgrid", custom_style)

In [None]:
ax = sns.histplot(ellipticity_df["e_3"], bins=7, element='poly', stat="density", fill=False)
ax.set(xlabel="$\longleftarrow$ more elliptical         (b/a)           more spherical $\longrightarrow$")

In [None]:
ellipticity_df_VSMDPL_saga_avg = ellipticity_df.copy()
%store ellipticity_df_VSMDPL_saga_avg

<h4>Store ellipticity DataFrame to use in other nbs</h4>

In [None]:
ellipticity_df_VSMDPL_5_5 = ellipticity_df.copy()
%store ellipticity_df_VSMDPL_5_5

<h2>Highest luminosity analysis</h2>

In [None]:
highest_luminosity_subs = hosts_and_subs.groupby('upid', as_index = False)['Mr', 'num_satellites'].min()

In [None]:
highest_luminosity_ellipticity = pd.merge(highest_luminosity_subs, ellipticity_df, how="left", left_on="upid", right_on="upid")[['upid', 'Mr', 'e_3']]

In [None]:
# get rid of hosts which were filtered out by 
highest_luminosity_ellipticity_VSMDPL = highest_luminosity_ellipticity[~highest_luminosity_ellipticity['e_3'].isna()]
%store highest_luminosity_ellipticity_VSMDPL

<h2>Concat highest luminosity w/ log(vmax_mpeak)</h2>

In [None]:
highest_vmax_subs = hosts_and_subs.groupby('upid', as_index = False)['vmax_mpeak'].max()

In [None]:
highest_vmax_ellipticity = pd.merge(highest_vmax_subs, ellipticity_df, how="left", left_on="upid", right_on="upid")[['upid', 'vmax_mpeak', 'e_3']]
highest_vmax_ellipticity

In [None]:
highest_vmax_ellipticity['log_vmax_mpeak'] = np.log(highest_vmax_ellipticity['vmax_mpeak'])

In [None]:
highest_vmax_ellipticity_VSMDPL = highest_vmax_ellipticity[~highest_vmax_ellipticity['e_3'].isna()]
%store highest_vmax_ellipticity_VSMDPL
highest_vmax_ellipticity_VSMDPL

In [None]:
import matplotlib.pyplot as plt

In [None]:
fig, ax1 = plt.subplots()
ax1.set_xlabel('"$\longleftarrow$ more elliptical         ellipticity           more spherical $\longrightarrow$"')
ax1.set_ylabel('log(vmax_mpeak)')
sp1 = sns.scatterplot(data=highest_vmax_ellipticity_VSMDPL, x='e_3', y='log_vmax_mpeak', ax=ax1, s=2)

In [None]:
fig = sp1.figure.get_figure()
fig.set_size_inches([6, 4])
fig.savefig("SimVmax_mpeakVsEllipticity.png",dpi=600)

<h2>Average # satellites per vmax_mpeak</h2>

In [None]:
hosts_num_vmax = subs.groupby('upid').first()[['vmax_mpeak_host', 'num_satellites']]
hosts_num_vmax

Bin hosts by vmax_mpeak

In [None]:
hosts_num_vmax_averages = hosts_num_vmax.groupby(pd.cut(hosts_num_vmax['vmax_mpeak_host'], np.linspace(135, 340, 20))).mean()
hosts_num_vmax_averages

In [None]:
fig2, ax2 = plt.subplots()
ax2.set_xlabel('vmax_mpeak')
ax2.set_ylabel('average # satellites')
sp2 = plt.plot(hosts_num_vmax_averages['vmax_mpeak_host'], hosts_num_vmax_averages['num_satellites'])

In [None]:
fig3, ax3 = plt.subplots()
ax3.set_xlabel('vmax_mpeak')
ax3.set_ylabel('# satellites')
sp3 = plt.scatter(hosts_num_vmax['vmax_mpeak_host'], hosts_num_vmax['num_satellites'], s=2)

In [None]:
import scipy as sp
spearmanr_results = sp.stats.spearmanr(hosts_num_vmax['vmax_mpeak_host'], hosts_num_vmax['num_satellites'], axis=0, nan_policy='propagate', alternative='two-sided')
spearmanr_results