In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd  
import os
# Global plot style (APJ-safe)
plt.style.use("seaborn-v0_8-whitegrid")
plt.rcParams.update({
    "font.size": 12,
    "font.family": "serif",
    "axes.labelsize": 13,
    "axes.titlesize": 13,
    "xtick.labelsize": 12,
    "ytick.labelsize": 12,
    "legend.fontsize": 11,
    "pdf.fonttype": 42  # Ensures TrueType font embedded (for APJ)
})


def save_figure(fig, filename, dpi=300):
    """Save figure in both PDF (for APJ) and PNG (for local use) formats."""
    base = os.path.join("figures", filename)
    fig.savefig(f"{base}.pdf", bbox_inches='tight')  # APJ-compatible vector format
    #fig.savefig(f"{base}.png", dpi=dpi, bbox_inches='tight')  # For checking / internal use
    print(f"Saved: {base}.pdf and {base}.png")

base_path = '/users_path/merger_trace/notebooks/plot/plot_data/phase-space/'

pre_path = base_path+ "tau_scan_rotation_main_pre.csv"
post_path = base_path+ "tau_scan_rotation_main_post.csv"
allm_path = base_path + "tau_scan_rotation_main_all.csv"

pre = pd.read_csv(pre_path)
post = pd.read_csv(post_path)
allm = pd.read_csv(allm_path)

# read csv
pre = pd.read_csv(pre_path)
post = pd.read_csv(post_path)
allm = pd.read_csv(allm_path)

tau_prepost = pre['tau']
r2_pre = pre['R2_test']
r2_post = post['R2_test']

tau_all = allm['tau'] * 2
r2_all = allm['R2_test']

# plot
plt.figure(figsize=(6, 4))
plt.plot(tau_prepost, r2_pre, label='past-merger', color='#1f77b4', linewidth=2)
plt.plot(tau_prepost, r2_post, label='future-merger', color='#ff7f0e', linewidth=2)
plt.plot(tau_all[:20], r2_all[:20], label='full-merger', color='#2ca02c', linewidth=2)
plt.axvline(2.0, linestyle='--', color='gray', alpha=0.7)
plt.text(2.03, 0.1, r"$t_{\rm window}=2.0$", fontsize=11, color='gray')

plt.xlabel('Time window length (Gyr)', fontsize=12)
plt.ylabel('Test $R^2$', fontsize=12)
plt.legend(frameon=False, fontsize=10)
plt.grid(alpha=0.3)
plt.tight_layout()
save_figure(plt.gcf(), "tau_window_compare")
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import r2_score

# Global plot style (APJ-safe)
plt.style.use("seaborn-v0_8-whitegrid")
plt.rcParams.update({
    "font.size": 12,
    "font.family": "serif",
    "axes.labelsize": 13,
    "axes.titlesize": 13,
    "xtick.labelsize": 12,
    "ytick.labelsize": 12,
    "legend.fontsize": 11,
    "pdf.fonttype": 42  # Ensures TrueType font embedded (for APJ)
})



def save_figure(fig, filename, dpi=300):
    """Save figure in both PDF (for APJ) and PNG (for local use) formats."""
    base = os.path.join("figures", filename)
    fig.savefig(f"{base}.pdf", bbox_inches='tight')  # APJ-compatible vector format
    #fig.savefig(f"{base}.png", dpi=dpi, bbox_inches='tight')  # For checking / internal use
    print(f"Saved: {base}.pdf and {base}.png")


base_path ='/users_path/merger_trace/notebooks/plot/plot_data/phase-space/'
# pre-merger
pre_data = np.load(base_path+"scatter_rotate_pruned_tau2.0_premerger.npz")
y_test_pre = pre_data['y_test']
y_pred_pre = pre_data['y_pred']

# post-merger
post_data = np.load(base_path+"scatter_rotate_pruned_tau2.0_postmerger.npz")
y_test_post = post_data['y_test']
y_pred_post = post_data['y_pred']

# all-merger
all_data = np.load(base_path+"scatter_rotate_pruned_tau1.0_allmerger.npz")
y_test_all = all_data['y_test']
y_pred_all = all_data['y_pred']

fig, axes = plt.subplots(1, 3, figsize=(12, 4), sharex=True, sharey=True)
titles = ['on past-merger', 'on future-merger', 'on full-merger']
colors = ['#1f77b4', '#ff7f0e', '#2ca02c']

all_y = np.concatenate([y_test_pre, y_pred_pre, y_test_post, y_pred_post, y_test_all, y_pred_all])
lims = [0, np.ceil(all_y.max())]

for i, (y_test, y_pred) in enumerate([
    (y_test_pre, y_pred_pre),
    (y_test_post, y_pred_post),
    (y_test_all, y_pred_all),
]):
    r2 = r2_score(y_test, y_pred)
    ax = axes[i]
    ax.scatter(y_test, y_pred, s=10, alpha=0.6, color=colors[i], label='Prediction')
    ax.text(0.95, 0.05, f'$R^2$ = {r2:.2f}', transform=ax.transAxes,
        fontsize=11, verticalalignment='bottom', horizontalalignment='right')


    ax.plot(lims, lims, 'r--', label='Perfect Prediction', linewidth=1.2)
    ax.set_title(titles[i], fontsize=13)
    ax.set_xlim(lims)
    ax.set_ylim(lims)
    ax.grid(True, alpha=0.3)
    if i == 0:
        ax.set_ylabel('Predicted Score', fontsize=12)
    ax.set_xlabel('True Score', fontsize=12)
    ax.legend(fontsize=9)

plt.tight_layout()
save_figure(plt.gcf(), "predicted_vs_true_score")
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import r2_score

# Global plot style (APJ-safe)
plt.style.use("seaborn-v0_8-whitegrid")
plt.rcParams.update({
    "font.size": 12,
    "font.family": "serif",
    "axes.labelsize": 13,
    "axes.titlesize": 13,
    "xtick.labelsize": 12,
    "ytick.labelsize": 12,
    "legend.fontsize": 11,
    "pdf.fonttype": 42  # Ensures TrueType font embedded (for APJ)
})



def save_figure(fig, filename, dpi=300):
    """Save figure in both PDF (for APJ) and PNG (for local use) formats."""
    base = os.path.join("figures", filename)
    fig.savefig(f"{base}.pdf", bbox_inches='tight')  # APJ-compatible vector format
    #fig.savefig(f"{base}.png", dpi=dpi, bbox_inches='tight')  # For checking / internal use
    print(f"Saved: {base}.pdf and {base}.png")


base_path ='/users_path/merger_trace/notebooks/plot/plot_data/phase-space/'
# pre-merger
pre_data = np.load(base_path+"scatter_rotate_discussion_tau2.0_premerger.npz")
y_test_pre = pre_data['y_test']
y_pred_pre = pre_data['y_pred']

# post-merger
post_data = np.load(base_path+"scatter_rotate_discussion_tau2.0_postmerger.npz")
y_test_post = post_data['y_test']
y_pred_post = post_data['y_pred']

# all-merger
all_data = np.load(base_path+"scatter_rotate_discussion_tau1.0_allmerger.npz")
y_test_all = all_data['y_test']
y_pred_all = all_data['y_pred']

fig, axes = plt.subplots(1, 3, figsize=(12, 4), sharex=True, sharey=True)
titles = ['on past-merger', 'on future-merger', 'on full-merger']
colors = ['#1f77b4', '#ff7f0e', '#2ca02c']

all_y = np.concatenate([y_test_pre, y_pred_pre, y_test_post, y_pred_post, y_test_all, y_pred_all])
lims = [0, np.ceil(all_y.max())]

for i, (y_test, y_pred) in enumerate([
    (y_test_pre, y_pred_pre),
    (y_test_post, y_pred_post),
    (y_test_all, y_pred_all),
]):
    r2 = r2_score(y_test, y_pred)
    ax = axes[i]
    ax.scatter(y_test, y_pred, s=10, alpha=0.6, color=colors[i], label='Prediction')
    ax.text(0.95, 0.05, f'$R^2$ = {r2:.2f}', transform=ax.transAxes,
        fontsize=11, verticalalignment='bottom', horizontalalignment='right')


    ax.plot(lims, lims, 'r--', label='Perfect Prediction', linewidth=1.2)
    ax.set_title(titles[i], fontsize=13)
    ax.set_xlim(lims)
    ax.set_ylim(lims)
    ax.grid(True, alpha=0.3)
    if i == 0:
        ax.set_ylabel('Predicted Score', fontsize=12)
    ax.set_xlabel('True Score', fontsize=12)
    ax.legend(fontsize=9)

plt.tight_layout()
save_figure(plt.gcf(), "predicted_vs_true_score_discussion")
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Global plot style (APJ-safe)
plt.style.use("seaborn-v0_8-whitegrid")
plt.rcParams.update({
    "font.size": 12,
    "font.family": "serif",
    "axes.labelsize": 13,
    "axes.titlesize": 13,
    "xtick.labelsize": 12,
    "ytick.labelsize": 12,
    "legend.fontsize": 11,
    "pdf.fonttype": 42  # Ensures TrueType font embedded (for APJ)
})



def save_figure(fig, filename, dpi=300):
    """Save figure in both PDF (for APJ) and PNG (for local use) formats."""
    base = os.path.join("figures", filename)
    fig.savefig(f"{base}.pdf", bbox_inches='tight')  # APJ-compatible vector format
    #fig.savefig(f"{base}.png", dpi=dpi, bbox_inches='tight')  # For checking / internal use
    print(f"Saved: {base}.pdf and {base}.png")


# read data
scores_pre = np.load(base_path+'score_distribution_rotate_main_tau2.0_premerger.npz')['scores']
scores_post = np.load(base_path+'score_distribution_rotate_main_tau2.0_postmerger.npz')['scores']
scores_all = np.load(base_path+'score_distribution_rotate_main_tau1.0_allmerger.npz')['scores']

pre_data = np.load(base_path+"scatter_rotate_pruned_tau2.0_premerger.npz")
y_test_pre = pre_data['y_test']

post_data = np.load(base_path+"scatter_rotate_pruned_tau2.0_postmerger.npz")
y_test_post = post_data['y_test']

all_data = np.load(base_path+"scatter_rotate_pruned_tau1.0_allmerger.npz")
y_test_all = all_data['y_test']

score_list_full = [scores_pre, scores_post, scores_all]
score_list_test = [y_test_pre, y_test_post, y_test_all]

titles = ['past-merger', 'future-merger', 'full-merger']
colors = ['#1f77b4', '#ff7f0e', '#2ca02c']

bins = np.linspace(0, max(scores_pre.max(), scores_post.max(), scores_all.max()), 40)

fig, axes = plt.subplots(1, 3, figsize=(15, 4), sharex=True, sharey=True)

for i, ax in enumerate(axes):
    ax.hist(score_list_full[i], bins=bins, color=colors[i], alpha=0.5, label='Full sample')
    ax.hist(score_list_test[i], bins=bins, edgecolor=colors[i], histtype='step', linewidth=2, linestyle='--', label='Test sample')

    ax.set_title(titles[i], fontsize=12)
    if i == 0:
        ax.set_ylabel('Sample Count', fontsize=11)
    ax.set_xlabel('Merger Score', fontsize=11)
    ax.grid(alpha=0.3)
    ax.legend(fontsize=9)
save_figure(plt.gcf(), "score_distribution_test_vs_full")
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()



In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Define file paths
file_paths = {
    'Pre-merger': base_path+'permutation_importance_rotate_main_tau2.0_premerger.csv',
    'Post-merger': base_path+'/permutation_importance_rotate_main_tau2.0_postmerger.csv',
    'All-merger': base_path+'permutation_importance_rotate_main_tau1.0_allmerger.csv'
}

# Define color mapping
colors = {
    'Pre-merger': '#1f77b4',
    'Post-merger': '#ff7f0e',
    'All-merger': '#2ca02c'
}

fig, axes = plt.subplots(1, 3, figsize=(15, 4.5), sharey=True)

for ax, label in zip(axes, file_paths):
    df = pd.read_csv(file_paths[label])
    features = df['Feature']
    importances = df['Importance Mean']
    stds = df['Importance Std']

    ax.bar(features, importances, yerr=stds, color=colors[label], capsize=4)
    ax.set_title(label, fontsize=12)
    ax.set_xticks(range(len(features)))
    ax.set_xticklabels(features, rotation=45, ha='right', fontsize=10)
    ax.grid(alpha=0.3)
    if ax == axes[0]:
        ax.set_ylabel('Permutation Importance', fontsize=11)

fig.suptitle('Feature Importance Comparison across Merger Categories', fontsize=14)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
plt.style.use("seaborn-v0_8-whitegrid")
plt.rcParams.update({
    "font.size": 12,
    "font.family": "serif",
    "axes.labelsize": 13,
    "axes.titlesize": 13,
    "xtick.labelsize": 12,
    "ytick.labelsize": 12,
    "legend.fontsize": 11,
    "pdf.fonttype": 42  # Ensures TrueType font embedded (for APJ)
})



def save_figure(fig, filename, dpi=300):
    """Save figure in both PDF (for APJ) and PNG (for local use) formats."""
    base = os.path.join("figures", filename)
    fig.savefig(f"{base}.pdf", bbox_inches='tight')  # APJ-compatible vector format
    #fig.savefig(f"{base}.png", dpi=dpi, bbox_inches='tight')  # For checking / internal use
    print(f"Saved: {base}.pdf and {base}.png")
# Define file paths

base_path ='/home/chuiyang/merger_trace/notebooks/plot/plot_data/phase-space/'
file_paths = {
    'past-merger':  base_path + '/permutation_importance_rotate_main_tau2.0_premerger.csv',
    'future-merger': base_path + '/permutation_importance_rotate_main_tau2.0_postmerger.csv',
    'full-merger':  base_path + '/permutation_importance_rotate_main_tau1.0_allmerger.csv'
}

# Define color mapping
colors = {
    'past-merger': '#1f77b4',
    'future-merger': '#ff7f0e',
    'full-merger': '#2ca02c'
}

# Create unified feature list and F-labels
all_features = pd.read_csv(list(file_paths.values())[0])['Feature'].values
f_label_dict = {feat: f'F{i+1}' for i, feat in enumerate(all_features)}

# Initialize figure
fig, axes = plt.subplots(1, 3, figsize=(15, 4.5), sharey=True)

# Save F-label map
feature_map_for_table = {}

# Plot
for ax, label in zip(axes, file_paths):
    df = pd.read_csv(file_paths[label])
    
    # Sort by importance
    df_sorted = df.sort_values(by='Importance Mean', ascending=False).reset_index(drop=True)
    
    # Get values
    importances = df_sorted['Importance Mean'].values
    stds = df_sorted['Importance Std'].values
    features_sorted = df_sorted['Feature'].values
    f_labels_sorted = [f_label_dict[f] for f in features_sorted]

    # Save mapping (only once)
    if not feature_map_for_table:
        feature_map_for_table = f_label_dict

    # Plot
    ax.bar(f_labels_sorted, importances, yerr=stds, color=colors[label], capsize=4)
    ax.axhline(0.01, linestyle='--', color='red', linewidth=1.5)
    ax.set_title(label, fontsize=12)
    ax.set_xticks(range(len(f_labels_sorted)))
    ax.set_xticklabels(f_labels_sorted, rotation=45, ha='right', fontsize=10)
    ax.grid(alpha=0.3)

    if ax is axes[0]:
        ax.set_ylabel('Permutation Importance', fontsize=11)

# Title and save
#fig.suptitle('Feature Importance Comparison across Merger Categories', fontsize=14)
plt.tight_layout(rect=[0, 0, 1, 0.95])
#plt.savefig("feature_importance_sorted_common_Flabels.png", dpi=300)
save_figure(plt.gcf(), "feature_importance_sorted_common_Flabels")
plt.show()

# Print F-label mapping for caption/table
print("\nUnified F-label to Feature mapping:")
for f, name in feature_map_for_table.items():
    print(f"{f}: {name}")



In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
plt.style.use("seaborn-v0_8-whitegrid")
plt.rcParams.update({
    "font.size": 12,
    "font.family": "serif",
    "axes.labelsize": 13,
    "axes.titlesize": 13,
    "xtick.labelsize": 12,
    "ytick.labelsize": 12,
    "legend.fontsize": 11,
    "pdf.fonttype": 42  # Ensures TrueType font embedded (for APJ)
})



def save_figure(fig, filename, dpi=300):
    """Save figure in both PDF (for APJ) and PNG (for local use) formats."""
    base = os.path.join("figures", filename)
    fig.savefig(f"{base}.pdf", bbox_inches='tight')  # APJ-compatible vector format
    #fig.savefig(f"{base}.png", dpi=dpi, bbox_inches='tight')  # For checking / internal use
    print(f"Saved: {base}.pdf and {base}.png")
# Define file paths

base_path ='/users_path/merger_trace/notebooks/plot/plot_data/phase-space'
file_paths = {
    'past-merger':  base_path + '/permutation_importance_rotate_discussion_tau2.0_premerger.csv',
    'future-merger': base_path + '/permutation_importance_rotate_discussion_tau2.0_postmerger.csv',
    'full-merger':  base_path + '/permutation_importance_rotate_discussion_tau1.0_allmerger.csv'
}

# Define color mapping
colors = {
    'past-merger': '#1f77b4',
    'future-merger': '#ff7f0e',
    'full-merger': '#2ca02c'
}

# Create unified feature list and F-labels
all_features = pd.read_csv(list(file_paths.values())[0])['Feature'].values
f_label_dict = {feat: f'F{i+1}' for i, feat in enumerate(all_features)}

# Initialize figure
fig, axes = plt.subplots(1, 3, figsize=(15, 4.5), sharey=True)

# Save F-label map
feature_map_for_table = {}

# Plot
for ax, label in zip(axes, file_paths):
    df = pd.read_csv(file_paths[label])
    print(df)
    
    # Sort by importance
    df_sorted = df.sort_values(by='Importance Mean', ascending=False).reset_index(drop=True)
    
    # Get values
    importances = df_sorted['Importance Mean'].values
    stds = df_sorted['Importance Std'].values
    features_sorted = df_sorted['Feature'].values
    f_labels_sorted = [f_label_dict[f] for f in features_sorted]

    # Save mapping (only once)
    if not feature_map_for_table:
        feature_map_for_table = f_label_dict

    # Plot
    ax.bar(f_labels_sorted, importances, yerr=stds, color=colors[label], capsize=4)
    ax.axhline(0.01, linestyle='--', color='red', linewidth=1.5)
    ax.set_title(label, fontsize=12)
    ax.set_xticks(range(len(f_labels_sorted)))
    ax.set_xticklabels(f_labels_sorted, rotation=45, ha='right', fontsize=10)
    ax.grid(alpha=0.3)

    if ax is axes[0]:
        ax.set_ylabel('Permutation Importance', fontsize=11)

# Title and save
#fig.suptitle('Feature Importance Comparison across Merger Categories', fontsize=14)
plt.tight_layout(rect=[0, 0, 1, 0.95])
save_figure(plt.gcf(), "feature_importance_sorted_common_Flabels_discussion")
plt.show()

# Print F-label mapping for caption/table
print("\nUnified F-label to Feature mapping:")
for f, name in feature_map_for_table.items():
    print(f"{f}: {name}")


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams.update({
    'figure.dpi': 150,
    'font.family': 'serif',
    'figure.figsize': [15, 4.5],
    'legend.fontsize': 11,
    'xtick.labelsize': 10,
    'ytick.labelsize': 11,
    'axes.labelsize': 11,
    'legend.title_fontsize': 11,
})

# Define file paths
base_path ='/users_path/merger_trace/notebooks/plot/plot_data/phase-space/'
file_paths = {
    'past-merger': base_path + 'permutation_importance_rotate_main_tau2.0_premerger.csv',
    'future-merger': base_path + 'permutation_importance_rotate_main_tau2.0_postmerger.csv',
    'full-merger': base_path + 'permutation_importance_rotate_main_tau1.0_allmerger.csv',
}

colors = {
    'past-merger': '#1f77b4',
    'future-merger': '#ff7f0e',
    'full-merger': '#2ca02c',
}

# Unified F-labels
all_features = pd.read_csv(list(file_paths.values())[0])['Feature'].values
f_label_dict = {feat: f'F{i+1}' for i, feat in enumerate(all_features)}

# Initialize figure
feature_map_for_table = {}
importance_matrix = []

# Step 2: Plot and collect max importances
for (label, path) in file_paths.items():
    df = pd.read_csv(path)
    
    importances = df['Importance Mean'].values
    stds = df['Importance Std'].values
    features_sorted = df['Feature'].values
    labels_sorted = [f_label_dict[f] for f in features_sorted]

    # map feature name to F-label
    if not feature_map_for_table:
        feature_map_for_table = {f_label_dict[f]: f for f in features_sorted}

    # save importance matrix
    importance_matrix.append(importances)

importance_matrix = np.array(importance_matrix)  # shape = (3, n_features)

# select features which have feature importance over 0.01 in at least one model
for n in range(len(importance_matrix[0])):
    current_im = importance_matrix[:,n]
    print(current_im)
    count = np.sum(current_im>=0.01)
    if count>=1:
        print(n+1)


In [None]:
feature_map_for_table