In [None]:
import h5py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ttest_ind, mannwhitneyu
import os

In [None]:
# file_celestial = "celestial_data.hdf5"
# file_celestial = "ephemeris_data.hdf5"
# file_simulation = "simulation_data.hdf5"
file_celestial = "ephemeris_data.csv"
file_simulation = "simulation_data.csv"

In [None]:
# with h5py.File(file_celestial, 'r') as f1, h5py.File(file_simulation, 'r') as f2:
#     celestial_data = {key: np.array(f1[key]) for key in f1.keys()}
#     simulation_data = {key: np.array(f2[key]) for key in f2.keys()}

# Convert data to DataFrame
# celestial_df = pd.DataFrame(celestial_data)
# simulation_df = pd.DataFrame(simulation_data)
celestial_df = pd.read_csv(file_celestial)
simulation_df = pd.read_csv(file_simulation)

In [None]:
# celestial_df
simulation_df
# simulation_df.columns
# simulation_df.head(3)


In [None]:
r_A_x = simulation_df['r_A_x']
r_A_y = simulation_df['r_A_y']
r_A_z = simulation_df['r_A_z']

r_B_x = simulation_df['r_B_x']
r_B_y = simulation_df['r_B_y']
r_B_z = simulation_df['r_B_z']

r_C_x = simulation_df['r_C_x']
r_C_y = simulation_df['r_C_y']
r_C_z = simulation_df['r_C_z']

# Create a 3D scatter plot
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')

# Plot the positions for A, B, and C
ax.scatter(r_A_x, r_A_y, r_A_z, color='r', label='Entity A', s=1)
ax.scatter(r_B_x, r_B_y, r_B_z, color='g', label='Entity B', s=1)
ax.scatter(r_C_x, r_C_y, r_C_z, color='b', label='Entity C', s=1)

ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
ax.set_title('3D Positions of Entities A, B, and C')
ax.legend()
plt.show()


In [None]:
# Display basic information
print("Celestial Data Overview:")
print(celestial_df.info())
print("\nSimulation Data Overview:")
print(simulation_df.info())

In [None]:
# Standardize column names
celestial_df.columns = celestial_df.columns.str.strip().str.lower()
simulation_df.columns = simulation_df.columns.str.strip().str.lower()

# Display basic information
print("Celestial Data Overview:")
print(celestial_df.info())
print("\nSimulation Data Overview:")
print(simulation_df.info())

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ttest_ind, mannwhitneyu

# Ensure column alignment for comparison
common_columns = celestial_df.columns.intersection(simulation_df.columns)
if common_columns.empty:
    print("No common columns found between celestial and simulation data.")
else:
    celestial_df = celestial_df[common_columns]
    simulation_df = simulation_df[common_columns]

    # Descriptive statistics
    celestial_stats = celestial_df.describe()
    simulation_stats = simulation_df.describe()

    print("\nCelestial Data Statistics:")
    print(celestial_stats)
    print("\nSimulation Data Statistics:")
    print(simulation_stats)

    # Statistical comparison functions
    def perform_statistical_tests(df1, df2, alpha=0.05):
        results = []
        for column in df1.columns:
            stat, p_ttest = ttest_ind(df1[column], df2[column], equal_var=False, nan_policy='omit')
            _, p_mwu = mannwhitneyu(df1[column], df2[column], alternative='two-sided')
            results.append((column, p_ttest, p_mwu))
        
        results_df = pd.DataFrame(results, columns=['Feature', 'T-test p-value', 'Mann-Whitney U p-value'])
        results_df['T-test Significant'] = results_df['T-test p-value'] < alpha
        results_df['Mann-Whitney U Significant'] = results_df['Mann-Whitney U p-value'] < alpha
        return results_df

    # Perform statistical tests
    comparison_results = perform_statistical_tests(celestial_df, simulation_df)
    print("\nStatistical Test Results:")
    print(comparison_results)

    # Visualization setup
    sns.set(style="whitegrid")
    output_plots = 'three_bodies/'

    # Ensure the output directory exists
    if not os.path.exists(output_plots):
        os.makedirs(output_plots)
        print(f"Created directory: {output_plots}")

    # Create pairwise comparison plots
    for column in common_columns:
        # Density plot
        plt.figure(figsize=(10, 6))
        sns.kdeplot(celestial_df[column], label='Celestial Data', shade=True)
        sns.kdeplot(simulation_df[column], label='Simulation Data', shade=True)
        plt.title(f"Density Plot: {column}")
        plt.xlabel(column)
        plt.ylabel("Density")
        plt.legend()
        density_plot_path = os.path.join(output_plots, f"{column}_density.png")
        plt.savefig(density_plot_path)
        plt.close()

        # Box plot
        plt.figure(figsize=(10, 6))
        sns.boxplot(data=[celestial_df[column], simulation_df[column]], orient='h')
        plt.yticks([0, 1], ['Celestial Data', 'Simulation Data'])
        plt.title(f"Box Plot: {column}")
        plt.xlabel(column)
        box_plot_path = os.path.join(output_plots, f"{column}_boxplot.png")
        plt.savefig(box_plot_path)
        plt.close()

    print(f"Plots saved to {output_plots}")


In [None]:
print(os.getcwd())