In [None]:
# Import necessary libraries
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from scipy.stats import rankdata

# Load your dataset (adjust the path as per your actual file location)
# For anonymity, the actual path is not displayed. Replace 'your_file_path.csv' with the actual file path.
data = pd.read_csv("your_file_path.csv")

# Set up seaborn style
sns.set(style="whitegrid")

# Define the specific BMI values for which quantiles are required
bmi_values = [14.5, 18.5, 24.9, 25.0, 29.9, 30.0, 39.9, 40.0, 55.2]

# Colors for each BMI value's vertical line
colors = sns.color_palette("Set1", n_colors=len(bmi_values))

# Function to plot and save overall BMI distribution
def plot_and_save_overall_bmi_distribution(data):
    # Rank the BMI values and compute the empirical cumulative distribution function (ECDF)
    bmi_ranks = rankdata(data['BMI'], method='max')
    bmi_ecdf = bmi_ranks / len(data['BMI'])
    bmi_quantiles_df = pd.DataFrame({'BMI': data['BMI'], 'Quantile': bmi_ecdf})
    
    # Find the quantile for each specified BMI value
    bmi_quantiles = {}
    for bmi in bmi_values:
        closest_quantile = bmi_quantiles_df[bmi_quantiles_df['BMI'] >= bmi]['Quantile'].min()
        bmi_quantiles[bmi] = closest_quantile if not pd.isna(closest_quantile) else "Not found"
    
    # Plot the overall BMI distribution
    plt.figure(figsize=(14, 8))
    sns.histplot(data['BMI'], kde=True, color='#404080', bins=20)
    plt.title("Overall Distribution of BMI with Specified Quantile Lines")
    plt.xlabel("BMI")
    plt.ylabel("Count")
    
    # Add vertical lines for each specified BMI value
    for bmi, color in zip(bmi_values, colors):
        quantile = bmi_quantiles.get(bmi, "Not found")
        if quantile != "Not found":
            plt.axvline(x=bmi, color=color, linestyle='--', label=f"BMI {bmi} (Quantile: {quantile:.2f})")
    
    plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1))
    
    # Remove grid lines
    plt.grid(False)
    
    # Save the plot as a PDF file before showing it
    plot_filename = "Overall_BMI_Distribution.pdf"
    plt.savefig(plot_filename, format='pdf', bbox_inches='tight')
    print(f"Saved overall BMI distribution plot to {plot_filename}")
    
    plt.show()

# Function to plot and save overall whole body fat mass distribution
def plot_and_save_overall_fat_mass_distribution(data):
    # Rank the BMI values and compute the empirical cumulative distribution function (ECDF)
    bmi_ranks = rankdata(data['BMI'], method='max')
    bmi_ecdf = bmi_ranks / len(data['BMI'])
    bmi_quantiles_df = pd.DataFrame({'BMI': data['BMI'], 'Quantile': bmi_ecdf})
    
    # Find the quantile for each specified BMI value
    bmi_quantiles = {}
    for bmi in bmi_values:
        closest_quantile = bmi_quantiles_df[bmi_quantiles_df['BMI'] >= bmi]['Quantile'].min()
        bmi_quantiles[bmi] = closest_quantile if not pd.isna(closest_quantile) else "Not found"
    
    # Find the corresponding whole body fat mass for each BMI quantile
    whole_body_fat_mass_values = {}
    for bmi, quantile in bmi_quantiles.items():
        if quantile != "Not found":
            whole_body_fat_mass_values[bmi] = data['Whole_body_fat_mass'].quantile(quantile)
    
    # Plot the overall whole body fat mass distribution
    plt.figure(figsize=(14, 8))
    sns.histplot(data['Whole_body_fat_mass'], kde=True, color='#00798c', bins=20)
    plt.title("Overall Distribution of Whole Body Fat Mass with BMI Quantile Lines")
    plt.xlabel("Whole Body Fat Mass")
    plt.ylabel("Count")
    
    # Add vertical lines for each specified BMI value's corresponding whole body fat mass
    for (bmi, mass_value), color in zip(whole_body_fat_mass_values.items(), colors):
        plt.axvline(x=mass_value, color=color, linestyle='--', label=f"BMI {bmi}: Whole Body Fat Mass value {mass_value:.2f}")
    
    plt.legend(loc='upper right', bbox_to_anchor=(1.35, 1), title="Corresponding Whole Body Fat Mass values for BMI Quantile Positions")
    
    # Remove grid lines
    plt.grid(False)
    
    # Save the plot as a PDF file before showing it
    plot_filename = "Overall_Fat_Mass_Distribution.pdf"
    plt.savefig(plot_filename, format='pdf', bbox_inches='tight')
    print(f"Saved overall Whole Body Fat Mass distribution plot to {plot_filename}")
    
    plt.show()

# Function to plot and save BMI distribution by sex
def plot_and_save_bmi_distribution_by_sex(data, sex_label):
    # Filter data based on sex label (0 for males, 1 for females)
    data_sex = data[data['sex'] == sex_label]
    sex_name = 'Males' if sex_label == 0 else 'Females'
    fill_color = "#00798c" if sex_label == 0 else "#404080"
    
    # Rank the BMI values and compute the empirical cumulative distribution function (ECDF)
    bmi_ranks = rankdata(data_sex['BMI'], method='max')
    bmi_ecdf = bmi_ranks / len(data_sex['BMI'])
    bmi_quantiles_df = pd.DataFrame({'BMI': data_sex['BMI'], 'Quantile': bmi_ecdf})
    
    # Find the quantile for each specified BMI value
    bmi_quantiles = {}
    for bmi in bmi_values:
        closest_quantile = bmi_quantiles_df[bmi_quantiles_df['BMI'] >= bmi]['Quantile'].min()
        bmi_quantiles[bmi] = closest_quantile if not pd.isna(closest_quantile) else "Not found"
    
    # Plot the BMI distribution by sex
    plt.figure(figsize=(14, 8))
    sns.histplot(data_sex['BMI'], kde=True, color=fill_color, bins=20)
    plt.title(f"Distribution of BMI with Specified Quantile Lines ({sex_name})")
    plt.xlabel("BMI")
    plt.ylabel("Count")
    
    # Add vertical lines for each specified BMI value
    for bmi, color in zip(bmi_values, colors):
        quantile = bmi_quantiles.get(bmi, "Not found")
        if quantile != "Not found":
            plt.axvline(x=bmi, color=color, linestyle='--', label=f"BMI {bmi} (Quantile: {quantile:.2f})")
    
    plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1))
    
    # Remove grid lines
    plt.grid(False)
    
    # Save the plot as a PDF file before showing it
    plot_filename = f"BMI_Distribution_{sex_name}.pdf"
    plt.savefig(plot_filename, format='pdf', bbox_inches='tight')
    print(f"Saved BMI distribution plot for {sex_name} to {plot_filename}")
    
    plt.show()

# Function to plot and save whole body fat mass distribution by sex
def plot_and_save_fat_mass_distribution_by_sex(data, sex_label):
    # Filter data based on sex label (0 for males, 1 for females)
    data_sex = data[data['sex'] == sex_label]
    sex_name = 'Males' if sex_label == 0 else 'Females'
    fill_color = "#00798c" if sex_label == 0 else "#404080"
    
    # Rank the BMI values and compute the empirical cumulative distribution function (ECDF)
    bmi_ranks = rankdata(data_sex['BMI'], method='max')
    bmi_ecdf = bmi_ranks / len(data_sex['BMI'])
    bmi_quantiles_df = pd.DataFrame({'BMI': data_sex['BMI'], 'Quantile': bmi_ecdf})
    
    # Find the quantile for each specified BMI value
    bmi_quantiles = {}
    for bmi in bmi_values:
        closest_quantile = bmi_quantiles_df[bmi_quantiles_df['BMI'] >= bmi]['Quantile'].min()
        bmi_quantiles[bmi] = closest_quantile if not pd.isna(closest_quantile) else "Not found"
    
    # Find the corresponding whole body fat mass for each BMI quantile
    whole_body_fat_mass_values = {}
    for bmi, quantile in bmi_quantiles.items():
        if quantile != "Not found":
            whole_body_fat_mass_values[bmi] = data_sex['Whole_body_fat_mass'].quantile(quantile)
    
    # Plot the whole body fat mass distribution by sex
    plt.figure(figsize=(14, 8))
    sns.histplot(data_sex['Whole_body_fat_mass'], kde=True, color=fill_color, bins=20)
    plt.title(f"Distribution of Whole Body Fat Mass with BMI Quantile Lines ({sex_name})")
    plt.xlabel("Whole Body Fat Mass")
    plt.ylabel("Count")
    
   Continuing from where the text was cut off:

```python
    # Add vertical lines for each specified BMI value's corresponding whole body fat mass
    for (bmi, mass_value), color in zip(whole_body_fat_mass_values.items(), colors):
        plt.axvline(x=mass_value, color=color, linestyle='--', label=f"BMI {bmi}: Whole Body Fat Mass value {mass_value:.2f}")
    
    plt.legend(loc='upper right', bbox_to_anchor=(1.35, 1), title="Corresponding Whole Body Fat Mass values for BMI Quantile Positions")
    
    # Remove grid lines
    plt.grid(False)
    
    # Save the plot as a PDF file before showing it
    plot_filename = f"Fat_Mass_Distribution_{sex_name}.pdf"
    plt.savefig(plot_filename, format='pdf', bbox_inches='tight')
    print(f"Saved Whole Body Fat Mass distribution plot for {sex_name} to {plot_filename}")
    
    plt.show()

# Save overall and sex-specific BMI and Whole Body Fat Mass distribution plots
plot_and_save_overall_bmi_distribution(data)  # Overall BMI
plot_and_save_overall_fat_mass_distribution(data)  # Overall Fat Mass
plot_and_save_bmi_distribution_by_sex(data, 0)  # Males BMI
plot_and_save_bmi_distribution_by_sex(data, 1)  # Females BMI
plot_and_save_fat_mass_distribution_by_sex(data, 0)  # Males Fat Mass
plot_and_save_fat_mass_distribution_by_sex(data, 1)  # Females Fat Mass
