# Script: Beta Diversity Null Model Analysis with Group Color Plotting

## Purpose

This script performs null model-based beta diversity analysis and visualizes standardized effect size (SES) for predefined sample groups using color-coded bar plots.

## Workflow

1. Load relative abundance OTU table.
2. Extract sample group prefixes from column names.
3. Calculate:
   - Observed beta diversity (Bray-Curtis)
   - Null distribution via permutation
   - SES values and p-values
4. Save:
   - Distance matrices
   - SES results
   - Color-coded bar plots of SES

## Output Files

- `*_Observed_BetaDiversity.csv`: Distance matrix for each group
- `SES_Results.csv`: Table of SES and p-values
- `SES_Distribution.png`: Bar plot of SES values, color-coded per group

## Notes

- You can define a manual group order by uncommenting `group_order`.
- Colors can be customized in the `colors` list.

## Dependencies

- `numpy`, `pandas`, `matplotlib`
- `scikit-bio` for beta diversity metrics
- `scipy` for t-tests


## 🔬 Script: Beta Diversity Null Model with SES Visualization

This script analyzes microbial community beta diversity using null models and generates standardized effect size (SES) plots per group.

### Features

- Bray-Curtis dissimilarity calculation
- Null model simulation via permutation
- Statistical testing (t-test against null mean)
- SES plots grouped and color-coded by sample prefix

### Usage

```bash
python scripts/beta_diversity_null_model_plot.py


In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skbio.diversity import beta_diversity
from scipy.stats import ttest_1samp

# File paths
input_file = r'..grelative.xls.csv'
output_folder = r'..BetaDiversityResults/'
os.makedirs(output_folder, exist_ok=True)

# Load OTU table and set first column as index
otu_df = pd.read_csv(input_file)
otu_df.set_index(otu_df.columns[0], inplace=True)

# Extract group labels from sample names (e.g., A1, A2 -> A)
samples = otu_df.columns
groups = pd.Series(samples.str.extract(r'^([A-Za-z]+)')[0].values, index=samples)

# Group data by sample prefix
grouped_data = {}
for group in groups.unique():
    matching_columns = groups[groups == group].index
    grouped_data[group] = otu_df.loc[:, matching_columns]
    
# Function to calculate beta diversity and SES with a null model
def calculate_beta_diversity_with_stats(group_data, group_name, metric="braycurtis", num_iterations=100):
    """
    Calculate observed beta diversity, generate a null model distribution,
    compute the standardized effect size (SES), and perform statistical testing.
    """
    # Observed beta diversity matrix
    observed_dist = beta_diversity(metric, counts=group_data.values.T, ids=group_data.columns)
    observed_values = observed_dist.to_data_frame().values.flatten()

    # Null model distribution
    null_distributions = []
    for _ in range(num_iterations):
        randomized_data = group_data.apply(np.random.permutation, axis=0)
        null_dist = beta_diversity(metric, counts=randomized_data.values.T, ids=randomized_data.columns)
        null_distributions.append(null_dist.to_data_frame().values.flatten())
    
    null_distributions = pd.DataFrame(null_distributions)

    # Calculate null mean and standard deviation
    null_mean = null_distributions.mean(axis=1).mean()
    null_std = null_distributions.stack().std()

    # Calculate SES
    observed_mean = observed_values.mean()
    ses = (observed_mean - null_mean) / null_std

    # One-sample t-test
    t_stat, p_value = ttest_1samp(null_distributions.mean(axis=1), observed_mean)
    return observed_dist.to_data_frame(), ses, p_value

# Main analysis loop
results = {}

for group_name, group_data in grouped_data.items():
    print(f"Processing group: {group_name}")

    # Calculate beta diversity, SES, and p-value
    observed_df, ses, p_value = calculate_beta_diversity_with_stats(group_data, group_name)
    print(f"Group {group_name}: SES = {ses}, p-value = {p_value}")

    # Save observed beta diversity matrix
    observed_file = os.path.join(output_folder, f"{group_name}_Observed_BetaDiversity.csv")
    observed_df.to_csv(observed_file)

    # Store SES and p-value results
    results[group_name] = {
        "SES": ses,
        "p-value": p_value
    }

# Save all group results to CSV
results_df = pd.DataFrame.from_dict(results, orient='index')
results_df['Group'] = results_df.index.str.extract(r'^([A-Z]+)').values.flatten()
results_file = os.path.join(output_folder, 'SES_Results.csv')
results_df.to_csv(results_file)

# Optional: Sort groups manually
# group_order = ["MNG", "MCG", "HNG", "HCG", "LNG", "LCG"]
# results_df = results_df.loc[group_order]

# Plot SES summary
plt.figure()
colors = ["lightblue", "blue", "lightgreen",]()
