In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import ListedColormap

# 1. Load data and extract clan information
prop_df = pd.read_excel('Figure_3E_data.xlsx', index_col=0)

# 2. Extract clan names from column headers; assign 'Unknown_Clan' if missing
clan_info = prop_df.columns.str.extract(r'\((.*?)\)$')[0].fillna('Unknown_Clan')
prop_df.columns = pd.MultiIndex.from_arrays([clan_info, prop_df.columns], names=['Clan','Domain'])

# 3. Sum domain percentages by clan for each community and normalize per community
clan_pct = prop_df.T.groupby(level='Clan').sum().T
clan_pct_normalized = clan_pct.div(clan_pct.sum(axis=1), axis=0).replace([np.inf, -np.inf], np.nan) * 100
clan_pct_normalized = clan_pct_normalized.dropna(how='all')

# 4. Define community labels and plotting order
community_labels = {3: 'A', 2: 'B', 8: 'C', 7: 'D', 4: 'E'}
desired_order = ['A', 'B', 'C', 'D', 'E']

# 5. Extract top 5 clans per community (excluding Unknown_Clan) for plotting
plot_data = pd.DataFrame()

for community in clan_pct_normalized.index:
    try:
        comm_int = int(community)
    except (ValueError, TypeError):
        comm_int = community

    community_data = clan_pct_normalized.loc[community].dropna()
    known_clans = community_data[~community_data.index.isin(['Unknown_Clan', 'nan', np.nan])]
    top5 = known_clans.nlargest(5)
    
    if not top5.empty:
        comm_label = community_labels.get(comm_int, community)
        temp_df = top5.to_frame(comm_label).T
        plot_data = pd.concat([plot_data, temp_df])

# 6. Keep only communities in desired order and reorder
plot_data = plot_data[plot_data.index.isin(desired_order)]
plot_data = plot_data.reindex(desired_order)

# 7. Define custom color palette
your_colors = [
    '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
    '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf',
    '#aec7e8', '#ffbb78', '#98df8a', '#ff9896', '#c5b0d5',
    '#c49c94', '#f7b6d2'
]
custom_cmap = ListedColormap(your_colors[:len(plot_data.columns)])

# 8. Plot stacked bar chart with custom colors
plt.figure(figsize=(6, 6))
ax = plot_data.plot(
    kind='bar',
    stacked=True,
    colormap=custom_cmap,
    width=0.8,
    edgecolor='white',
    linewidth=0.5
)

# 9. Customize axes and legend
ax.set_xlabel('Community', fontsize=12)
ax.set_ylabel('Domain Percentage (%)', fontsize=12)
plt.xticks(rotation=45, ha='right')
ax.legend(title='Clan', bbox_to_anchor=(1.15, 1), loc='upper left')

# 10. Add percentage labels for values > 5%
for i, community in enumerate(plot_data.index):
    cumulative = 0
    for clan in plot_data.columns:
        value = plot_data.loc[community, clan]
        if pd.notna(value) and value > 5:
            ax.text(
                i,
                cumulative + value/2,
                f'{value:.1f}%',
                ha='center',
                va='center',
                fontsize=8,
                color='white',
                weight='bold'
            )
        cumulative += value if pd.notna(value) else 0

plt.tight_layout()
plt.savefig('Figure_3E.pdf', dpi=300, bbox_inches='tight')
plt.show()
