In [None]:
import re
import os
import pandas as pd
from collections import defaultdict

bg_file = "/home2/s439906/project/CASP16/oligomer/stoich_bg_distribution.csv"


In [None]:
# 创建 DataFrame
df_corrected = pd.read_csv(bg_file)

# 添加链的数量列
df_corrected['chain_count'] = df_corrected['stoichiometry'].apply(lambda x: len(x.split(',')))

# 使用正则表达式提取并按降序排列数字部分
def extract_and_sort_numbers(stoichiometry):
    numbers = [int(num) for num in re.findall(r'\d+', stoichiometry)]
    sorted_numbers = sorted(numbers, reverse=True)
    return tuple(sorted_numbers)

# 应用该函数以创建新列 'sorted_numbers'
df_corrected['sorted_numbers'] = df_corrected['stoichiometry'].apply(extract_and_sort_numbers)

# 显示处理后的 DataFrame
# import ace_tools as tools; tools.display_dataframe_to_user(name="Stoichiometry with Sorted Numbers (Final)", dataframe=df_corrected)

df_corrected

In [None]:
# 按 chain_count 分组数据
unique_chain_counts = {chain_count: df_corrected[df_corrected['chain_count'] == chain_count] 
                       for chain_count in df_corrected['chain_count'].unique()}
unique_chain_counts

In [None]:
# 初始化字典来存储结果
chain_count_analysis = defaultdict(dict)

# 对每个 chain_count 分组，然后对 sorted_numbers 聚合
for chain_count, group in df_corrected.groupby('chain_count'):
    # 计算每种 unique sorted_numbers 的总和
    sorted_number_counts = group.groupby('sorted_numbers')['count'].sum()
    chain_count_analysis[chain_count] = sorted_number_counts.to_dict()
chain_count_analysis

In [None]:
chain_count_analysis = defaultdict(dict)
# 对每个 chain_count 分组
for chain_count, group in df_corrected.groupby('chain_count'):
    # 计算每种 unique sorted_numbers 的计数总和
    sorted_number_counts = group.groupby('sorted_numbers')['count'].sum()
    
    # 计算标准化比例
    total_count = sorted_number_counts.sum()
    normalized_counts = (sorted_number_counts / total_count).to_dict()
    
    chain_count_analysis[chain_count] = normalized_counts

chain_count_analysis

In [None]:
stoichiometry_dir = "/data/data1/conglab/jzhan6/CASP16/CASP16_scores/oligo_20240910/firstmodels/"
stoichiometry_files = [file for file in os.listdir(stoichiometry_dir) if file.endswith('.stoichiometry')]
stoichiometry_files.__len__()

In [None]:
def count_unique_chains(stoichiometry):
    # 提取所有字母（代表链的种类），并计算唯一字母的数量
    unique_chains = set(char for char in stoichiometry if char.isalpha())
    return len(unique_chains)

In [None]:
score_df_all = None
for file in stoichiometry_files:
    target = file.split('.')[0]
    df = pd.read_csv(os.path.join(stoichiometry_dir, file), sep='\t')
    df['group_number'] = df['model'].apply(lambda x: re.search(r'TS\d{3}', x).group(0) if re.search(r'TS\d{3}', x) else None)
    df['sorted_numbers'] = df['truth'].apply(extract_and_sort_numbers)
    df['chain_count'] = df['truth'].apply(count_unique_chains)
    first_sorted_numbers = df['sorted_numbers'].iloc[0]
    first_chain_count = df['chain_count'].iloc[0]
    bg_prob = chain_count_analysis[first_chain_count][first_sorted_numbers]
    score = 1 - bg_prob # if the probability is low, then the score awarded should be high
    df['score'] = df['match_status'].apply(
        lambda x: score if x == "yes" else 0
    )

    score_df = df[['group_number', 'score']]
    score_df.set_index('group_number', inplace=True)
    # rename the column to the target name
    score_df.columns = [target]

    if score_df_all is None:
        score_df_all = score_df
    else:
        score_df_all = pd.concat([score_df_all, score_df], axis=1)


df
score_df
score_df_all

In [None]:
stoich_dict={
    "T0201o": "T0201o(A2)",
    "H0202": "H0202(A2B2)",
    "H0204": "H0204(A2B2C2)",
    "T0206o": "T0206o(A2)",
    "H0208": "H0208(A1B1)",
    "H0215": "H0215(A1B1)",
    "H0213": "H0213(A1B1C1D1E1)",
    "T0249o": "T0249o(A3)",
    "H0217": "H0217(A2B2C2D2E2F2)",
    "T0218o": "T0218o(A2)",
    "H0220": "H0220(A1B4)",
    "H0222": "H0222(A1B1C1)",
    "H0223": "H0223(A1B1C1)",
    "H0225": "H0225(A1B1C1)",
    "H0227": "H0227(A1B6)",
    "H0229": "H0229(A1B1)",
    "H0230": "H0230(A1B1)",
    "H0232": "H0232(A2B2)",
    "H0233": "H0233(A2B2C2)",
    "H0236": "H0236(A3B6)",
    "T0234o": "T0234o(A3)",
    "T0235o": "T0235o(A6)",
    "T0237o": "T0237o(A4)",
    "T0238o": "T0238o(A2)",
    "T0240o": "T0240o(A3)",
    "H0244": "H0244(A2B2C2)",
    "H0245": "H0245(A1B1)",
    "H0258": "H0258(A1B2)",
    "T0257o": "T0257o(A3)",
    "T0259o": "T0259o(A3)",
    "H0265": "H0265(A9B18)",
    "H0267": "H0267(A2B2)",
    "T0270o": "T0270o(A6)",
    "T0272o": "T0272o(A1B1C1D1E1F1G1H1I1)",
    "H0272": "H0272(A1B1C1D1E1F1G1H1I1)",
    "T0292o": "T0292o(A2)",
    "T0294v1o": "T0294v1o(A2)",
    "T0295o": "T0295o(A8)",
    "T0269v1o": "T0269v1o(An)",
    "T0298o": "T0298o(A2)",
    "T0219v1o": "T0219v1o(An)"
}

number2group={'TS002': 'JFK-THG-AMBER',
 'TS003': 'JFK-THG-AMBERstable',
 'TS004': 'JFK-THG-CHARMM',
 'TS005': 'JFK-THG-CHARMMstable',
 'TS006': 'RNA_Dojo',
 'TS008': 'HADDOCK',
 'TS014': 'Cool-PSP',
 'TS015': 'PEZYFoldings',
 'TS016': 'haiping',
 'TS017': 'Seder2024hard',
 'TS018': 'AttStructureScorer',
 'TS019': 'Zheng-Server',
 'TS020': 'comppharmunibas',
 'TS022': 'Yang',
 'TS023': 'FTBiot0119',
 'TS026': 'SwRI',
 'TS027': 'ModFOLDdock2R',
 'TS028': 'NKRNA-s',
 'TS029': 'zyh_mae_try1',
 'TS030': 'SNU-CHEM_aff',
 'TS031': 'MassiveFold',
 'TS032': 'Bryant',
 'TS033': 'Diff',
 'TS039': 'arosko',
 'TS040': 'DELCLAB',
    'TS044': 'N/A',
 'TS049': 'UTMB',
 'TS050': 'SHORTLE',
 'TS051': 'MULTICOM',
 'TS052': 'Yang-Server',
 'TS055': 'LCDD-team',
 'TS059': 'DeepFold',
 'TS063': 'RNApolis',
 'TS074': 'ModFOLDdock2S',
 'TS075': 'GHZ-ISM',
 'TS077': 'coogs2',
 'TS079': 'MRAFold',
 'TS080': 'pDockQ',
 'TS082': 'VnsDock',
 'TS084': 'Vendruscolo',
 'TS085': 'Bates',
 'TS088': 'orangeballs',
 'TS091': 'Huang-HUST',
 'TS092': 'Seamount',
 'TS094': 'SimRNA-server',
 'TS097': 'JFK-THG-IDPCONFGEN',
 'TS100': 'zurite_lab',
 'TS102': 'Psi-Phi',
 'TS105': 'PFSC-PFVM',
 'TS110': 'MIEnsembles-Server',
 'TS112': 'Seder2024easy',
 'TS114': 'COAST',
 'TS117': 'Vakser',
 'TS120': 'Cerebra',
 'TS121': 'Pascal_Auffinger',
 'TS122': 'MQA_server',
 'TS128': 'TheMeilerMethod',
 'TS132': 'profold2',
 'TS135': 'Lindorff-LarsenCLVDS',
 'TS136': 'Lindorff-LarsenM3PPS',
 'TS137': 'Lindorff-LarsenM3PWS',
 'TS138': 'Shengyi',
 'TS139': 'DeepFold-refine',
 'TS143': 'dMNAfold',
 'TS145': 'colabfold_baseline',
 'TS147': 'Zheng-Multimer',
 'TS148': 'Guijunlab-Complex',
 'TS156': 'SoutheRNA',
 'TS159': '406',
 'TS163': 'MultiFOLD2',
 'TS164': 'McGuffin',
 'TS165': 'dfr',
 'TS167': 'OpenComplex',
 'TS169': 'thermomaps',
 'TS171': 'ChaePred',
 'TS172': 'VoroAffinity',
 'TS174': 'colabfold_foldseek',
 'TS177': 'aicb',
 'TS183': 'GuangzhouRNA-human',
 'TS187': 'Ayush',
 'TS188': 'VifChartreuseJaune',
 'TS189': 'LCBio',
 'TS191': 'Schneidman',
 'TS196': 'HYU_MLLAB',
 'TS197': 'D3D',
 'TS198': 'colabfold',
 'TS201': 'Drugit',
 'TS202': 'test001',
 'TS204': 'Zou',
 'TS207': 'MULTICOM_ligand',
 'TS208': 'falcon2',
 'TS209': 'colabfold_human',
 'TS212': 'PIEFold_human',
 'TS217': 'zyh_mae_try1E',
 'TS218': 'HIT-LinYang',
 'TS219': 'XGroup-server',
 'TS221': 'CSSB_FAKER',
    'TS225': 'N/A',
 'TS226': 'Pfaender',
 'TS227': 'KUMC',
 'TS231': 'B-LAB',
 'TS235': 'isyslab-hust',
 'TS237': 'Convex-PL-R',
 'TS238': 'BRIQX',
 'TS241': 'elofsson',
 'TS261': 'UNRES',
 'TS262': 'CoDock',
 'TS264': 'GuijunLab-Human',
 'TS267': 'kiharalab_server',
 'TS269': 'CSSB_server',
 'TS271': 'mialab_prediction2',
 'TS272': 'GromihaLab',
 'TS273': 'MQA_base',
 'TS274': 'kozakovvajda',
 'TS275': 'Seminoles',
 'TS276': 'FrederickFolding',
 'TS281': 'T2DUCC',
 'TS284': 'Unicorn',
 'TS286': 'CSSB_experimental',
 'TS287': 'plmfold',
 'TS290': 'Pierce',
 'TS293': 'MRAH',
 'TS294': 'KiharaLab',
 'TS295': 'VoroAffinityB',
 'TS298': 'ShanghaiTech-human',
 'TS300': 'ARC',
 'TS301': 'GHZ-MAN',
 'TS304': 'AF3-server',
 'TS306': 'GeneSilicoRNA-server',
 'TS307': 'nfRNA',
 'TS308': 'MoMAteam1',
 'TS309': 'Koes',
 'TS311': 'RAGfold_Prot1',
 'TS312': 'GuijunLab-Assembly',
 'TS314': 'GuijunLab-PAthreader',
 'TS317': 'GuangzhouRNA_AI',
 'TS319': 'MULTICOM_LLM',
 'TS322': 'XGroup',
 'TS323': 'Yan',
 'TS325': '405',
 'TS331': 'MULTICOM_AI',
 'TS337': 'APOLLO',
 'TS338': 'GeneSilico',
 'TS345': 'MULTICOM_human',
 'TS349': 'cheatham-lab',
 'TS351': 'digiwiser-ensemble',
 'TS353': 'KORP-PL-W',
 'TS355': 'CMOD',
 'TS357': 'UTAustin',
 'TS358': 'PerezLab_Gators',
 'TS361': 'Cerebra_server',
 'TS363': '2Vinardo',
 'TS367': 'AIR',
 'TS369': 'Bhattacharya',
 'TS370': 'DrAshokAndFriends',
 'TS375': 'milliseconds',
 'TS376': 'OFsingleseq',
 'TS380': 'mialab_prediction',
 'TS384': 'pert-plddt',
 'TS386': 'ShanghaiTech-Ligand',
 'TS388': 'DeepFold-server',
 'TS391': 'bussilab_replex',
 'TS393': 'GuijunLab-QA',
 'TS397': 'smg_ulaval',
 'TS400': 'OmniFold',
 'TS403': 'mmagnus',
 'TS408': 'SNU-CHEM-lig',
 'TS412': 'cheatham-lab_villa',
 'TS416': 'GPLAffinity',
 'TS417': 'GuangzhouRNA-meta',
 'TS418': 'Lee-Shin',
 'TS419': 'CSSB-Human',
 'TS420': 'Zou_aff2',
 'TS423': 'ShanghaiTech-server',
 'TS425': 'MULTICOM_GATE',
 'TS432': 'DIMAIO',
 'TS435': 'RNAFOLDX',
 'TS436': 'Yoshiaki',
 'TS439': 'Dokholyan',
 'TS441': 'ModFOLDdock2',
 'TS443': 'MQA',
 'TS446': 'pDockQ2',
 'TS447': 'UDMod',
 'TS448': 'dNAfold',
 'TS450': 'OpenComplex_Server',
 'TS456': 'Yang-Multimer',
 'TS461': 'forlilab',
 'TS462': 'Zheng',
 'TS464': 'PocketTracer',
 'TS465': 'Wallner',
 'TS466': 'coogs3',
 'TS468': 'MIALAB_gong',
 'TS469': 'GruLab',
 'TS471': 'Pcons',
 'TS474': 'CCB-AlGDock',
 'TS475': 'ptq',
 'TS476': 'VifChartreuse',
 'TS481': 'Vfold',
 'TS485': 'bussilab_plain_md',
 'TS489': 'Fernandez-Recio',
 'TS494': 'ClusPro',
 'TS496': 'AF_unmasked'}


In [None]:
import pandas as pd
import numpy as np

# 示例数据 (请替换为实际的 score_df_all)
# score_df_all = pd.DataFrame(...)

# 1. 将数据框变成二元的，严格保留 NaN
binary_score_df = score_df_all.copy()  # 复制原数据以保留 NaN
binary_score_df = binary_score_df.applymap(lambda x: 1 if pd.notna(x) and x > 0 else (0 if pd.notna(x) and x == 0 else np.nan))

# 2. 按每一行的和排序（忽略 NaN 计算行和，但不改变 NaN 信息）
binary_score_df['row_sum'] = binary_score_df.apply(lambda row: row.sum(skipna=True), axis=1)
binary_score_df = binary_score_df.sort_values(by='row_sum', ascending=False).drop(columns=['row_sum'])

# 3. 按列字母序排序
binary_score_df = binary_score_df[sorted(binary_score_df.columns)]

# 显示结果
binary_score_df

mapped_df = binary_score_df.rename(index=number2group, columns=stoich_dict)
mapped_df
mapped_df=mapped_df.T
mapped_df


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# 绘制热力图
plt.figure(figsize=(10, 8))  # 设置图像大小
sns.heatmap(
    mapped_df,
    cmap="coolwarm",          # 设置颜色映射，0和1的对比色
    cbar=True,                # 显示颜色条
    linewidths=0.5,           # 添加单元格分隔线
    linecolor='black',        # 分隔线颜色
    mask=mapped_df.isna(),  # 使用 mask 隐藏 NaN
    annot=False               # 可选：是否在每个单元格显示数值
)

plt.title("Binary Score Heatmap", fontsize=16)
plt.show()


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

# 定义离散颜色映射：0 为蓝色，1 为红色，NaN 为灰色
cmap = ListedColormap(['#00008B', '#8B0000'])  # 深蓝和深红的16进制颜色代码

# 绘制热力图
plt.figure(figsize=(20, 8), dpi=300)
sns.heatmap(
    mapped_df,
    cmap=cmap,
    cbar_kws={'ticks': [0, 1]},  # 仅显示离散的 0 和 1
    linewidths=0.5,             # 单元格分隔线
    linecolor='black',          # 分隔线颜色
    mask=mapped_df.isna(),  # 隐藏 NaN 的部分
    square=True,               # 使单元格正方形
    annot=False                 # 是否在每个单元格显示值（可选）
)

# 添加标题
plt.title("Binary Score Heatmap", fontsize=16)
plt.show()


In [None]:
binary_score_df

In [None]:
mask = score_df_all.isna()
# sort columns alphabetically
score_df_all = score_df_all.reindex(sorted(score_df_all.columns), axis=1)
# impute missing values with 0
score_df_all.fillna(0, inplace=True)
# sum the scores for each row
score_df_all['total_score'] = score_df_all.sum(axis=1)
# sort the dataframe by total_score
score_df_all.sort_values('total_score', ascending=False, inplace=True)
score_df_all

In [None]:
# plot the total scores as bar plot
import matplotlib.pyplot as plt
plt.figure(figsize=(16, 6), dpi=300)
plt.bar(score_df_all.index, score_df_all['total_score'])
plt.xlabel('Group Number', fontsize=16)
plt.ylabel('Total Score', fontsize=16)
# rotate the x-axis labels by 90 degrees
plt.xticks(rotation=45, fontsize=10, ha='right')
plt.yticks(fontsize=10)
plt.title('Total Scores for stoichiometry for each Group', fontsize=20)
plt.show()



In [None]:
import seaborn as sns
from matplotlib.gridspec import GridSpec
from matplotlib.colors import ListedColormap
import numpy as np
# drop the total_score column
score_df_all.drop(columns='total_score', inplace=True)


In [None]:
sum = score_df_all.sum(axis=1)
sorted_indices = sum.sort_values(ascending=True).index
sorted_heatmap_data = score_df_all.loc[sorted_indices].reset_index(
    drop=True)
sorted_sum = sum.loc[sorted_indices].reset_index(drop=True)
sorted_mask = pd.DataFrame(
    mask, index=score_df_all.index).loc[sorted_indices].reset_index(drop=True)
# use mask to mask the data. will be used for heatmap
masked_data = sorted_heatmap_data.copy()
masked_data[sorted_mask] = np.nan
# set up the colormap
cmap = plt.cm.YlGn
cmap = ListedColormap(cmap(np.linspace(0, 1, 256)))
cmap.set_bad(color='gray')  # set the masked area to gray
# set up the figure and gridspec
fig = plt.figure(figsize=(24, 18), dpi=300)
gs = GridSpec(1, 2, width_ratios=[4, 1], wspace=0.3)
# plot the heatmap
ax0 = fig.add_subplot(gs[0])
sns.heatmap(masked_data, cmap=cmap, cbar=True, ax=ax0)
ax0.set_yticklabels(
    [f'{i}' for i in sorted_indices], rotation=0)  # use the same order as the row sum
ax0.set_xticklabels(sorted_heatmap_data.columns, rotation=90)
# set x tick font size
ax0.tick_params(axis='x', labelsize=16)
# set y tick font size
ax0.tick_params(axis='y', labelsize=16)
# set the font size of the colorbar
cbar = ax0.collections[0].colorbar
cbar.ax.tick_params(labelsize=16)

ax0.set_title(
    "Heatmap for scores of stoichiometry", fontsize=20)
# plot the row sum
ax1 = fig.add_subplot(gs[1], sharey=ax0)
y_pos = range(len(sorted_sum))
y_pos = [i+0.5 for i in y_pos]  # change the position of the bars
ax1.barh(y_pos, sorted_sum, color='tan')
# ax1.margins(y=0.5)
ax1.set_yticks(range(len(sorted_sum)))
ax1.set_yticklabels(
    [f'{i}' for i in sorted_indices], rotation=0)  # use the same order as the heatmap
# ax1.spines['bottom'].set_position(('outward', 10))  # 将 x 轴向下移动 10 点
# ymin, ymax = ax1.get_ylim()  # 获取当前的 y 轴范围
# ax1.set_ylim(ymin - 1, ymax-1)  # 为最底部条形预留空间
# set x tick font size
ax1.tick_params(axis='x', labelsize=16)
# set y tick font size
ax1.tick_params(axis='y', labelsize=16)
ax1.invert_yaxis()  # flip the y-axis
ax1.set_xlabel("Sum", fontsize=16)
ax1.set_title("Group sum scores", fontsize=20)

In [None]:
# convert score_df_all to a binary dataframe
# if the score is greater than 0, then set it to 1
binary_score_df = score_df_all.copy()
binary_score_df[binary_score_df > 0] = 1
binary_score_df
# sort the binary dataframe row sum
binary_sum = binary_score_df.sum(axis=1)
sorted_binary_indices = binary_sum.sort_values(ascending=False).index
sorted_binary_score_df = binary_score_df.loc[sorted_binary_indices].reset_index(drop=False)
# set index to index
sorted_binary_score_df.set_index('index', inplace=True)

sorted_binary_score_df

# plot the binary heatmap
fig = plt.figure(figsize=(24, 18), dpi=300)
gs = GridSpec(1, 2, width_ratios=[4, 1], wspace=0.3)
# plot the heatmap
ax0 = fig.add_subplot(gs[0])
sns.heatmap(sorted_binary_score_df, cmap=cmap, cbar=False, ax=ax0)
ax0.set_yticklabels(
    [f'{i}' for i in sorted_binary_indices], rotation=0)  # use the same order as the row sum
ax0.set_xticklabels(sorted_binary_score_df.columns, rotation=90)
# set x tick font size
ax0.tick_params(axis='x', labelsize=16)
# set y tick font size
ax0.tick_params(axis='y', labelsize=16)
# set the font size of the colorbar
# cbar = ax0.collections[0].colorbar
# cbar.ax.tick_params(labelsize=16)

ax0.set_title(
    "Binary Heatmap for scores of stoichiometry", fontsize=20)
# plot the row sum
ax1 = fig.add_subplot(gs[1], sharey=ax0)
y_pos = range(len(binary_sum))
y_pos = [i+0.5 for i in y_pos]  # change the position of the bars
ax1.barh(y_pos, binary_sum, color='tan')
# ax1.margins(y=0.5)
ax1.set_yticks(range(len(binary_sum)))
ax1.set_yticklabels(
    [f'{i}' for i in sorted_binary_indices], rotation=0)  # use the same order as the heatmap
# ax1.spines['bottom'].set_position(('outward', 10))  # 将 x 轴向下移动 10 点
# ymin, ymax = ax1.get_ylim()  # 获取当前的 y 轴范围
# ax1.set_ylim(ymin - 1, ymax-1)  # 为最底部条形预留空间
# set x tick font size
ax1.tick_params(axis='x', labelsize=16)
# set y tick font size
ax1.tick_params(axis='y', labelsize=16)
ax1.invert_yaxis()  # flip the y-axis
ax1.set_xlabel("Sum", fontsize=16)
ax1.set_title("Group sum binary scores", fontsize=20)
plt.show()



In [None]:
sum = binary_score_df.sum(axis=1)
sorted_indices = sum.sort_values(ascending=True).index
sorted_heatmap_data = binary_score_df.loc[sorted_indices].reset_index(
    drop=True)
sorted_sum = sum.loc[sorted_indices].reset_index(drop=True)
sorted_mask = pd.DataFrame(
    mask, index=binary_score_df.index).loc[sorted_indices].reset_index(drop=True)
# use mask to mask the data. will be used for heatmap
masked_data = sorted_heatmap_data.copy()
masked_data[sorted_mask] = np.nan
# set up the colormap
cmap = plt.cm.YlGn
cmap = ListedColormap(cmap(np.linspace(0, 1, 256)))
cmap.set_bad(color='gray')  # set the masked area to gray
# set up the figure and gridspec
fig = plt.figure(figsize=(24, 18), dpi=300)
gs = GridSpec(1, 2, width_ratios=[4, 1], wspace=0.3)
# plot the heatmap
ax0 = fig.add_subplot(gs[0])
sns.heatmap(masked_data, cmap=cmap, cbar=True, ax=ax0)
ax0.set_yticklabels(
    [f'{i}' for i in sorted_indices], rotation=0)  # use the same order as the row sum
ax0.set_xticklabels(sorted_heatmap_data.columns, rotation=90)
# set x tick font size
ax0.tick_params(axis='x', labelsize=16)
# set y tick font size
ax0.tick_params(axis='y', labelsize=16)
# set the font size of the colorbar
cbar = ax0.collections[0].colorbar
cbar.ax.tick_params(labelsize=16)

ax0.set_title(
    "Heatmap for scores of stoichiometry", fontsize=20)
# plot the row sum
ax1 = fig.add_subplot(gs[1])
y_pos = range(len(sorted_sum))
y_pos = [i+0.5 for i in y_pos]  # change the position of the bars
ax1.barh(y_pos, sorted_sum, color='tan')
# ax1.margins(y=0.5)
ax1.set_yticks(range(len(sorted_sum)))
ax1.set_yticklabels(
    [f'{i}' for i in sorted_indices], rotation=0)  # use the same order as the heatmap
# ax1.spines['bottom'].set_position(('outward', 10))  # 将 x 轴向下移动 10 点
# ymin, ymax = ax1.get_ylim()  # 获取当前的 y 轴范围
# ax1.set_ylim(ymin - 1, ymax-1)  # 为最底部条形预留空间
# set x tick font size
ax1.tick_params(axis='x', labelsize=16)
# set y tick font size
ax1.tick_params(axis='y', labelsize=16)
ax1.invert_yaxis()  # flip the y-axis
ax1.set_xlabel("Sum", fontsize=16)
ax1.set_title("Group sum scores", fontsize=20)

In [None]:
binary_score_df

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# 假设 binary_score_df 是一个 Pandas DataFrame
plt.figure(figsize=(8, 6))
sns.heatmap(binary_score_df, cmap="binary", cbar=False, linewidths=0.5, linecolor='black')
plt.title("Binary Score Heatmap")
plt.show()
