In [None]:
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.colors as mcolor
import sys
import os
import pandas as pd
import numpy as np
from Bio import Phylo
import seaborn as sns
from scipy.stats import t, ttest_1samp, wilcoxon, mannwhitneyu, ttest_rel, zscore, spearmanr
import json
from statsmodels.stats.multitest import multipletests
from scipy.stats import gaussian_kde
from sklearn import linear_model
import re
from matplotlib.colors import ListedColormap
import networkx as nx
from statsmodels.stats.multitest import multipletests

In [None]:
matplotlib.rcParams['font.family']       = 'Arial'
matplotlib.rcParams['font.sans-serif']   = ["Arial","DejaVu Sans","Lucida Grande","Verdana"]
matplotlib.rcParams['figure.figsize']    = [4,3]
matplotlib.rcParams['font.size']         = 10
matplotlib.rcParams["axes.labelcolor"]   = "#000000"
matplotlib.rcParams["axes.linewidth"]    = 1.0 
matplotlib.rcParams["xtick.major.width"] = 1.0
matplotlib.rcParams["ytick.major.width"] = 1.0
cmap1 = plt.cm.tab20
cmap2 = plt.cm.Set3  
#plt.style.use('default')

In [None]:
os.chdir("/Users/konnonaoki/Documents/backupped/Research/IwasakiLab/Data/MetabolicNetworkEvolution/experiment/NK_M0151")

for dir in ["figures", "tables", "networks"]:
    try:
        os.mkdir(dir)
    except:
        None

#### Classes of KOs

In [None]:
# Classess of KOs

df_path_ko = pd.read_table("tables/path_ko.txt", names = ['Pathway', 'KO'])
df_rn_ko = pd.read_table("tables/rn_ko.txt", names = ['Reaction','KO'])
df_md_ko = pd.read_table("tables/md_ko.txt", names = ['Module','KO'])
df_path_md = pd.read_table("tables/path_md.txt", names = ['Pathway','Module'])
ontology = json.load(open("/Users/konnonaoki/Documents/backupped/Research/IwasakiLab/Data/MetabolicNetworkEvolution/experiment/NK_M0151/json/ko00001.json"))

ontology_tree = Phylo.BaseTree.Tree(Phylo.BaseTree.Clade(name=ontology['name']))
root_clade    = Phylo.BaseTree.Clade(name=ontology['name'])
stack = [(ontology, root_clade)]

while len(stack) > 0:
    term, clade = stack.pop()
    if ('children' in term.keys()):
        for child in term['children']:
            child_clade = Phylo.BaseTree.Clade(name = child['name'])
            clade.clades.append(child_clade)
            stack.append((child, child_clade))

ontology_tree = Phylo.BaseTree.Tree(root_clade)

list_category_ko = []
for clade in ontology_tree.clade.clades[0].clades:
    for tip in clade.get_terminals():
        KO = tip.name.split()[0]
        if (KO[0] == 'K'):
            list_category_ko.append([clade.name, KO])
df_category_ko = pd.DataFrame(list_category_ko, columns = ['category', 'KO'])
st_category_ko = []
for clade in ontology_tree.clade.clades[0].clades:
    for tip in clade.get_terminals():
        KO = tip.name.split()[0]
        if (KO[0] == 'K'):
            list_category_ko.append([clade.name, KO])
df_category_ko = pd.DataFrame(list_category_ko, columns = ['category', 'KO'])
df_category_ko = df_category_ko[~df_category_ko.duplicated()]

df_ko_count = pd.DataFrame(df_category_ko.KO.value_counts())
set_ko_with_unique_category = set(df_ko_count[df_ko_count['KO']==1].index)
df_category_ko['unique'] = [(ko in set_ko_with_unique_category) for ko in df_category_ko.KO]
df_uniquecategory_ko = df_category_ko[df_category_ko['unique']]

# color of function categories

colors = ['#66C2A5', '#FC8D62', '#8DA0CB', '#E78AC3', '#555555', '#FC8D62', '#8DA0CB', '#E78AC3', '#66C2A5', '#FC8D62', '#000000']

cm_name = 'Set3' # B->G->R
cm = plt.get_cmap(cm_name)

df_category_ko_module = pd.merge(df_category_ko, df_md_ko, on = 'KO')
df_category_ko_module['Nko'] = 1
df_category_module_count = df_category_ko_module.groupby(['category', 'Module'], as_index = False).sum()
df_maxcategory_module = df_category_module_count.loc[df_category_module_count.groupby('Module')['Nko'].idxmax(),:].sort_values('category')
df_maxcategory_module = df_maxcategory_module.reset_index().loc[:, ['category', 'Module']]
df_category_color = pd.DataFrame([[category, i] for i, category in enumerate(df_maxcategory_module.category.unique())], columns = ["category", 'category_id'])
df_category_color['color'] = [mcolor.rgb2hex(cm(i)) for i in df_category_color['category_id']]
#df_category_color

df_category_ko_pathway = pd.merge(df_category_ko, df_path_ko, on = 'KO')
df_category_ko_pathway['Nko'] = 1
df_category_pathway_count = df_category_ko_pathway.groupby(['category', 'Pathway'], as_index = False).sum()
df_maxcategory_pathway = df_category_pathway_count.loc[df_category_pathway_count.groupby('Pathway')['Nko'].idxmax(),:].sort_values('category')
df_maxcategory_pathway = df_maxcategory_pathway.reset_index().loc[:, ['category', 'Pathway']]
df_maxcategory_pathway

#### データセット準備

In [None]:
df_feature_selection = pd.read_table('/Users/konnonaoki/Documents/backupped/Research/IwasakiLab/Data/MetabolicNetworkEvolution/experiment/NK_M0151/result/feature_target_method_ko_selectionscore.txt', names = ['feature_set', 'type', 'selec_method', 'KO', 'feature', 'signed_importance'])
df_feature_selection_ext = df_feature_selection[
    (df_feature_selection['feature_set'] == 'md') &
    (df_feature_selection['selec_method'] == 'ANOVA') 
]
df_feature_selection_ext = df_feature_selection_ext.reset_index()
df_feature_selection_ext

#### 特徴量ランキング

In [None]:
df_feature_selection_ext['importance'] = abs(df_feature_selection_ext['signed_importance'])
df_feature_selection_ext['sign'] = np.sign(df_feature_selection_ext['signed_importance'])
df_feature_selection_ext['sign_color'] = ["#FF0000" if sign>0 else ("#0000FF" if sign<0 else "#AAAAAA") for sign in df_feature_selection_ext['sign']]
df_feature_selection_ext['importance_rank'] = df_feature_selection_ext.groupby(
    [
     "type", 
     "KO"
    ])["importance"].rank(ascending=False)
df_feature_selection_ext

In [None]:
len(set(df_feature_selection_ext.feature))

#### predicted KOに対応するModuleを紐づけて、必要なデータを抜き出した

In [None]:
df_feature_selection_merge = pd.merge(
    df_feature_selection_ext, df_md_ko, on = 'KO'
    ).loc[:, ['KO', 'type', 'Module', 'feature', 'importance_rank', 'sign']].rename(columns = {'Module':'predicted', 'feature':'predictor'})
df_feature_selection_merge

#### 芳香族分解系の隣接するモジュールの抜き出し

In [None]:
list_module_pair = [
    ["M00419", "M00539"],
    ["M00537", "M00551"],
    ["M00538", "M00551"],
    ["M00418", "M00541"],
    ["M00543", "M00551"],
    ["M00547", "M00568"],
    ["M00548", "M00568"],
    ["M00551", "M00568"],
    ["M00637", "M00568"],
    ["M00547", "M00569"],
    ["M00548", "M00569"],
    ["M00551", "M00569"],
    ["M00544", "M00637"],
    ["M00637", "M00569"],
    ["M00534", "M00638"],
    ["M00534", "M00569"], # 本当は違うがこれまでも注目していたので
]
df_result = pd.DataFrame(list_module_pair, columns = ["peripheral", "central"])
df_result

#### 芳香族分解経路の獲得に絞ってランク解析

In [None]:
df_feature_selection_merge_gain = df_feature_selection_merge[df_feature_selection_merge['type'] == 'gain']

i = 0
test_result_list  = []
for i in range(len(list_module_pair)):

    peripheral = list_module_pair[i][0]
    central = list_module_pair[i][1]

    df_feature_selection_merge_gain_cp = df_feature_selection_merge_gain[
        (df_feature_selection_merge_gain['predicted'] == central) &
        (df_feature_selection_merge_gain['predictor'] == peripheral)
    ].reset_index()
    df_feature_selection_merge_gain_cp['direction'] = "1st → 2nd"

    df_feature_selection_merge_gain_pc = df_feature_selection_merge_gain[
        (df_feature_selection_merge_gain['predicted'] == peripheral) &
        (df_feature_selection_merge_gain['predictor'] == central)
    ].reset_index()
    df_feature_selection_merge_gain_pc['direction'] = "2nd → 1st"

    df_feature_selection_merge_gain_cp_pc = pd.concat([df_feature_selection_merge_gain_cp, df_feature_selection_merge_gain_pc])
    df_feature_selection_merge_gain_cp_pc['percentage'] = df_feature_selection_merge_gain_cp_pc['importance_rank']

    
    # Get the null distribution of average rank 
    
    N = 1000

    df_feature_selection_merge_gain_cp_all = df_feature_selection_merge_gain[
            (df_feature_selection_merge_gain['predicted'] == central) 
        ].reset_index()
    df_feature_selection_merge_gain_cp_all_byKO = df_feature_selection_merge_gain_cp_all.groupby("KO")
    ave_rank_list = [np.average(df_feature_selection_merge_gain_cp_all_byKO.sample(n=1, random_state=i)['importance_rank']) for i in range(N)]
    df_feature_selection_merge_gain_cp_random = pd.DataFrame([["1st → 2nd", ave_rank] for ave_rank in ave_rank_list], columns=["direction", "importance_rank_ave"])

    df_feature_selection_merge_gain_cp_all = df_feature_selection_merge_gain[
            (df_feature_selection_merge_gain['predicted'] == peripheral) 
        ].reset_index()
    df_feature_selection_merge_gain_cp_all_byKO = df_feature_selection_merge_gain_cp_all.groupby("KO")
    ave_rank_list = [np.average(df_feature_selection_merge_gain_cp_all_byKO.sample(n=1, random_state=i)['importance_rank']) for i in range(N)]
    df_feature_selection_merge_gain_pc_random = pd.DataFrame([["2nd → 1st", ave_rank] for ave_rank in ave_rank_list], columns=["direction", "importance_rank_ave"])

    df_feature_selection_merge_gain_cp_pc_random = pd.concat([df_feature_selection_merge_gain_cp_random, df_feature_selection_merge_gain_pc_random])
    df_feature_selection_merge_gain_cp_pc_random['percentage'] = df_feature_selection_merge_gain_cp_pc_random['importance_rank_ave'] 
    
    # Get the p-value
    
    for direction in ["2nd → 1st", "1st → 2nd"]:
    
        obs_ave_percentage = df_feature_selection_merge_gain_cp_pc.groupby("direction").mean()['percentage'].loc[direction]

        sim_ave_percentage_list = list(
            df_feature_selection_merge_gain_cp_pc_random[
                df_feature_selection_merge_gain_cp_pc_random["direction"]==direction
            ]["percentage"]
        )
        
        count = 0
        for sim_ave_percentage in sim_ave_percentage_list:
            if(sim_ave_percentage > obs_ave_percentage):
                count+=1

        p = min(count/N, (N - count)/N)*2
        
        test_result_list.append([central, peripheral, direction, obs_ave_percentage, p])
    
    
    fig = plt.figure(figsize=(1.8,0.7))
    ax = fig.add_axes([0.1,0.1,0.8,0.8])
    
    sns.violinplot(
        y = df_feature_selection_merge_gain_cp_pc_random['direction'], 
        x = df_feature_selection_merge_gain_cp_pc_random['percentage'], 
        color = "#DDDDDD", alpha = 0.1, linewidth = 0, orient = 'h')
    #sns.stripplot(
    #    y = df_feature_selection_merge_gain_cp_pc_random['direction'], 
    #    x = df_feature_selection_merge_gain_cp_pc_random['percentage'], 
    #    color = "#DDDDDD", alpha = 0.1, linewidth = 0, orient = 'h')
    sns.stripplot(
        y = df_feature_selection_merge_gain_cp_pc['direction'], 
        x = df_feature_selection_merge_gain_cp_pc['percentage'], 
        hue = df_feature_selection_merge_gain_cp_pc['sign'], hue_order=[1, 0, -1], palette=["#EE7BA6", "#CCCCCC","#99FCFE"], alpha = 0.5, jitter= 0.3, size = 4, orient = 'h')
    
    ax.set_ylabel("Direction")
    ax.set_xlabel("Feature importance rank")
    ax.set_xlim(350, -10)
    ax.set_xticks([339, 171, 1])
    ax.set_yticklabels(["Up → Down", "Down → Up"])
    
    ax.set_title("Up: "+peripheral+"\nDown: "+central, fontsize=10)
    ax.get_legend().remove()
    plt.gca().spines['right'].set_visible(False)
    plt.gca().spines['top'].set_visible(False)
    
    ave_C_P = df_feature_selection_merge_gain_cp_pc.groupby("direction").mean()['percentage'].loc["2nd → 1st"]
    ax.plot([ave_C_P, ave_C_P], [0.8, 1.2], color = "#000000", linewidth=0.75)
    ave_P_C = df_feature_selection_merge_gain_cp_pc.groupby("direction").mean()['percentage'].loc["1st → 2nd"]
    ax.plot([ave_P_C, ave_P_C], [-0.2, 0.2], color = "#000000", linewidth=0.75)
    
    plt.savefig("figures/proxy_importance_gain_"+central+"_"+peripheral+".pdf", bbox_inches = 'tight')
    plt.close()

In [None]:
df_test_result = pd.DataFrame(test_result_list, columns=["central", "peripheral", "direction", "obs_ave", "p"])
df_test_result["q"] = list(multipletests(list(df_test_result.loc[:,"p"]), method = "fdr_bh")[1])
df_test_result["sig"] = ['*' if q < 0.05 else '' for q in df_test_result["q"]]
df_test_result.to_csv("direction_test.txt", sep="\t", index=False)
df_test_result

#### C > PとP > Cの特徴量ランクの比較

In [None]:
df_test_result_CP = df_test_result[df_test_result["direction"]=="2nd → 1st"].loc[:,["central", "peripheral", "obs_ave"]]
df_test_result_PC = df_test_result[df_test_result["direction"]=="1st → 2nd"].loc[:,["central", "peripheral", "obs_ave"]]
df_test_result_CP_PC = pd.merge(df_test_result_CP, df_test_result_PC, on = ["central", "peripheral"]).rename(columns={"obs_ave_x":"2nd → 1st", "obs_ave_y":"1st → 2nd"})

# Wilcoxon test
print(wilcoxon(df_test_result_CP_PC["2nd → 1st"], df_test_result_CP_PC["1st → 2nd"]))

fig = plt.figure(figsize=(2,2))
ax = fig.add_axes([0.1,0.1,0.8,0.8])
ax.scatter(x=df_test_result_CP_PC["2nd → 1st"], y=df_test_result_CP_PC["1st → 2nd"], color="#009193", s = 8)
ax.plot([-10,400], [-10,400], color = "#AAAAAA", alpha=0.3)
ax.set_xlim(350, -10)
ax.set_ylim(350, -10)
ax.set_xticks([339, 171, 1])
ax.set_yticks([339, 171, 1])

ax.set_ylabel("Norm. importance rank (%)\n1st → 2nd")
ax.set_xlabel("Norm. importance rank (%)\n2nd → 1st")

plt.savefig("figures/proxy_gain_importance_scatter.pdf", bbox_inches = 'tight')


### Loss解析
#### まずは近接モジュールの全列挙。
近接しているペアのうち、機能が一貫している(=どちらもsynthesis or どちらもdegradation)で一続きの反応経路を構成する(片方の生生物がもう片方の原料)ものを選んだ

In [None]:
# NK_M0152参照
list_module_pair_amino = [
    ["M00028", "M00844"],
    ["M00763", "M00844"],
    ["M00845", "M00133"],
    ["M00844", "M00134"],
    ["M00845", "M00134"],
    ["M00134", "M00136"],
    ["M00018", "M00570"],
    ["M00134", "M00135"],
    ["M00017", "M00609"],
    ["M00844", "M00133"],
    ["M00025", "M00042"],
    ["M00022", "M00025"],
    ["M00040", "M00042"],
    ["M00040", "M00043"],
    ["M00022", "M00023"],
    ["M00022", "M00024"],
    ["M00023", "M00370"],
    ["M00023", "M00037"]
]

list_module_pair_vitamin = [
    ["M00846", "M00847"],
    ["M00572", "M00123"],
    ["M00119", "M00120"]
]

list_module_pair_nucleotide = [
    ["M00048", "M00049"],
    ["M00048", "M00050"],
    ["M00052", "M00053"]
]

list_module_pair = list_module_pair_amino + list_module_pair_vitamin + list_module_pair_nucleotide

In [None]:
df_module_pair_amino = pd.DataFrame(list_module_pair_amino, columns = ["central", "peripheral"])
df_module_pair_amino["category"] = '09105 Amino acid metabolism'
df_module_pair_vitamin = pd.DataFrame(list_module_pair_vitamin, columns = ["central", "peripheral"])
df_module_pair_vitamin["category"] = '09108 Metabolism of cofactors and vitamins'
df_module_pair_nucleotide = pd.DataFrame(list_module_pair_nucleotide, columns = ["central", "peripheral"])
df_module_pair_nucleotide["category"] = '09104 Nucleotide metabolism'
df_module_pair = pd.concat([df_module_pair_amino, df_module_pair_vitamin, df_module_pair_nucleotide]).reset_index(drop=True)
df_module_pair

#### アミノ酸/ビタミン/ヌクレオチド代謝の欠失に絞ってランク解析

In [None]:
df_feature_selection_merge_loss = df_feature_selection_merge[df_feature_selection_merge['type'] == 'loss']

df_feature_selection_merge_loss

In [None]:
i = 0
test_result_list  = []
for i in range(len(list_module_pair)):

    central = list_module_pair[i][0]
    peripheral = list_module_pair[i][1]

    df_feature_selection_merge_loss_cp = df_feature_selection_merge_loss[
        (df_feature_selection_merge_loss['predicted'] == central) &
        (df_feature_selection_merge_loss['predictor'] == peripheral)
    ].reset_index()
    df_feature_selection_merge_loss_cp['direction'] = "2nd → 1st" # "<predictor> -> <predicted>"

    df_feature_selection_merge_loss_pc = df_feature_selection_merge_loss[
        (df_feature_selection_merge_loss['predicted'] == peripheral) &
        (df_feature_selection_merge_loss['predictor'] == central)
    ].reset_index()
    df_feature_selection_merge_loss_pc['direction'] = "1st → 2nd"
    
    if (len(df_feature_selection_merge_loss_cp) == 0 or len(df_feature_selection_merge_loss_pc) == 0):
        print(central, peripheral)
    else:
        
        df_feature_selection_merge_loss_cp_pc = pd.concat([df_feature_selection_merge_loss_cp, df_feature_selection_merge_loss_pc])
        df_feature_selection_merge_loss_cp_pc['percentage'] = df_feature_selection_merge_loss_cp_pc['importance_rank'] 


        # Get the null distribution of average rank 

        N = 1000

        df_feature_selection_merge_loss_cp_all = df_feature_selection_merge_loss[
                (df_feature_selection_merge_loss['predicted'] == central) 
            ].reset_index()
        df_feature_selection_merge_loss_cp_all_byKO = df_feature_selection_merge_loss_cp_all.groupby("KO")
        ave_rank_list = [np.average(df_feature_selection_merge_loss_cp_all_byKO.sample(n=1, random_state=i)['importance_rank']) for i in range(N)]
        df_feature_selection_merge_loss_cp_random = pd.DataFrame([["2nd → 1st", ave_rank] for ave_rank in ave_rank_list], columns=["direction", "importance_rank_ave"])

        df_feature_selection_merge_loss_cp_all = df_feature_selection_merge_loss[
                (df_feature_selection_merge_loss['predicted'] == peripheral) 
            ].reset_index()
        df_feature_selection_merge_loss_cp_all_byKO = df_feature_selection_merge_loss_cp_all.groupby("KO")
        ave_rank_list = [np.average(df_feature_selection_merge_loss_cp_all_byKO.sample(n=1, random_state=i)['importance_rank']) for i in range(N)]
        df_feature_selection_merge_loss_pc_random = pd.DataFrame([["1st → 2nd", ave_rank] for ave_rank in ave_rank_list], columns=["direction", "importance_rank_ave"])

        df_feature_selection_merge_loss_cp_pc_random = pd.concat([df_feature_selection_merge_loss_cp_random, df_feature_selection_merge_loss_pc_random])
        df_feature_selection_merge_loss_cp_pc_random['percentage'] = df_feature_selection_merge_loss_cp_pc_random['importance_rank_ave'] 

        # Get the p-value

        for direction in ["1st → 2nd", "2nd → 1st"]:

            obs_ave_percentage = df_feature_selection_merge_loss_cp_pc.groupby("direction").mean()['percentage'].loc[direction]

            sim_ave_percentage_list = list(
                df_feature_selection_merge_loss_cp_pc_random[
                    df_feature_selection_merge_loss_cp_pc_random["direction"]==direction
                ]["percentage"]
            )

            count = 0
            for sim_ave_percentage in sim_ave_percentage_list:
                if(sim_ave_percentage > obs_ave_percentage):
                    count+=1

            p = min(count/N, (N - count)/N)*2

            test_result_list.append([central, peripheral, direction, obs_ave_percentage, p])


        fig = plt.figure(figsize=(1.8,0.7))
        ax = fig.add_axes([0.1,0.1,0.8,0.8])

        sns.violinplot(
            y = df_feature_selection_merge_loss_cp_pc_random['direction'], order =["1st → 2nd", "2nd → 1st"],
            x = df_feature_selection_merge_loss_cp_pc_random['percentage'], 
            color = "#DDDDDD", alpha = 0.1, linewidth = 0, orient = 'h')
        #sns.stripplot(
        #    y = df_feature_selection_merge_loss_cp_pc_random['direction'], 
        #    x = df_feature_selection_merge_loss_cp_pc_random['percentage'], 
        #    color = "#DDDDDD", alpha = 0.1, linewidth = 0, orient = 'h')
        sns.stripplot(
            y = df_feature_selection_merge_loss_cp_pc['direction'], order =["1st → 2nd", "2nd → 1st"],
            x = df_feature_selection_merge_loss_cp_pc['percentage'], 
            hue = df_feature_selection_merge_loss_cp_pc['sign'], hue_order=[1, 0, -1], palette=["#EE7BA6", "#CCCCCC","#99FCFE"], alpha = 0.5, jitter= 0.3, size = 4, orient = 'h')

        ax.set_ylabel("Direction")
        ax.set_xlabel("Feature importance rank")
        ax.set_xlim(350, -10)
        ax.set_xticks([339, 171, 1])
        ax.set_title("Up: "+central+"\nDown: "+peripheral, fontsize=10)
        ax.get_legend().remove()
        plt.gca().spines['right'].set_visible(False)
        plt.gca().spines['top'].set_visible(False)
        
        #ax.set_yticklabels(["Up → Down", "Down → Up"])

        ave_C_P = df_feature_selection_merge_loss_cp_pc.groupby("direction").mean()['percentage'].loc["1st → 2nd"]
        ax.plot([ave_C_P, ave_C_P], [-0.2, 0.2], color = "#000000", linewidth=0.75) # up
        ave_P_C = df_feature_selection_merge_loss_cp_pc.groupby("direction").mean()['percentage'].loc["2nd → 1st"]
        ax.plot([ave_P_C, ave_P_C], [0.8, 1.2], color = "#000000", linewidth=0.75)  # bottom

        plt.savefig("figures/proxy_importance_loss_"+central+"_"+peripheral+".pdf", bbox_inches = 'tight')
        plt.close()

In [None]:
df_test_result = pd.DataFrame(test_result_list, columns=["central", "peripheral", "direction", "obs_ave", "p"])
df_test_result["q"] = list(multipletests(list(df_test_result.loc[:,"p"]), method = "fdr_bh")[1])
df_test_result["sig"] = ['*' if q < 0.05 else '' for q in df_test_result["q"]]
df_test_result.to_csv("direction_test.txt", sep="\t", index=False)
df_test_result.sort_values("peripheral", kind = "merge").sort_values("central", kind = "merge")

#### C > PとP > Cの特徴量ランクの比較

In [None]:
df_test_result_CP = df_test_result[df_test_result["direction"]=="1st → 2nd"].loc[:,["central", "peripheral", "obs_ave"]]
df_test_result_PC = df_test_result[df_test_result["direction"]=="2nd → 1st"].loc[:,["central", "peripheral", "obs_ave"]]
df_test_result_CP_PC = pd.merge(df_test_result_CP, df_test_result_PC, on = ["central", "peripheral"]).rename(columns={"obs_ave_x":"1st → 2nd", "obs_ave_y":"2nd → 1st"})

# Wilcoxon test
print(wilcoxon(df_test_result_CP_PC["2nd → 1st"], df_test_result_CP_PC["1st → 2nd"]))

fig = plt.figure(figsize=(2,2))
ax = fig.add_axes([0.1,0.1,0.8,0.8])
ax.plot([-10,400], [-10,400], color = "#AAAAAA", alpha=0.3)

#ax.scatter(x=df_test_result_CP_PC["2nd → 1st"], y=df_test_result_CP_PC["1st → 2nd"], color="#008F00", s = 5)

ax.scatter(y=df_test_result_CP_PC["1st → 2nd"], x=df_test_result_CP_PC["2nd → 1st"], color="#009193", s = 8)

#ax.scatter(y=df_test_result_CP_PC_ext["1st → 2nd"], x=df_test_result_CP_PC_ext["2nd → 1st"], color="#000000", s = 8)

ax.set_ylabel("Norm. importance rank (%)\n1st → 2nd")
ax.set_xlabel("Norm. importance rank (%)\n2nd → 1st")
ax.set_xlim(350, -10)
ax.set_ylim(350, -10)
ax.set_xticks([339, 171, 1])
ax.set_yticks([339, 171, 1])

plt.savefig("figures/proxy_loss_importance_scatter.pdf", bbox_inches = 'tight')



In [None]:
df_test_result_CP_PC

In [None]:
df_test_result_CP_PC_ext = df_test_result_CP_PC[(df_test_result_CP_PC["central"]=="M00018") & (df_test_result_CP_PC["peripheral"]=="M00570")]
df_test_result_CP_PC_ext