In [1]:
#from pyvis import network as net
import networkx as nx 
from pyvis.network import Network
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import streamlit as st
from IPython.display import display, HTML
#from IPython.core.display import display, HTML

In [2]:
import csv
import os

def create_graph_from_csv(csv_path, conditions):
    # 读取 CSV 文件
    with open(csv_path, newline='',encoding='utf-8-sig') as csvfile:
        reader = csv.DictReader(csvfile)
        print(reader)
        
        # 为每个条件创建一个空图
        graphs = {condition: nx.Graph() for condition in conditions}
        # 创建一个总的图
        total_graph = nx.Graph()
        
        # 解析 CSV 文件中的每一行
        for row in reader:
            #print(row)
            node1 = row['Bait']
            node2 = row['Prey']
            
            # 按条件添加边到各自的图
            for condition in conditions:
                if row[condition]:
                    graphs[condition].add_edge(node1, node2)
                    total_graph.add_edge(node1, node2)
    
    return graphs, total_graph

def save_graphs_to_graphml(graphs, total_graph, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for condition, graph in graphs.items():
        output_path = os.path.join(output_dir, f"{condition}.graphml")
        nx.write_graphml(graph, output_path)
        print(f"Graph for {condition} saved to {output_path}")
    
    total_output_path = os.path.join(output_dir, "Total.graphml")
    nx.write_graphml(total_graph, total_output_path)
    print(f"Total graph saved to {total_output_path}")

In [3]:
csv_path = 'data/DataS4_SubNetwork_ClusteringData.csv'
conditions = ['Independent', 'HM_Enhanced', 'Neu_Enhanced', 'F_Enhanced', 'S_Enhanced', 'FS_Enhanced',
              'HM_Suppressed','Neu_Suppressed','F_Suppressed','S_Suppressed','FS_Suppressed']
output_dir = 'data/graphml'

graphs, total_graph = create_graph_from_csv(csv_path, conditions)
save_graphs_to_graphml(graphs, total_graph, output_dir)

<csv.DictReader object at 0x153b1c580>
Graph for Independent saved to data/graphml/Independent.graphml
Graph for HM_Enhanced saved to data/graphml/HM_Enhanced.graphml
Graph for Neu_Enhanced saved to data/graphml/Neu_Enhanced.graphml
Graph for F_Enhanced saved to data/graphml/F_Enhanced.graphml
Graph for S_Enhanced saved to data/graphml/S_Enhanced.graphml
Graph for FS_Enhanced saved to data/graphml/FS_Enhanced.graphml
Graph for HM_Suppressed saved to data/graphml/HM_Suppressed.graphml
Graph for Neu_Suppressed saved to data/graphml/Neu_Suppressed.graphml
Graph for F_Suppressed saved to data/graphml/F_Suppressed.graphml
Graph for S_Suppressed saved to data/graphml/S_Suppressed.graphml
Graph for FS_Suppressed saved to data/graphml/FS_Suppressed.graphml
Total graph saved to data/graphml/Total.graphml


In [4]:
def create_html(path):
    # Ensure the file exists
    if not os.path.exists(path):
        print(f"File does not exist: {path}")
        return

    try:
        # Load CYS data from GraphML file
        graph = nx.read_graphml(path)
        # Extracting and displaying the node metadata
        node_metadata = {node: graph.nodes[node] for node in graph.nodes()}
        node_metadata_list = list(node_metadata.items())[:10]  # Displaying metadata for first 10 nodes for brevity
        print("Node metadata (first 10):", node_metadata_list)
        
        # Create a Pyvis network
        net = Network(notebook=False, height="750px", width="100%")

        # Node customization settings
        special_nodes = ['BSG', 'CD44', 'EGFR', 'SLC3A2']
        special_color = '#FE81B8'  # Example color (red)
        font_size = 14  # Adjust font size
        #print(graph.nodes)
        for node, attrs in graph.nodes(data=True):
            if node in special_nodes:
                net.add_node(node, label=attrs.get('id', ''), shape='diamond', size=20, font=dict(size=font_size))
            else:
                net.add_node(node, label=attrs.get('id', ''), size=12, font=dict(size=font_size), color=special_color)

        # Add edges with hover data
        for source, target, attrs in graph.edges(data=True):
            title = f"{source}_{target}"
            net.add_edge(source, target, title=title,color='#929292')

        # Generate the HTML file path
        html_path = path.replace('.graphml', '.html').replace('graphml', 'Total_html')
        print(f"Saving HTML to {html_path}")

        # Save the network to an HTML file
        net.write_html(html_path)
        print(f"HTML file saved to {html_path}")

    except Exception as e:
        print(f"Error processing file {path}: {e}")

In [5]:
# Iterate over all GraphML files in the directory and create HTML files
for file in os.listdir('data/graphml/'):
    if file.endswith('.graphml'):
        create_html(os.path.join('data/graphml', file)) 

Node metadata (first 10): [('BSG', {}), ('ADAM9', {}), ('CPT1A', {}), ('LGALS3', {}), ('CD44', {}), ('EPCAM', {}), ('ITGB1', {}), ('NDUFS1', {}), ('PFKP', {}), ('S100A8', {})]
Saving HTML to data/Total_html/Neu_Enhanced.html
HTML file saved to data/Total_html/Neu_Enhanced.html
Node metadata (first 10): [('BSG', {}), ('ADAM9', {}), ('ARF4', {}), ('ARF5', {}), ('ARF6', {}), ('ATP2A2', {}), ('ATP2B1', {}), ('ATP5F1A', {}), ('ATP5F1B', {}), ('ATP5PO', {})]
Saving HTML to data/Total_html/Total.html
HTML file saved to data/Total_html/Total.html
Node metadata (first 10): [('BSG', {}), ('ARF5', {}), ('ATP5F1A', {}), ('ATP5PO', {}), ('ATP6V1H', {}), ('CCT3', {}), ('ESYT2', {}), ('HAX1', {}), ('LPCAT1', {}), ('PHB1', {})]
Saving HTML to data/Total_html/Independent.html
HTML file saved to data/Total_html/Independent.html
Node metadata (first 10): [('BSG', {}), ('SACM1L', {}), ('CD44', {}), ('ALDH1A3', {}), ('NDUFS1', {}), ('S100A8', {}), ('S100A9', {}), ('SPINT2', {}), ('EGFR', {}), ('AP1M1', {})

## make plot

### topS score(average)

In [6]:
score_df = pd.read_csv(csv_path,header=0)
score_df['Pair'] = score_df.iloc[:, 0].astype(str) + '_' + score_df.iloc[:, 1].astype(str)
score_df = score_df.iloc[:, [18] + list(range(2, 7))]
score_df

Unnamed: 0,Pair,HM,Neu,F,S,FS
0,BSG_ADAM9,-3.558388,24.378590,-14.711610,16.649870,-15.251580
1,BSG_ARF4,63.453350,-16.640660,-6.834476,-9.014121,-4.681746
2,BSG_ARF5,4.855931,1.359417,8.539603,-16.549280,5.314012
3,BSG_ARF6,15.565080,-22.821040,-1.019577,22.421430,-3.877377
4,BSG_ATP2A2,-16.688310,2.473478,-0.159288,30.161220,-10.127520
...,...,...,...,...,...,...
151,SLC3A2_SLC25A4,-16.639500,33.773380,6.207278,1.947548,-17.620890
152,SLC3A2_SLC25A5,-14.419680,-6.014832,6.673834,13.448910,2.704662
153,SLC3A2_SLC7A5,62.809240,-17.414580,7.901422,5.912278,-16.019990
154,SLC3A2_TNFRSF10B,-75.098220,101.689200,-4.615238,-28.718650,73.869310


In [7]:
for index, row in score_df.iterrows():
    title = row[0]  # 第一列作为标题
    data = row[1:]  # 剩余列作为数据
    pair_name = row['Pair']
    # 生成柱状图
    plt.figure(figsize=(8, 6))
    ax=sns.barplot(x=data.index, y=data.values, palette=['#B14743', '#D48640', '#539045','#CC79A7', '#44729D'])
    
    # 设置图表标题和轴标签
    plt.title(title)
    plt.xlabel('')
    plt.ylabel('TopS Score (Average)')
    # 自定义横轴标签
    custom_labels = ['HM', 'Neu', 'Fuc', 'Sia', 'SiaFuc']
    ax.set_xticklabels(custom_labels)
    # 设置背景为白色
    ax.set_facecolor('white')
    plt.gcf().set_facecolor('white')

    # 确保四条边显示
    # 确保四条边显示并设置边框颜色和宽度
    for spine in ax.spines.values():
        spine.set_visible(True)
        spine.set_edgecolor('black')
        spine.set_linewidth(0.8)  # 可以根据需要调整宽度
    # 设置y轴范围和参考线
    #plt.ylim(-20, 80)
    plt.axhline(0, color='black', linewidth=1)
    plt.savefig(f"data/TopS_Score/{pair_name}.png", dpi=800)
    plt.close()  # Close the plot to avoid displaying it in the notebook
    
    
    

    

  title = row[0]  # 第一列作为标题

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax=sns.barplot(x=data.index, y=data.values, palette=['#B14743', '#D48640', '#539045','#CC79A7', '#44729D'])
  ax.set_xticklabels(custom_labels)
  title = row[0]  # 第一列作为标题

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax=sns.barplot(x=data.index, y=data.values, palette=['#B14743', '#D48640', '#539045','#CC79A7', '#44729D'])
  ax.set_xticklabels(custom_labels)
  title = row[0]  # 第一列作为标题

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax=sns.barplot(x=data.index, y=data.values, palette=['#B14743', '#D48640', '#539045','#CC79A7', '#44729D'])
  ax.set_xtic

### normalized by bait (per sample)

In [8]:
# read data with two level column name 
df = pd.read_csv('data/DataS3_GAP-MS_Quant_Processing.csv',header=0, skiprows=1)
df

Unnamed: 0,Pair,Bait,Prey,BaitID,PreyID,HM_1,HM_2,HM_3,Neu_1,Neu_2,...,Neu_3.1,F_1.1,F_2.1,F_3.1,S_1.1,S_2.1,S_3.1,FS_1.1,FS_2.1,FS_3.1
0,EGFR_ACIN1,EGFR,ACIN1,P00533,Q9UKV3,4.052092e+04,5.450712e+04,3.045702e+04,1.310872e+04,1.888494e+04,...,32.26,20.18,38.10,28.21,36.26,31.04,34.72,29.62,25.55,32.16
1,BSG_ADAM9,BSG,ADAM9,P35613,Q13443,1.328394e+04,1.157430e+04,1.478993e+04,1.431780e+04,1.423172e+04,...,117.47,67.87,54.14,67.54,103.45,86.37,96.52,77.55,75.73,52.59
2,EGFR_ADAM9,EGFR,ADAM9,P00533,Q13443,7.384025e+03,1.236787e+04,6.587488e+03,7.218774e+03,1.080979e+04,...,76.19,84.20,59.90,59.02,51.61,55.25,41.59,49.61,52.84,53.85
3,CD44_AGPAT2,CD44,AGPAT2,P16070,O15120,1.933473e+04,2.819623e+04,1.343752e+04,1.074930e+04,1.721734e+04,...,43.19,41.77,33.41,34.09,36.85,28.62,37.63,68.21,71.81,48.74
4,CD44_ALDH1A3,CD44,ALDH1A3,P16070,P47895,9.177027e+03,1.133345e+04,1.007137e+04,8.560941e+03,8.796078e+03,...,100.98,71.77,76.95,71.96,49.15,37.26,40.76,129.09,125.91,84.88
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
151,SLC3A2_VDAC1,SLC3A2,VDAC1,P08195,P21796,9.039628e+03,8.895175e+03,5.496049e+03,1.202920e+04,1.002899e+04,...,90.68,98.14,106.23,89.65,192.79,210.26,144.99,239.33,211.69,254.12
152,BSG_VIM,BSG,VIM,P35613,P08670,9.477566e+06,1.589625e+07,9.372005e+06,3.063164e+06,4.107945e+06,...,21.02,77.93,53.66,56.53,37.42,36.92,29.27,36.23,35.14,24.53
153,BSG_VMP1,BSG,VMP1,P35613,Q96GC9,1.370242e+05,2.178247e+05,1.156233e+05,4.422217e+04,9.195783e+04,...,30.05,23.86,43.64,33.04,24.87,40.88,32.76,21.59,31.64,24.46
154,BSG_WLS,BSG,WLS,P35613,Q5T9L3,2.375596e+04,3.017458e+04,1.953605e+04,1.552511e+04,1.787169e+04,...,65.03,46.34,36.49,36.99,51.38,46.20,50.13,55.33,59.17,52.98


In [9]:
df1 = df.iloc[:, [0] + list(range(5, 20))]
df1

Unnamed: 0,Pair,HM_1,HM_2,HM_3,Neu_1,Neu_2,Neu_3,F_1,F_2,F_3,S_1,S_2,S_3,FS_1,FS_2,FS_3
0,EGFR_ACIN1,4.052092e+04,5.450712e+04,3.045702e+04,1.310872e+04,1.888494e+04,9.826100e+03,8.175462e+03,2.076946e+04,8.590930e+03,1.469245e+04,1.691773e+04,1.057603e+04,1.200248e+04,1.392690e+04,9.794740e+03
1,BSG_ADAM9,1.328394e+04,1.157430e+04,1.478993e+04,1.431780e+04,1.423172e+04,1.737345e+04,9.015777e+03,6.266290e+03,9.989002e+03,1.374196e+04,9.996807e+03,1.427494e+04,1.030113e+04,8.765172e+03,7.778019e+03
2,EGFR_ADAM9,7.384025e+03,1.236787e+04,6.587488e+03,7.218774e+03,1.080979e+04,5.019127e+03,6.217008e+03,7.408455e+03,3.887721e+03,3.810618e+03,6.832887e+03,2.739600e+03,3.663018e+03,6.534784e+03,3.547346e+03
3,CD44_AGPAT2,1.933473e+04,2.819623e+04,1.343752e+04,1.074930e+04,1.721734e+04,5.804212e+03,8.075185e+03,9.420492e+03,4.580906e+03,7.125119e+03,8.070875e+03,5.056103e+03,1.318772e+04,2.024659e+04,6.549858e+03
4,CD44_ALDH1A3,9.177027e+03,1.133345e+04,1.007137e+04,8.560941e+03,8.796078e+03,1.017025e+04,6.586164e+03,8.721528e+03,7.247311e+03,4.510751e+03,4.223138e+03,4.105318e+03,1.184630e+04,1.426947e+04,8.549049e+03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
151,SLC3A2_VDAC1,9.039628e+03,8.895175e+03,5.496049e+03,1.202920e+04,1.002899e+04,4.984062e+03,8.871913e+03,9.449624e+03,4.927016e+03,1.742747e+04,1.870336e+04,7.968794e+03,2.163426e+04,1.882993e+04,1.396680e+04
152,BSG_VIM,9.477566e+06,1.589625e+07,9.372005e+06,3.063164e+06,4.107945e+06,1.970365e+06,7.385806e+06,8.530392e+06,5.297552e+06,3.546138e+06,5.869309e+06,2.742930e+06,3.433844e+06,5.586633e+06,2.299193e+06
153,BSG_VMP1,1.370242e+05,2.178247e+05,1.156233e+05,4.422217e+04,9.195783e+04,3.474856e+04,3.269046e+04,9.505387e+04,3.820428e+04,3.407249e+04,8.903605e+04,3.787632e+04,2.958919e+04,6.891832e+04,2.828065e+04
154,BSG_WLS,2.375596e+04,3.017458e+04,1.953605e+04,1.552511e+04,1.787169e+04,1.270473e+04,1.100875e+04,1.101177e+04,7.226718e+03,1.220487e+04,1.393970e+04,9.792652e+03,1.314478e+04,1.785576e+04,1.035070e+04


In [10]:
for index, row in df1.iterrows():
    # Extracting the 'Pair' name and data for the row
    pair_name = row['Pair']
    data = row[1:].astype(float)  # Convert data to float
    #print((data))
    #conditions_unique = set([x[0] for x in data.index ])
    conditions_unique = ["HM","Neu","F","S","FS"]
    
    # Prepare data for plotting
    plot_data = pd.DataFrame(columns=conditions_unique)
    for condition in conditions_unique:
        # Collect data for each condition
        # 提取列名并匹配
        condition_data = [data[col] for col in data.index if condition in col]
        plot_data[condition] = pd.Series(condition_data)    
    # Plotting
    plt.figure(figsize=(8, 6))
    ax=sns.boxplot(data=plot_data,palette=['#B14743', '#D48640', '#539045', '#CC79A7', '#44729D'],saturation=1)
    plt.title(pair_name)
    plt.ylabel('Normalized Abundance')
    # 自定义横轴标签
    custom_labels = ['HM', 'Neu', 'Fuc', 'Sia', 'SiaFuc']
    ax.set_xticklabels(custom_labels)
    # 设置背景为白色
    ax.set_facecolor('white')
    plt.gcf().set_facecolor('white')

    # 确保四条边显示
    # 确保四条边显示并设置边框颜色和宽度
    for spine in ax.spines.values():
        spine.set_visible(True)
        spine.set_edgecolor('black')
        spine.set_linewidth(0.8)  # 可以根据需要调整宽度
    
    # Save the plot as a PNG file
    #plt.show()
    # use great resolution for publication
    plt.savefig(f"data/boxplot_normalized/{pair_name}.png", dpi=800)
    plt.close()  # Close the plot to avoid displaying it in the notebook
    



  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_

### relative abundance

In [11]:
df2 = df.iloc[:, [0] + list(range(20, 35))]
df2

Unnamed: 0,Pair,HM_1.1,HM_2.1,HM_3.1,Neu_1.1,Neu_2.1,Neu_3.1,F_1.1,F_2.1,F_3.1,S_1.1,S_2.1,S_3.1,FS_1.1,FS_2.1,FS_3.1
0,EGFR_ACIN1,100,100,100,32.35,34.65,32.26,20.18,38.10,28.21,36.26,31.04,34.72,29.62,25.55,32.16
1,BSG_ADAM9,100,100,100,107.78,122.96,117.47,67.87,54.14,67.54,103.45,86.37,96.52,77.55,75.73,52.59
2,EGFR_ADAM9,100,100,100,97.76,87.40,76.19,84.20,59.90,59.02,51.61,55.25,41.59,49.61,52.84,53.85
3,CD44_AGPAT2,100,100,100,55.60,61.06,43.19,41.77,33.41,34.09,36.85,28.62,37.63,68.21,71.81,48.74
4,CD44_ALDH1A3,100,100,100,93.29,77.61,100.98,71.77,76.95,71.96,49.15,37.26,40.76,129.09,125.91,84.88
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
151,SLC3A2_VDAC1,100,100,100,133.07,112.75,90.68,98.14,106.23,89.65,192.79,210.26,144.99,239.33,211.69,254.12
152,BSG_VIM,100,100,100,32.32,25.84,21.02,77.93,53.66,56.53,37.42,36.92,29.27,36.23,35.14,24.53
153,BSG_VMP1,100,100,100,32.27,42.22,30.05,23.86,43.64,33.04,24.87,40.88,32.76,21.59,31.64,24.46
154,BSG_WLS,100,100,100,65.35,59.23,65.03,46.34,36.49,36.99,51.38,46.20,50.13,55.33,59.17,52.98


In [12]:
for index, row in df2.iterrows():
    # Extracting the 'Pair' name and data for the row
    pair_name = row['Pair']
    data = row[1:].astype(float)  # Convert data to float
    #print((data))
    #conditions_unique = set([x[0] for x in data.index ])
    conditions_unique = ["HM","Neu","F","S","FS"]
    
    # Prepare data for plotting
    plot_data = pd.DataFrame(columns=conditions_unique)
    for condition in conditions_unique:
        # Collect data for each condition
        # 提取列名并匹配
        condition_data = [data[col] for col in data.index if condition in col]
        plot_data[condition] = pd.Series(condition_data)    
    # Plotting
    plt.figure(figsize=(8, 6))
    ax=sns.boxplot(data=plot_data,palette=['#B14743', '#D48640', '#539045', '#CC79A7', '#44729D'],saturation=1)
    plt.title(pair_name)
    plt.ylabel('Relative Abundance')
    # 自定义横轴标签
    custom_labels = ['HM', 'Neu', 'Fuc', 'Sia', 'SiaFuc']
    ax.set_xticklabels(custom_labels)
    # 设置背景为白色
    ax.set_facecolor('white')
    plt.gcf().set_facecolor('white')

    # 确保四条边显示
    # 确保四条边显示并设置边框颜色和宽度
    for spine in ax.spines.values():
        spine.set_visible(True)
        spine.set_edgecolor('black')
        spine.set_linewidth(0.8)  # 可以根据需要调整宽度
    
    # Save the plot as a PNG file
    #plt.show()
    # use great resolution for publication
    plt.savefig(f"data/boxplot_relative/{pair_name}.png", dpi=800)
    plt.close()  # Close the plot to avoid displaying it in the notebook
    



  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_labels)
  ax.set_xticklabels(custom_