In [6]:
#BDM Perturbation Analysis

import pandas as pd
import numpy as np
from pybdm import BDM
from pybdm import PerturbationExperiment
import os

# Set the working directory
os.chdir(r'C:\Users\uabic\Desktop')

# Load the expression data
scEpathK27M = pd.read_csv("selected_scEpathK27M.csv", index_col=0)
scEpathIDHWT = pd.read_csv("selected_scEpathIDHWT.csv", index_col=0)

# Transpose the data frames to have genes as columns
scEpathK27M = scEpathK27M.transpose()
scEpathIDHWT = scEpathIDHWT.transpose()

# Compute Spearman correlation adjacency matrices
K27M_adj_matrix = scEpathK27M.corr(method='spearman')
IDHWT_adj_matrix = scEpathIDHWT.corr(method='spearman')

# Save adjacency matrices
K27M_adj_matrix.to_csv("K27M_adjacency_matrix3.csv")
IDHWT_adj_matrix.to_csv("IDHWT_adjacency_matrix3.csv")

# Perform BDM perturbation analysis
def binarize_matrix(matrix, threshold=0.5):
    binary_matrix = (matrix > threshold).astype(int)
    return binary_matrix

def bdm_perturbation_analysis(adj_matrix):
    bdm = BDM(ndim=2)
    binary_matrix = binarize_matrix(adj_matrix.values)
    perturbation = PerturbationExperiment(bdm, binary_matrix, metric='bdm')
    delta_bdm = perturbation.run()
    
    # Ensure the shape matches the original matrix
    reshaped_delta_bdm = np.reshape(delta_bdm, adj_matrix.shape)
    
    return pd.DataFrame(reshaped_delta_bdm, index=adj_matrix.index, columns=adj_matrix.columns)

K27M_bdm_results = bdm_perturbation_analysis(K27M_adj_matrix)
IDHWT_bdm_results = bdm_perturbation_analysis(IDHWT_adj_matrix)

# Save BDM results
K27M_bdm_results.to_csv("K27M_bdm_results3.csv")
IDHWT_bdm_results.to_csv("IDHWT_bdm_results3.csv")


In [9]:
import matplotlib.pyplot as plt

def plot_top_5_bdm_changes(bdm_results, title, save_path, color):
    # Get the top 5 BDM changes
    top_5 = bdm_results.unstack().nlargest(5)
    
    # Create a plot
    plt.figure(figsize=(12, 8))
    top_5.plot(kind='bar', color=color)
    plt.ylabel('BDM Change (bits)', fontsize=16)
    plt.xlabel('Gene-Gene Interaction', fontsize=16)
    plt.title(title, fontsize=20)
    plt.xticks(rotation=45, ha='right', fontsize=14)
    plt.yticks(fontsize=14)
    plt.tight_layout()
    
    # Save the plot as a high-quality JPEG image
    plt.savefig(save_path, format='jpeg', quality=95)
    plt.close()

# Plot and save the top 5 BDM changes for K27M
plot_top_5_bdm_changes(K27M_bdm_results, 'Top 5 K27M BDM Changes', 'K27M_top_5_bdm_changes.jpeg', 'turquoise')

# Plot and save the top 5 BDM changes for IDHWT
plot_top_5_bdm_changes(IDHWT_bdm_results, 'Top 5 IDHWT BDM Changes', 'IDHWT_top_5_bdm_changes.jpeg', 'pink')



The 'quality' parameter of print_jpg() was deprecated in Matplotlib 3.3 and will be removed two minor releases later. Use pil_kwargs={'quality': ...} instead. If any parameter follows 'quality', they should be passed as keyword, not positionally.
  plt.savefig(save_path, format='jpeg', quality=95)
The 'quality' parameter of print_jpg() was deprecated in Matplotlib 3.3 and will be removed two minor releases later. Use pil_kwargs={'quality': ...} instead. If any parameter follows 'quality', they should be passed as keyword, not positionally.
  plt.savefig(save_path, format='jpeg', quality=95)


In [11]:
import matplotlib.pyplot as plt
import numpy as np

def get_top_5_unique_pairs(bdm_results):
    bdm_unstacked = bdm_results.unstack()
    # Get top pairs with sorted indices
    sorted_bdm = bdm_unstacked.sort_values(ascending=False)
    unique_pairs = {}
    
    for (gene1, gene2), value in sorted_bdm.items():
        sorted_pair = tuple(sorted([gene1, gene2]))
        if sorted_pair not in unique_pairs:
            unique_pairs[sorted_pair] = value
        if len(unique_pairs) == 5:
            break
    
    top_5_pairs = pd.Series(unique_pairs).sort_values(ascending=False)
    return top_5_pairs

def plot_top_5_bdm_changes(bdm_results, title, save_path, color):
    # Get the top 5 unique BDM changes
    top_5 = get_top_5_unique_pairs(bdm_results)
    
    # Calculate mean and standard error
    means = top_5.values
    errors = np.std(top_5.values) / np.sqrt(len(top_5.values))
    
    # Create a plot
    plt.figure(figsize=(12, 8))
    plt.bar(top_5.index.map(str), means, yerr=errors, color=color, capsize=5)
    plt.ylabel('BDM Change (bits)', fontsize=16)
    plt.xlabel('Gene-Gene Interaction', fontsize=16)
    plt.title(title, fontsize=20)
    plt.xticks(rotation=45, ha='right', fontsize=14)
    plt.yticks(fontsize=14)
    plt.tight_layout()
    
    # Save the plot as a high-quality JPEG image
    plt.savefig(save_path, format='jpeg', quality=95)
    plt.close()

# Plot and save the top 5 BDM changes for K27M
plot_top_5_bdm_changes(K27M_bdm_results, 'Top 5 K27M BDM Changes', 'K27M_top_5_bdm_changes.jpeg', 'turquoise')

# Plot and save the top 5 BDM changes for IDHWT
plot_top_5_bdm_changes(IDHWT_bdm_results, 'Top 5 IDHWT BDM Changes', 'IDHWT_top_5_bdm_changes.jpeg', 'pink')



The 'quality' parameter of print_jpg() was deprecated in Matplotlib 3.3 and will be removed two minor releases later. Use pil_kwargs={'quality': ...} instead. If any parameter follows 'quality', they should be passed as keyword, not positionally.
  plt.savefig(save_path, format='jpeg', quality=95)
The 'quality' parameter of print_jpg() was deprecated in Matplotlib 3.3 and will be removed two minor releases later. Use pil_kwargs={'quality': ...} instead. If any parameter follows 'quality', they should be passed as keyword, not positionally.
  plt.savefig(save_path, format='jpeg', quality=95)


In [22]:
import pandas as pd
import numpy as np
from pybdm import BDM
from pybdm import PerturbationExperiment
import os
import matplotlib.pyplot as plt

# Set the working directory
os.chdir(r'C:\Users\uabic\Desktop')

# Load the gene expression data
K27M_expr = pd.read_csv("K27Mreal_log_normalized.csv", index_col=0)

# Define the list of genes
genes_list = [
    'ETV2', 'IKZF1', 'IRF8', 'KLF14', 'MYC', 'OTX1', 'ZNF777', 'BCL3', 'ELF1', 
    'ETV3', 'MYB', 'FEV', 'MYBL1', 'TEAD4', 'ATF3', 'TFAP2E', 'FOXO3', 'EHF', 
    'ZNF230', 'ELK3', 'E2F7', 'CEBPG', 'FOXG1', 'ZNF787', 'GATA6'
]

# Filter the gene expression data based on the provided gene list
K27M_selected_expr = K27M_expr.loc[K27M_expr.index.intersection(genes_list)]

# Transpose to have genes as columns
K27M_selected_expr = K27M_selected_expr.transpose()

# Compute Spearman correlation adjacency matrices
K27M_adj_matrix = K27M_selected_expr.corr(method='spearman')

# Save adjacency matrices
K27M_adj_matrix.to_csv("K27M_adjacency_matrix_regulons.csv")

# Perform BDM perturbation analysis
def binarize_matrix(matrix, threshold=0.5):
    binary_matrix = (matrix > threshold).astype(int)
    return binary_matrix

def bdm_perturbation_analysis(adj_matrix):
    bdm = BDM(ndim=2)
    binary_matrix = binarize_matrix(adj_matrix.values)
    perturbation = PerturbationExperiment(bdm, binary_matrix, metric='bdm')
    delta_bdm = perturbation.run()
    
    # Ensure the shape matches the original matrix
    reshaped_delta_bdm = np.reshape(delta_bdm, adj_matrix.shape)
    
    return pd.DataFrame(reshaped_delta_bdm, index=adj_matrix.index, columns=adj_matrix.columns)

K27M_bdm_results = bdm_perturbation_analysis(K27M_adj_matrix)

# Save BDM results
K27M_bdm_results.to_csv("K27M_bdm_results_regulons.csv")

# Function to get top 5 unique gene pairs
def get_top_5_unique_pairs(bdm_results):
    bdm_unstacked = bdm_results.unstack()
    sorted_bdm = bdm_unstacked.sort_values(ascending=False)
    unique_pairs = {}
    
    for (gene1, gene2), value in sorted_bdm.items():
        sorted_pair = tuple(sorted([gene1, gene2]))
        if sorted_pair not in unique_pairs:
            unique_pairs[sorted_pair] = value
        if len(unique_pairs) == 5:
            break
    
    top_5_pairs = pd.Series(unique_pairs).sort_values(ascending=False)
    return top_5_pairs

# Function to plot the top 5 BDM changes
def plot_top_5_bdm_changes(bdm_results, title, save_path, color):
    top_5 = get_top_5_unique_pairs(bdm_results)
    means = top_5.values
    errors = np.std(top_5.values) / np.sqrt(len(top_5.values))
    
    plt.figure(figsize=(12, 8))
    plt.bar(top_5.index.map(str), means, yerr=errors, color=color, capsize=5)
    plt.ylabel('BDM Change (bits)', fontsize=24)
    plt.xlabel('Gene-Gene Interaction', fontsize=24)
    plt.title(title, fontsize=20)
    plt.xticks(rotation=45, ha='right', fontsize=30)
    plt.yticks(fontsize=16)
    plt.tight_layout()
    
    plt.savefig(save_path, format='jpeg', quality=95)
    plt.close()

# Plot and save the top 5 BDM changes for K27M
plot_top_5_bdm_changes(K27M_bdm_results, 'K27M Regulons BDM Changes', 'K27M_top_5_bdm_changes_regulons.jpeg', 'turquoise')


The 'quality' parameter of print_jpg() was deprecated in Matplotlib 3.3 and will be removed two minor releases later. Use pil_kwargs={'quality': ...} instead. If any parameter follows 'quality', they should be passed as keyword, not positionally.
  plt.savefig(save_path, format='jpeg', quality=95)


In [23]:
# Load the gene expression data
IDHWT_expr = pd.read_csv("IDHWT_log_normalized.csv", index_col=0)

# Load the gene list
IDHWT_regulons = pd.read_csv("IDHWTReg.csv")

# Filter the gene expression data based on the provided gene list
IDHWT_selected_expr = IDHWT_expr.loc[IDHWT_expr.index.intersection(IDHWT_regulons['Gene'])]

# Transpose to have genes as columns
IDHWT_selected_expr = IDHWT_selected_expr.transpose()

# Compute Spearman correlation adjacency matrices
IDHWT_adj_matrix = IDHWT_selected_expr.corr(method='spearman')

# Save adjacency matrices
IDHWT_adj_matrix.to_csv("IDHWT_adjacency_matrix_regulons.csv")

# Perform BDM perturbation analysis
IDHWT_bdm_results = bdm_perturbation_analysis(IDHWT_adj_matrix)

# Save BDM results
IDHWT_bdm_results.to_csv("IDHWT_bdm_results_regulons.csv")

# Plot and save the top 5 BDM changes for IDHWT
plot_top_5_bdm_changes(IDHWT_bdm_results, 'IDHWT Regulons BDM Changes', 'IDHWT_top_5_bdm_changes_regulons.jpeg', 'pink')


The 'quality' parameter of print_jpg() was deprecated in Matplotlib 3.3 and will be removed two minor releases later. Use pil_kwargs={'quality': ...} instead. If any parameter follows 'quality', they should be passed as keyword, not positionally.
  plt.savefig(save_path, format='jpeg', quality=95)


In [25]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt

# Function to plot the network
def plot_network(adj_matrix, title, save_path):
    G = nx.from_pandas_adjacency(adj_matrix)
    pos = nx.spring_layout(G)
    plt.figure(figsize=(24, 16))
    nx.draw(G, pos, with_labels=True, node_color='turquoise', edge_color='violet', node_size=500, font_size=18, font_color='black')
    plt.title(title, fontsize=20)
    plt.tight_layout()
    plt.savefig(save_path, format='jpeg', quality=95)
    plt.close()

# Function to compute centralities and save as CSV
def compute_centralities(adj_matrix, prefix):
    G = nx.from_pandas_adjacency(adj_matrix)
    centralities = {
        'betweenness': nx.betweenness_centrality(G),
        'closeness': nx.closeness_centrality(G),
        'eigenvector': nx.eigenvector_centrality(G, max_iter=1000)
    }
    centrality_df = pd.DataFrame(centralities)
    centrality_df.to_csv(f"{prefix}_centrality_measures.csv")

# Load adjacency matrices
K27M_adj_matrix = pd.read_csv("K27M_adjacency_matrix_regulons.csv", index_col=0).abs()
IDHWT_adj_matrix = pd.read_csv("IDHWT_adjacency_matrix_regulons.csv", index_col=0).abs()

# Plot networks
plot_network(K27M_adj_matrix, 'K27M Network', 'K27M_network.jpeg')
plot_network(IDHWT_adj_matrix, 'IDHWT Network', 'IDHWT_network.jpeg')

# Compute and save centrality measures
compute_centralities(K27M_adj_matrix, "K27M")
compute_centralities(IDHWT_adj_matrix, "IDHWT")


  plt.tight_layout()
The 'quality' parameter of print_jpg() was deprecated in Matplotlib 3.3 and will be removed two minor releases later. Use pil_kwargs={'quality': ...} instead. If any parameter follows 'quality', they should be passed as keyword, not positionally.
  plt.savefig(save_path, format='jpeg', quality=95)


In [5]:
#CellRouter Networks
import os
import pandas as pd
import numpy as np
from pybdm import BDM
from pybdm import PerturbationExperiment
import matplotlib.pyplot as plt
import networkx as nx

# Set the working directory
os.chdir(r'C:\Users\uabic\Desktop')

# Load the gene lists
top10_K27M = pd.read_csv("top10_K27M.csv")['Gene']
top10_IDHWT = pd.read_csv("top10_IDHWT.csv")['Gene']

# Load the gene expression data
K27M_expr = pd.read_csv("K27Mreal_log_normalized.csv", index_col=0)
IDHWT_expr = pd.read_csv("IDHWT_log_normalized.csv", index_col=0)

# Filter the gene expression data based on the provided gene lists
K27M_selected_expr = K27M_expr.loc[K27M_expr.index.intersection(top10_K27M)]
IDHWT_selected_expr = IDHWT_expr.loc[IDHWT_expr.index.intersection(top10_IDHWT)]

# Transpose to have genes as columns
K27M_selected_expr = K27M_selected_expr.transpose()
IDHWT_selected_expr = IDHWT_selected_expr.transpose()

# Compute Spearman correlation adjacency matrices
K27M_adj_matrix = K27M_selected_expr.corr(method='spearman')
IDHWT_adj_matrix = IDHWT_selected_expr.corr(method='spearman')

# Save adjacency matrices
K27M_adj_matrix.to_csv("K27M_adjacency_matrix.csv")
IDHWT_adj_matrix.to_csv("IDHWT_adjacency_matrix.csv")

# Perform BDM perturbation analysis
def binarize_matrix(matrix, threshold=0.5):
    binary_matrix = (matrix > threshold).astype(int)
    return binary_matrix

def bdm_perturbation_analysis(adj_matrix):
    bdm = BDM(ndim=2)
    binary_matrix = binarize_matrix(adj_matrix.values)
    perturbation = PerturbationExperiment(bdm, binary_matrix, metric='bdm')
    delta_bdm = perturbation.run()
    
    # Ensure the shape matches the original matrix
    reshaped_delta_bdm = np.reshape(delta_bdm, adj_matrix.shape)
    
    return pd.DataFrame(reshaped_delta_bdm, index=adj_matrix.index, columns=adj_matrix.columns)

K27M_bdm_results = bdm_perturbation_analysis(K27M_adj_matrix)
IDHWT_bdm_results = bdm_perturbation_analysis(IDHWT_adj_matrix)

# Save BDM results
K27M_bdm_results.to_csv("K27M_bdm_results.csv")
IDHWT_bdm_results.to_csv("IDHWT_bdm_results.csv")

# Function to get top 5 unique gene pairs
def get_top_5_unique_pairs(bdm_results):
    bdm_unstacked = bdm_results.unstack()
    sorted_bdm = bdm_unstacked.sort_values(ascending=False)
    unique_pairs = {}
    
    for (gene1, gene2), value in sorted_bdm.items():
        sorted_pair = tuple(sorted([gene1, gene2]))
        if sorted_pair not in unique_pairs:
            unique_pairs[sorted_pair] = value
        if len(unique_pairs) == 5:
            break
    
    top_5_pairs = pd.Series(unique_pairs).sort_values(ascending=False)
    return top_5_pairs

# Function to plot the top 5 BDM changes
def plot_top_5_bdm_changes(bdm_results, title, save_path, color):
    top_5 = get_top_5_unique_pairs(bdm_results)
    means = top_5.values
    errors = np.std(top_5.values) / np.sqrt(len(top_5.values))
    
    plt.figure(figsize=(12, 8))
    plt.bar(top_5.index.map(str), means, yerr=errors, color=color, capsize=5)
    plt.ylabel('BDM Change (bits)', fontsize=16)
    plt.xlabel('Gene-Gene Interaction', fontsize=16)
    plt.title(title, fontsize=20)
    plt.xticks(rotation=45, ha='right', fontsize=14)
    plt.yticks(fontsize=14)
    plt.tight_layout()
    
    plt.savefig(save_path, format='jpeg', quality=95)
    plt.close()

# Plot and save the top 5 BDM changes for K27M
plot_top_5_bdm_changes(K27M_bdm_results, 'K27M Top 5 Gene BDM Changes', 'K27M_top_5_bdm_changes.jpeg', 'lightblue')

# Plot and save the top 5 BDM changes for IDHWT
plot_top_5_bdm_changes(IDHWT_bdm_results, 'IDHWT Top 5 Gene BDM Changes', 'IDHWT_top_5_bdm_changes.jpeg', 'pink')

# Function to plot the network
def plot_network(adj_matrix, title, save_path):
    G = nx.from_pandas_adjacency(adj_matrix)
    pos = nx.spring_layout(G)
    plt.figure(figsize=(24, 16))
    nx.draw(G, pos, with_labels=True, node_color='violet', edge_color='pink', node_size=500, font_size=18, font_color='black')
    plt.title(title, fontsize=20)
    plt.tight_layout()
    plt.savefig(save_path, format='jpeg', quality=95)
    plt.close()

# Function to compute centralities and save as CSV
def compute_centralities(adj_matrix, prefix):
    G = nx.from_pandas_adjacency(adj_matrix)
    centralities = {
        'betweenness': nx.betweenness_centrality(G),
        'closeness': nx.closeness_centrality(G),
        'eigenvector': nx.eigenvector_centrality(G, max_iter=1000)
    }
    centrality_df = pd.DataFrame(centralities)
    centrality_df.to_csv(f"{prefix}_centrality_measures.csv")

# Load and take absolute value of adjacency matrices
K27M_adj_matrix_abs = K27M_adj_matrix.abs()
IDHWT_adj_matrix_abs = IDHWT_adj_matrix.abs()

# Plot networks
plot_network(K27M_adj_matrix_abs, 'K27M Gene Network', 'K27M_network.jpeg')
plot_network(IDHWT_adj_matrix_abs, 'IDHWT Gene Network', 'IDHWT_network.jpeg')

# Compute and save centrality measures
compute_centralities(K27M_adj_matrix_abs, "K27M")
compute_centralities(IDHWT_adj_matrix_abs, "IDHWT")


The 'quality' parameter of print_jpg() was deprecated in Matplotlib 3.3 and will be removed two minor releases later. Use pil_kwargs={'quality': ...} instead. If any parameter follows 'quality', they should be passed as keyword, not positionally.
  plt.savefig(save_path, format='jpeg', quality=95)
The 'quality' parameter of print_jpg() was deprecated in Matplotlib 3.3 and will be removed two minor releases later. Use pil_kwargs={'quality': ...} instead. If any parameter follows 'quality', they should be passed as keyword, not positionally.
  plt.savefig(save_path, format='jpeg', quality=95)
  plt.tight_layout()
The 'quality' parameter of print_jpg() was deprecated in Matplotlib 3.3 and will be removed two minor releases later. Use pil_kwargs={'quality': ...} instead. If any parameter follows 'quality', they should be passed as keyword, not positionally.
  plt.savefig(save_path, format='jpeg', quality=95)
