In [3]:
import pandas as pd
import numpy as np
from pybdm import BDM
from pybdm import PerturbationExperiment
import os
import matplotlib.pyplot as plt
import networkx as nx

# Set the working directory
os.chdir(r'C:\Users\uabic\Desktop')

# Load the datasets
GliomaCCLE = pd.read_csv("GliomaCCLE.csv")
Shema = pd.read_csv("Shema.csv")

# Set the 'Histone Modification' as the index
GliomaCCLE.set_index('Histone Modification', inplace=True)
Shema.set_index('Histone Modification', inplace=True)

# Transpose the datasets to have histone modifications as columns
GliomaCCLE = GliomaCCLE.transpose()
Shema = Shema.transpose()

# Compute Spearman correlation adjacency matrices
GliomaCCLE_adj_matrix = GliomaCCLE.corr(method='spearman')
Shema_adj_matrix = Shema.corr(method='spearman')

# Save adjacency matrices
GliomaCCLE_adj_matrix.to_csv("GliomaCCLE_adjacency_matrix.csv")
Shema_adj_matrix.to_csv("Shema_adjacency_matrix.csv")

# Perform BDM perturbation analysis
def binarize_matrix(matrix, threshold=0.5):
    binary_matrix = (matrix > threshold).astype(int)
    return binary_matrix

def bdm_perturbation_analysis(adj_matrix):
    bdm = BDM(ndim=2)
    binary_matrix = binarize_matrix(adj_matrix.values)
    perturbation = PerturbationExperiment(bdm, binary_matrix, metric='bdm')
    delta_bdm = perturbation.run()
    
    # Ensure the shape matches the original matrix
    reshaped_delta_bdm = np.reshape(delta_bdm, adj_matrix.shape)
    
    return pd.DataFrame(reshaped_delta_bdm, index=adj_matrix.index, columns=adj_matrix.columns)

GliomaCCLE_bdm_results = bdm_perturbation_analysis(GliomaCCLE_adj_matrix)
Shema_bdm_results = bdm_perturbation_analysis(Shema_adj_matrix)

# Save BDM results
GliomaCCLE_bdm_results.to_csv("GliomaCCLE_bdm_results.csv")
Shema_bdm_results.to_csv("Shema_bdm_results.csv")

# Function to get top 5 unique gene pairs
def get_top_5_unique_pairs(bdm_results):
    bdm_unstacked = bdm_results.unstack()
    sorted_bdm = bdm_unstacked.sort_values(ascending=False)
    unique_pairs = {}
    
    for (gene1, gene2), value in sorted_bdm.items():
        sorted_pair = tuple(sorted([gene1, gene2]))
        if sorted_pair not in unique_pairs:
            unique_pairs[sorted_pair] = value
        if len(unique_pairs) == 5:
            break
    
    top_5_pairs = pd.Series(unique_pairs).sort_values(ascending=False)
    return top_5_pairs

# Function to plot the top 5 BDM changes
def plot_top_5_bdm_changes(bdm_results, title, save_path, color):
    top_5 = get_top_5_unique_pairs(bdm_results)
    means = top_5.values
    errors = np.std(top_5.values) / np.sqrt(len(top_5.values))
    
    plt.figure(figsize=(12, 8))
    plt.bar(top_5.index.map(str), means, yerr=errors, color=color, capsize=5)
    plt.ylabel('BDM Change (bits)', fontsize=16)
    plt.xlabel('Histone-Histone Interaction', fontsize=16)
    plt.title(title, fontsize=20)
    plt.xticks(rotation=45, ha='right', fontsize=14)
    plt.yticks(fontsize=14)
    plt.tight_layout()
    
    plt.savefig(save_path, format='jpeg', quality=95)
    plt.close()

# Plot and save the top 5 BDM changes for GliomaCCLE
plot_top_5_bdm_changes(GliomaCCLE_bdm_results, 'GliomaCCLE Histone Modifications BDM Changes', 'GliomaCCLE_top_5_bdm_changes.jpeg', 'turquoise')

# Plot and save the top 5 BDM changes for Shema
plot_top_5_bdm_changes(Shema_bdm_results, 'BT245 Histone Modifications BDM Changes', 'BT245_top_5_bdm_changes.jpeg', 'pink')

# Function to plot the network
def plot_network(adj_matrix, title, save_path):
    G = nx.from_pandas_adjacency(adj_matrix)
    pos = nx.spring_layout(G)
    plt.figure(figsize=(24, 16))
    nx.draw(G, pos, with_labels=True, node_color='turquoise', edge_color='pink', node_size=500, font_size=18, font_color='black')
    plt.title(title, fontsize=20)
    plt.tight_layout()
    plt.savefig(save_path, format='jpeg', quality=95)
    plt.close()

# Function to compute centralities and save as CSV
def compute_centralities(adj_matrix, prefix):
    G = nx.from_pandas_adjacency(adj_matrix)
    centralities = {
        'betweenness': nx.betweenness_centrality(G),
        'closeness': nx.closeness_centrality(G),
        'eigenvector': nx.eigenvector_centrality(G, max_iter=1000)
    }
    centrality_df = pd.DataFrame(centralities)
    centrality_df.to_csv(f"{prefix}_centrality_measures.csv")

# Load and take absolute value of adjacency matrices
GliomaCCLE_adj_matrix_abs = GliomaCCLE_adj_matrix.abs()
Shema_adj_matrix_abs = Shema_adj_matrix.abs()

# Plot networks
plot_network(GliomaCCLE_adj_matrix_abs, 'Glioma CCLE Histone Network', 'GliomaCCLE_network.jpeg')
plot_network(Shema_adj_matrix_abs, 'BT245 Histone Network', 'Shema_network.jpeg')

# Compute and save centrality measures
compute_centralities(GliomaCCLE_adj_matrix_abs, "GliomaCCLE")
compute_centralities(Shema_adj_matrix_abs, "Shema")


The 'quality' parameter of print_jpg() was deprecated in Matplotlib 3.3 and will be removed two minor releases later. Use pil_kwargs={'quality': ...} instead. If any parameter follows 'quality', they should be passed as keyword, not positionally.
  plt.savefig(save_path, format='jpeg', quality=95)
The 'quality' parameter of print_jpg() was deprecated in Matplotlib 3.3 and will be removed two minor releases later. Use pil_kwargs={'quality': ...} instead. If any parameter follows 'quality', they should be passed as keyword, not positionally.
  plt.savefig(save_path, format='jpeg', quality=95)
  plt.tight_layout()
The 'quality' parameter of print_jpg() was deprecated in Matplotlib 3.3 and will be removed two minor releases later. Use pil_kwargs={'quality': ...} instead. If any parameter follows 'quality', they should be passed as keyword, not positionally.
  plt.savefig(save_path, format='jpeg', quality=95)


In [4]:
#BT245_PID BDM

import pandas as pd
import numpy as np
from pybdm import BDM
from pybdm import PerturbationExperiment
import matplotlib.pyplot as plt
import networkx as nx

# Set the working directory
os.chdir(r'C:\Users\uabic\Desktop')

# Load the edge list dataset
bt245_pid = pd.read_csv("BT245_PID.csv")

# Convert the edge list to an adjacency matrix
G = nx.from_pandas_edgelist(bt245_pid, 'from', 'to', edge_attr='weight')
adj_matrix = nx.to_pandas_adjacency(G, nodelist=sorted(G.nodes()))

# Save the adjacency matrix
adj_matrix.to_csv("BT245_adjacency_matrix.csv")

# Perform BDM perturbation analysis
def binarize_matrix(matrix, threshold=0.5):
    binary_matrix = (matrix > threshold).astype(int)
    return binary_matrix

def bdm_perturbation_analysis(adj_matrix):
    bdm = BDM(ndim=2)
    binary_matrix = binarize_matrix(adj_matrix.values)
    perturbation = PerturbationExperiment(bdm, binary_matrix, metric='bdm')
    delta_bdm = perturbation.run()
    
    # Ensure the shape matches the original matrix
    reshaped_delta_bdm = np.reshape(delta_bdm, adj_matrix.shape)
    
    return pd.DataFrame(reshaped_delta_bdm, index=adj_matrix.index, columns=adj_matrix.columns)

bt245_bdm_results = bdm_perturbation_analysis(adj_matrix)

# Save BDM results
bt245_bdm_results.to_csv("BT245_bdm_results.csv")

# Function to get top 5 unique gene pairs
def get_top_5_unique_pairs(bdm_results):
    bdm_unstacked = bdm_results.unstack()
    sorted_bdm = bdm_unstacked.sort_values(ascending=False)
    unique_pairs = {}
    
    for (gene1, gene2), value in sorted_bdm.items():
        sorted_pair = tuple(sorted([gene1, gene2]))
        if sorted_pair not in unique_pairs:
            unique_pairs[sorted_pair] = value
        if len(unique_pairs) == 5:
            break
    
    top_5_pairs = pd.Series(unique_pairs).sort_values(ascending=False)
    return top_5_pairs

# Function to plot the top 5 BDM changes
def plot_top_5_bdm_changes(bdm_results, title, save_path, color):
    top_5 = get_top_5_unique_pairs(bdm_results)
    means = top_5.values
    errors = np.std(top_5.values) / np.sqrt(len(top_5.values))
    
    plt.figure(figsize=(12, 8))
    plt.bar(top_5.index.map(str), means, yerr=errors, color=color, capsize=5)
    plt.ylabel('BDM Change (bits)', fontsize=16)
    plt.xlabel('Histone-Histone Interaction', fontsize=16)
    plt.title(title, fontsize=20)
    plt.xticks(rotation=45, ha='right', fontsize=14)
    plt.yticks(fontsize=14)
    plt.tight_layout()
    
    plt.savefig(save_path, format='jpeg', quality=95)
    plt.close()

# Plot and save the top 5 BDM changes for BT245
plot_top_5_bdm_changes(bt245_bdm_results, 'BT245 Histone Modifications BDM Changes', 'BT245_top_5_bdm_changes.jpeg', 'violet')


The 'quality' parameter of print_jpg() was deprecated in Matplotlib 3.3 and will be removed two minor releases later. Use pil_kwargs={'quality': ...} instead. If any parameter follows 'quality', they should be passed as keyword, not positionally.
  plt.savefig(save_path, format='jpeg', quality=95)


In [6]:
import pandas as pd
import numpy as np
from pybdm import BDM
from pybdm import PerturbationExperiment
import matplotlib.pyplot as plt
import networkx as nx

# Set the working directory
os.chdir(r'C:\Users\uabic\Desktop')

# Load the edge list dataset
pid_glioma = pd.read_csv("PIDGlioma.csv")

# Convert the edge list to an adjacency matrix
G = nx.from_pandas_edgelist(pid_glioma, 'from', 'to', edge_attr='weight')
adj_matrix = nx.to_pandas_adjacency(G, nodelist=sorted(G.nodes()))

# Save the adjacency matrix
adj_matrix.to_csv("PIDGlioma_adjacency_matrix.csv")

# Perform BDM perturbation analysis
def binarize_matrix(matrix, threshold=0.5):
    binary_matrix = (matrix > threshold).astype(int)
    return binary_matrix

def bdm_perturbation_analysis(adj_matrix):
    bdm = BDM(ndim=2)
    binary_matrix = binarize_matrix(adj_matrix.values)
    perturbation = PerturbationExperiment(bdm, binary_matrix, metric='bdm')
    delta_bdm = perturbation.run()
    
    # Ensure the shape matches the original matrix
    reshaped_delta_bdm = np.reshape(delta_bdm, adj_matrix.shape)
    
    return pd.DataFrame(reshaped_delta_bdm, index=adj_matrix.index, columns=adj_matrix.columns)

glioma_bdm_results = bdm_perturbation_analysis(adj_matrix)

# Save BDM results
glioma_bdm_results.to_csv("PIDGlioma_bdm_results.csv")

# Function to get top 5 unique gene pairs
def get_top_5_unique_pairs(bdm_results):
    bdm_unstacked = bdm_results.unstack()
    sorted_bdm = bdm_unstacked.sort_values(ascending=False)
    unique_pairs = {}
    
    for (gene1, gene2), value in sorted_bdm.items():
        sorted_pair = tuple(sorted([gene1, gene2]))
        if sorted_pair not in unique_pairs:
            unique_pairs[sorted_pair] = value
        if len(unique_pairs) == 5:
            break
    
    top_5_pairs = pd.Series(unique_pairs).sort_values(ascending=False)
    return top_5_pairs

# Function to plot the top 5 BDM changes
def plot_top_5_bdm_changes(bdm_results, title, save_path, color):
    top_5 = get_top_5_unique_pairs(bdm_results)
    means = top_5.values
    errors = np.std(top_5.values) / np.sqrt(len(top_5.values))
    
    plt.figure(figsize=(12, 8))
    plt.bar(top_5.index.map(str), means, yerr=errors, color=color, capsize=5)
    plt.ylabel('BDM Change (bits)', fontsize=16)
    plt.xlabel('Histone-Histone Interaction', fontsize=16)
    plt.title(title, fontsize=20)
    plt.xticks(rotation=45, ha='right', fontsize=14)
    plt.yticks(fontsize=14)
    plt.tight_layout()
    
    plt.savefig(save_path, format='jpeg', quality=95)
    plt.close()

# Plot and save the top 5 BDM changes for PIDGlioma
plot_top_5_bdm_changes(glioma_bdm_results, 'CCLE Glioma_PID Histone Network BDM Changes', 'PIDGlioma_top_5_bdm_changes.jpeg', 'lightcoral')


The 'quality' parameter of print_jpg() was deprecated in Matplotlib 3.3 and will be removed two minor releases later. Use pil_kwargs={'quality': ...} instead. If any parameter follows 'quality', they should be passed as keyword, not positionally.
  plt.savefig(save_path, format='jpeg', quality=95)
