In [None]:
#Hopfield Networks

In [8]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from pybdm import BDM
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Function to normalize the counts
def normalize_counts(df):
    scaler = StandardScaler()
    normalized_data = scaler.fit_transform(df.values[:, 1:])
    normalized_df = pd.DataFrame(normalized_data, columns=df.columns[1:])
    normalized_df.insert(0, df.columns[0], df[df.columns[0]].values)
    return normalized_df

# Function to binarize the data
def binarize_data(df, threshold=0.1):
    binarized_df = df.copy()
    binarized_df.iloc[:, 1:] = (df.iloc[:, 1:] > threshold).astype(int)
    return binarized_df

# Function to calculate BDM shifts
def calculate_bdm_shifts(df):
    bdm = BDM(ndim=1)
    shifts = {}
    for index, row in df.iterrows():
        gene_data = row[1:].values.astype(int)
        bdm_value = bdm.bdm(gene_data)
        shifts[row[0]] = bdm_value
    return shifts

# Function to process a DEGs file and gene expression matrix
def process_degs_and_matrix(degs_file, matrix_file, output_folder):
    # Load gene expression matrix
    gene_matrix = pd.read_csv(matrix_file)
    
    # Extract relevant genes
    genes = degs_file['Gene'].values
    relevant_data = gene_matrix[gene_matrix[gene_matrix.columns[0]].isin(genes)]
    
    # Normalize counts
    normalized_data = normalize_counts(relevant_data)
    
    # Binarize the normalized data for BDM analysis
    binarized_df = binarize_data(normalized_data)
    
    # Calculate BDM shifts
    bdm_shifts = calculate_bdm_shifts(binarized_df)
    
    # Save BDM shifts
    bdm_shifts_df = pd.DataFrame(list(bdm_shifts.items()), columns=['Gene', 'BDM_Shift'])
    bdm_shifts_df.to_csv(f'{output_folder}/BDM_Shifts.csv', index=False)
    
    # Plot top 10 and bottom 10 BDM shifts
    top_10_bdm = bdm_shifts_df.nlargest(10, 'BDM_Shift')
    bottom_10_bdm = bdm_shifts_df.nsmallest(10, 'BDM_Shift')
    
    plt.figure(figsize=(20, 10))
    sns.scatterplot(x='Gene', y='BDM_Shift', data=pd.concat([top_10_bdm, bottom_10_bdm]), color='turquoise')
    plt.xticks(rotation=90, fontsize=20)
    plt.yticks(fontsize=20)
    plt.title('Top 10 and Bottom 10 BDM Shifts', fontsize=20)
    plt.xlabel('Gene', fontsize=20)
    plt.ylabel('BDM Shift', fontsize=20)
    plt.savefig(f'{output_folder}/BDM_Shifts_Scatter.png', bbox_inches='tight')
    plt.close()
    
    plt.figure(figsize=(20, 10))
    sns.barplot(x='Gene', y='BDM_Shift', data=pd.concat([top_10_bdm, bottom_10_bdm]), color='turquoise')
    plt.xticks(rotation=90, fontsize=20)
    plt.yticks(fontsize=20)
    plt.title('Top 10 and Bottom 10 BDM Shifts', fontsize=20)
    plt.xlabel('Gene', fontsize=20)
    plt.ylabel('BDM Shift', fontsize=20)
    plt.savefig(f'{output_folder}/BDM_Shifts_Bar.png', bbox_inches='tight')
    plt.close()

# Example usage
os.makedirs('Hopfield_Results/Cluster_0_IDHWT', exist_ok=True)
os.makedirs('Hopfield_Results/Cluster_0_K27M', exist_ok=True)

# Load DEGs and gene expression matrices
cluster_0_degs = pd.read_csv('cluster_0_degs.csv')  # Update with correct path
cluster_0_degs2 = pd.read_csv('cluster_0_degs2.csv')  # Update with correct path

# Process Cluster_0_DEGs with IDHWT.csv
process_degs_and_matrix(cluster_0_degs, 'IDHWT.csv', 'Hopfield_Results/Cluster_0_IDHWT')

# Process Cluster_0_DEGs2 with K27M.csv
process_degs_and_matrix(cluster_0_degs2, 'K27M.csv', 'Hopfield_Results/Cluster_0_K27M')



  shifts[row[0]] = bdm_value
  shifts[row[0]] = bdm_value


In [None]:
#VAE and GANs- the 1b and 2B are Louvain GAN, while 1 and 2 are kmeans GAN

In [9]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from pybdm import BDM
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Function to normalize the counts
def normalize_counts(df):
    scaler = StandardScaler()
    normalized_data = scaler.fit_transform(df.values[:, 1:])
    normalized_df = pd.DataFrame(normalized_data, columns=df.columns[1:])
    normalized_df.insert(0, df.columns[0], df[df.columns[0]].values)
    return normalized_df

# Function to binarize the data
def binarize_data(df, threshold=0.1):
    binarized_df = df.copy()
    binarized_df.iloc[:, 1:] = (df.iloc[:, 1:] > threshold).astype(int)
    return binarized_df

# Function to calculate BDM shifts
def calculate_bdm_shifts(df):
    bdm = BDM(ndim=1)
    shifts = {}
    for index, row in df.iterrows():
        gene_data = row[1:].values.astype(int)
        bdm_value = bdm.bdm(gene_data)
        shifts[row[0]] = bdm_value
    return shifts

# Function to process a DEGs file and gene expression matrix
def process_degs_and_matrix(degs_file, matrix_file, output_folder, color):
    # Load gene expression matrix
    gene_matrix = pd.read_csv(matrix_file)
    
    # Extract relevant genes
    genes = degs_file['Gene'].values
    relevant_data = gene_matrix[gene_matrix[gene_matrix.columns[0]].isin(genes)]
    
    # Normalize counts
    normalized_data = normalize_counts(relevant_data)
    
    # Binarize the normalized data for BDM analysis
    binarized_df = binarize_data(normalized_data)
    
    # Calculate BDM shifts
    bdm_shifts = calculate_bdm_shifts(binarized_df)
    
    # Save BDM shifts
    bdm_shifts_df = pd.DataFrame(list(bdm_shifts.items()), columns=['Gene', 'BDM_Shift'])
    bdm_shifts_df.to_csv(f'{output_folder}/BDM_Shifts.csv', index=False)
    
    # Plot top 10 and bottom 10 BDM shifts
    top_10_bdm = bdm_shifts_df.nlargest(10, 'BDM_Shift')
    bottom_10_bdm = bdm_shifts_df.nsmallest(10, 'BDM_Shift')
    
    plt.figure(figsize=(20, 10))
    sns.barplot(x='Gene', y='BDM_Shift', data=pd.concat([top_10_bdm, bottom_10_bdm]), color=color)
    plt.xticks(rotation=90, fontsize=20)
    plt.yticks(fontsize=20)
    plt.title('Top 10 and Bottom 10 BDM Shifts', fontsize=20)
    plt.xlabel('Gene', fontsize=20)
    plt.ylabel('BDM Shift', fontsize=20)
    plt.savefig(f'{output_folder}/BDM_Shifts_Bar.png', bbox_inches='tight')
    plt.close()

# Create directories for results
os.makedirs('Networks/VAE1', exist_ok=True)
os.makedirs('Networks/GAN1', exist_ok=True)
os.makedirs('Networks/GAN1B', exist_ok=True)
os.makedirs('Networks/VAE2', exist_ok=True)
os.makedirs('Networks/GAN2', exist_ok=True)
os.makedirs('Networks/GAN2B', exist_ok=True)

# Load DEGs files
vae1 = pd.read_csv('VAE1.csv')
gan1 = pd.read_csv('GAN1.csv')
gan1b = pd.read_csv('GAN1B.csv')
vae2 = pd.read_csv('VAE2.csv')
gan2 = pd.read_csv('GAN2.csv')
gan2b = pd.read_csv('GAN2B.csv')

# Process each DEGs file with the appropriate gene expression matrix and color
process_degs_and_matrix(vae1, 'IDHWT.csv', 'Networks/VAE1', 'violet')
process_degs_and_matrix(gan1, 'IDHWT.csv', 'Networks/GAN1', 'violet')
process_degs_and_matrix(gan1b, 'IDHWT.csv', 'Networks/GAN1B', 'violet')
process_degs_and_matrix(vae2, 'K27M.csv', 'Networks/VAE2', 'pink')
process_degs_and_matrix(gan2, 'K27M.csv', 'Networks/GAN2', 'pink')
process_degs_and_matrix(gan2b, 'K27M.csv', 'Networks/GAN2B', 'pink')


  shifts[row[0]] = bdm_value
  shifts[row[0]] = bdm_value
  shifts[row[0]] = bdm_value
  shifts[row[0]] = bdm_value
  shifts[row[0]] = bdm_value
  shifts[row[0]] = bdm_value
