In [None]:
#PID Networkx Stats

In [7]:
import pandas as pd
import numpy as np
import networkx as nx
from scipy import stats

# Load the adjacency matrices
bt245_adj = pd.read_csv('BT245_adjacency_matrix.csv', index_col=0)
pid_glioma_adj = pd.read_csv('PIDGlioma_adjacency_matrix.csv', index_col=0)

# Ensure indices and columns match for both adjacency matrices
bt245_adj = bt245_adj.loc[bt245_adj.index.intersection(bt245_adj.columns), bt245_adj.index.intersection(bt245_adj.columns)]
pid_glioma_adj = pid_glioma_adj.loc[pid_glioma_adj.index.intersection(pid_glioma_adj.columns), pid_glioma_adj.index.intersection(pid_glioma_adj.columns)]

# Create graphs from the adjacency matrices
G_bt245 = nx.from_pandas_adjacency(bt245_adj)
G_pid_glioma = nx.from_pandas_adjacency(pid_glioma_adj)

# Compute centrality measures for BT245
bt245_closeness = nx.closeness_centrality(G_bt245)
bt245_betweenness = nx.betweenness_centrality(G_bt245)
bt245_eigenvector = nx.eigenvector_centrality(G_bt245, max_iter=1000)
bt245_graph_strength = {n: sum(dict(G_bt245.degree([n])).values()) for n in G_bt245.nodes()}
bt245_transitivity = nx.transitivity(G_bt245)

# Compute centrality measures for PIDGlioma
pid_glioma_closeness = nx.closeness_centrality(G_pid_glioma)
pid_glioma_betweenness = nx.betweenness_centrality(G_pid_glioma)
pid_glioma_eigenvector = nx.eigenvector_centrality(G_pid_glioma, max_iter=1000)
pid_glioma_graph_strength = {n: sum(dict(G_pid_glioma.degree([n])).values()) for n in G_pid_glioma.nodes()}
pid_glioma_transitivity = nx.transitivity(G_pid_glioma)

# Create DataFrames for the computed centralities
bt245_centrality_df = pd.DataFrame({
    'Closeness': pd.Series(bt245_closeness),
    'Betweenness': pd.Series(bt245_betweenness),
    'Eigenvector': pd.Series(bt245_eigenvector),
    'Graph Strength': pd.Series(bt245_graph_strength)
})

pid_glioma_centrality_df = pd.DataFrame({
    'Closeness': pd.Series(pid_glioma_closeness),
    'Betweenness': pd.Series(pid_glioma_betweenness),
    'Eigenvector': pd.Series(pid_glioma_eigenvector),
    'Graph Strength': pd.Series(pid_glioma_graph_strength)
})

# Transitivity is a single value for the whole graph, add as a column
bt245_centrality_df['Transitivity'] = bt245_transitivity
pid_glioma_centrality_df['Transitivity'] = pid_glioma_transitivity

# Encode the provided table into a DataFrame
data = {
    'Measure': ['Closeness', 'Closeness', 'Betweenness', 'Betweenness', 'Eigenvector', 'Eigenvector', 'Graph Strength', 'Graph Strength', 'Transitivity', 'Transitivity'],
    'Modification': ['H3K9me3', 'H3K27me3', 'H3K9me3', 'H3K27me3', 'H3K4me3', 'H4K16ac', 'H3K4me3', 'H4K16ac', 'H4K16ac', 'H2Aub'],
    'Value': [0.18, 0.18, 34, 26, 1, 0.95, 37.86, 35.96, 1, 1],
    'Dataset': ['BT245', 'BT245', 'BT245', 'BT245', 'BT245', 'BT245', 'BT245', 'BT245', 'BT245', 'BT245']
}
bt245_observed_df = pd.DataFrame(data)

data = {
    'Measure': ['Closeness', 'Closeness', 'Betweenness', 'Eigenvector', 'Graph Strength', 'Transitivity'],
    'Modification': ['H3K4me1', 'H3K27me0K36me1', 'H3K4me1', 'H3K27ac1k36me2', 'H3K27ac1k36me2', 'N/A'],
    'Value': [0.02, 0.02, 273, 1, 142, 0.7132157],
    'Dataset': ['PIDGlioma', 'PIDGlioma', 'PIDGlioma', 'PIDGlioma', 'PIDGlioma', 'PIDGlioma']
}
pid_glioma_observed_df = pd.DataFrame(data)

# Combine the observed data
observed_df = pd.concat([bt245_observed_df, pid_glioma_observed_df])

# Perform Mann-Whitney U Test for significance
results = {}
for index, row in observed_df.iterrows():
    measure = row['Measure']
    value = row['Value']
    dataset = row['Dataset']
    
    if dataset == 'BT245':
        all_values = bt245_centrality_df[measure].dropna().values
    else:
        all_values = pid_glioma_centrality_df[measure].dropna().values
    
    # Perform the Mann-Whitney U test
    stat, p_value = stats.mannwhitneyu([value], all_values, alternative='two-sided')
    results[(measure, dataset)] = p_value

results


{('Closeness', 'BT245'): np.float64(0.00017683457040160755),
 ('Betweenness', 'BT245'): np.float64(0.00017683457040160755),
 ('Eigenvector', 'BT245'): np.float64(0.00017683457040160755),
 ('Graph Strength', 'BT245'): np.float64(0.00017683457040160755),
 ('Transitivity', 'BT245'): np.float64(1.0),
 ('Closeness', 'PIDGlioma'): np.float64(0.09735161782074063),
 ('Betweenness', 'PIDGlioma'): np.float64(0.09783514433560599),
 ('Eigenvector', 'PIDGlioma'): np.float64(0.09783514433560599),
 ('Graph Strength', 'PIDGlioma'): np.float64(0.09735161782074063),
 ('Transitivity', 'PIDGlioma'): np.float64(2.508860953006112e-10)}

In [10]:
#KRUSKAL-WALLIS PID Network Histones BDM pvalues


In [12]:
import pandas as pd
from scipy import stats

# Load the BDM results
bdm_results = pd.read_csv('BT245_bdm_results.csv')

# Ensure all BDM values are numeric
for column in bdm_results.columns:
    bdm_results[column] = pd.to_numeric(bdm_results[column], errors='coerce')

# Reshape the DataFrame to have one column for modifications and one for BDM values
bdm_long = pd.melt(bdm_results, var_name='Modification', value_name='BDM_Value')

# Remove rows with NaN values
bdm_long = bdm_long.dropna()

# Perform Kruskal-Wallis H Test
kruskal_results = stats.kruskal(
    *[bdm_long[bdm_long['Modification'] == mod]['BDM_Value'].values for mod in bdm_long['Modification'].unique()]
)

# Get the p-value from the Kruskal-Wallis results
p_value = kruskal_results.pvalue

# Display the p-value
print(f'Kruskal-Wallis p-value: {p_value}')


Kruskal-Wallis p-value: 0.11358158338228853


In [16]:
# Reshape the DataFrame to have one column for modifications and one for BDM values
bdm_long = pd.melt(bdm_results, var_name='Modification', value_name='BDM_Value')

# Remove rows with NaN values
bdm_long = bdm_long.dropna()

# List of all unique modifications
modifications = bdm_long['Modification'].unique()

# Prepare a dictionary to hold the results
results = {}

# Function to perform Mann-Whitney U test and t-test
def perform_tests(data1, data2):
    mannwhitney_stat, mannwhitney_p = stats.mannwhitneyu(data1, data2, alternative='two-sided')
    ttest_stat, ttest_p = stats.ttest_ind(data1, data2, equal_var=False)  # Use Welch's t-test for unequal variances
    return mannwhitney_p, ttest_p

# Perform tests for each pair of modifications
for i in range(len(modifications)):
    for j in range(i+1, len(modifications)):
        mod1 = modifications[i]
        mod2 = modifications[j]
        data1 = bdm_long[bdm_long['Modification'] == mod1]['BDM_Value'].values
        data2 = bdm_long[bdm_long['Modification'] == mod2]['BDM_Value'].values
        
        if len(data1) > 0 and len(data2) > 0:
            mannwhitney_p, ttest_p = perform_tests(data1, data2)
            results[(mod1, mod2)] = {'Mann-Whitney p-value': mannwhitney_p, 't-test p-value': ttest_p}

# Display the results
results


{('H2A.X', 'H2Aub'): {'Mann-Whitney p-value': np.float64(0.07310164197498858),
  't-test p-value': np.float64(0.030050172269799433)},
 ('H2A.X', 'H3.3'): {'Mann-Whitney p-value': np.float64(0.33598629016801607),
  't-test p-value': np.float64(0.12033174646413816)},
 ('H2A.X', 'H3K27M'): {'Mann-Whitney p-value': np.float64(0.06705994799526993),
  't-test p-value': np.float64(0.02780907717922638)},
 ('H2A.X',
  'H3K27ac'): {'Mann-Whitney p-value': np.float64(0.10500279568807346), 't-test p-value': np.float64(0.14536165118989663)},
 ('H2A.X',
  'H3K27me3'): {'Mann-Whitney p-value': np.float64(0.11333717264872825), 't-test p-value': np.float64(0.08009956718373454)},
 ('H2A.X',
  'H3K36me2'): {'Mann-Whitney p-value': np.float64(0.059434671103148276), 't-test p-value': np.float64(0.1796995119490491)},
 ('H2A.X',
  'H3K4me1'): {'Mann-Whitney p-value': np.float64(0.06470875481691006), 't-test p-value': np.float64(0.07410541815724186)},
 ('H2A.X',
  'H3K4me3'): {'Mann-Whitney p-value': np.float

In [22]:
import os
import pandas as pd
from scipy import stats

# Directory containing the CSV files
directory = './Networkx'  # Adjust the path if needed

# List of CSV files in the directory
csv_files = [
    'PID_selected_HVGIDHWT.txt_adjacency_matrix.csv',
    'PID_selected_HVGIDHWT.txt_bdm_results.csv',
    'PID_selected_HVGIDHWT.txt_centrality_measures.csv',
    'PID_selected_HVGK27M.txt_adjacency_matrix.csv',
    'PID_selected_HVGK27M.txt_bdm_results.csv',
    'PID_selected_HVGK27M.txt_centrality_measures.csv',
    'PID_selected_scEpathIDHWT.txt_adjacency_matrix.csv',
    'PID_selected_scEpathIDHWT.txt_bdm_results.csv',
    'PID_selected_scEpathIDHWT.txt_centrality_measures.csv',
    'PID_selected_TopFeatK27M.txt_adjacency_matrix.csv',
    'PID_selected_TopFeatK27M.txt_bdm_results.csv',
    'PID_selected_TopFeatK27M.txt_centrality_measures.csv'
]

# Function to perform statistical tests on centrality measures
def perform_stat_tests_centrality(data):
    results = []
    measures = data.columns[1:]  # Skip the first column which is the gene names
    for measure in measures:
        values = data[measure].dropna().values
        if len(values) > 1:
            # Perform Mann-Whitney U test comparing each measure to the mean
            mannwhitney_p = stats.mannwhitneyu(values, values.mean().repeat(len(values)), alternative='two-sided').pvalue
            # Perform t-test comparing each measure to the mean
            ttest_p = stats.ttest_1samp(values, values.mean()).pvalue
            results.append({
                'Measure': measure,
                'Mann-Whitney p-value': mannwhitney_p,
                't-test p-value': ttest_p
            })
    return results

# Function to perform statistical tests on BDM results
def perform_stat_tests_bdm(data):
    results = []
    genes = data.columns[1:]  # Skip the first column which is the gene names
    for gene in genes:
        values = data[gene].dropna().values
        if len(values) > 1:
            # Perform Mann-Whitney U test comparing each gene to the mean
            mannwhitney_p = stats.mannwhitneyu(values, values.mean().repeat(len(values)), alternative='two-sided').pvalue
            # Perform t-test comparing each gene to the mean
            ttest_p = stats.ttest_1samp(values, values.mean()).pvalue
            results.append({
                'Gene': gene,
                'Mann-Whitney p-value': mannwhitney_p,
                't-test p-value': ttest_p
            })
    return results

# Initialize an empty list to collect all results
all_results = []

# Iterate over the CSV files and perform analyses
for file in csv_files:
    file_path = os.path.join(directory, file)
    
    if 'centrality_measures' in file:
        data = pd.read_csv(file_path)
        results = perform_stat_tests_centrality(data)
    elif 'bdm_results' in file:
        data = pd.read_csv(file_path)
        results = perform_stat_tests_bdm(data)
    else:
        continue  # Skip adjacency matrix files for this analysis
    
    # Prepare results for saving
    for result in results:
        result['Dataset'] = file
    all_results.extend(results)

# Convert the results to a DataFrame and save to CSV
final_results = pd.DataFrame(all_results)
final_results.to_csv('Analysis1.csv', index=False)


  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)


In [26]:
import pandas as pd
from scipy import stats
import os

# Directory containing the CSV files
directory = './Networkx'  # Adjust the path if needed

# List of CSV files in the directory
csv_files = [
    'PID_selected_HVGIDHWT.txt_adjacency_matrix.csv',
    'PID_selected_HVGIDHWT.txt_bdm_results.csv',
    'PID_selected_HVGIDHWT.txt_centrality_measures.csv',
    'PID_selected_HVGK27M.txt_adjacency_matrix.csv',
    'PID_selected_HVGK27M.txt_bdm_results.csv',
    'PID_selected_HVGK27M.txt_centrality_measures.csv',
    'PID_selected_scEpathIDHWT.txt_adjacency_matrix.csv',
    'PID_selected_scEpathIDHWT.txt_bdm_results.csv',
    'PID_selected_scEpathIDHWT.txt_centrality_measures.csv',
    'PID_selected_TopFeatK27M.txt_adjacency_matrix.csv',
    'PID_selected_TopFeatK27M.txt_bdm_results.csv',
    'PID_selected_TopFeatK27M.txt_centrality_measures.csv'
]

# Function to perform Kruskal-Wallis H test
def perform_kruskal_wallis(data):
    results = {}
    grouped_data = [group.dropna().values for name, group in data.items()]
    if all(len(group) > 1 for group in grouped_data):
        kruskal_p = stats.kruskal(*grouped_data).pvalue
        results['Kruskal-Wallis p-value'] = kruskal_p
    return results

# Function to perform Friedman test
def perform_friedman(data):
    results = {}
    grouped_data = [group.dropna().values for name, group in data.items()]
    if all(len(group) > 1 for group in grouped_data):
        try:
            friedman_p = stats.friedmanchisquare(*grouped_data).pvalue
            results['Friedman p-value'] = friedman_p
        except ValueError as e:
            if str(e) == "All numbers are identical":
                results['Friedman p-value'] = float('nan')
            else:
                raise e
    return results

# Initialize an empty list to collect all results
all_results = []

# Iterate over the CSV files and perform analyses
for file in csv_files:
    file_path = os.path.join(directory, file)
    
    if 'centrality_measures' in file or 'bdm_results' in file:
        data = pd.read_csv(file_path, index_col=0)
        kruskal_results = perform_kruskal_wallis(data)
        friedman_results = perform_friedman(data)
        
        # Combine results
        combined_results = []
        combined_results.append({
            'Dataset': file,
            'Kruskal-Wallis p-value': kruskal_results.get('Kruskal-Wallis p-value', float('nan')),
            'Friedman p-value': friedman_results.get('Friedman p-value', float('nan'))
        })
        all_results.extend(combined_results)

# Convert the results to a DataFrame and save to CSV
final_results = pd.DataFrame(all_results)
final_results.to_csv('Analysis2.csv', index=False)


In [5]:
import os
import pandas as pd
from scipy import stats

# Directory containing the CSV files
directory = './SCENIC Regulons'  # Adjust the path if needed

# List of CSV files to be processed
csv_files = [
    'IDHWT_adjacency_matrix_regulons.csv',
    'IDHWT_bdm_results_regulons.csv',
    'IDHWT_centrality_measures.csv',
    'K27M_adjacency_matrix_regulons.csv',
    'K27M_bdm_results_regulons.csv',
    'K27M_centrality_measures.csv'
]

# Function to perform statistical tests on centrality measures
def perform_stat_tests_centrality(data):
    results = []
    measures = data.columns[1:]  # Skip the first column which is the gene names
    for measure in measures:
        values = data[measure].dropna().values
        if len(values) > 1:
            # Perform Mann-Whitney U test comparing each measure to the mean
            mannwhitney_p = stats.mannwhitneyu(values, [values.mean()] * len(values), alternative='two-sided').pvalue
            # Perform t-test comparing each measure to the mean
            ttest_p = stats.ttest_1samp(values, values.mean()).pvalue
            results.append({
                'Measure': measure,
                'Mann-Whitney p-value': mannwhitney_p,
                't-test p-value': ttest_p
            })
    return results

# Function to perform statistical tests on BDM results
def perform_stat_tests_bdm(data):
    results = []
    genes = data.columns[1:]  # Skip the first column which is the gene names
    for gene in genes:
        values = data[gene].dropna().values
        if len(values) > 1:
            # Perform Mann-Whitney U test comparing each gene to the mean
            mannwhitney_p = stats.mannwhitneyu(values, [values.mean()] * len(values), alternative='two-sided').pvalue
            # Perform t-test comparing each gene to the mean
            ttest_p = stats.ttest_1samp(values, values.mean()).pvalue
            results.append({
                'Gene': gene,
                'Mann-Whitney p-value': mannwhitney_p,
                't-test p-value': ttest_p
            })
    return results

# Initialize an empty list to collect all results
all_results = []

# Iterate over the CSV files and perform analyses
for file in csv_files:
    file_path = os.path.join(directory, file)
    
    if 'centrality_measures' in file:
        data = pd.read_csv(file_path)
        results = perform_stat_tests_centrality(data)
    elif 'bdm_results' in file:
        data = pd.read_csv(file_path)
        results = perform_stat_tests_bdm(data)
    else:
        continue  # Skip adjacency matrix files for this analysis
    
    # Prepare results for saving
    for result in results:
        result['Dataset'] = file
    all_results.extend(results)

# Convert the results to a DataFrame and save to CSV
final_results = pd.DataFrame(all_results)
final_results.to_csv('Analysis1.csv', index=False)


  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)


In [6]:
import pandas as pd
from scipy import stats
import os

# Directory containing the CSV files
directory = './SCENIC Regulons'  # Adjust the path if needed

# List of CSV files to be processed
csv_files = [
    'IDHWT_adjacency_matrix_regulons.csv',
    'IDHWT_bdm_results_regulons.csv',
    'IDHWT_centrality_measures.csv',
    'K27M_adjacency_matrix_regulons.csv',
    'K27M_bdm_results_regulons.csv',
    'K27M_centrality_measures.csv'
]

# Function to perform Kruskal-Wallis H test
def perform_kruskal_wallis(data):
    results = {}
    grouped_data = [group.dropna().values for name, group in data.items()]
    if all(len(group) > 1 for group in grouped_data):
        kruskal_p = stats.kruskal(*grouped_data).pvalue
        results['Kruskal-Wallis p-value'] = kruskal_p
    return results

# Function to perform Friedman test
def perform_friedman(data):
    results = {}
    grouped_data = [group.dropna().values for name, group in data.items()]
    if all(len(group) > 1 for group in grouped_data):
        try:
            friedman_p = stats.friedmanchisquare(*grouped_data).pvalue
            results['Friedman p-value'] = friedman_p
        except ValueError as e:
            if str(e) == "All numbers are identical":
                results['Friedman p-value'] = float('nan')
            else:
                raise e
    return results

# Initialize an empty list to collect all results
all_results = []

# Iterate over the CSV files and perform analyses
for file in csv_files:
    file_path = os.path.join(directory, file)
    
    if 'centrality_measures' in file or 'bdm_results' in file:
        data = pd.read_csv(file_path, index_col=0)
        kruskal_results = perform_kruskal_wallis(data)
        friedman_results = perform_friedman(data)
        
        # Combine results
        combined_results = []
        combined_results.append({
            'Dataset': file,
            'Kruskal-Wallis p-value': kruskal_results.get('Kruskal-Wallis p-value', float('nan')),
            'Friedman p-value': friedman_results.get('Friedman p-value', float('nan'))
        })
        all_results.extend(combined_results)

# Convert the results to a DataFrame and save to CSV
final_results = pd.DataFrame(all_results)
final_results.to_csv('Analysis2.csv', index=False)


In [11]:
#SDE analysis
import pandas as pd
from scipy import stats

# List of files and their respective data
files = {
    'K27Mperturbation_genes.csv': pd.read_csv('K27Mperturbation_genes.csv'),
    'IDHWTperturbation_genes.csv': pd.read_csv('IDHWTperturbation_genes.csv'),
    'K27Mimportant_genes.csv': pd.read_csv('K27Mimportant_genes.csv'),
    'IDHWTimportant_genes.csv': pd.read_csv('IDHWTimportant_genes.csv')
}

# Function to perform Mann-Whitney U test
def perform_mann_whitney(data, value_column):
    results = []
    genes = data.columns[1:]  # Skip the first column which is the gene names
    for gene in data[value_column].dropna().values:
        values = data[value_column].dropna().values
        if len(values) > 1:
            mannwhitney_p = stats.mannwhitneyu(values, [values.mean()] * len(values), alternative='two-sided').pvalue
            results.append({
                'Gene': gene,
                'Mann-Whitney p-value': mannwhitney_p
            })
    return results

# Initialize list to collect results
mann_whitney_results = []

# Iterate over the files and perform Mann-Whitney U test
for file_name, data in files.items():
    if 'perturbation' in file_name:
        results = perform_mann_whitney(data, 'BDM Change')
    else:
        results = perform_mann_whitney(data, 'Importance')
        
    # Prepare results for saving
    for result in results:
        result['Dataset'] = file_name
    mann_whitney_results.extend(results)

# Convert the results to DataFrame and save to CSV
mann_whitney_df = pd.DataFrame(mann_whitney_results)
mann_whitney_df.to_csv('Mann_Whitney_Analysis.csv', index=False)


In [13]:
import os
import pandas as pd
from scipy import stats

# Directories containing the CSV files
cellrouter_dir = './CellRouter'
scepath_dir = './scEpath BDM'

# List of CSV files for each directory
cellrouter_files = [
    'IDHWT_adjacency_matrix.csv',
    'IDHWT_bdm_results.csv',
    'IDHWT_centrality_measures.csv',
    'K27M_adjacency_matrix.csv',
    'K27M_bdm_results.csv',
    'K27M_centrality_measures.csv'
]

scepath_files = [
    'IDHWT_adjacency_matrix3.csv',
    'IDHWT_bdm_results3.csv',
    'K27M_adjacency_matrix3.csv',
    'K27M_bdm_results3.csv'
]

# Function to perform statistical tests on centrality measures and BDM results
def perform_stat_tests(data, columns):
    results = []
    for column in columns:
        values = data[column].dropna().values
        if len(values) > 1:
            mannwhitney_p = stats.mannwhitneyu(values, [values.mean()] * len(values), alternative='two-sided').pvalue
            t_test_p = stats.ttest_1samp(values, values.mean()).pvalue
            results.append({
                'Measure': column,
                'Mann-Whitney p-value': mannwhitney_p,
                'T-test p-value': t_test_p
            })
    return results

# Create the stats folder and subfolders
stats_dir = './stats'
cellrouter_stats_dir = os.path.join(stats_dir, 'CellRouter')
scepath_stats_dir = os.path.join(stats_dir, 'scEpath_BDM')

os.makedirs(cellrouter_stats_dir, exist_ok=True)
os.makedirs(scepath_stats_dir, exist_ok=True)

# Initialize lists to collect results
cellrouter_results = []
scepath_results = []

# Iterate over the files in CellRouter and perform analyses
for file_name in cellrouter_files:
    file_path = os.path.join(cellrouter_dir, file_name)
    data = pd.read_csv(file_path)
    
    if 'bdm_results' in file_name:
        results = perform_stat_tests(data, data.columns[1:])
    elif 'centrality_measures' in file_name:
        results = perform_stat_tests(data, ['betweenness', 'closeness', 'eigenvector'])
    
    for result in results:
        result['Dataset'] = file_name
    cellrouter_results.extend(results)

# Save CellRouter results to CSV
cellrouter_df = pd.DataFrame(cellrouter_results)
cellrouter_df.to_csv(os.path.join(cellrouter_stats_dir, 'analysis3.csv'), index=False)

# Reset lists for scEpath BDM analysis
scepath_results = []

# Iterate over the files in scEpath BDM and perform analyses
for file_name in scepath_files:
    file_path = os.path.join(scepath_dir, file_name)
    data = pd.read_csv(file_path)
    
    if 'bdm_results' in file_name:
        results = perform_stat_tests(data, data.columns[1:])
    elif 'centrality_measures' in file_name:
        results = perform_stat_tests(data, ['betweenness', 'closeness', 'eigenvector'])
    
    for result in results:
        result['Dataset'] = file_name
    scepath_results.extend(results)

# Save scEpath BDM results to CSV
scepath_df = pd.DataFrame(scepath_results)
scepath_df.to_csv(os.path.join(scepath_stats_dir, 'analysis5.csv'), index=False)


  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)


In [4]:
import pandas as pd
import matplotlib.pyplot as plt

# Define a function to generate the plot
def generate_bdm_shift_plot(csv_file_path, output_file_name, title_prefix):
    # Load the CSV file
    data = pd.read_csv(csv_file_path)
    
    # Sort the data by the second column in descending order
    data_sorted = data.sort_values(by=data.columns[1], ascending=False)
    
    # Extract the x and y values
    x_values = range(1, len(data_sorted) + 1)
    y_values = data_sorted.iloc[:, 1]
    
    # Create the plot
    plt.figure(figsize=(10, 6))
    plt.plot(x_values, y_values, 'o-', markersize=5, color='blue', linewidth=4)
    plt.xlabel('Gene Index', fontsize=20)
    plt.ylabel('BDM Shift', fontsize=20)
    
    # Set the title based on the CSV name
    title = f"{title_prefix} in {csv_file_path.split('/')[-1].replace('BDM_Shifts.csv', '')}"
    plt.title(title, fontsize=24)
    
    # Save the plot as a JPEG file
    plt.savefig(output_file_name, format='jpeg', dpi=300)
    plt.close()

# Paths to the CSV files
k27m_csv_path = 'K27MBDM_Shifts.csv'
idhwt_csv_path = 'IDHWTBDM_Shifts.csv'

# Generate the plots
generate_bdm_shift_plot(k27m_csv_path, 'K27M_BDM_Shifts.jpeg', 'BDM Shifts in GAN kmeans space for K27M')
generate_bdm_shift_plot(idhwt_csv_path, 'IDHWT_BDM_Shifts.jpeg', 'BDM Shifts in GAN kmeans space for IDHWT')
