In [1]:
import pandas as pd
import numpy as np
import glob

# Define domains based on residue ranges
domains = {
    'beta-propeller': (1, 452),
    'Thigh': (453, 608),
    'Calf-1': (609, 743),
    'Calf-2': (744, 964),
    'Transmembrane_alpha': (965, 1008),
    'Beta I': (1118, 1360),
    'Hybrid': list(range(1067, 1118)) + list(range(1361, 1441)),  # Combined Hybrid domains
    'Psi': (1009, 1065),
    'E1-E4': (1441, 1604),  # Combined E1 to E4 as one domain
    'Beta-T': (1605, 1698),
    'Transmembrane_beta': (1699, 1770)
}

# Read all data files in directory
file_list = glob.glob('*.DAT')

dataframes = {}

# Identify the control file to use its Residue column for others
control_file = next((f for f in file_list if 'control' in f.lower()), None)
if control_file is None:
    raise ValueError("Control file not found in the directory.")

# Read the control file and use its residue numbers for all other files
control_df = pd.read_csv(control_file, sep="\s+", header=None, names=['Residue', 'BC', 'Extra'])
residues_reference = control_df['Residue']

for file in file_list:
    df = pd.read_csv(file, sep="\s+", header=None, names=['Residue', 'BC', 'Extra'])
    df['Residue'] = residues_reference  # Use the residue numbers from the control file
    df = df[['Residue', 'BC']]  # Ignore the third column
    dataframes[file] = df.dropna()

# Function to get the average BC value for each domain
def average_bc_for_domains(df):
    averages = {}
    for domain, residues in domains.items():
        if isinstance(residues, tuple):
            # Extract residue numbers based on the range (start, end)
            mask = (df['Residue'] >= residues[0]) & (df['Residue'] <= residues[1])
        else:
            # Extract using full range provided as list
            mask = df['Residue'].isin(residues)
        domain_df = df[mask]
        if not domain_df.empty:
            averages[domain] = domain_df['BC'].mean()
        else:
            averages[domain] = np.nan
    return averages

# Calculate domain averages for all files
domain_averages = {}
for filename, dataframe in dataframes.items():
    domain_averages[filename] = average_bc_for_domains(dataframe)

# Calculate percentage change compared to control condition
control_averages = domain_averages[control_file]
delta_bc = {}
for filename, averages in domain_averages.items():
    if filename == control_file:
        continue
    delta_bc[filename] = {}
    for domain in averages:
        if not np.isnan(control_averages[domain]) and not np.isnan(averages[domain]):
            delta_bc[filename][domain] = ((averages[domain] - control_averages[domain]) / control_averages[domain]) * 100
        else:
            delta_bc[filename][domain] = np.nan

# Print the percentage change for each domain for all conditions
for filename, deltas in delta_bc.items():
    print(f"File: {filename}")
    for domain, delta in deltas.items():
        print(f"  {domain}: {delta if not np.isnan(delta) else 'No data'}")
    print()


File: 0.BC_open_ForceRGD_betweenness_value_filter0.30.dat
  beta-propeller: 0.3104902450452768
  Thigh: -1.4583156472728755
  Calf-1: 1.5873015873015994
  Calf-2: 0.06349407917711868
  Transmembrane_alpha: -1.597251117404666
  Beta I: -1.4111737237301847
  Hybrid: 0.9101201690847457
  Psi: 2.832516366746726
  E1-E4: -1.378996602266072
  Beta-T: 2.744008336227861
  Transmembrane_beta: 0.745652579159017

File: 0.BC_open_Force_betweenness_value_filter0.30.dat
  beta-propeller: -1.0713973090845592
  Thigh: -0.8795662332197547
  Calf-1: 1.6369187306951238
  Calf-2: 0.9897139591733011
  Transmembrane_alpha: -4.7085352267693805
  Beta I: 0.11423218288618801
  Hybrid: 3.907134520307203
  Psi: 0.05263677336578754
  E1-E4: 0.3379963321534013
  Beta-T: -1.8986007046097413
  Transmembrane_beta: 0.6404623045990672

File: 0.BC_open_RGD_betweenness_value_filter0.30.dat
  beta-propeller: -0.006307637648112313
  Thigh: -0.11294471983432915
  Calf-1: 0.8913205759164486
  Calf-2: 0.5497529022085343
  Tra