In [None]:
#!/usr/bin/env python3
import os
import networkx as nx
import numpy as np
import pandas as pd
from tqdm import tqdm

def compute_vn_entropy(density_matrix, tol=1e-12):
    """
    Compute the Von Neumann entropy for a density matrix.
    
    Parameters:
      density_matrix (np.ndarray): A symmetric, positive semidefinite matrix.
      tol (float): Tolerance for ignoring eigenvalues close to zero.
      
    Returns:
      float: The Von Neumann entropy.
      
    The Von Neumann entropy is defined as:
      S(ρ) = -∑ λ_i log(λ_i),
    where the sum is taken over eigenvalues λ_i of the density matrix.
    """
    # Compute eigenvalues for the symmetric matrix
    eigenvals = np.linalg.eigvalsh(density_matrix)
    # For numerical stability, ignore eigenvalues below a tolerance
    valid = eigenvals > tol
    selected = eigenvals[valid]
    entropy = -np.sum(selected * np.log(selected))
    return entropy

def process_network(network_file, network_label):
    """
    Load a GraphML network, compute its combinatorial Laplacian,
    normalize it to form a density matrix, and compute the baseline Von Neumann entropy.
    
    Parameters:
      network_file (str): Path to the GraphML file.
      network_label (str): Label used for printing/logging.
      
    Returns:
      nodes (list): Sorted list of node IDs.
      node_to_index (dict): Mapping from node ID to index.
      L_dense (np.ndarray): Dense Laplacian matrix.
      trace_val (float): Trace of the Laplacian.
      density (np.ndarray): Density matrix (Laplacian normalized by trace).
      baseline_entropy (float): The computed Von Neumann entropy.
    """
    print(f"\n=== Processing {network_label} ===")
    print(f"Loading network from file: {network_file}")
    G = nx.read_graphml(network_file)
    print(f"Network loaded. Total nodes in {network_label}: {len(G.nodes())}")
    
    # Sort nodes to maintain consistent ordering
    nodes = sorted(G.nodes())
    node_to_index = {node: idx for idx, node in enumerate(nodes)}
    
    print("Computing the combinatorial Laplacian...")
    # Compute Laplacian as a sparse matrix (using the given node order)
    L_sparse = nx.laplacian_matrix(G, nodelist=nodes)
    
    print("Converting Laplacian to a dense matrix (this may take a moment)...")
    L_dense = L_sparse.toarray()
    
    # Compute the trace of L (which is the sum of the degrees, equal to 2 * number of edges)
    trace_val = np.trace(L_dense)
    print(f"Trace of the Laplacian for {network_label}: {trace_val}")
    
    # Form the density matrix: ρ = L / Tr(L)
    density = L_dense / trace_val
    
    print("Computing baseline eigenvalues and Von Neumann entropy on CPU...")
    baseline_entropy = compute_vn_entropy(density)
    print(f"Baseline Von Neumann Entropy for {network_label}: {baseline_entropy}")
    
    return nodes, node_to_index, L_dense, trace_val, density, baseline_entropy

def compute_knockout_entropy(L_dense, remove_index, tol=1e-12):
    """
    Simulate a gene knockout by removing the corresponding row and column from the dense Laplacian,
    normalize the resulting submatrix to obtain a new density matrix,
    and compute its Von Neumann entropy.
    
    Parameters:
      L_dense (np.ndarray): The original dense Laplacian matrix.
      remove_index (int): The index of the node (gene) to remove.
      tol (float): Tolerance for eigenvalue computation.
      
    Returns:
      float: The Von Neumann entropy of the network after the knockout.
    """
    # Remove the specified row and column
    L_sub = np.delete(L_dense, remove_index, axis=0)
    L_sub = np.delete(L_sub, remove_index, axis=1)
    
    # Compute the trace and form the density matrix for the submatrix
    trace_sub = np.trace(L_sub)
    density_sub = L_sub / trace_sub
    return compute_vn_entropy(density_sub, tol=tol)

def main():
    # Set the working directory
    work_dir = "/home/bioinfo/Desktop/Pratanu/Network/O3"
    os.chdir(work_dir)
    print(f"Working directory set to: {work_dir}")
    
    # Filenames
    networkA_file = "Dengue_Human_PPI.graphml"
    networkB_file = "Human_PPI.graphml"
    gene_file = "Gene_set.txt"
    
    # Load gene list (one gene per line)
    print(f"\nLoading gene list from file: {gene_file}")
    with open(gene_file, "r") as f:
        genes = [line.strip() for line in f if line.strip()]
    print(f"Total genes loaded: {len(genes)}")
    
    # Process both networks (compute baseline properties)
    print("\nProcessing Network A...")
    nodes_A, node_to_index_A, L_dense_A, trace_A, density_A, baseline_entropy_A = process_network(networkA_file, "Network A")
    
    print("\nProcessing Network B...")
    nodes_B, node_to_index_B, L_dense_B, trace_B, density_B, baseline_entropy_B = process_network(networkB_file, "Network B")
    
    # Prepare to store results
    results = []
    
    print("\n=== Starting Gene Knockout Analysis ===")
    # Iterate over all genes with a progress bar
    for gene in tqdm(genes, desc="Overall Gene Knockouts", total=len(genes)):
        print(f"\n--- Processing gene: {gene} ---")
        row = {"gene": gene,
               "baseline_entropy_A": baseline_entropy_A,
               "baseline_entropy_B": baseline_entropy_B}
        
        # Process knockout for Network A
        if gene in node_to_index_A:
            idx_A = node_to_index_A[gene]
            print(f"Gene {gene} found in Network A at index {idx_A}. Computing knockout entropy...")
            try:
                knockout_entropy_A = compute_knockout_entropy(L_dense_A, idx_A)
                delta_A = baseline_entropy_A - knockout_entropy_A
                row["knockout_entropy_A"] = knockout_entropy_A
                row["delta_entropy_A"] = delta_A
                print(f"Network A: Knockout entropy = {knockout_entropy_A}, Δ entropy = {delta_A}")
            except Exception as e:
                print(f"Error during Network A knockout for gene {gene}: {e}")
                row["knockout_entropy_A"] = None
                row["delta_entropy_A"] = None
        else:
            print(f"Gene {gene} not found in Network A. Skipping knockout.")
            row["knockout_entropy_A"] = None
            row["delta_entropy_A"] = None
        
        # Process knockout for Network B
        if gene in node_to_index_B:
            idx_B = node_to_index_B[gene]
            print(f"Gene {gene} found in Network B at index {idx_B}. Computing knockout entropy...")
            try:
                knockout_entropy_B = compute_knockout_entropy(L_dense_B, idx_B)
                delta_B = baseline_entropy_B - knockout_entropy_B
                row["knockout_entropy_B"] = knockout_entropy_B
                row["delta_entropy_B"] = delta_B
                print(f"Network B: Knockout entropy = {knockout_entropy_B}, Δ entropy = {delta_B}")
            except Exception as e:
                print(f"Error during Network B knockout for gene {gene}: {e}")
                row["knockout_entropy_B"] = None
                row["delta_entropy_B"] = None
        else:
            print(f"Gene {gene} not found in Network B. Skipping knockout.")
            row["knockout_entropy_B"] = None
            row["delta_entropy_B"] = None
        
        results.append(row)
    
    # Write results to CSV
    output_csv = "knockout_results.csv"
    print(f"\nWriting results to CSV file: {output_csv}")
    df_results = pd.DataFrame(results)
    df_results.to_csv(output_csv, index=False)
    print("Analysis complete. Results saved successfully.")

if __name__ == "__main__":
    main()