
# Centrality Variation Analysis

This code performs centrality analysis on a network by comparing the baseline centrality values to the centrality values obtained after perturbing the network by removing each node individually. The analysis is performed using the Python programming language and various data analysis libraries such as Pandas and NumPy.

## Functionality

1. The code loads centrality data from a pickle file containing the baseline centrality values and perturbed centrality values.
2. It concatenates the baseline and perturbed centrality data into a single DataFrame with a multi-index.
3. The code defines a function to calculate the log2 ratio of baseline centrality to perturbed centrality for a given reaction.
4. Another function applies the log2 ratio calculation to all reactions in the dataset and consolidates the results into a single DataFrame.
5. The resulting centrality variations are saved as a compressed parquet file.

## Usage

1. Ensure that you have the necessary dependencies installed (Pandas, NumPy).
2. Modify the code to specify the correct file path for the input pickle file.
3. Run the code in a Python environment or a Jupyter notebook.
4. The output will be a parquet file containing the log2 ratio centrality variations.




In [None]:

import pickle
import pandas as pd
import numpy as np
from typing import List, Tuple

def load_data(file_path: str) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """Loads data from a pickle file"""
    with open(file_path, 'rb') as handle:
        data = pickle.load(handle)

    return data['centralidades_perturbadas'], data['baseline']

def concat_centralities(baseline: pd.DataFrame, centralidades_perturbadas: List[pd.DataFrame]) -> pd.DataFrame:
    """Concatenates baseline and perturbed centralities"""
    all_centralities = [baseline]
    index_keys = ["baseline"]
    for node in centralidades_perturbadas:
        perturbed_centralities = node[1]
        removed_node_name = node[0]
        all_centralities.append(perturbed_centralities)
        index_keys.append(removed_node_name)

    centralidades_df = pd.concat(all_centralities, axis=0, keys=index_keys, names=["removed_node", "metabolite"])
    centralidades_df.sort_index(inplace=True)

    return centralidades_df

def log2RatioCentrality(baseline: pd.DataFrame, centralidades_df: pd.DataFrame, a_rxn: str) -> pd.DataFrame:
    """Computes log2 ratio of baseline centralities to the centralities of a given reaction"""
    assert all(baseline.index.values == centralidades_df.loc[a_rxn].index.values), "Index mismatch"
    return np.log2(baseline / centralidades_df.loc[a_rxn])

def get_centrality_variation(variation_function, rxns: List[str]) -> pd.DataFrame:
    """Applies the variation function to a list of reactions and consolidates the results into a single DataFrame"""
    if "baseline" in rxns:
        rxns.remove("baseline")

    my_iter = map(variation_function, rxns)
    Centralities_list = []
    for i_df, rxn in zip(my_iter, rxns):
        i_df["removed_rxn"] = rxn
        i_df.reset_index(inplace=True)
        i_df.set_index(["removed_rxn", "metabolite"], inplace=True)
        Centralities_list.append(i_df)

    return pd.concat(Centralities_list, axis=0)


centralidades_perturbadas, baseline = load_data('aws-downloads/centralities.pickle')
print(f"Read centralidades_perturbadas of length: {len(centralidades_perturbadas)}")
print(f"Read baseline of length: {len(baseline)}")

centralidades_df = concat_centralities(baseline, centralidades_perturbadas)
baseline = centralidades_df.loc["baseline"]

all_rxns = list(np.unique(centralidades_df.index.get_level_values("removed_node").values))
log2Ratio_df = get_centrality_variation(lambda rxn: log2RatioCentrality(baseline, centralidades_df, rxn), all_rxns)
log2Ratio_df.to_parquet("log2Ratio_df.parquet.gzip")
