In [2]:
import pandas as pd
import numpy as np

In [3]:
import scanpy as sc
adata = sc.read_h5ad('../data/rna_figure_ready.h5ad')

In [4]:
# mask = np.isin(adata.var_names, [zero_locations_named[0][0], zero_locations_named[0][1]])
mask = np.zeros(adata.n_vars, dtype=bool)
mask[0] = True
mask[1] = True
mask[2] = True


In [5]:
trajs = np.load(f"../results_final_Dec22/traj_gene_space_T.npy", allow_pickle=True)

In [6]:
traj_subset = trajs[..., mask]

In [7]:
data = np.transpose(traj_subset, (1, 0, 2))
# gene_names = np.array([zero_locations_named[0][0], zero_locations_named[0][1]])
gene_names = adata.var_names[mask]
traj_df = pd.DataFrame(data.mean(axis=0), columns=gene_names)

In [8]:
traj_df

Unnamed: 0,AL627309.1,AL669831.5,FAM87B
0,0.001267,0.025910,0.003336
1,0.001307,0.024862,0.003068
2,0.001345,0.023811,0.002768
3,0.001377,0.022788,0.002435
4,0.001405,0.021787,0.002084
...,...,...,...
95,0.001588,0.031910,0.005951
96,0.001563,0.032121,0.005786
97,0.001525,0.032311,0.005566
98,0.001479,0.032479,0.005262


In [105]:
from tqdm import tqdm
import warnings
from statsmodels.tsa.stattools import grangercausalitytests

lag_order = 1 # since we aggregated the data in to 9 bins we only need 1 lag
maxlag = (
    lag_order,  # becuase we got this value before. We are not suppose to add 1 to it
)
test = "ssr_chi2test"

from joblib import Parallel, delayed

def grangers_causation_matrix(
    data, in_variables, out_variables, test="ssr_chi2test", n_jobs=1, warn=False
):
    """Check Granger Causality of all possible combinations of the Time series.
    The rows are the response variable, columns are predictors. The values in the table 
    are the P-Values. P-Values lesser than the significance level (0.05), implies 
    the Null Hypothesis that the coefficients of the corresponding past values is 
    zero, that is, the X does not cause Y can be rejected.

    data      : pandas dataframe containing the time series variables
    variables : list containing names of the time series variables.
    """

    def get_pval(dd):
        if warn:
            test_result = grangercausalitytests(dd, maxlag=maxlag, verbose=True)
        else:
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", category=FutureWarning)
                test_result = grangercausalitytests(dd, maxlag=maxlag, verbose=False)
                # according to the documentation https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.grangercausalitytests.html,
                # the dd has 2 columns, second causes the first.
                # print(test_result.keys())
                # print(f'dd shape {dd.shape}')
                # print(test_result[1][1][0].params[0])

                # print(test_result[1][1][0].summary())
                # print(test_result[1][1][1].summary())
                # print(test_result[1][1][1].model.exog_names)

                # print(test_result[2][1][1].summary())
                


                # print(test_result[1][1][0].model.params)
                # print(test_result[1])
                # print(test_result[1][1][0].params)
                # print(test_result[1][1][0].params['constant'])
                # print(test_result[1][1][0].model.exog_names)
        # import pdb; pdb.set_trace()
        # print(dd.columns)
        # print(dd.head())
        # print(test_result[1][1][1].params)
        p_values = [test_result[i][0][test][1] for i in maxlag] # test_result[i][1] is the unrestricted model, test_result[i][1][0] is the restricted model
        # coefs = [test_result[i][1][1].params[0] for i in maxlag]
        coefs = [test_result[i][1][1].params[1] for i in maxlag] # x1, x2, const

        arg_min_p_value = np.argmin(p_values)
        min_p_value = p_values[arg_min_p_value]
        min_coef = coefs[arg_min_p_value]
        # print(p_values)
        # print(coefs)
        return (min_p_value, min_coef)


    out = Parallel(n_jobs=n_jobs)(
        # delayed(get_pval)(data[[r, c]]) # this is incorrect
        delayed(get_pval)(data[[c, r]]) # this means r causes c, so r is be in and c is out
        for c in tqdm(out_variables, desc="Processing columns")  # Outer loop progress bar
        for r in tqdm(in_variables, desc="Processing rows", leave=False)  # Inner loop progress bar
    )
    # Note that this is the wrong way and must be corrected
    # df = pd.DataFrame(
    #     np.array(out).reshape((len(in_variables), len(out_variables))), # this is incorrect
    #     columns=out_variables,
    #     index=in_variables,
    # )
    out_p = [p for (p,c) in out]
    out_c = [c for (p,c) in out]
    df_p = pd.DataFrame(
        np.array(out_p).reshape((len(out_variables), len(in_variables))), # should be reshaped to len(out_variables), len(in_variables) according to the for loop.
        columns=in_variables,
        index=out_variables,
    ).T # used the correct reshaping, and then transposed the matrix so the x and y are semantically correct (x causes y).
    df_c = pd.DataFrame(
        np.array(out_c).reshape((len(out_variables), len(in_variables))), # should be reshaped to len(out_variables), len(in_variables) according to the for loop.
        columns=in_variables,
        index=out_variables,
    ).T
    df_p.index = [var + "_x" for var in in_variables]
    df_p.columns = [var + "_y" for var in out_variables]
    df_c.index = [var + "_x" for var in in_variables]
    df_c.columns = [var + "_y" for var in out_variables]
    return df_p, df_c

def do_granger(trajs, in_genes, out_genes, n_jobs=1, warn=False):
    # in causes out
    trajs = trajs.T[::10]
    trajs = trajs - trajs.shift(1)
    trajs = trajs.dropna()
    out_traj_p, out_traj_c = grangers_causation_matrix(
        trajs, in_variables=in_genes, out_variables=out_genes, n_jobs=n_jobs, warn=warn
    )
    return out_traj_p, out_traj_c



In [106]:
# from tqdm import tqdm
# import warnings
# ## KPSS Null hypothesis: there is a no unit root, meaning series is stationary
# from statsmodels.tsa.stattools import kpss


# def kpss_test(series, **kw):
#     statistic, p_value, n_lags, critical_values = kpss(series, **kw)
#     # Format Output
#     print(f"KPSS Statistic: {statistic}")
#     print(f"p-value: {p_value}")
#     print(f"num lags: {n_lags}")
#     print("Critial Values:")
#     for key, value in critical_values.items():
#         print(f"   {key} : {value}")
#     print(f'Result: The series is {"not " if p_value < 0.05 else ""}stationary')


# from statsmodels.tsa.stattools import grangercausalitytests

# lag_order = 2
# maxlag = (
#     lag_order,  # becuase we got this value before. We are not suppose to add 1 to it
# )
# test = "ssr_chi2test"

# from joblib import Parallel, delayed

# def grangers_causation_matrix(
#     data, in_variables, out_variables, test="ssr_chi2test", n_jobs=1, warn=False
# ):
#     """Check Granger Causality of all possible combinations of the Time series.
#     The rows are the response variable, columns are predictors. The values in the table 
#     are the P-Values. P-Values lesser than the significance level (0.05), implies 
#     the Null Hypothesis that the coefficients of the corresponding past values is 
#     zero, that is, the X does not cause Y can be rejected.

#     data      : pandas dataframe containing the time series variables
#     variables : list containing names of the time series variables.
#     """

#     def get_pval(dd):
#         if warn:
#             test_result = grangercausalitytests(dd, maxlag=maxlag, verbose=True)
#         else:
#             with warnings.catch_warnings():
#                 warnings.simplefilter("ignore", category=FutureWarning)
#                 # print(dd.columns)
#                 test_result = grangercausalitytests(dd, maxlag=maxlag, verbose=False)
#                 # according to the documentation https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.grangercausalitytests.html,
#                 # the dd has 2 columns, second causes the first.
#                 assert len(test_result.keys()) == 1
#                 # print(test_result.keys())
#                 key = next(iter(test_result))
#                 print(test_result[key][1][0].summary())
#                 # assert False
#                 # print(test_result[1][1][0].params)
#                 # print(test_result[1][1][0].summary())
#                 # print(test_result[1])
#                 # print(test_result[1][1][0].params)
#                 # print(test_result[1][1][0].params['constant'])
#                 # print(test_result[1][1][0].model.exog_names)
#         # import pdb; pdb.set_trace()
#         p_values = [test_result[i][0][test][1] for i in maxlag]
#         print(p_values)
#         coefs = [test_result[i][1][0].params[:-1] for i in maxlag]
#         arg_min_p_value = np.argmin(p_values)
#         min_p_value = p_values[arg_min_p_value]
#         # min_coef = coefs[arg_min_p_value]
#         min_coef = coefs[arg_min_p_value]
#         min_coef = (min_coef > 0).mean()
#         print(min_coef)
#         return (min_p_value, min_coef)


#     out = Parallel(n_jobs=n_jobs)(
#         # delayed(get_pval)(data[[r, c]]) # this is incorrect
#         delayed(get_pval)(data[[c, r]]) # this means r causes c, so r is be in and c is out
#         for c in tqdm(out_variables, desc="Processing columns")  # Outer loop progress bar
#         for r in tqdm(in_variables, desc="Processing rows", leave=False)  # Inner loop progress bar
#     )
#     # Note that this is the wrong way and must be corrected
#     # df = pd.DataFrame(
#     #     np.array(out).reshape((len(in_variables), len(out_variables))), # this is incorrect
#     #     columns=out_variables,
#     #     index=in_variables,
#     # )
#     out_p = [p for (p,c) in out]
#     out_c = [c for (p,c) in out]
#     df_p = pd.DataFrame(
#         np.array(out_p).reshape((len(out_variables), len(in_variables))), # should be reshaped to len(out_variables), len(in_variables) according to the for loop.
#         columns=in_variables,
#         index=out_variables,
#     ).T # used the correct reshaping, and then transposed the matrix so the x and y are semantically correct (x causes y).
#     df_c = pd.DataFrame(
#         np.array(out_c).reshape((len(out_variables), len(in_variables))), # should be reshaped to len(out_variables), len(in_variables) according to the for loop.
#         columns=in_variables,
#         index=out_variables,
#     ).T
#     df_p.index = [var + "_x" for var in in_variables]
#     df_p.columns = [var + "_y" for var in out_variables]
#     df_c.index = [var + "_x" for var in in_variables]
#     df_c.columns = [var + "_y" for var in out_variables]
#     return df_p, df_c

# def do_granger(trajs, in_genes, out_genes, n_jobs=1, warn=False):
#     # in causes out
#     trajs = trajs.T[::10]
#     trajs = trajs - trajs.shift(1)
#     trajs = trajs.dropna()
#     out_traj_p, out_traj_c = grangers_causation_matrix(
#         trajs, in_variables=in_genes, out_variables=out_genes, n_jobs=n_jobs, warn=warn
#     )
#     return out_traj_p, out_traj_c



In [107]:
gm = do_granger(traj_df.T, in_genes=gene_names[[0,1,2]], out_genes=gene_names[[0,1,2]], n_jobs=-1, warn=False)



















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A


















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A

















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A

















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A




















Processing columns: 100%|██████████| 3/3 [00:00<00:00, 33.48it/s]


In [108]:
gm1 = gm
gm1

(              AL627309.1_y  AL669831.5_y  FAM87B_y
 AL627309.1_x      1.000000      0.451341  0.004875
 AL669831.5_x      0.204324      1.000000  0.000024
 FAM87B_x          0.298581      0.012638  1.000000,
               AL627309.1_y  AL669831.5_y   FAM87B_y
 AL627309.1_x      0.262137      8.141888  22.518747
 AL669831.5_x     -0.011747      0.035946  -1.408153
 FAM87B_x         -0.008034      0.901694   0.192779)

In [68]:
gm1 = gm
gm1

(              AL627309.1_y  AL669831.5_y  FAM87B_y
 AL627309.1_x      1.000000      0.451341  0.004875
 AL669831.5_x      0.204324      1.000000  0.000024
 FAM87B_x          0.298581      0.012638  1.000000,
               AL627309.1_y  AL669831.5_y   FAM87B_y
 AL627309.1_x      0.262137      8.141888  22.518747
 AL669831.5_x     -0.011747      0.035946  -1.408153
 FAM87B_x         -0.008034      0.901694   0.192779)

In [13]:
gm

(              AL627309.1_y  AL669831.5_y  FAM87B_y
 AL627309.1_x      1.000000      0.451341  0.004875
 AL669831.5_x      0.204324      1.000000  0.000024
 FAM87B_x          0.298581      0.012638  1.000000,
               AL627309.1_y  AL669831.5_y  FAM87B_y
 AL627309.1_x      0.524273      0.071892  0.385558
 AL669831.5_x      0.524273      0.071892  0.385558
 FAM87B_x          0.524273      0.071892  0.385558)

In [14]:
traj_df

Unnamed: 0,AL627309.1,AL669831.5,FAM87B
0,0.001267,0.025910,0.003336
1,0.001307,0.024862,0.003068
2,0.001345,0.023811,0.002768
3,0.001377,0.022788,0.002435
4,0.001405,0.021787,0.002084
...,...,...,...
95,0.001588,0.031910,0.005951
96,0.001563,0.032121,0.005786
97,0.001525,0.032311,0.005566
98,0.001479,0.032479,0.005262


In [15]:
gene_names

Index(['AL627309.1', 'AL669831.5', 'FAM87B'], dtype='object')

In [16]:
data.shape

(80, 100, 3)

In [17]:
df2 = pd.read_csv('tmp_test.csv', index_col=0)

In [18]:
df2.values.min()

1.2826915507341268e-32

In [40]:
gm1 = do_granger(traj_df.T, in_genes=gene_names[[0]], out_genes=gene_names[[1]], n_jobs=-1)


[A

[A[A

AttributeError: 'OLS' object has no attribute 'params'

In [20]:
gm1

(              AL669831.5_y
 AL627309.1_x      0.451341,
               AL669831.5_y
 AL627309.1_x      0.071892)

In [21]:
df

NameError: name 'df' is not defined

In [None]:
traj_gene_sp = np.load(f"../results_final_Dec22/traj_gene_space_T.npy", allow_pickle=True)
# sc.pp.highly_variable_genes(adata, n_top_genes=400)
alex_genes = open('../data/alex_genes.txt').read().splitlines()
alex_tfs = open('../data/alex_tfs.txt').read().splitlines()
alex_gene_all = list(set(alex_genes) | set(alex_tfs))
alex_gene_all_mask = np.isin(adata.var_names, alex_gene_all)
traj_subset = traj_gene_sp[:, :, alex_gene_all_mask]
gene_names = adata.var_names[alex_gene_all_mask]
# traj_df = pd.DataFrame(np.transpose(traj_subset, (1, 0, 2)).mean(axis=0), columns=gene_names)
data = np.transpose(traj_subset, (1, 0, 2))
valid_mask = (data.mean(axis=0).var(axis=0) != 0.0)
data = data[:, :, valid_mask]
gene_names = np.array(gene_names)[valid_mask]
traj_df = pd.DataFrame(data.mean(axis=0), columns=gene_names)

In [None]:
gm1 = do_granger(traj_df.T, in_genes=df.index[:10], out_genes=df.columns[:10], n_jobs=-1)
gm2 = do_granger(traj_df.T, in_genes=df.index[:5], out_genes=df.columns[:5], n_jobs=-1)
gm3 = do_granger(traj_df.T, in_genes=df.index[:10], out_genes=df.columns[:15], n_jobs=-1)


[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A
[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A
[A

[A[A

[A[A

[A[A

Processing columns: 100%|██████████| 10/10 [00:00<00:00, 29.13it/s]

[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

Processing columns: 100%|██████████| 5/5 [00:00<00:00, 50.45it/s]

[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A
[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A
[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A
[A

[A[A

[A[A

[A[A

[A[A

[A[A

Processing columns: 100%|██████████| 15/15 [00:00<00:00, 31.73it/s]


In [None]:
gm1

Unnamed: 0,NOC2L_y,HES4_y,ISG15_y,ERRFI1_y,ENO1_y,PGD_y,CENPS_y,SRM_y,PLOD1_y,EFHD2_y
HES4_x,0.042306,1.0,0.116742,0.3083272,0.03775,0.452045,2.866243e-08,0.418659,0.899307,0.395684
CENPS_x,0.026502,0.146633,0.857549,0.9133007,0.000961,0.759271,1.0,0.005341,0.541369,0.949224
RUNX3_x,0.698055,0.599462,0.339878,0.0001033588,0.455522,0.377175,0.2070784,0.097731,0.454008,0.243152
AHDC1_x,0.598789,0.049435,0.256724,0.2114117,0.913732,0.221681,0.04690639,0.287425,0.916514,0.123789
ZBTB8A_x,0.938998,4.1e-05,0.623932,0.2649687,0.110319,0.196143,0.04286693,0.373719,0.374236,0.201891
RLF_x,0.140316,0.632341,0.435876,0.6578897,0.199921,0.246837,0.7053279,0.006401,0.156722,0.825791
ZFP69_x,0.575046,0.044491,0.80668,0.8341304,0.000298,0.66152,0.001201931,0.037432,0.641618,0.835952
HIVEP3_x,0.343454,0.078747,0.271302,0.03476633,0.88409,0.018767,0.05692897,0.424072,0.21833,0.00971
YBX1_x,0.169627,0.105368,0.09836,0.4398768,0.050936,0.170498,0.399209,0.000362,0.011135,0.687989
JUN_x,0.880392,0.659954,0.084644,1.172409e-13,0.757666,0.056045,0.4817478,0.494637,0.034118,0.056479


In [None]:
df.loc[df.index[:10], df.columns[:10]]

Unnamed: 0,NOC2L,HES4,ISG15,ERRFI1,ENO1,PGD,CENPS,SRM,PLOD1,EFHD2
HES4,0.386854,0.322516,0.901839,0.895447,0.909349,0.124991,0.560242,0.321999,0.28346,0.72114
CENPS,0.803194,4.3e-05,0.660363,0.650542,0.661946,0.673583,0.123069,0.791316,0.698329,0.691989
RUNX3,0.761496,0.544198,0.72481,0.004724,0.399354,0.746247,0.324311,0.225394,0.788975,0.204553
AHDC1,0.312396,0.039732,0.728161,0.600441,0.655087,0.035616,0.479752,0.699288,0.614211,0.013805
ZBTB8A,0.332794,0.944415,0.50886,0.00032,0.674841,0.165641,0.313251,0.612232,0.231637,0.688988
RLF,0.001757,0.66947,0.678395,0.598549,0.02558,0.313528,0.00012,0.007127,3e-06,0.023471
ZFP69,0.654615,0.037852,0.352345,0.125859,0.21769,0.213694,0.267965,0.123096,0.153962,0.003374
HIVEP3,0.016379,0.888899,0.440582,0.569993,0.009675,0.081771,0.388233,0.063944,0.925166,0.023314
YBX1,0.752377,0.085732,0.174891,0.202512,0.500328,0.885451,2.2e-05,0.658492,0.633695,0.019274
JUN,0.001028,0.238372,0.574191,0.069694,0.857793,0.096114,0.024562,0.679284,0.139009,0.537897


In [None]:
gm2

Unnamed: 0,NOC2L_y,HES4_y,ISG15_y,ERRFI1_y,ENO1_y
HES4_x,0.042306,1.0,0.116742,0.308327,0.03775
CENPS_x,0.026502,0.146633,0.857549,0.913301,0.000961
RUNX3_x,0.698055,0.599462,0.339878,0.000103,0.455522
AHDC1_x,0.598789,0.049435,0.256724,0.211412,0.913732
ZBTB8A_x,0.938998,4.1e-05,0.623932,0.264969,0.110319


In [None]:
gm1.loc[gm1.index[:5], gm1.columns[:5]].values - gm2.values

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [None]:
gm3.loc[gm3.index[:10], gm3.columns[:10]].values - gm1.loc[gm1.index[:10], gm1.columns[:10]].values

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

# THERE IS A BUG WHEN THE MATRIX IS NOT SQUARE!!!

In [None]:
gm3

Unnamed: 0,NOC2L_y,HES4_y,ISG15_y,ERRFI1_y,ENO1_y,PGD_y,CENPS_y,SRM_y,PLOD1_y,EFHD2_y,FBLIM1_y,MFAP2_y,CAMK2N1_y,PINK1_y,DDOST_y
HES4_x,0.042306,1.0,0.116742,0.3083272,0.03775,0.452045,2.866243e-08,0.418659,0.899307,0.395684,0.000801,0.009141,0.032035,0.604871,0.074424
CENPS_x,0.026502,0.146633,0.857549,0.9133007,0.000961,0.759271,1.0,0.005341,0.541369,0.949224,0.426593,0.180578,0.566098,0.669877,0.578755
RUNX3_x,0.698055,0.599462,0.339878,0.0001033588,0.455522,0.377175,0.2070784,0.097731,0.454008,0.243152,0.045444,0.109515,0.703094,0.342721,0.250121
AHDC1_x,0.598789,0.049435,0.256724,0.2114117,0.913732,0.221681,0.04690639,0.287425,0.916514,0.123789,0.3649,0.288002,0.066544,0.48404,0.590042
ZBTB8A_x,0.938998,4.1e-05,0.623932,0.2649687,0.110319,0.196143,0.04286693,0.373719,0.374236,0.201891,0.292149,0.136671,0.027572,0.643264,0.967976
RLF_x,0.140316,0.632341,0.435876,0.6578897,0.199921,0.246837,0.7053279,0.006401,0.156722,0.825791,0.260768,0.24,0.875486,0.498561,0.153204
ZFP69_x,0.575046,0.044491,0.80668,0.8341304,0.000298,0.66152,0.001201931,0.037432,0.641618,0.835952,0.076126,0.057377,0.256388,0.817632,0.907197
HIVEP3_x,0.343454,0.078747,0.271302,0.03476633,0.88409,0.018767,0.05692897,0.424072,0.21833,0.00971,0.402207,0.289847,0.012385,0.137795,0.560028
YBX1_x,0.169627,0.105368,0.09836,0.4398768,0.050936,0.170498,0.399209,0.000362,0.011135,0.687989,0.819536,0.190224,0.736311,0.304524,0.812202
JUN_x,0.880392,0.659954,0.084644,1.172409e-13,0.757666,0.056045,0.4817478,0.494637,0.034118,0.056479,0.460459,0.688721,0.545923,0.527285,0.809413


In [None]:
gm1

Unnamed: 0,NOC2L_y,HES4_y,ISG15_y,ERRFI1_y,ENO1_y,PGD_y,CENPS_y,SRM_y,PLOD1_y,EFHD2_y
HES4_x,0.042306,1.0,0.116742,0.3083272,0.03775,0.452045,2.866243e-08,0.418659,0.899307,0.395684
CENPS_x,0.026502,0.146633,0.857549,0.9133007,0.000961,0.759271,1.0,0.005341,0.541369,0.949224
RUNX3_x,0.698055,0.599462,0.339878,0.0001033588,0.455522,0.377175,0.2070784,0.097731,0.454008,0.243152
AHDC1_x,0.598789,0.049435,0.256724,0.2114117,0.913732,0.221681,0.04690639,0.287425,0.916514,0.123789
ZBTB8A_x,0.938998,4.1e-05,0.623932,0.2649687,0.110319,0.196143,0.04286693,0.373719,0.374236,0.201891
RLF_x,0.140316,0.632341,0.435876,0.6578897,0.199921,0.246837,0.7053279,0.006401,0.156722,0.825791
ZFP69_x,0.575046,0.044491,0.80668,0.8341304,0.000298,0.66152,0.001201931,0.037432,0.641618,0.835952
HIVEP3_x,0.343454,0.078747,0.271302,0.03476633,0.88409,0.018767,0.05692897,0.424072,0.21833,0.00971
YBX1_x,0.169627,0.105368,0.09836,0.4398768,0.050936,0.170498,0.399209,0.000362,0.011135,0.687989
JUN_x,0.880392,0.659954,0.084644,1.172409e-13,0.757666,0.056045,0.4817478,0.494637,0.034118,0.056479


In [None]:
gm3.values.shape

(10, 15)

In [None]:
gm3p = pd.DataFrame(gm3.values.flatten().reshape(15, 10), index=gm3.columns, columns=gm3.index)

In [None]:
gm3p

Unnamed: 0,HES4_x,CENPS_x,RUNX3_x,AHDC1_x,ZBTB8A_x,RLF_x,ZFP69_x,HIVEP3_x,YBX1_x,JUN_x
NOC2L_y,0.042306,1.0,0.116742,0.308327,0.03775,0.452045,2.866243e-08,0.418659,0.8993072,0.395684
HES4_y,0.000801,0.009141,0.032035,0.604871,0.074424,0.026502,0.1466334,0.857549,0.9133007,0.000961
ISG15_y,0.759271,1.0,0.005341,0.541369,0.949224,0.426593,0.1805783,0.566098,0.6698766,0.578755
ERRFI1_y,0.698055,0.599462,0.339878,0.000103,0.455522,0.377175,0.2070784,0.097731,0.454008,0.243152
ENO1_y,0.045444,0.109515,0.703094,0.342721,0.250121,0.598789,0.04943476,0.256724,0.2114117,0.913732
PGD_y,0.221681,0.046906,0.287425,0.916514,0.123789,0.3649,0.2880024,0.066544,0.48404,0.590042
CENPS_y,0.938998,4.1e-05,0.623932,0.264969,0.110319,0.196143,0.04286693,0.373719,0.3742364,0.201891
SRM_y,0.292149,0.136671,0.027572,0.643264,0.967976,0.140316,0.6323412,0.435876,0.6578897,0.199921
PLOD1_y,0.246837,0.705328,0.006401,0.156722,0.825791,0.260768,0.2400004,0.875486,0.4985609,0.153204
EFHD2_y,0.575046,0.044491,0.80668,0.83413,0.000298,0.66152,0.001201931,0.037432,0.6416185,0.835952


# The bug is in reshaping the output of the granger function!

In [None]:
len_in = 4
len_out = 3
[(c,r) for c in range(len_out) for r in range(len_in)]

[(0, 0),
 (0, 1),
 (0, 2),
 (0, 3),
 (1, 0),
 (1, 1),
 (1, 2),
 (1, 3),
 (2, 0),
 (2, 1),
 (2, 2),
 (2, 3)]

In [None]:
np.array([str((c,r)) for c in range(3) for r in range(4)]).reshape(len_out, len_in)

array([['(0, 0)', '(0, 1)', '(0, 2)', '(0, 3)'],
       ['(1, 0)', '(1, 1)', '(1, 2)', '(1, 3)'],
       ['(2, 0)', '(2, 1)', '(2, 2)', '(2, 3)']], dtype='<U6')