In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats.stats import pearsonr
from scipy.stats import spearmanr
import time as tm
from collections import OrderedDict
# Custom libraries
import CCM_GAH_lib as ccm
import data_handle_lib as dh

In [3]:
def converter_space(instr):
    return np.fromstring(instr[1:-1], sep = ',')
def converter_comma(instr):
    return np.fromstring(instr[1:-1], sep =' ')

#### Generates nodes and edges from a given data set, given certain limits (on the correlation coefficients, mainly)

In [4]:
# Select which data file to analyze
data_path = 'mgp93_F4_Tongue_CCMed_parallel_20190130_104823.csv'

In [5]:
df_data_analysis = pd.read_csv(data_path, converters={"pearson_coeff":converter_space, "L":converter_comma})

In [6]:
# Conditions for causality
df_data_analysis = df_data_analysis[(df_data_analysis.loc[:, "spearman_coeff_p"]<0.05)\
                                    & (df_data_analysis.loc[:, "spearman_coeff"]>0.5)\
                                    & (df_data_analysis.loc[:, "pearson_coeff_last"]>0.5)]

In [7]:
# Get other info about data set, to include in the output file title
E = str(int(df_data_analysis['E'].iloc[0]))
# Output names
timestr = tm.strftime("%Y%m%d_%H%M%S")
output_edges_name = "mgp93_F4_Tongue_edges_E" + E + "_" + timestr+ ".csv"
output_nodes_name = "mgp93_F4_Tongue_nodes_E" + E + "_" + timestr+ ".csv"

In [8]:
# Nodes
df_nodes_temp_1, df_nodes_temp_2 = pd.DataFrame(), pd.DataFrame()
df_nodes_temp_1[["ID", "label"]] = df_data_analysis[["x_ID", "x_name"]]
df_nodes_temp_2[["ID", "label"]] = df_data_analysis[["y_ID", "y_name"]]
df_nodes = pd.concat([df_nodes_temp_1, df_nodes_temp_2])

df_nodes.drop_duplicates(inplace=True)
df_nodes.reset_index(inplace=True, drop=True)
df_nodes.loc[:,"ID"] = df_nodes.loc[:,"ID"].astype(int)

In [9]:
# Edges
df_edges = pd.DataFrame(columns=["source", "target"])
df_edges[["source", "target"]] = df_data_analysis[["y_ID", "x_ID"]].astype(int)

In [10]:
# Output to CSV
timestr = tm.strftime("%Y%m%d_%H%M%S")
df_nodes.to_csv(output_nodes_name, index=False)
df_edges.to_csv(output_edges_name, index=False)