In [8]:
import Random_Walker_Module as rwm
import os
import networkx as nx # I dont think this package is on google colab, might be better to use NetworkX
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook as tqdm

Load graphs from Data Set

In [9]:
df = pd.read_csv('../Data/MitoTable.csv') 
df.Type = df.Type.astype("category")
df.Type.cat.reorder_categories(["WT", "WTFIX",'DD', 'DDFIX'], inplace=True)
print(df.shape)
#df = df.loc[(df.Date.isin([24]))&(df.Type.isin(['WT','DDFIX']))]
df = df.loc[(df.Date.isin([24]))&(df.Type.isin(['WT']))]
df.Type = df.Type.cat.remove_unused_categories()
print(df.shape)
graphs = []
for index in tqdm(df.index):      
    # Load the graph
    fname = os.path.join('../Data/',df.Name[index]+'.gnet') 
    graph_list = pd.read_csv(fname, skiprows=1, sep='\t', header=None)
    graph_list.columns = ['Source','Target','Length']
    edges = [(i,j) for i,j in zip(graph_list.Source.values,graph_list.Target.values)]
    g = nx.from_edgelist(edges, create_using = nx.MultiGraph) # this is the variable that stores the mitochondria graph
    graphs.append(g)


(3606, 8)
(351, 8)


  df.Type.cat.reorder_categories(["WT", "WTFIX",'DD', 'DDFIX'], inplace=True)
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for index in tqdm(df.index):


  0%|          | 0/351 [00:00<?, ?it/s]

Compute test statistic for graphs in data set

In [10]:
#get the valid mitochondria graphs from the data set
def is_valid_mitograph(g):
    degree_view = nx.degree(g)
    num_deg_1 = sum(1 for node, degree in degree_view if degree == 1)
    num_deg_3 = sum(1 for node, degree in degree_view if degree == 3)
    return nx.is_planar(g) and num_deg_1+num_deg_3==g.number_of_nodes()

obs_graphs = [g for g in graphs if is_valid_mitograph(g)]
S = len(obs_graphs)
print('The data set for wild type mitochondria contains', S, 'valid mitographs out of', len(graphs), 'samples')

# Compute T_0
def compute_degree_ratio(g):
    degree_view = nx.degree(g)
    num_deg_3 = sum(1 for node, degree in degree_view if degree == 3)
    return num_deg_3/g.number_of_nodes()
obs_deg_ratios = []
for g in obs_graphs:
    r = compute_degree_ratio(g)
    obs_deg_ratios.append(r)
T_0 = np.average(obs_deg_ratios)


N_max = 0
for g in obs_graphs:
    if nx.number_of_nodes(g)>N_max: N_max = nx.number_of_nodes(g)

print('Our T_0 value is:', T_0)


The data set for wild type mitochondria contains 55 valid mitographs out of 351 samples
Our T_0 value is: 0.5488619131643031


Simulate data sets

In [11]:
def simulate_data_set(S, initial_walk_length):
    num_time_steps = initial_walk_length+S
    initial_state = rwm.load_i_tubule()
    path, num_fails = rwm.random_walk(initial_state,num_time_steps)
    
    sim_deg_ratios = []
    for n in range(initial_walk_length,num_time_steps):
        graph = path[n]
        sim_deg_ratios.append(rwm.Mitograph.compute_degree_ratio(graph))

    return np.average(sim_deg_ratios), num_fails

def run_experiment(M, walk_length):
    total_fails = 0
    T_values = []
    for _ in range(M):
        T, num_fails = simulate_data_set(S, walk_length)
        print('T:', T, 'number of attempts to complete walk:', num_fails+1)
        T_values.append(T)
        total_fails = total_fails+num_fails
    print('failure rate: ',  100*(total_fails/(M+total_fails)),'%')
    return T_values

rwm.N_max = round(1.05*N_max)
walk_length = rwm.N_max
print('Maximum number of nodes:', rwm.N_max)
print('Walk length:', walk_length)
M = 100
T_values = run_experiment(M,walk_length)


Maximum number of nodes: 55
Walk length: 55
T: 0.8948680393680395 number of attempts to complete walk: 1
T: 0.9149665889665889 number of attempts to complete walk: 1
T: 0.8829921780020594 number of attempts to complete walk: 1
T: 0.8756152738346414 number of attempts to complete walk: 1
T: 0.9240352240352241 number of attempts to complete walk: 1
T: 0.8792499352499352 number of attempts to complete walk: 1
T: 0.906904687904688 number of attempts to complete walk: 1
T: 0.9042230135550294 number of attempts to complete walk: 1
T: 0.8498186998186998 number of attempts to complete walk: 1
T: 0.9024599844599844 number of attempts to complete walk: 1
T: 0.9098159803159802 number of attempts to complete walk: 1
T: 0.9076673141673142 number of attempts to complete walk: 1
T: 0.9069309764309764 number of attempts to complete walk: 1
T: 0.9424182966791662 number of attempts to complete walk: 1
T: 0.9171717171717172 number of attempts to complete walk: 1
T: 0.8903133903133902 number of attempts t

Compute P-value, this is a right tailed test, as completely triangulated graphs are unlikely

In [12]:
right_p_value = sum(1 for T in T_values if T>=T_0)/M
left_p_value = sum(1 for T in T_values if T<=T_0)/M
print('right p_val:', right_p_value, 'left p_val:', left_p_value)

right p_val: 1.0 left p_val: 0.0


Running experiment again but without restricting the total number of nodes:

In [13]:

rwm.N_max = float('inf') #the module is programmed so that setting N_max to this value removes the restriction
T_values = run_experiment(M,walk_length)
p_value = sum(1 for T in T_values if T>=T_0)/M
print('p_val:', p_value)

T: 0.7568565545759421 number of attempts to complete walk: 1
T: 0.6709444819504353 number of attempts to complete walk: 1
T: 0.669982716594551 number of attempts to complete walk: 1
T: 0.7245336343090245 number of attempts to complete walk: 1
T: 0.7286208397932589 number of attempts to complete walk: 1
T: 0.7657013050283813 number of attempts to complete walk: 1
T: 0.6780085932151534 number of attempts to complete walk: 1
T: 0.7601529967536963 number of attempts to complete walk: 1
T: 0.7463996824200742 number of attempts to complete walk: 1
T: 0.7022467075459663 number of attempts to complete walk: 1
T: 0.7149169854696762 number of attempts to complete walk: 1
T: 0.6441084362247621 number of attempts to complete walk: 1
T: 0.7163480480647458 number of attempts to complete walk: 1
T: 0.7231648929921943 number of attempts to complete walk: 1
T: 0.7054974936950374 number of attempts to complete walk: 1
T: 0.6758957499667315 number of attempts to complete walk: 1
T: 0.6614218076379651 num

Running experiment again, but this time restricting the amount of degree 3 nodes

In [14]:
rwm.n_max = 32 #this places a restriction on the number of degree 3 nodes
T_values = run_experiment(M,walk_length)
p_value = sum(1 for T in T_values if T>=T_0)/M
print('p_val:', p_value)

T: 0.6259175125708625 number of attempts to complete walk: 1
T: 0.788806289371614 number of attempts to complete walk: 1
T: 0.7440395384362156 number of attempts to complete walk: 1
T: 0.7262321630933869 number of attempts to complete walk: 1
T: 0.7293880550385298 number of attempts to complete walk: 1
T: 0.7418460091495805 number of attempts to complete walk: 1
T: 0.7019130333113375 number of attempts to complete walk: 1
T: 0.767655610228854 number of attempts to complete walk: 1
T: 0.742888596323307 number of attempts to complete walk: 1
T: 0.7195067087697732 number of attempts to complete walk: 1
T: 0.7064066335645816 number of attempts to complete walk: 1
T: 0.7369954403557551 number of attempts to complete walk: 1
T: 0.6739056038856436 number of attempts to complete walk: 1
T: 0.7207222374614927 number of attempts to complete walk: 1
T: 0.7484097582360765 number of attempts to complete walk: 1
T: 0.7050529390681839 number of attempts to complete walk: 1
T: 0.6898388745358303 numbe