In [1]:
import pandas as pd
import numpy as np
import scipy as sp
import miRNET, miRNET_enrichment
Targets = miRNET.Targets(path_to_miRTarBase='./baseData/hsa_miRTarBase.csv')
import warnings
import collections
from matplotlib import pyplot as plt
import random
from scipy.stats import chi2_contingency
import json
import networkx as nx

In [2]:
######################################
##Monte Carlo for up-regulated miRNAs#
######################################

In [3]:
with open('./addData/miR_key_dict.json') as json_file:
    miR_key_dict = json.load(json_file)

with open('./addData/miR_path_dict.json') as json_file:
    dict_miR_to_paths = json.load(json_file)

In [43]:
data_edges = pd.read_csv('../miRNAs_pathes_bigraphs/up_bigraph_edge.csv', sep=';')

bi_g = nx.from_pandas_edgelist(data_edges, source='miR', target='path')
if not nx.is_connected(bi_g):
    bi_g = sorted(nx.connected_component_subgraphs(bi_g), key=len, reverse=True)[0]

degr_cnt = dict(bi_g.degree)
degr_cnt = dict(sorted(degr_cnt.items(), key=lambda item: item[1], reverse=True))

keys = list(degr_cnt.keys())
for key in keys:
    if 'miR-' in key:
        degr_cnt.pop(key)

count = collections.Counter(degr_cnt.values())

#n_miRNA: n_pathway; ex: 1 miRNA target 216 patways
net_count = dict(sorted(count.items(), key=lambda item: item[1], reverse=True))

In [4]:
dict_for_mc = dict()
for miR in miR_key_dict.keys():
    if 1351 > miR_key_dict[miR]['targets'] > 51:
        if miR_key_dict[miR]['LCC'] > 5:
            if len(miR_key_dict[miR]['key_genes']) > 5:
                dict_for_mc[miR] = miR_key_dict[miR]

In [115]:
n_miRNA = 12
n_iter = 100
n_ex_iter = 100

result_df = pd.DataFrame(columns=['count'], data=[i for i in range(n_miRNA+1)])

ex_result = list()
seed = 1
for j in range(n_ex_iter):
    
    result_df = pd.DataFrame(columns=['count'], data=[i for i in range(n_miRNA+1)])

    for i in range(n_iter):
        np.random.seed(seed)
        seed += 1
        miR_set = np.random.choice(list(dict_for_mc.keys()), size=n_miRNA)

        list_paths = list()
        for miR in miR_set:
            list_paths = list_paths + dict_miR_to_paths[miR]

        count_dict = collections.Counter(collections.Counter(list_paths).values())

        add_col = pd.DataFrame.from_dict(count_dict, orient='index', columns=['count'])
        result_df = pd.concat([result_df, add_col], axis=1)
        
    ex_result.append(list(result_df.iloc[:,1:].mean(axis=1)))

In [151]:
result = pd.DataFrame(np.matrix(ex_result))
percentile = (2.5, 50, 97.5)
print('perc:', *percentile, sep='\t')
for i in range(1,8):
    print(i, *np.percentile(result.iloc[:,i], percentile).round(2), sep='\t')

perc:	2.5	50	97.5
1	179.86	194.96	205.44
2	82.52	88.7	99.72
3	35.4	40.48	45.28
4	14.54	16.81	18.94
5	5.74	6.89	8.7
6	2.83	3.62	4.9
7	1.79	2.49	3.81


In [161]:
observed_point = np.array(list(net_count.values()))
expected_point = np.array(list(result.iloc[:,1:].median(axis=0))[0:7])
print(observed_point)
print(sp.stats.chisquare(observed_point, expected_point))

[216  49  36  18  10   6   3]
Power_divergenceResult(statistic=23.69209961091193, pvalue=0.0005949205289592848)
