In [9]:
import pandas as pd
import numpy as np
import scipy as sp
import miRNET, miRNET_enrichment
Targets = miRNET.Targets(path_to_miRTarBase='./baseData/hsa_miRTarBase_old.csv')
import warnings
import collections
from matplotlib import pyplot as plt
import random
from scipy.stats import chi2_contingency
import json
import networkx as nx

In [31]:
miRnames_up = ['miR-25-3p', 'miR-891a-3p', 'miR-145-3p', 'miR-325', 'miR-223-3p', 'miR-155-5p', 
               'miR-130a-3p', 'miR-150-5p', 'miR-214-3p', 'let-7d-5p', 'miR-203a-3p', 'miR-24-3p', 
               'miR-29b-3p', 'miR-30a-5p', 'miR-21-5p']

In [32]:
for i in miRnames_up:
    print('hsa-' + i)

hsa-miR-25-3p
hsa-miR-891a-3p
hsa-miR-145-3p
hsa-miR-325
hsa-miR-223-3p
hsa-miR-155-5p
hsa-miR-130a-3p
hsa-miR-150-5p
hsa-miR-214-3p
hsa-let-7d-5p
hsa-miR-203a-3p
hsa-miR-24-3p
hsa-miR-29b-3p
hsa-miR-30a-5p
hsa-miR-21-5p


In [33]:
len(miRnames_up)

15

In [34]:
def mir_list_to_target_list(miR_names):
    colnames = ['miR', 'n_targets', 'n_heart_targets', 'n_LCC', 'n_key_genes']
    table = pd.DataFrame(columns=colnames)
    miR_key_nodes = dict()
    for miR in miR_names:
        print('miR-name:', miR, sep=' ')
        miR_targets = Targets.get_targets(miR)
        if miR_targets == 1:
            warnings.warn('use another name')
            continue
        tis_gene_set = miRNET.tissue_selector(ans=0, tissue_id=23)
        MirNet = miRNET.MainNet() #Load String db and create gene-gene interaction network
        MirNet.get_LCC() #get the largest connected component from the network
        MirNet.select_nodes(miR_targets) # select the part of LCC containing only the miRNA target genes
        MirNet.select_nodes(tis_gene_set) # select the part of LCC containing only the tissue target genes
        kne = miRNET.KeyNodesExtractor(MirNet) # creating an object for extracting key nodes and storing their meta information
        key_nodes = kne.extraction()
        
        if key_nodes == 1:
            warnings.warn('the key nodes do not exist')
            continue
        miR_key_nodes[miR] = list(key_nodes) #key nodes extraction
        
        print()
        
        nx.to_pandas_edgelist(MirNet.LCC).to_csv('../Data/miRNAs_LCC/edge_list_' + miR + '.csv', index=False, index_label=False)
        row = [miR]
        row.append(len(miR_targets))
        row.append(len(tis_gene_set.intersection(miR_targets)))
        row.append(len(MirNet.LCC.nodes()))
        row.append(len(miR_key_nodes[miR]))
        
        row = pd.DataFrame(row).transpose()
        row.columns = colnames

        table = pd.concat([table, row], axis=0)
    return [miR_key_nodes, table]

In [35]:
up_miRs = mir_list_to_target_list(miRnames_up)

miR-name: miR-25-3p
I found a miRNA with name: hsa-miR-25-3p
and  518 unique targets
Gene universe is...
heart muscle was used
your tissue is  heart muscle  number of genes:  7944

miR-name: miR-891a-3p
I found a miRNA with name: hsa-miR-891a-3p
and  155 unique targets
Gene universe is...
heart muscle was used
your tissue is  heart muscle  number of genes:  7944




miR-name: miR-145-3p
I found a miRNA with name: hsa-miR-145-3p
and  60 unique targets
Gene universe is...
heart muscle was used
your tissue is  heart muscle  number of genes:  7944
miR-name: miR-325
I found a miRNA with name: hsa-miR-325
and  32 unique targets
Gene universe is...
heart muscle was used
your tissue is  heart muscle  number of genes:  7944




miR-name: miR-223-3p
I found a miRNA with name: hsa-miR-223-3p
and  98 unique targets
Gene universe is...
heart muscle was used
your tissue is  heart muscle  number of genes:  7944

miR-name: miR-155-5p
I found a miRNA with name: hsa-miR-155-5p
and  904 unique targets
Gene universe is...
heart muscle was used
your tissue is  heart muscle  number of genes:  7944

miR-name: miR-130a-3p
I found a miRNA with name: hsa-miR-130a-3p
and  399 unique targets
Gene universe is...
heart muscle was used
your tissue is  heart muscle  number of genes:  7944

miR-name: miR-150-5p
I found a miRNA with name: hsa-miR-150-5p
and  534 unique targets
Gene universe is...
heart muscle was used
your tissue is  heart muscle  number of genes:  7944

miR-name: miR-214-3p
I found a miRNA with name: hsa-miR-214-3p
and  206 unique targets
Gene universe is...
heart muscle was used
your tissue is  heart muscle  number of genes:  7944

miR-name: let-7d-5p
I found a miRNA with name: hsa-let-7d-5p
and  394 unique targets

In [36]:
up_miRs[1].to_csv('../table_1_latest_v1.csv', sep=';')

In [37]:
up_dict = up_miRs[0]

In [38]:
with open('../Data/miRNAs_key_genes/up_miRNAs_key_genes_dict.json', 'w') as outfile:
    json.dump(up_dict, outfile)

In [39]:
def get_df_edges(enrich_res, miRname):
    pathes = list(enrich_res.react_dict.keys())
    miR_col = [miRname for i in range(len(pathes))]
    return pd.DataFrame(list(zip(miR_col, pathes)), columns=['miR', 'path'])

In [40]:
#list_paths = list()
data_edges = pd.DataFrame(columns=['miR', 'path'])
for miR in up_dict.keys():
    enrich_res = miRNET_enrichment.reactome_enrichment(up_dict[miR], species='Homo sapiens')
    enrich_res = miRNET_enrichment.reac_pars(enrich_res)
#    list_paths = list_paths + list(enrich_res.react_dict.keys())
    data_edges = pd.concat([data_edges, get_df_edges(enrich_res, miR)], axis=0)

In [41]:
data_edges.to_csv('../Data/miRNAs_pathes_bigraphs/up_bigraph_edge.csv', index=False, index_label=False, sep=';')