In [248]:
import pandas as pd
import numpy as np
import scipy as sp
import miRNET, miRNET_enrichment
Targets = miRNET.Targets(path_to_miRTarBase='./baseData/hsa_miRTarBase.csv')
import warnings
import collections
from matplotlib import pyplot as plt
import random
from scipy.stats import chi2_contingency
import json
import networkx as nx

In [53]:
miRnames_up = ['miR-221-', 'miR-451a', 'miR-451b', 'miR-222-', 'miR-29a-', 'miR-133a-', 'miR-199a-3p', 'miR-93-', 'miR-497-']

In [263]:
def mir_list_to_target_list(miR_names):
    colnames = ['miR', 'n_targets', 'n_heart_targets', 'n_LCC', 'n_key_genes']
    table = pd.DataFrame(columns=colnames)
    miR_key_nodes = dict()
    for miR in miR_names:
        print('miR-name:', miR, sep=' ')
        miR_targets = Targets.get_targets(miR)
        if miR_targets == 1:
            warnings.warn('use another name')
            continue
        tis_gene_set = miRNET.tissue_selector(ans=0, tissue_id=23)
        MirNet = miRNET.MainNet() #Load String db and create gene-gene interaction network
        MirNet.get_LCC() #get the largest connected component from the network
        MirNet.select_nodes(miR_targets) # select the part of LCC containing only the miRNA target genes
        MirNet.select_nodes(tis_gene_set) # select the part of LCC containing only the tissue target genes
        kne = miRNET.KeyNodesExtractor(MirNet) # creating an object for extracting key nodes and storing their meta information
        miR_key_nodes[miR] = list(kne.extraction()) #key nodes extraction
        
        print()
        
        nx.to_pandas_edgelist(MirNet.LCC).to_csv('./Supl/edge_list_' + miR + '.csv', index=False, index_label=False)
        row = [miR]
        row.append(len(miR_targets))
        row.append(len(tis_gene_set.intersection(miR_targets)))
        row.append(len(MirNet.LCC.nodes()))
        row.append(len(miR_key_nodes[miR]))
        
        row = pd.DataFrame(row).transpose()
        row.columns = colnames

        table = pd.concat([table, row], axis=0)
    return [miR_key_nodes, table]

In [265]:
up_miRs = mir_list_to_target_list(miRnames_up)

miR-name: miR-221-
I found a miRNA with name: hsa-miR-221-3p hsa-miR-221-5p
and  504 unique targets
Gene universe is...
heart muscle was used
your tissue is  heart muscle  number of genes:  7944

miR-name: miR-451a
I found a miRNA with name: hsa-miR-451a
and  31 unique targets
Gene universe is...
heart muscle was used
your tissue is  heart muscle  number of genes:  7944

miR-name: miR-451b
I found a miRNA with name: hsa-miR-451b
and  150 unique targets
Gene universe is...
heart muscle was used
your tissue is  heart muscle  number of genes:  7944

miR-name: miR-222-
I found a miRNA with name: hsa-miR-222-3p hsa-miR-222-5p
and  469 unique targets
Gene universe is...
heart muscle was used
your tissue is  heart muscle  number of genes:  7944

miR-name: miR-29a-
I found a miRNA with name: hsa-miR-29a-3p hsa-miR-29a-5p
and  356 unique targets
Gene universe is...
heart muscle was used
your tissue is  heart muscle  number of genes:  7944

miR-name: miR-133a-
I found a miRNA with name: hsa-miR-

In [188]:
up_dict = up_miRs[0]

In [266]:
with open('./Supl/up_miRNAs_key_genes_dict.json', 'w') as outfile:
    json.dump(up_dict, outfile)

In [267]:
def get_df_edges(enrich_res, miRname):
    pathes = list(enrich_res.react_dict.keys())
    miR_col = [miRname for i in range(len(pathes))]
    return pd.DataFrame(list(zip(miR_col, pathes)), columns=['miR', 'path'])

In [268]:
list_paths = list()
data_edges = pd.DataFrame(columns=['miR', 'path'])
for miR in up_dict.keys():
    enrich_res = miRNET_enrichment.reactome_enrichment(up_dict[miR], species='Homo sapiens')
    enrich_res = miRNET_enrichment.reac_pars(enrich_res)
    list_paths = list_paths + list(enrich_res.react_dict.keys())
    data_edges = pd.concat([data_edges, get_df_edges(enrich_res, miR)], axis=0)

In [270]:
data_edges.to_csv('./Supl/up_bigraph_edge.csv', index=False, index_label=False, sep=';')