# **Construction of MicroRNA Networks Based on Inferred Interactions for TCGA-BRCA**

This notebook produces the AT_MN family of artifacts 

- TCGA: The Cancer Genome Atlas  
- BRCA: Breast Invasive Carcinoma

# Import Libraries and Configurations

In [1]:
import os
import sys
from collections import defaultdict
from itertools import combinations

import numpy as np
import pandas as pd

# Add project root to Python's path
sys.path.append(os.path.abspath(os.path.join('..')))

from config import (
    NETWORK_CONSTRUCTION_SETUP,
    NETWORK_DATA_DIRS,
    NETWORK_FILES,
)

# Interaction Networks

## Functions

In [2]:
def flag_inferred_interactions(group):
    """
    Flag inferred interactions of interest based on correlation and
    q-value thresholds.

    This function loads a CSV file containing inferred interactions for a
    given group, flags those interactions whose correlation and q-value are
    below predefined thresholds, and stores the flagged results into a
    processed CSV file. The thresholds and file paths are obtained from
    global configuration dictionaries.

    Parameters
    ----------
    group : str
        The name of the group for which inferred interactions will be
        flagged. Must be a key in `NETWORK_DATA_DIRS['interim']` and
        `NETWORK_DATA_DIRS['processed']`.

    Returns
    -------
    pandas.DataFrame
        DataFrame containing all inferred interactions for the group, with
        an additional column `is_interaction_of_interest` indicating
        flagged interactions:
        - 1 : Interaction meets both correlation and q-value criteria.
        - 0 : Interaction does not meet criteria.

    Notes
    -----
    - The following global configuration variables are used:
    * `NETWORK_DATA_DIRS` : Directory paths for different stages of
        network data.
    * `NETWORK_FILES` : Filenames for specific network data files.
    * `NETWORK_CONSTRUCTION_SETUP` : Thresholds for correlation and
        q-value.
    - The function overwrites the corresponding file in the processed
    directory.

    Examples
    --------
    >>> df_flagged = flag_inferred_interactions('HER2-enriched')
    >>> df_flagged.head()
    mirna_name  gene_name  correlation  qvalue  is_interaction_of_interest
    0     miR-21     GeneA      -0.85      0.001               1
    1     miR-155    GeneB      -0.40      0.050               0
    """
    # Create a DataFrame for the inferred interactions
    dir_path = NETWORK_DATA_DIRS['interim'][group]
    file_name = NETWORK_FILES['inferred-interactions']
    df_inferred_interactions = pd.read_csv(os.path.join(dir_path, file_name))
    
    # Flag the inferred interactions of interest, i.e. those with correlation and 
    # q-value values below the thresholds
    max_correlation = NETWORK_CONSTRUCTION_SETUP['max-correlation']
    max_qvalue = NETWORK_CONSTRUCTION_SETUP['max-qvalue']
    df_inferred_interactions['is_interaction_of_interest'] = np.where(
        ((df_inferred_interactions['correlation'] < max_correlation)
         & (df_inferred_interactions['qvalue'] < max_qvalue)), 1, 0
    )
    
    # Store the DataFrame of flagged inferred interactions in a CSV file
    dir_path = NETWORK_DATA_DIRS['processed'][group]
    df_inferred_interactions.to_csv(
        os.path.join(dir_path, file_name), index=False
    )
    
    return df_inferred_interactions

In [3]:
def create_interaction_network(group):
    """
    Create an interaction network from flagged inferred interactions.

    This function processes previously flagged inferred interactions to
    produce two network data structures: one for edges (connections between
    microRNAs and genes) and one for nodes (microRNAs and messenger RNAs).
    The outputs are stored as CSV files in the processed data directory for
    the group.

    Parameters
    ----------
    group : str
        The name of the group for which to create the interaction network.
        It will be normalized (lowercase, spaces replaced with hyphens) to
        match directory naming conventions in `NETWORK_DATA_DIRS`.

    Returns
    -------
    dict of pandas.DataFrame
        Dictionary with two keys:
        - 'edges' : DataFrame containing network edges, with columns:
        `source` (microRNA), `target` (gene), `correlation`, `qvalue`,
        and optional `mirtarbase` annotations.
        - 'nodes' : DataFrame containing network nodes, with columns:
        `id`, `label`, and `type` ('MicroRNA' or 'Messenger RNA').

    Notes
    -----
    - Uses `flag_inferred_interactions` to ensure edges are filtered by
    interest criteria.
    - The following global configuration variables are used:
    * `NETWORK_DATA_DIRS` : Directory paths for different stages of
        network data.
    * `NETWORK_FILES` : Filenames for specific network data files.

    Examples
    --------
    >>> network = create_interaction_network('HER2-enriched')
    >>> network['edges'].head()
        source   target  correlation  qvalue mirtarbase
    0   miR-21   GeneA     -0.85      0.001    validated
    1   miR-22   GeneC     -0.80      0.002             
    >>> network['nodes'].head()
        id    label          type
    0   miR-21  miR-21     MicroRNA
    1   miR-22  miR-22     MicroRNA
    2   GeneA   GeneA  Messenger RNA
    3   GeneC   GeneC  Messenger RNA
    """
    # Define the group processed directory path
    group_dir = (group.lower()).replace(' ', '-')
    dir_path = NETWORK_DATA_DIRS['processed'][group_dir]
    
    # Flag the inferred interactions of interest
    df_inferred_interactions = flag_inferred_interactions(group_dir)
    
    # Create the edges DataFrame
    df_edges = df_inferred_interactions \
        .query('is_interaction_of_interest == 1') \
        .drop(columns=['is_interaction_of_interest', 'accession_id', 'pvalue']) \
        .rename(columns={'mirna_name': 'source', 'gene_name': 'target'}) \
        .fillna(value={'mirtarbase': ''}) \
        .sort_values(by=['correlation', 'qvalue'], ascending=True) \
        .reset_index(drop=True) \
        .round(3)
    
    # Create the microRNA nodes DataFrame
    df_mirna_nodes = df_edges[['source']].copy() \
        .rename(columns={'source': 'id'}) \
        .drop_duplicates(ignore_index=True)
    df_mirna_nodes['label'] = df_mirna_nodes['id']
    df_mirna_nodes['type'] = 'MicroRNA'
    
    # Create the messenger RNA nodes DataFrame
    df_mrna_nodes = df_edges[['target']].copy() \
        .rename(columns={'target': 'id'}) \
        .drop_duplicates(ignore_index=True)
    df_mrna_nodes['label'] = df_mrna_nodes['id']
    df_mrna_nodes['type'] = 'Messenger RNA'
    
    # Create the nodes DataFrame
    df_nodes = pd.concat([df_mirna_nodes, df_mrna_nodes], ignore_index=True)
    
    # Store the DataFrames of edges
    file_name = NETWORK_FILES['interaction-edges']
    df_edges.to_csv(os.path.join(dir_path, file_name), index=False)
    
    # Store the DataFrames of nodes
    file_name = NETWORK_FILES['interaction-nodes']
    df_nodes.to_csv(os.path.join(dir_path, file_name), index=False)
    
    return {
        'edges': df_edges,
        'nodes': df_nodes,
    }

## Basal-like Artifacts

In [4]:
# Create files for the construction of the basal-like network
files = create_interaction_network('Basal-like')

In [5]:
# Print the DataFrame of edges for the group network
files['edges']

Unnamed: 0,source,target,mirtarbase,correlation,qvalue
0,hsa-miR-182-5p,RAB27A,,-0.564,0.000
1,hsa-miR-18a-5p,ORAI3,MIRT050737,-0.545,0.000
2,hsa-miR-18a-5p,ZBTB4,,-0.532,0.001
3,hsa-miR-96-5p,RAB27A,,-0.496,0.004
4,hsa-miR-106b-5p,ATXN1,MIRT441140,-0.495,0.004
...,...,...,...,...,...
291,hsa-miR-125b-5p,DNAL4,,-0.356,0.050
292,hsa-miR-20a-5p,PRKACB,MIRT080178,-0.356,0.050
293,hsa-miR-148a-3p,DYNLL2,MIRT025997,-0.356,0.050
294,hsa-miR-106b-5p,TGFBR2,MIRT091689,-0.356,0.050


In [6]:
# Print the DataFrame of nodes for the group network
files['nodes']

Unnamed: 0,id,label,type
0,hsa-miR-182-5p,hsa-miR-182-5p,MicroRNA
1,hsa-miR-18a-5p,hsa-miR-18a-5p,MicroRNA
2,hsa-miR-96-5p,hsa-miR-96-5p,MicroRNA
3,hsa-miR-106b-5p,hsa-miR-106b-5p,MicroRNA
4,hsa-miR-221-3p,hsa-miR-221-3p,MicroRNA
...,...,...,...
286,B4GALT5,B4GALT5,Messenger RNA
287,DNAL4,DNAL4,Messenger RNA
288,DYNLL2,DYNLL2,Messenger RNA
289,TGFBR2,TGFBR2,Messenger RNA


## HER2-enriched Artifacts

In [7]:
# Create files for the construction of the HER2-enriched network
files = create_interaction_network('HER2-enriched')

In [8]:
# Print the DataFrame of edges for the group network
files['edges']

Unnamed: 0,source,target,mirtarbase,correlation,qvalue
0,hsa-miR-141-3p,IGF2,,-0.587,0.032
1,hsa-let-7d-5p,MEF2D,MIRT735654,-0.562,0.032
2,hsa-miR-378a-3p,ACP6,,-0.562,0.032
3,hsa-miR-18a-5p,ZBTB20,,-0.559,0.032
4,hsa-miR-106b-5p,PLXDC2,,-0.558,0.032
5,hsa-let-7d-5p,SMARCC1,,-0.546,0.042
6,hsa-miR-106b-5p,SALL1,,-0.545,0.042
7,hsa-let-7d-5p,HIF1AN,,-0.539,0.042
8,hsa-let-7d-5p,PRKAR2A,,-0.537,0.042
9,hsa-miR-155-5p,VAV3,,-0.537,0.042


In [9]:
# Print the DataFrame of nodes for the group network
files['nodes']

Unnamed: 0,id,label,type
0,hsa-miR-141-3p,hsa-miR-141-3p,MicroRNA
1,hsa-let-7d-5p,hsa-let-7d-5p,MicroRNA
2,hsa-miR-378a-3p,hsa-miR-378a-3p,MicroRNA
3,hsa-miR-18a-5p,hsa-miR-18a-5p,MicroRNA
4,hsa-miR-106b-5p,hsa-miR-106b-5p,MicroRNA
5,hsa-miR-155-5p,hsa-miR-155-5p,MicroRNA
6,hsa-miR-182-5p,hsa-miR-182-5p,MicroRNA
7,hsa-miR-320b,hsa-miR-320b,MicroRNA
8,hsa-miR-30c-5p,hsa-miR-30c-5p,MicroRNA
9,IGF2,IGF2,Messenger RNA


## Luminal A Artifacts

In [10]:
# Create files for the construction of the luminal A network
files = create_interaction_network('Luminal A')

In [11]:
# Print the DataFrame of edges for the group network
files['edges']

Unnamed: 0,source,target,mirtarbase,correlation,qvalue
0,hsa-miR-200a-3p,WWTR1,,-0.477,0.0
1,hsa-miR-141-3p,PHYHIPL,,-0.452,0.0
2,hsa-miR-96-5p,SGMS2,,-0.438,0.0
3,hsa-miR-29b-3p,PCSK5,,-0.425,0.0
4,hsa-miR-342-3p,FRMD4A,,-0.425,0.0
...,...,...,...,...,...
254,hsa-miR-106b-5p,CYP2U1,,-0.300,0.0
255,hsa-miR-17-5p,SAR1B,,-0.300,0.0
256,hsa-miR-200b-3p,SLC24A4,,-0.300,0.0
257,hsa-miR-96-5p,SH3PXD2B,,-0.300,0.0


In [12]:
# Print the DataFrame of nodes for the group network
files['nodes']

Unnamed: 0,id,label,type
0,hsa-miR-200a-3p,hsa-miR-200a-3p,MicroRNA
1,hsa-miR-141-3p,hsa-miR-141-3p,MicroRNA
2,hsa-miR-96-5p,hsa-miR-96-5p,MicroRNA
3,hsa-miR-29b-3p,hsa-miR-29b-3p,MicroRNA
4,hsa-miR-342-3p,hsa-miR-342-3p,MicroRNA
...,...,...,...
246,LUZP1,LUZP1,Messenger RNA
247,CYBRD1,CYBRD1,Messenger RNA
248,TBX15,TBX15,Messenger RNA
249,SLC24A4,SLC24A4,Messenger RNA


## Luminal B Artifacts

In [13]:
# Create files for the construction of the luminal B network
files = create_interaction_network('Luminal B')

In [14]:
# Print the DataFrame of edges for the group network
files['edges']

Unnamed: 0,source,target,mirtarbase,correlation,qvalue
0,hsa-miR-200c-3p,ZMAT3,MIRT209182,-0.493,0.000
1,hsa-let-7g-5p,KLHL13,,-0.467,0.001
2,hsa-miR-30d-5p,FST,,-0.461,0.001
3,hsa-miR-200c-3p,FAM126A,,-0.458,0.001
4,hsa-miR-151a-3p,FRK,MIRT508167,-0.456,0.001
...,...,...,...,...,...
579,hsa-miR-339-5p,DAAM2,,-0.300,0.030
580,hsa-miR-30b-5p,TMTC3,,-0.300,0.030
581,hsa-miR-205-5p,RAB11FIP1,MIRT505979,-0.300,0.030
582,hsa-miR-222-3p,PANK3,MIRT046819,-0.300,0.030


In [15]:
# Print the DataFrame of nodes for the group network
files['nodes']

Unnamed: 0,id,label,type
0,hsa-miR-200c-3p,hsa-miR-200c-3p,MicroRNA
1,hsa-let-7g-5p,hsa-let-7g-5p,MicroRNA
2,hsa-miR-30d-5p,hsa-miR-30d-5p,MicroRNA
3,hsa-miR-151a-3p,hsa-miR-151a-3p,MicroRNA
4,hsa-miR-27b-3p,hsa-miR-27b-3p,MicroRNA
...,...,...,...
554,AFF2,AFF2,Messenger RNA
555,DAAM2,DAAM2,Messenger RNA
556,RAB11FIP1,RAB11FIP1,Messenger RNA
557,PANK3,PANK3,Messenger RNA


## Paired Normal Artifacts

In [16]:
# Create files for the construction of the paired normal network
files = create_interaction_network('Paired Normal')

In [17]:
# Print the DataFrame of edges for the group network
files['edges']

Unnamed: 0,source,target,mirtarbase,correlation,qvalue
0,hsa-miR-22-3p,RPS3,,-0.816,0.00
1,hsa-miR-452-5p,RPA1,,-0.810,0.00
2,hsa-miR-141-3p,TRHDE,,-0.807,0.00
3,hsa-miR-22-3p,RPL14,,-0.804,0.00
4,hsa-miR-22-3p,LRRC1,MIRT030660,-0.800,0.00
...,...,...,...,...,...
5067,hsa-miR-93-5p,CREBRF,MIRT028082,-0.332,0.05
5068,hsa-miR-93-5p,USP37,,-0.332,0.05
5069,hsa-miR-139-5p,ZC3H12A,,-0.332,0.05
5070,hsa-miR-326,FAM168A,,-0.332,0.05


In [18]:
# Print the DataFrame of nodes for the group network
files['nodes']

Unnamed: 0,id,label,type
0,hsa-miR-22-3p,hsa-miR-22-3p,MicroRNA
1,hsa-miR-452-5p,hsa-miR-452-5p,MicroRNA
2,hsa-miR-141-3p,hsa-miR-141-3p,MicroRNA
3,hsa-miR-200a-3p,hsa-miR-200a-3p,MicroRNA
4,hsa-miR-224-5p,hsa-miR-224-5p,MicroRNA
...,...,...,...
2910,NKAPD1,NKAPD1,Messenger RNA
2911,SLITRK4,SLITRK4,Messenger RNA
2912,PHF21A,PHF21A,Messenger RNA
2913,ZC3H12A,ZC3H12A,Messenger RNA


# Association Networks

## Functions

In [19]:
def infer_associations(group):
    """
    Infer associations between nodes based on the Jaccard index.

    This function calculates the Jaccard index for every pair of distinct
    nodes in a network, where nodes are connected via edges defined in a
    precomputed `interaction-edges` CSV file. The Jaccard index measures
    the similarity between two sets of neighbors, ranging from 0 (no
    common neighbors) to 1 (identical neighbor sets).

    Parameters
    ----------
    group : str
        The name of the group for which associations will be inferred.
        Must correspond to a key in `NETWORK_DATA_DIRS['processed']` and
        `NETWORK_DATA_DIRS['interim']`.

    Returns
    -------
    pandas.DataFrame
        DataFrame containing the inferred associations with the following
        columns:
        - `node_a` : First node in the pair.
        - `node_b` : Second node in the pair.
        - `association` : Jaccard index between the two nodes' neighbor
        sets.

    Notes
    -----
    - The function assumes the existence of a CSV file for the group at:
    `NETWORK_DATA_DIRS['processed'][group]` /
    `NETWORK_FILES['interaction-edges']`.
    - Associations are sorted in descending order of the Jaccard index.
    - Global configuration variables used:
    * `NETWORK_DATA_DIRS`
    * `NETWORK_FILES`

    Examples
    --------
    >>> df_assoc = infer_associations('Basal-like')
    >>> df_assoc.head()
    node_a  node_b  association
    0 miR-21  miR-22     0.666667
    1 miR-21  miR-155    0.500000
    2 miR-22  miR-155    0.333333
    """
    # Create a DataFrame for the interactions of interest
    dir_path = NETWORK_DATA_DIRS['processed'][group]
    file_name = NETWORK_FILES['interaction-edges']
    df_interactions = pd.read_csv(os.path.join(dir_path, file_name))

    # Determine the set of neighbors for each node
    neighbors = defaultdict(set)
    for _, row in df_interactions.iterrows():
        neighbors[row['source']].add(row['target'])
    
    # Create a list to store the results
    results = list()

    # Compute the Jaccard index between each combination of different nodes
    for node_a, node_b in combinations(neighbors, 2):
        # Get the sets of neighbors from both nodes
        set_a = neighbors[node_a]
        set_b = neighbors[node_b]
        
        # Compute the Jaccard index between both sets
        intersection = len(set_a & set_b)
        union = len(set_a | set_b)
        jaccard = intersection / union if union else 0
        
        # Insert the result for this combination in the list
        results.append((node_a, node_b, jaccard))
    
    # Create a DataFrame for the inferred associations
    columns = ['node_a', 'node_b', 'association']
    df_associations = pd.DataFrame(results, columns=columns) \
        .sort_values(by='association', ascending=False)
    
    # Store the DataFrame of inferred associations
    dir_path = NETWORK_DATA_DIRS['interim'][group]
    file_name = NETWORK_FILES['inferred-associations']
    df_associations.to_csv(os.path.join(dir_path, file_name), index=False)
    
    return df_associations

In [20]:
def flag_inferred_associations(group):
    """
    Flag inferred associations of interest based on an association
    threshold.

    This function calls `infer_associations` to compute pairwise Jaccard
    indices and then flags those associations that exceed the minimum
    association threshold defined in the global configuration. The flagged
    results are stored in a CSV file.

    Parameters
    ----------
    group : str
        The name of the group for which associations will be flagged.
        Must be a valid key in `NETWORK_DATA_DIRS['processed']` and
        `NETWORK_DATA_DIRS['interim']`.

    Returns
    -------
    pandas.DataFrame
        DataFrame containing the inferred associations with an additional
        column:
        - `is_association_of_interest` : 1 if association exceeds
        threshold, else 0.

    Notes
    -----
    - The threshold value is taken from
    `NETWORK_CONSTRUCTION_SETUP['min-association']`.
    - The output CSV is stored in the processed directory for the group.

    Examples
    --------
    >>> df_flagged = flag_inferred_associations('Basal-like')
    >>> df_flagged.head()
    node_a  node_b  association  is_association_of_interest
    0 miR-21  miR-22     0.67                 1
    1 miR-21  miR-155    0.50                 1
    2 miR-22  miR-155    0.33                 0
    """
    # Infer the associations
    df_associations = infer_associations(group)
        
    # Flag the associations of interest, i.e. those with index above the threshold
    min_association = NETWORK_CONSTRUCTION_SETUP['min-association']
    df_associations['is_association_of_interest'] = np.where(
        df_associations['association'] > min_association, 1, 0
    )
    
    # Store the DataFrame of flagged inferred associations
    dir_path = NETWORK_DATA_DIRS['processed'][group]
    file_name = NETWORK_FILES['inferred-associations']
    df_associations.to_csv(os.path.join(dir_path, file_name), index=False)
    
    return df_associations

In [21]:
def create_association_network(group):
    """
    Create an association network from flagged inferred associations.

    This function produces network edges and nodes from previously flagged
    associations of interest, storing the results into CSV files. The edges
    represent pairs of nodes with a high Jaccard index, and the nodes
    represent unique elements from these pairs.

    Parameters
    ----------
    group : str
        The name of the group for which to create the association network.
        It will be normalized (lowercase, spaces replaced with hyphens) to
        match directory naming conventions in `NETWORK_DATA_DIRS`.

    Returns
    -------
    dict of pandas.DataFrame
        Dictionary with:
        - 'edges' : DataFrame of edges with columns `source`, `target`, and
        `association`.
        - 'nodes' : DataFrame of nodes with columns `id`, `label`, and
        `type` ('MicroRNA').

    Notes
    -----
    - The `flag_inferred_associations` function is called internally.
    - The type for all nodes is set to 'MicroRNA' in the current
    implementation.
    - Global configuration variables used:
    * `NETWORK_DATA_DIRS`
    * `NETWORK_FILES`

    Examples
    --------
    >>> network = create_association_network('Basal-like')
    >>> network['edges'].head()
    source   target  association
    0  miR-21   miR-22        0.67
    1  miR-21  miR-155        0.50
    >>> network['nodes'].head()
        id    label     type
    0   miR-21  miR-21  MicroRNA
    1   miR-22  miR-22  MicroRNA
    2  miR-155 miR-155  MicroRNA
    """
    # Define the group processed directory path
    group_dir = (group.lower()).replace(' ', '-')
    dir_path = NETWORK_DATA_DIRS['processed'][group_dir]
    
    # Infer the associations and flag the associations of inteterest
    df_associations = flag_inferred_associations(group_dir)
    
    # Create the edges DataFrame
    df_edges = df_associations \
        .query('is_association_of_interest == 1') \
        .drop(columns=['is_association_of_interest']) \
        .rename(columns={'node_a': 'source', 'node_b': 'target'}) \
        .reset_index(drop=True) \
        .round(3)
    
    # Create the nodes DataFrame
    df_source_nodes = df_edges[['source']].copy() \
        .rename(columns={'source': 'id'})
    df_target_nodes = df_edges[['target']].copy() \
        .rename(columns={'target': 'id'})
        
    df_nodes = pd.concat([df_source_nodes, df_target_nodes], ignore_index=True) \
        .drop_duplicates(ignore_index=True)
    df_nodes['label'] = df_nodes['id']
    df_nodes['type'] = 'MicroRNA'
    
    # Store the DataFrames of edges
    file_name = NETWORK_FILES['association-edges']
    df_edges.to_csv(os.path.join(dir_path, file_name), index=False)
    
    # Store the DataFrames of nodes
    file_name = NETWORK_FILES['association-nodes']
    df_nodes.to_csv(os.path.join(dir_path, file_name), index=False)
    
    return {
        'edges': df_edges,
        'nodes': df_nodes,
    }

## Basal-like Artifacts

In [22]:
# Create files for the construction of the basal-like network
files = create_association_network('Basal-like')

In [23]:
# Print the DataFrame of edges for the group network
files['edges']

Unnamed: 0,source,target,association
0,hsa-miR-19a-3p,hsa-miR-19b-3p,0.667
1,hsa-miR-17-5p,hsa-miR-20a-5p,0.571
2,hsa-miR-23a-3p,hsa-miR-151a-3p,0.5
3,hsa-miR-27a-3p,hsa-miR-128-3p,0.25
4,hsa-miR-221-3p,hsa-miR-222-3p,0.25
5,hsa-miR-20a-5p,hsa-miR-130b-3p,0.167
6,hsa-miR-29b-3p,hsa-miR-29a-3p,0.167
7,hsa-miR-200b-3p,hsa-miR-200c-3p,0.158
8,hsa-miR-18a-5p,hsa-miR-19b-3p,0.143
9,hsa-miR-106a-5p,hsa-miR-17-5p,0.136


In [24]:
# Print the DataFrame of nodes for the group network
files['nodes']

Unnamed: 0,id,label,type
0,hsa-miR-19a-3p,hsa-miR-19a-3p,MicroRNA
1,hsa-miR-17-5p,hsa-miR-17-5p,MicroRNA
2,hsa-miR-23a-3p,hsa-miR-23a-3p,MicroRNA
3,hsa-miR-27a-3p,hsa-miR-27a-3p,MicroRNA
4,hsa-miR-221-3p,hsa-miR-221-3p,MicroRNA
5,hsa-miR-20a-5p,hsa-miR-20a-5p,MicroRNA
6,hsa-miR-29b-3p,hsa-miR-29b-3p,MicroRNA
7,hsa-miR-200b-3p,hsa-miR-200b-3p,MicroRNA
8,hsa-miR-18a-5p,hsa-miR-18a-5p,MicroRNA
9,hsa-miR-106a-5p,hsa-miR-106a-5p,MicroRNA


## HER2-enriched Artifacts

In [25]:
# Create files for the construction of the HER2-enriched network
files = create_association_network('HER2-enriched')

In [26]:
# Print the DataFrame of edges for the group network
files['edges']

Unnamed: 0,source,target,association


In [27]:
# Print the DataFrame of nodes for the group network
files['nodes']

Unnamed: 0,id,label,type


## Luminal A Artifacts

In [28]:
# Create files for the construction of the luminal A network
files = create_association_network('Luminal A')

In [29]:
# Print the DataFrame of edges for the group network
files['edges']

Unnamed: 0,source,target,association
0,hsa-miR-365b-3p,hsa-miR-365a-3p,1.0
1,hsa-miR-30c-5p,hsa-miR-324-5p,0.5
2,hsa-miR-20a-5p,hsa-miR-17-5p,0.273
3,hsa-miR-20a-5p,hsa-miR-92a-3p,0.25
4,hsa-miR-33b-5p,hsa-miR-33a-5p,0.2
5,hsa-miR-200a-3p,hsa-miR-141-3p,0.2
6,hsa-miR-193b-3p,hsa-miR-148b-3p,0.167
7,hsa-miR-33b-5p,hsa-miR-148b-3p,0.125
8,hsa-miR-29b-3p,hsa-miR-29c-3p,0.114
9,hsa-miR-193b-3p,hsa-miR-33b-5p,0.111


In [30]:
# Print the DataFrame of nodes for the group network
files['nodes']

Unnamed: 0,id,label,type
0,hsa-miR-365b-3p,hsa-miR-365b-3p,MicroRNA
1,hsa-miR-30c-5p,hsa-miR-30c-5p,MicroRNA
2,hsa-miR-20a-5p,hsa-miR-20a-5p,MicroRNA
3,hsa-miR-33b-5p,hsa-miR-33b-5p,MicroRNA
4,hsa-miR-200a-3p,hsa-miR-200a-3p,MicroRNA
5,hsa-miR-193b-3p,hsa-miR-193b-3p,MicroRNA
6,hsa-miR-29b-3p,hsa-miR-29b-3p,MicroRNA
7,hsa-miR-192-5p,hsa-miR-192-5p,MicroRNA
8,hsa-miR-365a-3p,hsa-miR-365a-3p,MicroRNA
9,hsa-miR-324-5p,hsa-miR-324-5p,MicroRNA


## Luminal B Artifacts

In [31]:
# Create files for the construction of the luminal B network
files = create_association_network('Luminal B')

In [32]:
# Print the DataFrame of edges for the group network
files['edges']

Unnamed: 0,source,target,association
0,hsa-miR-15a-5p,hsa-miR-30e-5p,1.0
1,hsa-miR-381-3p,hsa-miR-299-5p,1.0
2,hsa-miR-378a-3p,hsa-miR-378c,0.333
3,hsa-miR-151a-3p,hsa-miR-212-3p,0.333
4,hsa-miR-150-5p,hsa-miR-378c,0.333
5,hsa-miR-26a-5p,hsa-miR-26b-5p,0.333
6,hsa-miR-501-3p,hsa-miR-502-3p,0.286
7,hsa-let-7d-5p,hsa-miR-107,0.25
8,hsa-miR-1271-5p,hsa-miR-378c,0.25
9,hsa-miR-378a-3p,hsa-miR-150-5p,0.2


In [33]:
# Print the DataFrame of nodes for the group network
files['nodes']

Unnamed: 0,id,label,type
0,hsa-miR-15a-5p,hsa-miR-15a-5p,MicroRNA
1,hsa-miR-381-3p,hsa-miR-381-3p,MicroRNA
2,hsa-miR-378a-3p,hsa-miR-378a-3p,MicroRNA
3,hsa-miR-151a-3p,hsa-miR-151a-3p,MicroRNA
4,hsa-miR-150-5p,hsa-miR-150-5p,MicroRNA
5,hsa-miR-26a-5p,hsa-miR-26a-5p,MicroRNA
6,hsa-miR-501-3p,hsa-miR-501-3p,MicroRNA
7,hsa-let-7d-5p,hsa-let-7d-5p,MicroRNA
8,hsa-miR-1271-5p,hsa-miR-1271-5p,MicroRNA
9,hsa-let-7i-5p,hsa-let-7i-5p,MicroRNA


## Paired Normal Artifacts

In [34]:
# Create files for the construction of the paired normal network
files = create_association_network('Paired Normal')

In [35]:
# Print the DataFrame of edges for the group network
files['edges']

Unnamed: 0,source,target,association
0,hsa-miR-199b-3p,hsa-miR-199a-3p,1.0
1,hsa-miR-365a-3p,hsa-miR-365b-3p,1.0
2,hsa-miR-330-5p,hsa-miR-874-3p,0.5
3,hsa-miR-378a-3p,hsa-miR-378c,0.479
4,hsa-miR-141-3p,hsa-miR-200a-3p,0.442
5,hsa-let-7b-5p,hsa-let-7c-5p,0.385
6,hsa-miR-497-5p,hsa-miR-299-5p,0.333
7,hsa-miR-497-5p,hsa-miR-503-5p,0.333
8,hsa-miR-20b-5p,hsa-miR-106a-5p,0.325
9,hsa-miR-181b-5p,hsa-miR-181d-5p,0.312


In [36]:
# Print the DataFrame of nodes for the group network
files['nodes']

Unnamed: 0,id,label,type
0,hsa-miR-199b-3p,hsa-miR-199b-3p,MicroRNA
1,hsa-miR-365a-3p,hsa-miR-365a-3p,MicroRNA
2,hsa-miR-330-5p,hsa-miR-330-5p,MicroRNA
3,hsa-miR-378a-3p,hsa-miR-378a-3p,MicroRNA
4,hsa-miR-141-3p,hsa-miR-141-3p,MicroRNA
...,...,...,...
56,hsa-miR-17-5p,hsa-miR-17-5p,MicroRNA
57,hsa-miR-708-5p,hsa-miR-708-5p,MicroRNA
58,hsa-miR-125b-5p,hsa-miR-125b-5p,MicroRNA
59,hsa-miR-424-5p,hsa-miR-424-5p,MicroRNA
