# **Defining the Expressed Molecules in TCGA-BRCA Samples**

# Importing Libraries and Configurations

In [1]:
import os
import sys

import numpy as np
import pandas as pd

# Get the project root (two levels above)
sys.path.append(os.path.abspath(os.path.join('..', '..')))

from config import (
    BRCA_PROCESSED_FILES_DIRS,
)

# Loading Data

In [2]:
# Basal-like
df_basal_mirs = pd.read_csv(
    os.path.join(BRCA_PROCESSED_FILES_DIRS['basal'], 'basal-like-mir-norm-reads.csv')
)

df_basal_mrnas = pd.read_csv(
    os.path.join(BRCA_PROCESSED_FILES_DIRS['basal'], 'basal-like-mrna-norm-reads.csv')
)

# HER2-enriched
df_her2_mirs = pd.read_csv(
    os.path.join(BRCA_PROCESSED_FILES_DIRS['her2'], 'her2-enriched-mir-norm-reads.csv')
)

df_her2_mrnas = pd.read_csv(
    os.path.join(BRCA_PROCESSED_FILES_DIRS['her2'], 'her2-enriched-mrna-norm-reads.csv')
)

# Luminal A
df_luma_mirs = pd.read_csv(
    os.path.join(BRCA_PROCESSED_FILES_DIRS['lum_a'], 'luminal-a-mir-norm-reads.csv')
)

df_luma_mrnas = pd.read_csv(
    os.path.join(BRCA_PROCESSED_FILES_DIRS['lum_a'], 'luminal-a-mrna-norm-reads.csv')
)

# Luminal B
df_lumb_mirs = pd.read_csv(
    os.path.join(BRCA_PROCESSED_FILES_DIRS['lum_b'], 'luminal-b-mir-norm-reads.csv')
)

df_lumb_mrnas = pd.read_csv(
    os.path.join(BRCA_PROCESSED_FILES_DIRS['lum_b'], 'luminal-b-mrna-norm-reads.csv')
)

# Normal tissue
df_normal_mirs = pd.read_csv(
    os.path.join(BRCA_PROCESSED_FILES_DIRS['normal'], 'normal-tissue-mir-norm-reads.csv')
)

df_normal_mrnas = pd.read_csv(
    os.path.join(BRCA_PROCESSED_FILES_DIRS['normal'], 'normal-tissue-mrna-norm-reads.csv')
)

# Expressed Molecules

In [3]:
# MicroRNAs
query = df_basal_mirs['%_non_zero_samples'] > 50.0
df_basal_mirs = df_basal_mirs[query]['miRNA_region_id']

file_name = 'basal-like-expressed-mir.csv'
df_basal_mirs.to_csv(
    os.path.join(BRCA_PROCESSED_FILES_DIRS['basal'], file_name), index=False
)

# Messenger RNAs
query = df_basal_mrnas['%_non_zero_samples'] > 50.0
df_basal_mrnas = df_basal_mrnas[query]['gene_name']

file_name = 'basal-like-expressed-mrna.csv'
df_basal_mrnas.to_csv(
    os.path.join(BRCA_PROCESSED_FILES_DIRS['basal'], file_name), index=False
)

In [4]:
# MicroRNAs
query = df_her2_mirs['%_non_zero_samples'] > 50.0
df_her2_mirs = df_her2_mirs[query]['miRNA_region_id']

file_name = 'her2-enriched-expressed-mir.csv'
df_her2_mirs.to_csv(
    os.path.join(BRCA_PROCESSED_FILES_DIRS['her2'], file_name), index=False
)

# Messenger RNAs
query = df_her2_mrnas['%_non_zero_samples'] > 50.0
df_her2_mrnas = df_her2_mrnas[query]['gene_name']

file_name = 'her2-enriched-expressed-mrna.csv'
df_her2_mrnas.to_csv(
    os.path.join(BRCA_PROCESSED_FILES_DIRS['her2'], file_name), index=False
)

In [5]:
# MicroRNAs
query = df_luma_mirs['%_non_zero_samples'] > 50.0
df_luma_mirs = df_luma_mirs[query]['miRNA_region_id']

file_name = 'luminal-a-expressed-mir.csv'
df_luma_mirs.to_csv(
    os.path.join(BRCA_PROCESSED_FILES_DIRS['lum_a'], file_name), index=False
)

# Messenger RNAs
query = df_luma_mrnas['%_non_zero_samples'] > 50.0
df_luma_mrnas = df_luma_mrnas[query]['gene_name']

file_name = 'luminal-a-expressed-mrna.csv'
df_luma_mrnas.to_csv(
    os.path.join(BRCA_PROCESSED_FILES_DIRS['lum_a'], file_name), index=False
)

In [6]:
# MicroRNAs
query = df_lumb_mirs['%_non_zero_samples'] > 50.0
df_lumb_mirs = df_lumb_mirs[query]['miRNA_region_id']

file_name = 'luminal-b-expressed-mir.csv'
df_lumb_mirs.to_csv(
    os.path.join(BRCA_PROCESSED_FILES_DIRS['lum_b'], file_name), index=False
)

# Messenger RNAs
query = df_lumb_mrnas['%_non_zero_samples'] > 50.0
df_lumb_mrnas = df_lumb_mrnas[query]['gene_name']

file_name = 'luminal-b-expressed-mrna.csv'
df_lumb_mrnas.to_csv(
    os.path.join(BRCA_PROCESSED_FILES_DIRS['lum_b'], file_name), index=False
)

In [7]:
# MicroRNAs
query = df_normal_mirs['%_non_zero_samples'] > 50.0
df_normal_mirs = df_normal_mirs[query]['miRNA_region_id']

file_name = 'normal-tissue-expressed-mir.csv'
df_normal_mirs.to_csv(
    os.path.join(BRCA_PROCESSED_FILES_DIRS['normal'], file_name), index=False
)

# Messenger RNAs
query = df_normal_mrnas['%_non_zero_samples'] > 50.0
df_normal_mrnas = df_normal_mrnas[query]['gene_name']

file_name = 'normal-tissue-expressed-mrna.csv'
df_normal_mrnas.to_csv(
    os.path.join(BRCA_PROCESSED_FILES_DIRS['normal'], file_name), index=False
)