## Finding differentialy expressed miRNA's between LUAD stages I, II, III, IV vs normal using Logit regression
### Utilize Group Lasso with MISIM miRNA similarity data

In [1]:
# Necessary imports

import os
import pandas
import numpy as np

from definitions import ROOT_DIR

## Load normal and cancer miRNA expression data

In [2]:
from definitions import ROOT_DIR

# Load files into pandas data frames
mirna_tumor_df = pandas.read_csv(os.path.join(ROOT_DIR, "data/processed/miRNA/tumor_miRNA.csv"))
mirna_normal_df = pandas.read_csv(os.path.join(ROOT_DIR, "data/processed/miRNA/normal_miRNA.csv"))
clinical_df = pandas.read_csv(os.path.join(ROOT_DIR, "data/processed/clinical/clinical.csv"))
validated_miRNA_csv = pandas.read_csv(os.path.join(ROOT_DIR, 'data/external/validated_luad_miRNAs_miRCancer.csv'))

# Print data frame shapes
print "mirna_tumor_df.shape", mirna_tumor_df.shape, ', nulls:', mirna_tumor_df.isnull().sum().sum()
print "mirna_normal_df.shape", mirna_normal_df.shape, ', nulls:', mirna_normal_df.isnull().sum().sum()
print 'validated_miRNAs.shape', validated_miRNA_csv.shape

# Merge normal and tumor miRNA expression profiles with clinical cancer stage data
mirna_normal = pandas.merge(clinical_df[['patient_barcode', 'pathologic_stage']], mirna_normal_df, on='patient_barcode')
mirna_normal['pathologic_stage'] = 'normal'
mirna_tumor = pandas.merge(clinical_df[['patient_barcode', 'pathologic_stage']], mirna_tumor_df, on='patient_barcode')

# Map stage IA to stage I, IB to I, etc. ...
pathologic_stage_map = {'Stage IA': 'Stage I', 'Stage IB': 'Stage I', 
                        'Stage IIA': 'Stage II', 'Stage IIB': 'Stage II', 
                        'Stage IIIA': 'Stage III', 'Stage IIIB': 'Stage III'}
mirna_tumor.replace({'pathologic_stage': pathologic_stage_map}, inplace=True)

# Store list of all miRNA's in miRNA expression data
mirna_list = list(mirna_tumor.columns)[2:]

# Print quick overview of data
print mirna_normal['pathologic_stage'].value_counts().sort_index(axis=0)
print mirna_tumor['pathologic_stage'].value_counts().sort_index(axis=0)

mirna_tumor_df.shape (513, 1882) , nulls: 0
mirna_normal_df.shape (46, 1882) , nulls: 0
validated_miRNAs.shape (34, 2)
normal    46
Name: pathologic_stage, dtype: int64
Stage I      277
Stage II     121
Stage III     84
Stage IV      24
Name: pathologic_stage, dtype: int64


## Load normal & cancer Gene Expression data

In [3]:
gene_exp_tumor_df = pandas.read_table(os.path.join(ROOT_DIR, 'data/processed/gene_expression/tumor/READ__illuminahiseq_rnaseqv2__GeneExp.txt'), 
                                      header=0, delimiter='\t')
gene_exp_normal_df = pandas.read_table(os.path.join(ROOT_DIR, 'data/processed/gene_expression/normal/READ__illuminahiseq_rnaseqv2__GeneExp.txt'), 
                                      header=0, delimiter='\t')

gene_exp_tumor_df.rename(columns=lambda x: x[:12], inplace=True)
gene_exp_normal_df.rename(columns=lambda x: x[:12], inplace=True)

print gene_exp_tumor_df.shape
print gene_exp_normal_df.shape

(20530, 517)
(20530, 61)


## Reshape gene expression data frames to have shape (patients x gene expression) 

In [4]:
print len(list(gene_exp_tumor_df.columns))-2
print len(list(gene_exp_normal_df.columns))-2

# Remove entries with unknown Gene Symbol
gene_exp_tumor_df = gene_exp_tumor_df[gene_exp_tumor_df.GeneSymbol != '?']
gene_exp_normal_df = gene_exp_normal_df[gene_exp_normal_df.GeneSymbol != '?']

# Get list of all gene_symbols
gene_symbols = list(gene_exp_tumor_df['GeneSymbol'])
# Get list of tumor and normal patient_barcode
gene_exp_tumor_patient_barcodes = list(gene_exp_tumor_df.columns)[2:]
gene_exp_normal_patient_barcodes = list(gene_exp_normal_df.columns)[2:]

# Drop EntrezID column
gene_exp_tumor = gene_exp_tumor_df.drop(['EntrezID', 'GeneSymbol'], axis=1)
gene_exp_normal = gene_exp_normal_df.drop(['EntrezID', 'GeneSymbol'], axis=1)

# Reshaping data frame to have columns for GeneSymbols, and rows of patients
gene_exp_tumor = gene_exp_tumor.T
gene_exp_normal = gene_exp_normal.T
gene_exp_tumor.columns = gene_symbols
gene_exp_normal.columns = gene_symbols

# Add column for patients barcode
gene_exp_tumor['patient_barcode'] = gene_exp_tumor.index
gene_exp_normal['patient_barcode'] = gene_exp_normal.index

print "gene_symbols", len(gene_symbols)
print "gene_exp_tumor_patients", len(gene_exp_tumor_patient_barcodes)
print "gene_exp_normal_patients", len(gene_exp_normal_patient_barcodes)

print gene_exp_tumor.shape
print gene_exp_normal.shape

515
59
gene_symbols 20502
gene_exp_tumor_patients 515
gene_exp_normal_patients 59
(515, 20503)
(59, 20503)


## Filter samples with matched Gene Expression data and miRNA data

In [5]:
# Merge normal and tumor miRNA expression profiles with clinical cancer stage data
merged_normal_patients = pandas.merge(gene_exp_normal[['patient_barcode']], mirna_normal, on='patient_barcode')[['patient_barcode', 'pathologic_stage']]
merged_tumor_patients = pandas.merge(gene_exp_tumor[['patient_barcode']], mirna_tumor, on='patient_barcode')[['patient_barcode', 'pathologic_stage']]

# Print quick overview of data
print merged_tumor_patients['pathologic_stage'].value_counts().sort_index(axis=0)
print merged_normal_patients['pathologic_stage'].value_counts().sort_index(axis=0)

# Filter samples
mirna_tumor = mirna_tumor[mirna_tumor['patient_barcode'].isin(merged_tumor_patients['patient_barcode'])]
gene_exp_tumor = gene_exp_tumor[gene_exp_tumor['patient_barcode'].isin(merged_tumor_patients['patient_barcode'])]
mirna_normal = mirna_normal[mirna_normal['patient_barcode'].isin(merged_normal_patients['patient_barcode'])]
gene_exp_normal = gene_exp_normal[gene_exp_normal['patient_barcode'].isin(merged_normal_patients['patient_barcode'])]

# Drop categorical columns
mirna_tumor.drop(['patient_barcode', 'pathologic_stage'], 1, inplace=True)
mirna_normal.drop(['patient_barcode', 'pathologic_stage'], 1, inplace=True)
gene_exp_tumor.drop(['patient_barcode'], 1, inplace=True)
gene_exp_normal.drop(['patient_barcode'], 1, inplace=True)

print "mirna_tumor", mirna_tumor.shape, mirna_tumor.isnull().sum().sum()
print "gene_exp_tumor", gene_exp_tumor.shape, gene_exp_tumor.isnull().sum().sum()
print "mirna_normal", mirna_normal.shape, mirna_normal.isnull().sum().sum()
print "gene_exp_normal", gene_exp_normal.shape, gene_exp_normal.isnull().sum().sum()

Stage I      275
Stage II     120
Stage III     84
Stage IV      24
Name: pathologic_stage, dtype: int64
normal    20
Name: pathologic_stage, dtype: int64
mirna_tumor (510, 1881) 0
gene_exp_tumor (510, 20502)

 0
mirna_normal (20, 1881) 0
gene_exp_normal (20, 20502) 0


## Build miRNA-target relationship network to identify 
## Xu et al. (xu2011prioritizing)

In [6]:
from src.models.miRNA_target_network import miRNATargetNetwork
import networkx as nx

network = miRNATargetNetwork(threshold=0.8)
network.train(miRNAs_A=mirna_tumor, targets_A=gene_exp_tumor, miRNAs_B=mirna_normal, targets_B=gene_exp_normal)
# print nx.bipartite.sets(network.B)

n_A 510
n_B 20


  ((n_B - 1) * miRNA_B_m_std * np.std(targets_B[t]))
  if np.abs(dys) >= self.threshold:


  ((n_A - 1) * miRNA_A_m_std * np.std(targets_A[t]))


hsa-let-7a-1 : 4


hsa-let-7a-2 : 6


hsa-let-7a-3 : 3


hsa-let-7b : 5


hsa-let-7c : 7


hsa-let-7d : 2


hsa-let-7e : 2


hsa-let-7f-1 : 1


hsa-let-7f-2 : 1


hsa-let-7g : 4


hsa-let-7i : 10


hsa-mir-100 : 2


hsa-mir-101-1 : 1


hsa-mir-101-2 : 1


hsa-mir-103a-1 : 10


hsa-mir-103a-2 : 9


hsa-mir-103b-1 : 1


hsa-mir-103b-2 : 1


hsa-mir-105-1 : 106


hsa-mir-105-2 : 187


hsa-mir-106a : 121


hsa-mir-106b : 1


hsa-mir-107 : 44


hsa-mir-10a : 2


hsa-mir-10b : 36


hsa-mir-1-1 : 1


hsa-mir-1178 : 1


hsa-mir-1179 : 213


hsa-mir-1180 : 2


hsa-mir-1181 : 61


hsa-mir-1182 : 1


hsa-mir-1183 : 1


hsa-mir-1184-1 : 1


hsa-mir-1184-2 : 1


hsa-mir-1184-3 : 1


hsa-mir-1185-1 : 16


hsa-mir-1185-2 : 183


hsa-mir-1193 : 1


hsa-mir-1197 : 50


hsa-mir-1199 : 71


hsa-mir-1-2 : 1


hsa-mir-1200 : 1


hsa-mir-1202 : 1


hsa-mir-1203 : 1


hsa-mir-1204 : 1


hsa-mir-1205 : 1


hsa-mir-1206 : 1


hsa-mir-1207 : 1


hsa-mir-1208 : 1


hsa-mir-122 : 18


hsa-mir-1224 : 146


hsa-mir-1225 : 34


hsa-mir-1226 : 2


hsa-mir-1227 : 2


hsa-mir-1228 : 4


hsa-mir-1229 : 3


hsa-mir-1231 : 1


hsa-mir-1233-1 : 1


hsa-mir-1233-2 : 1


hsa-mir-1234 : 25


hsa-mir-1236 : 1


hsa-mir-1237 : 10


hsa-mir-1238 : 30


hsa-mir-124-1 : 85


hsa-mir-124-2 : 130


hsa-mir-1243 : 512


hsa-mir-124-3 : 55


hsa-mir-1244-1 : 1


hsa-mir-1244-2 : 1


hsa-mir-1244-3 : 1


hsa-mir-1244-4 : 1


hsa-mir-1245a : 4


hsa-mir-1245b : 49


hsa-mir-1246 : 54


hsa-mir-1247 : 1


hsa-mir-1248 : 6


hsa-mir-1249 : 4


hsa-mir-1250 : 71


hsa-mir-1251 : 39


hsa-mir-1252 : 1


hsa-mir-1253 : 1


hsa-mir-1254-1 : 6


hsa-mir-1254-2 : 8


hsa-mir-1255a : 15


hsa-mir-1255b-1 : 1


hsa-mir-1255b-2 : 1


hsa-mir-1256 : 12


hsa-mir-1257 : 1


hsa-mir-1258 : 5


hsa-mir-125a : 2


hsa-mir-125b-1 : 1


hsa-mir-125b-2 : 1


hsa-mir-126 : 34


hsa-mir-1260a : 476


hsa-mir-1260b : 46


hsa-mir-1261 : 1


hsa-mir-1262 : 10


hsa-mir-1263 : 1


hsa-mir-1264 : 1


hsa-mir-1265 : 1


hsa-mir-1266 : 1


hsa-mir-1267 : 1


hsa-mir-1268a : 1


hsa-mir-1268b : 1


hsa-mir-1269a : 189


hsa-mir-1269b : 327


hsa-mir-127 : 12


hsa-mir-1270 : 7


hsa-mir-1271 : 4


hsa-mir-1272 : 1


hsa-mir-1273a : 1


hsa-mir-1273c : 116


hsa-mir-1273d : 1


hsa-mir-1273e : 1


hsa-mir-1273f : 1


hsa-mir-1273g : 1


hsa-mir-1273h : 1


hsa-mir-1275 : 23


hsa-mir-1276 : 67


hsa-mir-1277 : 38


hsa-mir-1278 : 114


hsa-mir-1279 : 1


hsa-mir-1281 : 72


hsa-mir-128-1 : 3


hsa-mir-1282 : 1


hsa-mir-128-2 : 3


hsa-mir-1283-1 : 38


hsa-mir-1283-2 : 15


hsa-mir-1284 : 3


hsa-mir-1285-1 : 1


hsa-mir-1285-2 : 1


hsa-mir-1286 : 53


hsa-mir-1287 : 2


hsa-mir-1288 : 31


hsa-mir-1289-1 : 1


hsa-mir-1289-2 : 38


hsa-mir-1290 : 1


hsa-mir-1291 : 5


hsa-mir-129-1 : 131


hsa-mir-1292 : 23


hsa-mir-129-2 : 101


hsa-mir-1293 : 469


hsa-mir-1294 : 3


hsa-mir-1295a : 4


hsa-mir-1295b : 10


hsa-mir-1296 : 14


hsa-mir-1297 : 1


hsa-mir-1298 : 72


hsa-mir-1299 : 1


hsa-mir-1301 : 17


hsa-mir-1302-1 : 1


hsa-mir-1302-10 : 1


hsa-mir-1302-11 : 1


hsa-mir-1302-2 : 1


hsa-mir-1302-3 : 1


hsa-mir-1302-4 : 1


hsa-mir-1302-5 : 1


hsa-mir-1302-6 : 1


hsa-mir-1302-7 : 1


hsa-mir-1302-8 : 1


hsa-mir-1302-9 : 1


hsa-mir-1303 : 28


hsa-mir-1304 : 121


hsa-mir-1305 : 8


hsa-mir-1306 : 1


hsa-mir-1307 : 1


hsa-mir-130a : 11


hsa-mir-130b : 3


hsa-mir-132 : 22


hsa-mir-1321 : 1


hsa-mir-1322 : 1


hsa-mir-1323 : 51


hsa-mir-1324 : 1


hsa-mir-133a-1 : 7


hsa-mir-133a-2 : 103


hsa-mir-133b : 147


hsa-mir-134 : 1


hsa-mir-1343 : 2


hsa-mir-135a-1 : 2


hsa-mir-135a-2 : 4


hsa-mir-135b : 4


hsa-mir-136 : 29


hsa-mir-137 : 114


hsa-mir-138-1 : 13


hsa-mir-138-2 : 3


hsa-mir-139 : 1


hsa-mir-140 : 1


hsa-mir-141 : 2


hsa-mir-142 : 44


hsa-mir-143 : 1


hsa-mir-144 : 70


hsa-mir-145 : 2


hsa-mir-1468 : 4


hsa-mir-1469 : 1


hsa-mir-146a : 25


hsa-mir-146b : 3


hsa-mir-1470 : 1


hsa-mir-1471 : 1


hsa-mir-147a : 1


hsa-mir-147b : 169


hsa-mir-148a : 1


hsa-mir-148b : 4


hsa-mir-149 : 2


hsa-mir-150 : 119


hsa-mir-151a : 1


hsa-mir-151b : 14


hsa-mir-152 : 1


hsa-mir-153-1 : 1


hsa-mir-153-2 : 3


hsa-mir-1537 : 23


hsa-mir-1538 : 15


hsa-mir-1539 : 836


hsa-mir-154 : 10


hsa-mir-155 : 16


hsa-mir-1587 : 1


hsa-mir-15a : 15


hsa-mir-15b : 4


hsa-mir-16-1 : 33


hsa-mir-16-2 : 38


hsa-mir-17 : 5


hsa-mir-181a-1 : 19


hsa-mir-181a-2 : 16


hsa-mir-181b-1 : 19


hsa-mir-181b-2 : 17


hsa-mir-181c : 3


hsa-mir-181d : 5


hsa-mir-182 : 1


hsa-mir-1825 : 1


hsa-mir-1827 : 64


hsa-mir-183 : 2


hsa-mir-184 : 4


hsa-mir-185 : 64


hsa-mir-186 : 5


hsa-mir-187 : 2


hsa-mir-188 : 18


hsa-mir-18a : 33


hsa-mir-18b : 9


hsa-mir-1908 : 5


hsa-mir-1909 : 200


hsa-mir-190a : 23


hsa-mir-190b : 70


hsa-mir-191 : 4


hsa-mir-1910 : 21


hsa-mir-1911 : 67


hsa-mir-1912 : 70


hsa-mir-1913 : 69


hsa-mir-1914 : 707


hsa-mir-1915 : 69


hsa-mir-192 : 246


hsa-mir-193a : 1


hsa-mir-193b : 4


hsa-mir-194-1 : 244


hsa-mir-194-2 : 246


hsa-mir-195 : 2


hsa-mir-196a-1 : 238


hsa-mir-196a-2 : 229


hsa-mir-196b : 229


hsa-mir-197 : 3


hsa-mir-1972-1 : 1


hsa-mir-1972-2 : 1


hsa-mir-1973 : 1


hsa-mir-1976 : 2


hsa-mir-198 : 71


hsa-mir-199a-1 : 2


hsa-mir-199a-2 : 1


hsa-mir-199b : 2


hsa-mir-19a : 5


hsa-mir-19b-1 : 3


hsa-mir-19b-2 : 6


hsa-mir-200a : 210


hsa-mir-200b : 172


hsa-mir-200c : 71


hsa-mir-202 : 2


hsa-mir-203a : 1


hsa-mir-203b : 2


hsa-mir-204 : 7


hsa-mir-205 : 6


hsa-mir-2052 : 1


hsa-mir-2053 : 1


hsa-mir-2054 : 1


hsa-mir-206 : 22


hsa-mir-208a : 1


hsa-mir-208b : 1


hsa-mir-20a : 11


hsa-mir-20b : 265


hsa-mir-21 : 1


hsa-mir-210 : 70


hsa-mir-211 : 24


hsa-mir-2110 : 4


hsa-mir-2113 : 542


hsa-mir-2114 : 18


hsa-mir-2115 : 4


hsa-mir-2116 : 10


hsa-mir-2117 : 1


hsa-mir-212 : 39


hsa-mir-214 : 23


hsa-mir-215 : 190


hsa-mir-216a : 9


hsa-mir-216b : 145


hsa-mir-217 : 5


hsa-mir-218-1 : 1


hsa-mir-218-2 : 2


hsa-mir-219a-1 : 12


hsa-mir-219a-2 : 36


hsa-mir-219b : 38


hsa-mir-22 : 3


hsa-mir-221 : 4


hsa-mir-222 : 7


hsa-mir-223 : 7


hsa-mir-224 : 24


hsa-mir-2276 : 6


hsa-mir-2277 : 3


hsa-mir-2278 : 126


hsa-mir-2355 : 2


hsa-mir-2392 : 1


hsa-mir-23a : 3


hsa-mir-23b : 2


hsa-mir-23c : 4


hsa-mir-24-1 : 1


hsa-mir-24-2 : 1


hsa-mir-2467 : 65


hsa-mir-25 : 1


hsa-mir-2681 : 82


hsa-mir-2682 : 1


hsa-mir-26a-1 : 1


hsa-mir-26a-2 : 1


hsa-mir-26b : 2


hsa-mir-27a : 4


hsa-mir-27b : 2


hsa-mir-28 : 97


hsa-mir-2861 : 35


hsa-mir-2909 : 1


hsa-mir-296 : 1


hsa-mir-297 : 1


hsa-mir-298 : 1


hsa-mir-299 : 9


hsa-mir-29a : 4


hsa-mir-29b-1 : 1


hsa-mir-29b-2 : 9


hsa-mir-29c : 1


hsa-mir-300 : 1


hsa-mir-301a : 9


hsa-mir-301b : 73


hsa-mir-302a : 1


hsa-mir-302b : 1


hsa-mir-302c : 1


hsa-mir-302d : 1


hsa-mir-302e : 1


hsa-mir-302f : 1


hsa-mir-3064 : 108


hsa-mir-3065 : 12


hsa-mir-3074 : 4


hsa-mir-30a : 6


hsa-mir-30b : 5


hsa-mir-30c-1 : 6


hsa-mir-30c-2 : 1


hsa-mir-30d : 5


hsa-mir-30e : 3


hsa-mir-31 : 10


hsa-mir-3115 : 42


hsa-mir-3116-1 : 72


hsa-mir-3116-2 : 1


hsa-mir-3117 : 4


hsa-mir-3118-1 : 1


hsa-mir-3118-2 : 1


hsa-mir-3118-3 : 1


hsa-mir-3118-4 : 1


hsa-mir-3119-1 : 1


hsa-mir-3119-2 : 1


hsa-mir-3120 : 1


hsa-mir-3121 : 1


hsa-mir-3122 : 1


hsa-mir-3123 : 1


hsa-mir-3124 : 33


hsa-mir-3125 : 66


hsa-mir-3126 : 64


hsa-mir-3127 : 3


hsa-mir-3128 : 58


hsa-mir-3129 : 12


hsa-mir-3130-1 : 5


hsa-mir-3130-2 : 2


hsa-mir-3131 : 185


hsa-mir-3132 : 503


hsa-mir-3133 : 1


hsa-mir-3134 : 1


hsa-mir-3135a : 574


hsa-mir-3135b : 1


hsa-mir-3136 : 53


hsa-mir-3137 : 1


hsa-mir-3138 : 48


hsa-mir-3139 : 42


hsa-mir-3140 : 60


hsa-mir-3141 : 1


hsa-mir-3142 : 1


hsa-mir-3143 : 142


hsa-mir-3144 : 34


hsa-mir-3145 : 21


hsa-mir-3146 : 74


hsa-mir-3147 : 1


hsa-mir-3148 : 1


hsa-mir-3149 : 41


hsa-mir-3150a : 52


hsa-mir-3150b : 109


hsa-mir-3151 : 1


hsa-mir-3152 : 38


hsa-mir-3153 : 1


hsa-mir-3154 : 42


hsa-mir-3155a : 585


hsa-mir-3155b : 1


hsa-mir-3156-1 : 1


hsa-mir-3156-2 : 1


hsa-mir-3156-3 : 1


hsa-mir-3157 : 7


hsa-mir-3158-1 : 6


hsa-mir-3158-2 : 21


hsa-mir-3159 : 1


hsa-mir-3160-1 : 1


hsa-mir-3160-2 : 72


hsa-mir-3161 : 66


hsa-mir-3162 : 46


hsa-mir-3163 : 128


hsa-mir-3164 : 1


hsa-mir-3165 : 1


hsa-mir-3166 : 69


hsa-mir-3167 : 1


hsa-mir-3168 : 1


hsa-mir-3169 : 1


hsa-mir-3170 : 24


hsa-mir-3171 : 1


hsa-mir-3173 : 4


hsa-mir-3174 : 6


hsa-mir-3175 : 1


hsa-mir-3176 : 181


hsa-mir-3177 : 44


hsa-mir-3178 : 40


hsa-mir-3179-1 : 1


hsa-mir-3179-2 : 1


hsa-mir-3179-3 : 1


hsa-mir-3179-4 : 1


hsa-mir-3180-1 : 1


hsa-mir-3180-2 : 1


hsa-mir-3180-3 : 1


hsa-mir-3180-4 : 185


hsa-mir-3180-5 : 1


hsa-mir-3181 : 74


hsa-mir-3182 : 69


hsa-mir-3183 : 33


hsa-mir-3184 : 1


hsa-mir-3185 : 1


hsa-mir-3186 : 1


hsa-mir-3187 : 34


hsa-mir-3188 : 2


hsa-mir-3189 : 94


hsa-mir-3190 : 24


hsa-mir-3191 : 19


hsa-mir-3192 : 13


hsa-mir-3193 : 1


hsa-mir-3194 : 48


hsa-mir-3195 : 75


hsa-mir-3196 : 1


hsa-mir-3197 : 1


hsa-mir-3198-1 : 76


hsa-mir-3198-2 : 476


hsa-mir-3199-1 : 73


hsa-mir-3199-2 : 30


hsa-mir-32 : 19


hsa-mir-3200 : 14


hsa-mir-3201 : 1


hsa-mir-3202-1 : 70


hsa-mir-3202-2 : 45


hsa-mir-320a : 7


hsa-mir-320b-1 : 23


hsa-mir-320b-2 : 19


hsa-mir-320c-1 : 2


hsa-mir-320c-2 : 8


hsa-mir-320d-1 : 19


hsa-mir-320d-2 : 199


hsa-mir-320e : 18


hsa-mir-323a : 4


hsa-mir-323b : 65


hsa-mir-324 : 1


hsa-mir-325 : 1


hsa-mir-326 : 15


hsa-mir-328 : 2


hsa-mir-329-1 : 18


hsa-mir-329-2 : 31


hsa-mir-330 : 2


hsa-mir-331 : 5


hsa-mir-335 : 2


hsa-mir-337 : 1


hsa-mir-338 : 3


hsa-mir-339 : 4


hsa-mir-33a : 1


hsa-mir-33b : 3


hsa-mir-340 : 1


hsa-mir-342 : 7


hsa-mir-345 : 4


hsa-mir-346 : 26


hsa-mir-34a : 38


hsa-mir-34b : 23


hsa-mir-34c : 106


hsa-mir-3529 : 1


hsa-mir-3591 : 1


hsa-mir-3605 : 4


hsa-mir-3606 : 70


hsa-mir-3607 : 12


hsa-mir-3609 : 739


hsa-mir-361 : 7


hsa-mir-3610 : 6


hsa-mir-3611 : 77


hsa-mir-3612 : 62


hsa-mir-3613 : 26


hsa-mir-3614 : 8


hsa-mir-3615 : 3


hsa-mir-3616 : 1


hsa-mir-3617 : 49


hsa-mir-3618 : 5


hsa-mir-3619 : 1


hsa-mir-362 : 101


hsa-mir-3620 : 12


hsa-mir-3621 : 58


hsa-mir-3622a : 2


hsa-mir-3622b : 746


hsa-mir-363 : 174


hsa-mir-3646 : 1


hsa-mir-3648-1 : 1


hsa-mir-3648-2 : 1


hsa-mir-3649 : 1


hsa-mir-3650 : 1


hsa-mir-3651 : 29


hsa-mir-3652 : 2


hsa-mir-3653 : 5


hsa-mir-3654 : 69


hsa-mir-3655 : 3


hsa-mir-3656 : 214


hsa-mir-3657 : 1


hsa-mir-3658 : 1


hsa-mir-3659 : 1


hsa-mir-365a : 20


hsa-mir-365b : 19


hsa-mir-3660 : 38


hsa-mir-3661 : 1


hsa-mir-3662 : 19


hsa-mir-3663 : 1


hsa-mir-3664 : 820


hsa-mir-3665 : 1


hsa-mir-3666 : 1


hsa-mir-3667 : 14


hsa-mir-3668 : 51


hsa-mir-367 : 1


hsa-mir-3670-1 : 1


hsa-mir-3670-2 : 1


hsa-mir-3670-3 : 1


hsa-mir-3670-4 : 1


hsa-mir-3671 : 1


hsa-mir-3672 : 1


hsa-mir-3674 : 1


hsa-mir-3675 : 1


hsa-mir-3677 : 1


hsa-mir-3678 : 34


hsa-mir-3679 : 15


hsa-mir-3680-1 : 63


hsa-mir-3680-2 : 33


hsa-mir-3681 : 3


hsa-mir-3682 : 7


hsa-mir-3683 : 41


hsa-mir-3684 : 13


hsa-mir-3685 : 1


hsa-mir-3686 : 1


hsa-mir-3687-1 : 1


hsa-mir-3687-2 : 1


hsa-mir-3688-1 : 6


hsa-mir-3688-2 : 6


hsa-mir-3689a : 1


hsa-mir-3689b : 1


hsa-mir-3689c : 1


hsa-mir-3689d-1 : 1


hsa-mir-3689d-2 : 1


hsa-mir-3689e : 1


hsa-mir-3689f : 1


hsa-mir-369 : 12


hsa-mir-3690-1 : 57


hsa-mir-3690-2 : 1


hsa-mir-3691 : 6


hsa-mir-3692 : 502


hsa-mir-370 : 1


hsa-mir-3713 : 1


hsa-mir-3714 : 1


hsa-mir-371a : 37


hsa-mir-371b : 23


hsa-mir-372 : 8


hsa-mir-373 : 126


hsa-mir-374a : 1


hsa-mir-374b : 20


hsa-mir-374c : 78


hsa-mir-375 : 188


hsa-mir-376a-1 : 1


hsa-mir-376a-2 : 55


hsa-mir-376b : 46


hsa-mir-376c : 3


hsa-mir-377 : 7


hsa-mir-378a : 176


hsa-mir-378b : 10


hsa-mir-378c : 92


hsa-mir-378d-1 : 2


hsa-mir-378d-2 : 3


hsa-mir-378e : 1


hsa-mir-378f : 5


hsa-mir-378g : 139


hsa-mir-378h : 1


hsa-mir-378i : 1


hsa-mir-378j : 102


hsa-mir-379 : 4


hsa-mir-380 : 130


hsa-mir-381 : 3


hsa-mir-382 : 5


hsa-mir-383 : 27


hsa-mir-384 : 1


hsa-mir-3907 : 1


hsa-mir-3908 : 1


hsa-mir-3909 : 4


hsa-mir-3910-1 : 1


hsa-mir-3910-2 : 1


hsa-mir-3911 : 236


hsa-mir-3912 : 7


hsa-mir-3913-1 : 6


hsa-mir-3913-2 : 2


hsa-mir-3914-1 : 1


hsa-mir-3914-2 : 54


hsa-mir-3915 : 1


hsa-mir-3916 : 36


hsa-mir-3917 : 8


hsa-mir-3918 : 371


hsa-mir-3919 : 1


hsa-mir-3920 : 7


hsa-mir-3921 : 1


hsa-mir-3922 : 33


hsa-mir-3923 : 185


hsa-mir-3924 : 1


hsa-mir-3925 : 78


hsa-mir-3926-1 : 1


hsa-mir-3926-2 : 3


hsa-mir-3927 : 1


hsa-mir-3928 : 7


hsa-mir-3929 : 1


hsa-mir-3934 : 31


hsa-mir-3935 : 1


hsa-mir-3936 : 121


hsa-mir-3937 : 1


hsa-mir-3938 : 48


hsa-mir-3939 : 42


hsa-mir-3940 : 17


hsa-mir-3941 : 42


hsa-mir-3942 : 2


hsa-mir-3943 : 80


hsa-mir-3944 : 7


hsa-mir-3945 : 6


hsa-mir-3960 : 68


hsa-mir-3972 : 1


hsa-mir-3973 : 1


hsa-mir-3974 : 1


hsa-mir-3975 : 1


hsa-mir-3976 : 1


hsa-mir-3977 : 1


hsa-mir-3978 : 1


hsa-mir-409 : 24


hsa-mir-410 : 90


hsa-mir-411 : 7


hsa-mir-412 : 51


hsa-mir-421 : 12


hsa-mir-422a : 5


hsa-mir-423 : 4


hsa-mir-424 : 31


hsa-mir-425 : 15


hsa-mir-4251 : 1


hsa-mir-4252 : 1


hsa-mir-4253 : 1


hsa-mir-4254 : 1


hsa-mir-4255 : 1


hsa-mir-4256 : 1


KeyError: 'AGAP4'

## Loading miRecords experimentally validated miRNA-targets interactions

In [7]:
# Load data frame from file
miRecords_df = pandas.read_table(os.path.join(ROOT_DIR, 'data/external/miRecords_version4.tsv'), delimiter='\t')

# Select only homo sapiens miRNA-target pairs
miRecords_df = miRecords_df[(miRecords_df["miRNA_species"] == "Homo sapiens") &
                            (miRecords_df["Target gene_species_scientific"] == "Homo sapiens")]
miRecords_df = miRecords_df[["miRNA_mature_ID", "Target gene_name"]]

# Standardize miRNA and gene symbols
miRecords_df['miRNA_mature_ID'] = miRecords_df['miRNA_mature_ID'].str.lower()
miRecords_df['miRNA_mature_ID'] = miRecords_df['miRNA_mature_ID'].str.replace('*', '')
miRecords_df['Target gene_name'] = miRecords_df['Target gene_name'].str.upper()

# Filter miRNA-target pairs to only miRNA's included in miRNA expression data, same for gene targets 
miRecords_df = miRecords_df[miRecords_df["miRNA_mature_ID"].isin(mirna_list) &
                            miRecords_df["Target gene_name"].isin(gene_symbols)]

# miRecords_df