# ARCHS4 (Human Tissue)

Author: Moshe Silverstein <br/>
Date: 8-17 <br/>
Data Source: 

In [1]:
import sys, datetime, os
import numpy as np
import pandas as pd
import importlib
import my_functions as mf
from collections import Counter
import json
import re
import scipy
from sklearn.metrics import jaccard_similarity_score
from sklearn.metrics.pairwise import cosine_similarity
import scipy.stats as stats
%matplotlib inline

In [2]:
importlib.reload(mf)

<module 'my_functions' from '/Users/moshesilverstein/Documents/Harmonizome/ARCHS4/my_functions.py'>

# Download the data using r scripts provided by ARCHS4

In [3]:
import h5py

filename = '/Users/moshesilverstein/Documents/Harmonizome/ARCHS4/Input/Human/Tissue/human_matrix.h5'
f = h5py.File(filename, 'r')

# Load Data 

In [4]:
matrix = pd.DataFrame(data = f['data/expression'][:,:])

In [5]:
matrix = matrix.T

In [6]:
matrix.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,65419,65420,65421,65422,65423,65424,65425,65426,65427,65428
0,124,0,0,0,108,0,0,0,0,0,...,82,81,86,133,597,808,458,435,766,525
1,77,0,0,0,56,0,0,0,0,0,...,7,6,5,4,3,11,6,7,14,8
2,10743,189,1518,1873,26007,835,1077,12724,545,1100,...,234,8514,320,13332,41,42,45,30,73,25
3,28,6,0,0,20,0,0,0,0,0,...,13,12,24,15,10,10,5,7,15,9
4,119,0,0,0,69,0,0,0,0,0,...,13,15,23,14,2,1,1,0,0,1


In [7]:
matrix.shape

(35238, 65429)

# Load Sample Meta Data

In [8]:
sample_meta = f['meta/Sample_source_name_ch1'][:].tolist()

In [9]:
lst = []
for sample in sample_meta:
    lst.append(sample.decode("utf-8"))

sample_meta = lst

In [10]:
sample_meta[0:10]

['HeLa ELAVL1/HuR siRNA1 5d',
 'brain',
 'heart',
 'lymph node',
 'HeLa mock knockdown 5d',
 'ovary',
 'kidney',
 'liver',
 'thyroid',
 'breast']

# Get index of Tissue Data

In [11]:
tissues = pd.read_excel('/Users/moshesilverstein/Documents/Harmonizome/ARCHS4/Input/Human/Tissue/tissues_expanded.xlsx')

In [12]:
tissues.head()

Unnamed: 0,cell_type,synonyms,not,exact
0,CARDIAC MUSCLE FIBER,CARDIOMYOCYTE|CARDIAC MUSCLE CELL|CARDIAC MYOCYTE,,
1,PERICARDIUM,PERICARDIAL SAC,,
2,VENTRICLE,,,
3,ATRIUM,ATRIAL TISSUE|CARDIAC ATRIUM,,
4,VALVE,,,


In [13]:
tissues.shape

(112, 4)

# Map Sample Meta Data to Samples

In [14]:
matrix.columns = sample_meta

In [15]:
matrix.head()

Unnamed: 0,HeLa ELAVL1/HuR siRNA1 5d,brain,heart,lymph node,HeLa mock knockdown 5d,ovary,kidney,liver,thyroid,breast,...,astrocytes,astrocytes.1,astrocytes.2,astrocytes.3,"Human embryonic stem cells (VUB01), MSC DM1","Human embryonic stem cells (VUB01), MSC DM1, Digoxin","Human embryonic stem cells (VUB01), MSC DM1.1","Human embryonic stem cells (VUB01), MSC DM1, Digoxin.1","Human embryonic stem cells (VUB01), MSC DM1.2","Human embryonic stem cells (VUB01), MSC DM1, Digoxin.2"
0,124,0,0,0,108,0,0,0,0,0,...,82,81,86,133,597,808,458,435,766,525
1,77,0,0,0,56,0,0,0,0,0,...,7,6,5,4,3,11,6,7,14,8
2,10743,189,1518,1873,26007,835,1077,12724,545,1100,...,234,8514,320,13332,41,42,45,30,73,25
3,28,6,0,0,20,0,0,0,0,0,...,13,12,24,15,10,10,5,7,15,9
4,119,0,0,0,69,0,0,0,0,0,...,13,15,23,14,2,1,1,0,0,1


In [16]:
tissue_loc_list = []
tissue_list = []


for i,sample in enumerate(matrix.columns):
    
    progressPercent = ((i+1)/len(matrix.columns))*100

    sys.stdout.write("Progress: %d%%  %d Out of %d   \r" % (progressPercent, (i+1), len(matrix.columns)))
    sys.stdout.flush()
    
    
    
    for index in tissues.index:
        
        if type(tissues.ix[index,'exact']) != float and sample.lower() == tissues.ix[index,'exact'].lower():
                tissue_loc_list.append(i)
                tissue_list.append(tissues.ix[index,'cell_type'])
        
        elif re.search(tissues.ix[index,'cell_type'].lower(), sample.lower()):
            if type(tissues.ix[index,'not']) != float:
                excludes = tissues.ix[index,'not'].split('|')
                for bad_term in excludes:
                    if re.search(bad_term.lower(), sample.lower()):
                        break
                    elif bad_term == excludes[-1]:
                        tissue_loc_list.append(i)
                        tissue_list.append(tissues.ix[index,'cell_type'])
            else:
                tissue_loc_list.append(i)
                tissue_list.append(tissues.ix[index,'cell_type'])

        else:
            if type(tissues.ix[index,'synonyms']) != float:
                for synonym in tissues.ix[index,'synonyms'].split('|'):
                    if re.search(synonym.lower(), sample.lower()):
                        if type(tissues.ix[index,'not']) != float:
                            excludes = tissues.ix[index,'not'].split('|')
                            for bad_term in excludes:
                                if re.search(bad_term.lower(), sample.lower()):
                                    break
                                elif bad_term == excludes[-1]:
                                    tissue_loc_list.append(i)
                                    tissue_list.append(tissues.ix[index,'cell_type'])
                        else:
                            tissue_loc_list.append(i)
                            tissue_list.append(tissues.ix[index,'cell_type'])

Progress: 100%  65429 Out of 65429   

In [17]:
matrix= matrix[tissue_loc_list]

In [18]:
matrix.columns = tissue_list

In [19]:
matrix.head()

Unnamed: 0,BRAIN (BULK),HEART (BULK TISSUE),OVARY (BULK TISSUE),KIDNEY (BULK TISSUE),LIVER (BULK TISSUE),THYROID (BULK TISSUE),SKELETAL MUSCLE (BULK TISSUE),ADRENAL GLAND,LUNG (BULK TISSUE),BLASTOCYST,...,ASTROCYTE,ASTROCYTE.1,ASTROCYTE.2,ASTROCYTE.3,HUMAN EMBRYO,HUMAN EMBRYO.1,HUMAN EMBRYO.2,HUMAN EMBRYO.3,HUMAN EMBRYO.4,HUMAN EMBRYO.5
0,0,0,0,0,0,0,0,0,0,16,...,82,81,86,133,597,808,458,435,766,525
1,0,0,0,0,0,0,0,0,0,1,...,7,6,5,4,3,11,6,7,14,8
2,189,1518,835,1077,12724,545,361,1629,8451,0,...,234,8514,320,13332,41,42,45,30,73,25
3,6,0,0,0,0,0,0,0,0,10,...,13,12,24,15,10,10,5,7,15,9
4,0,0,0,0,0,0,0,0,0,0,...,13,15,23,14,2,1,1,0,0,1


In [20]:
matrix.shape

(35238, 26746)

# Get Gene Symbols

In [21]:
gene_list = f['meta/genes'][:].tolist() 

In [22]:
lst = []
for gene in gene_list:
    lst.append(gene.decode("utf-8"))

gene_list = lst

In [23]:
gene_list[0:10]

['A1BG',
 'A1CF',
 'A2M',
 'A2ML1',
 'A2MP1',
 'A3GALT2',
 'A4GALT',
 'A4GNT',
 'AAAS',
 'AACS']

# Map Gene Symbols to ID's

In [24]:
matrix.index = gene_list

In [25]:
matrix.head()

Unnamed: 0,BRAIN (BULK),HEART (BULK TISSUE),OVARY (BULK TISSUE),KIDNEY (BULK TISSUE),LIVER (BULK TISSUE),THYROID (BULK TISSUE),SKELETAL MUSCLE (BULK TISSUE),ADRENAL GLAND,LUNG (BULK TISSUE),BLASTOCYST,...,ASTROCYTE,ASTROCYTE.1,ASTROCYTE.2,ASTROCYTE.3,HUMAN EMBRYO,HUMAN EMBRYO.1,HUMAN EMBRYO.2,HUMAN EMBRYO.3,HUMAN EMBRYO.4,HUMAN EMBRYO.5
A1BG,0,0,0,0,0,0,0,0,0,16,...,82,81,86,133,597,808,458,435,766,525
A1CF,0,0,0,0,0,0,0,0,0,1,...,7,6,5,4,3,11,6,7,14,8
A2M,189,1518,835,1077,12724,545,361,1629,8451,0,...,234,8514,320,13332,41,42,45,30,73,25
A2ML1,6,0,0,0,0,0,0,0,0,10,...,13,12,24,15,10,10,5,7,15,9
A2MP1,0,0,0,0,0,0,0,0,0,0,...,13,15,23,14,2,1,1,0,0,1


# Save Unfiltered Matrix To File

In [26]:
filename = '~/./Documents/Harmonizome/ARCHS4/Output/archs4_tissue_matrix_unfilltered_%s.tsv.zip'% str(datetime.date.today())[0:7].replace('-', '_')
matrix.to_csv(filename, sep='\t', compression='gzip')

In [27]:
normalized_matrix = matrix.copy()

# Drop Any Genes That Have Zero Expression Across 95% Of The Samples

In [28]:
normalized_matrix.replace(0, np.nan, inplace=True)

In [29]:
normalized_matrix.dropna(thresh=(0.05*normalized_matrix.shape[1]), axis=0, inplace=True)

In [30]:
normalized_matrix.replace(np.nan, 0, inplace=True)

In [31]:
normalized_matrix.shape

(28260, 26746)

# Normalize Matrix (Quantile Normalize the matrix for the columns)

In [32]:
normalized_matrix.columns = range(0, len(normalized_matrix.columns))

In [33]:
normalized_matrix = mf.quantileNormalize(normalized_matrix)

Step 2/2 progress: 100%  26746 Out of 26746   

In [34]:
normalized_matrix.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,26736,26737,26738,26739,26740,26741,26742,26743,26744,26745
A1BG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,117.052083,...,71.465602,59.24714,46.393853,101.991588,81.434084,85.404584,67.72766,71.719771,72.508749,79.48912
A1CF,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.604128,...,6.69831,5.727361,3.396022,3.723398,1.331564,4.119008,2.578404,3.752374,4.004449,4.34521
A2M,5486.442459,43119.354446,18806.324572,39943.373402,98073.484895,9706.046287,7124.903911,55117.475024,197372.303709,0.0,...,269.5713,18563.166193,224.05728,27063.252412,14.092874,13.078367,14.694721,13.211022,15.633179,11.465228
A2ML1,604.57044,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,81.572011,...,11.645854,10.535744,14.005534,12.118971,4.408958,3.750654,2.164847,3.752374,4.249346,4.787931
A2MP1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,11.645854,12.728296,13.463845,11.406491,0.839228,0.249121,0.294773,0.0,0.0,0.398265


# Nomalize Matrix (z-score the rows)

In [35]:
mf.zscore(normalized_matrix, 'row')

Progress: 100%  28260 Out of 28260   

In [36]:
normalized_matrix.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,26736,26737,26738,26739,26740,26741,26742,26743,26744,26745
A1BG,-0.318826,-0.318826,-0.318826,-0.318826,-0.318826,-0.318826,-0.318826,-0.318826,-0.318826,-0.227924,...,-0.263326,-0.272815,-0.282797,-0.23962,-0.255585,-0.252501,-0.266229,-0.263129,-0.262516,-0.257095
A1CF,-0.215275,-0.215275,-0.215275,-0.215275,-0.215275,-0.215275,-0.215275,-0.215275,-0.215275,-0.20887,...,-0.20762,-0.208729,-0.211394,-0.211019,-0.213753,-0.210567,-0.212328,-0.210986,-0.210698,-0.210309
A2M,0.253755,3.961472,1.566073,3.648564,9.375731,0.669484,0.415182,5.143566,19.158974,-0.286787,...,-0.260228,1.542116,-0.264712,2.379572,-0.285399,-0.285499,-0.285339,-0.285486,-0.285247,-0.285658
A2ML1,0.462461,-0.133311,-0.133311,-0.133311,-0.133311,-0.133311,-0.133311,-0.133311,-0.133311,-0.052926,...,-0.121835,-0.122929,-0.119509,-0.121369,-0.128966,-0.129615,-0.131178,-0.129613,-0.129124,-0.128593
A2MP1,-0.154353,-0.154353,-0.154353,-0.154353,-0.154353,-0.154353,-0.154353,-0.154353,-0.154353,-0.154353,...,-0.091925,-0.086123,-0.08218,-0.093208,-0.149854,-0.153018,-0.152773,-0.154353,-0.154353,-0.152218


# Merge Duplicate Samples By Columns (by taking the mean)

In [37]:
normalized_matrix.columns = tissue_list

In [38]:
normalized_matrix = mf.merge(normalized_matrix, 'column', 'mean')

In [39]:
normalized_matrix.shape

(28260, 108)

# Map Gene Symbols To Up-to-date Approved Gene Symbols

In [40]:
mf.mapgenesymbols(normalized_matrix)

Progeres: 99%  28201 Out of 28260   

In [41]:
matrix.shape

# Merge Duplicate Genes By Rows

In [42]:
normalized_matrix = mf.merge(normalized_matrix, 'row', 'mean')

In [43]:
normalized_matrix.shape

(23169, 108)

# Save Filtered Matrix

In [44]:
filename = '~/./Documents/Harmonizome/ARCHS4/Output/archs4_tissue_matrix_filltered_%s.tsv.zip'% str(datetime.date.today())[0:7].replace('-', '_')
normalized_matrix.to_csv(filename, sep='\t', compression='gzip')

# Create Tertiary Matrix

In [45]:
tertiary_matrix = mf.createTertiaryMarix(normalized_matrix)

Progeres: 100%  108 Out of 108   

In [46]:
tertiary_matrix.head()

Unnamed: 0_level_0,ADIPOSE (BULK TISSUE),ADRENAL GLAND,ALPHA CELL,ALVEOLAR CELL TYPE II,ALVEOLAR MACROPHAGE,AMNIOTIC FLUID,ASTROCYTE,ATRIUM,BASAL CELL,BETA CELL,...,SUPERIOR FRONTAL GYRUS,TESTIS (BULK TISSUE),THYMUS (BULK TISSUE),THYROID (BULK TISSUE),TLYMPHOCYTE,TRACHEA (BULK TISSUE),VALVE,VASCULAR SMOOTH MUSCLE,VENTRICLE,WHARTONS JELLY
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A1BG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A1CF,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A2M,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,1.0,0.0
A2ML1,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
A2MP1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


# Save Teriary Matrix

In [47]:
filename = '~/./Documents/Harmonizome/ARCHS4/Output/archs4_tissue_tertiary_matrix_%s.tsv.zip'% str(datetime.date.today())[0:7].replace('-', '_')
tertiary_matrix.to_csv(filename, sep='\t', compression='gzip')

#### Path to output files

In [48]:
path = '/Users/moshesilverstein/Documents/Harmonizome/ARCHS4/Output/'

# Create Up Gene Set Library

In [49]:
name = 'archs4_tissue_gene_up_set'

In [50]:
mf.createUpGeneSetLib(normalized_matrix, path, name)

Progeres: 100%  108 Out of 108   

# Create Down Gene Set Library

In [52]:
name = 'archs4_tissue_gene_down_set'

In [53]:
mf.createDownGeneSetLib(normalized_matrix, path, name)

Progeres: 100%  108 Out of 108   

# Create Up Attribute Library

In [54]:
name = 'archs4_tissue_attribute_up_set'

In [55]:
mf.createUpAttributeSetLib(tertiary_matrix, path, name)

Progeres: 100%  23169 Out of 23169   

# Create Down Attribute Library

In [56]:
name = 'archs4_tissue_attribute_down_set'

In [57]:
mf.createDownAttributeSetLib(tertiary_matrix, path, name)

Progeres: 100%  23169 Out of 23169   

# Create Gene Similarity Matrix

In [58]:
gene_similarity_matix = mf.createSimilarityMatrix(normalized_matrix, 'cosine')

In [59]:
gene_similarity_matix.head()

index,A1BG,A1CF,A2M,A2ML1,A2MP1,A3GALT2,A4GALT,A4GNT,AAAS,AACS,...,ZWILCH,ZWINT,ZXDA,ZXDB,ZXDC,ZYG11A,ZYG11B,ZYX,ZZEF1,ZZZ3
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A1BG,1.0,0.672171,0.207036,-0.058123,-0.010649,-0.021419,-0.14348,0.089566,0.111268,0.096429,...,-0.007573,0.013799,-0.178479,0.096395,0.013083,0.12424,-0.090784,-0.035394,-0.171055,-0.094701
A1CF,0.672171,1.0,0.113603,-0.043191,-0.077302,-0.046565,-0.156881,0.095818,-0.037113,0.153261,...,-0.071209,-0.047596,-0.130969,0.199071,0.072584,0.104794,-0.119189,-0.139822,0.122622,0.026036
A2M,0.207036,0.113603,1.0,-0.050936,-0.002689,-0.077225,0.101411,0.116864,-0.056305,-0.126552,...,-0.314093,-0.22103,-0.064823,-0.104333,0.097675,-0.230689,0.040998,0.07723,0.153744,-0.229208
A2ML1,-0.058123,-0.043191,-0.050936,1.0,0.061358,-0.032232,0.105175,-0.017789,-0.001333,0.029846,...,-0.070858,-0.072212,0.085783,0.137574,0.089242,-0.033137,-0.029402,-0.079595,0.049261,0.122823
A2MP1,-0.010649,-0.077302,-0.002689,0.061358,1.0,-0.039241,0.081773,-0.055501,-0.012608,0.088613,...,-0.096248,-0.069835,0.075643,0.034786,0.188475,0.06294,-0.076712,-0.157378,0.096808,0.022541


# Save Gene Similarity Matrix 

In [60]:
filename = 'Output/archs4_tissue_gene_similarity_matix_%s.tsv.zip'% str(datetime.date.today())[0:7].replace('-', '_')
gene_similarity_matix.to_csv(filename, sep='\t', compression='gzip')

# Create Attribute Similarity matrix

In [61]:
attribute_similarity_matix = mf.createSimilarityMatrix(normalized_matrix.T, 'cosine')

In [62]:
attribute_similarity_matix.head()

Unnamed: 0,ADIPOSE (BULK TISSUE),ADRENAL GLAND,ALPHA CELL,ALVEOLAR CELL TYPE II,ALVEOLAR MACROPHAGE,AMNIOTIC FLUID,ASTROCYTE,ATRIUM,BASAL CELL,BETA CELL,...,SUPERIOR FRONTAL GYRUS,TESTIS (BULK TISSUE),THYMUS (BULK TISSUE),THYROID (BULK TISSUE),TLYMPHOCYTE,TRACHEA (BULK TISSUE),VALVE,VASCULAR SMOOTH MUSCLE,VENTRICLE,WHARTONS JELLY
ADIPOSE (BULK TISSUE),1.0,0.140132,-0.0153,-0.009833,0.048859,0.049832,0.207686,0.252345,0.071373,0.02755,...,0.085214,0.035777,-0.001794,0.184176,-0.216132,0.108436,0.358305,0.227074,0.287865,0.097166
ADRENAL GLAND,0.140132,1.0,0.07655,0.021021,0.064642,0.034227,0.120628,0.13388,0.05586,0.120456,...,0.056904,0.090125,0.024559,0.176387,-0.13914,0.083207,0.100382,0.103203,0.111508,0.081343
ALPHA CELL,-0.0153,0.07655,1.0,0.083025,0.035614,0.127987,0.034586,0.04676,0.047318,0.713479,...,0.114232,0.091285,0.012206,0.157543,-0.181161,0.025794,-0.022764,0.010647,0.046466,-0.039002
ALVEOLAR CELL TYPE II,-0.009833,0.021021,0.083025,1.0,0.056189,0.06474,0.006488,0.013331,0.180918,0.062327,...,-0.064224,0.017245,-0.015949,0.114191,-0.049787,0.090903,-0.050713,0.034364,0.001978,0.091283
ALVEOLAR MACROPHAGE,0.048859,0.064642,0.035614,0.056189,1.0,0.024868,0.006764,0.012762,0.065226,0.05226,...,-0.071215,0.020691,0.042011,0.10698,-0.090124,0.060575,-0.004661,0.043279,0.007078,0.089992


# Save Attribute Similarity Matrix

In [63]:
filename = 'Output/archs4_tissue_attribute_similarity_matix_%s.tsv.zip'% str(datetime.date.today())[0:7].replace('-', '_')
attribute_similarity_matix.to_csv(filename, sep='\t', compression='gzip')

# Create Gene List

In [64]:
gene_list = mf.createGeneList(normalized_matrix)

Progeres: 100%  23169 Out of 23169   

In [65]:
gene_list.head()

Unnamed: 0,GeneSym,GeneID
0,A1BG,1
1,A1CF,29974
2,A2M,2
3,A2ML1,144568
4,A2MP1,3


In [66]:
gene_list.shape

(23169, 2)

# Save Gene List

In [67]:
filename = 'Output/archs4_tissue_gene_list_%s.tsv.zip'% str(datetime.date.today())[0:7].replace('-', '_')
gene_list.to_csv(filename, sep='\t', index=False, compression='gzip')

# Create Attribute List 

In [68]:
attribute_list = mf.createAttributeList(normalized_matrix)

In [69]:
attribute_list.head()

Unnamed: 0,Attributes
0,ADIPOSE (BULK TISSUE)
1,ADRENAL GLAND
2,ALPHA CELL
3,ALVEOLAR CELL TYPE II
4,ALVEOLAR MACROPHAGE


In [70]:
attribute_list.shape

(108, 1)

# Save Attribute List

In [71]:
filename = 'Output/archs4_tissue_attribute_list_%s.tsv.zip'% str(datetime.date.today())[0:7].replace('-', '_')
attribute_list.to_csv(filename, sep='\t', index=False, compression='gzip')

# Create Gene-Attribute Edge List

In [72]:
path = '/Users/moshesilverstein/Documents/Harmonizome/ARCHS4/Output/'

In [73]:
name = 'archs4_tissue_gene_attribute_edge_list'

In [74]:
mf.createGeneAttributeEdgeList(tertiary_matrix, gene_list, path, name)

Progeres: 100%  108 Out of 108   

 The number of statisticaly relevent gene-attribute associations is: 500364
