# Guide to Pharmacology (Chemical Ligands of Receptors)

Author: Moshe Silverstein <br/>
Date: 8-17 <br/>
Data Source: http://www.guidetopharmacology.org/download.jsp

In [1]:
import sys, datetime, os
import numpy as np
import pandas as pd
import importlib
import untility_functions as uf
%matplotlib inline

In [2]:
importlib.reload(uf)

<module 'untility_functions' from '/Users/moshesilverstein/Documents/Harmonizome/Guide to Pharmacology/untility_functions.py'>

# Load Data

In [3]:
df = pd.read_csv('Input/interactions.csv', low_memory=False)

In [4]:
df.head()

Unnamed: 0,target,target_id,target_gene_symbol,target_uniprot,target_ligand,target_ligand_id,target_ligand_gene_symbol,target_ligand_uniprot,target_ligand_pubchem_sid,target_species,...,affinity_low,original_affinity_units,original_affinity_low_nm,original_affinity_median_nm,original_affinity_high_nm,original_affinity_relation,assay_description,receptor_site,ligand_context,pubmed_id
0,12S-LOX,1387.0,ALOX12,P18054,,,,,,Human,...,,IC50,,340.0,,=,,,,24393039
1,15-LOX-1,1388.0,ALOX15,P16050,,,,,,Human,...,,Kd,,3900.0,,=,Determined by surface plasmon\r\nresonance (SPR).,,,26290290
2,15-LOX-1,1388.0,ALOX15,P16050,,,,,,Human,...,,Ki,,10.0,,<,,,,20866075
3,15-LOX-2,1389.0,ALOX15B,O15296,,,,,,Human,...,,IC50,,51.0,,=,,,,17656086
4,3-phosphoinositide dependent protein kinase 1,1519.0,PDPK1,O15530,,,,,,Human,...,,IC50,,33.0,,=,,,,11896604


In [5]:
df.shape

(17191, 34)

In [6]:
df.columns

Index(['target', 'target_id', 'target_gene_symbol', 'target_uniprot',
       'target_ligand', 'target_ligand_id', 'target_ligand_gene_symbol',
       'target_ligand_uniprot', 'target_ligand_pubchem_sid', 'target_species',
       'ligand', 'ligand_id', 'ligand_gene_symbol', 'ligand_species',
       'ligand_pubchem_sid', 'type', 'action', 'action_comment', 'endogenous',
       'primary_target', 'concentration_range', 'affinity_units',
       'affinity_high', 'affinity_median', 'affinity_low',
       'original_affinity_units', 'original_affinity_low_nm',
       'original_affinity_median_nm', 'original_affinity_high_nm',
       'original_affinity_relation', 'assay_description', 'receptor_site',
       'ligand_context', 'pubmed_id'],
      dtype='object')

In [7]:
df['ligand_gene_symbol'].head()

0    NaN
1    NaN
2    NaN
3    NaN
4    NaN
Name: ligand_gene_symbol, dtype: object

In [8]:
len(df['ligand_gene_symbol'].unique())

362

# Get Relevant Data

In [9]:
df = df[df['target_species'].isin(['Human', 'Mouse', 'Rat'])] # Grap only selective species of target
df = df[df['target_species'].isin(['Human', 'Mouse|Rat', 
                                   'Human|Mouse|Rat', 
                                   'Human|Rat',
                                  'Mouse',
                                  'Rat'])] # Grap only selective species of ligand

In [10]:
df = df[['target_gene_symbol', 'ligand']]

In [11]:
df.dropna(how='any', inplace=True)

In [12]:
df.head()

Unnamed: 0,target_gene_symbol,ligand
0,ALOX12,ML355
1,ALOX15,PKUMDL_MH_1001
2,ALOX15,compound 34 [PMID: 20866075]
3,ALOX15B,compound 21n [PMID: 17656086]
4,PDPK1,7-hydroxystaurosporine


In [13]:
df.shape

(16789, 2)

# Map Gene Symbols To Up-to-date Approved Gene Symbols

In [17]:
df.set_index('target_gene_symbol', inplace=True)

In [18]:
uf.mapgenesymbols(df)

Progeres: 100%  16789 Out of 16789   

# Drop Duplicates

In [19]:
df.reset_index(inplace=True)

In [20]:
df.drop_duplicates(inplace=True)

In [21]:
df.shape

(13759, 2)

# Create Binary Matrix

In [22]:
binary_matrix = uf.createBinaryMatix(df)

Progeres: 100%  1577 Out of 1577   

In [23]:
binary_matrix.head()

Unnamed: 0,"[Ava<sup>9,10</sup>,Ava<sup>14,15</sup>]-Ac-hMCH<sub>6-16</sub>-NH2",anlotinib,latanoprost (isopropyl ester),I-CBP112,DSLET,surinabant,fMet-Leu-Phe-Glu,ASN04421891,compound 39a [PMID: 15634016],TMP269,...,urocortin 3,cenderitide,PMX53,octanol,bazedoxifene,roxindole,ALRT 1550,ginkgolide X,D-serine,ACDPP
GJB2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
KCNH6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
TEK,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
SLC40A1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
RORA,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [24]:
binary_matrix.shape

(1577, 7087)

# Save Binary Matrix

In [26]:
filename = '~/./Documents/Harmonizome/Guide to Pharmacology/Output/gp_chem_binary_matrix_%s.tsv.zip'% str(datetime.date.today())[0:7].replace('-', '_')
binary_matrix.to_csv(filename, sep='\t', compression='gzip')

# Create Gene Set Library

In [27]:
path = '/Users/moshesilverstein/Documents/Harmonizome/Guide to Pharmacology/Output/'

In [28]:
name = 'gp_chem_gene_set'

In [29]:
uf.createUpGeneSetLib(binary_matrix, path, name)

Progeres: 100%  7087 Out of 7087   

# Create Attribute Library

In [30]:
path = '/Users/moshesilverstein/Documents/Harmonizome/Guide to Pharmacology/Output/'

In [31]:
name = 'gp_chem_attribute_set'

In [32]:
uf.createUpAttributeSetLib(binary_matrix, path, name)

Progeres: 100%  1577 Out of 1577   

# Create Gene Similarity Matrix

In [33]:
gene_similarity_matix = uf.createSimilarityMatrix(binary_matrix, 'jaccard')

In [34]:
gene_similarity_matix.head()

Unnamed: 0,GJB2,KCNH6,TEK,SLC40A1,RORA,AKT2,SLC5A3,GABRR3,KDM4A,BMP1,...,PKD2,CFB,KCNK9,BRSK2,AQP5,IDH1,SLC6A4,HDAC2,ACHE,SLC1A2
GJB2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
KCNH6,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TEK,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
SLC40A1,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
RORA,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Save Gene Similarity Matrix

In [35]:
filename = '~/./Documents/Harmonizome/Guide to Pharmacology/Output/gp_chem_gene_similarity_matix_%s.tsv.zip'% str(datetime.date.today())[0:7].replace('-', '_')
gene_similarity_matix.to_csv(filename, sep='\t', compression='gzip')

# Create Attribute Similarity matrix

In [36]:
attribute_similarity_matix = uf.createSimilarityMatrix(binary_matrix.T, 'jaccard')

In [37]:
attribute_similarity_matix.head()

Unnamed: 0,"[Ava<sup>9,10</sup>,Ava<sup>14,15</sup>]-Ac-hMCH<sub>6-16</sub>-NH2",anlotinib,latanoprost (isopropyl ester),I-CBP112,DSLET,surinabant,fMet-Leu-Phe-Glu,ASN04421891,compound 39a [PMID: 15634016],TMP269,...,urocortin 3,cenderitide,PMX53,octanol,bazedoxifene,roxindole,ALRT 1550,ginkgolide X,D-serine,ACDPP
"[Ava<sup>9,10</sup>,Ava<sup>14,15</sup>]-Ac-hMCH<sub>6-16</sub>-NH2",1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
anlotinib,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
latanoprost (isopropyl ester),0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
I-CBP112,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
DSLET,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Save Attribute Similarity Matrix

In [38]:
filename = '~/./Documents/Harmonizome/Guide to Pharmacology/Output/gp_chem_attribute_similarity_matix_%s.tsv.zip'% str(datetime.date.today())[0:7].replace('-', '_')
attribute_similarity_matix.to_csv(filename, sep='\t', compression='gzip')

# Create Gene List

In [39]:
gene_list = uf.createGeneList(binary_matrix)

Progeres: 100%  1577 Out of 1577   

In [40]:
gene_list.head()

Unnamed: 0,GeneSym,GeneID
0,GJB2,2706
1,KCNH6,81033
2,TEK,7010
3,SLC40A1,30061
4,RORA,6095


In [41]:
gene_list.shape

(1577, 2)

# Save Gene List

In [42]:
filename = '~/./Documents/Harmonizome/Guide to Pharmacology/Output/gp_chem_gene_list_%s.tsv.zip'% str(datetime.date.today())[0:7].replace('-', '_')
gene_list.to_csv(filename, sep='\t', index=False, compression='gzip')

# Create Attribute List

In [43]:
attribute_list = uf.createAttributeList(binary_matrix)

In [44]:
attribute_list.head()

Unnamed: 0,Attributes
0,"[Ava<sup>9,10</sup>,Ava<sup>14,15</sup>]-Ac-hM..."
1,anlotinib
2,latanoprost (isopropyl ester)
3,I-CBP112
4,DSLET


In [45]:
attribute_list.shape

(7087, 1)

# Save Attribute List

In [46]:
filename = '~/./Documents/Harmonizome/Guide to Pharmacology/Output/gp_chem_attribute_list_%s.tsv.zip'% str(datetime.date.today())[0:7].replace('-', '_')
attribute_list.to_csv(filename, sep='\t', index=False, compression='gzip')

# Create Gene-Attribute Edge List

In [47]:
path = '/Users/moshesilverstein/Documents/Harmonizome/Guide to Pharmacology/Output/'

In [48]:
name = 'gp_chem_gene_attribute_edge_list'

In [49]:
uf.createGeneAttributeEdgeList(binary_matrix, gene_list, path, name)

Progeres: 100%  7087 Out of 7087   

 The number of statisticaly relevent gene-attribute associations is: 13759
