# Guide to Pharmacology (Protein Ligands of Receptors)

Author: Moshe Silverstein <br/>
Date: 8-17 <br/>
Data Source: http://www.guidetopharmacology.org/download.jsp

In [1]:
import sys, datetime, os
import numpy as np
import pandas as pd
import importlib
import my_functions as mf
%matplotlib inline

In [2]:
importlib.reload(mf)

<module 'my_functions' from '/Users/moshesilverstein/Documents/Harmonizome/Guide to Pharmacology/my_functions.py'>

# Load Data

In [3]:
df = pd.read_csv('Input/interactions.csv', low_memory=False)

In [4]:
df.head()

Unnamed: 0,target,target_id,target_gene_symbol,target_uniprot,target_ligand,target_ligand_id,target_ligand_gene_symbol,target_ligand_uniprot,target_ligand_pubchem_sid,target_species,...,affinity_low,original_affinity_units,original_affinity_low_nm,original_affinity_median_nm,original_affinity_high_nm,original_affinity_relation,assay_description,receptor_site,ligand_context,pubmed_id
0,12S-LOX,1387.0,ALOX12,P18054,,,,,,Human,...,,IC50,,340.0,,=,,,,24393039
1,15-LOX-1,1388.0,ALOX15,P16050,,,,,,Human,...,,Kd,,3900.0,,=,Determined by surface plasmon\r\nresonance (SPR).,,,26290290
2,15-LOX-1,1388.0,ALOX15,P16050,,,,,,Human,...,,Ki,,10.0,,<,,,,20866075
3,15-LOX-2,1389.0,ALOX15B,O15296,,,,,,Human,...,,IC50,,51.0,,=,,,,17656086
4,3-phosphoinositide dependent protein kinase 1,1519.0,PDPK1,O15530,,,,,,Human,...,,IC50,,33.0,,=,,,,11896604


In [5]:
df.shape

(17191, 34)

# Get Relevant Data 

In [6]:
df = df[df['target_species'].isin(['Human', 'Mouse', 'Rat'])] # Grap only selective species of target
df = df[df['target_species'].isin(['Human', 'Mouse|Rat', 
                                   'Human|Mouse|Rat', 
                                   'Human|Rat',
                                  'Mouse',
                                  'Rat'])] # Grap only selective species of ligand

In [7]:
df = df[['target_gene_symbol', 'ligand_gene_symbol']]

In [8]:
df.dropna(how='any', inplace=True)

In [9]:
df_interactions = pd.DataFrame()

for i, index in enumerate(df.index):
    
    progressPercent = ((i+1)/len(df.index))*100

    sys.stdout.write("Progeres: %d%%  %d Out of %d   \r" % (progressPercent, (i+1), len(df.index)))
    sys.stdout.flush()
    
    
    lst2 = df.ix[index, 'ligand_gene_symbol'].split('|')
    lst1 = [df.ix[index, 'target_gene_symbol']]*(len(lst2))
    temp = pd.DataFrame()
    temp['Ligand'] = lst2
    temp['Target'] = lst1
    df_interactions = pd.concat([df_interactions, temp]) 

Progeres: 100%  1070 Out of 1070   

In [10]:
df_interactions.head()

Unnamed: 0,Ligand,Target
0,TNFSF9,TNFRSF9
0,ALOX5AP,ALOX5
0,Adm,Ackr3
0,CXCL12,ACKR3
0,CXCL11,ACKR3


In [11]:
df_interactions.shape

(1769, 2)

# Map Gene Symbols To Up-to-date Approved Gene Symbols

In [12]:
df_interactions.set_index('Target', inplace=True)

In [13]:
mf.mapgenesymbols(df_interactions)

Progeres: 100%  1769 Out of 1769   

In [14]:
df_interactions.reset_index(inplace=True)

In [15]:
df_interactions.set_index('Ligand', inplace=True)

In [16]:
mf.mapgenesymbols(df_interactions)

Progeres: 100%  1587 Out of 1587   

# Drop Duplicates

In [17]:
df_interactions.reset_index(inplace=True)

In [18]:
df_interactions.drop_duplicates(inplace=True)

In [19]:
df_interactions.shape

(427, 2)

# Create Binary Matrix

In [20]:
binary_matrix = mf.createBinaryMatix(df_interactions)

Progeres: 100%  224 Out of 224   

In [21]:
binary_matrix.head()

Unnamed: 0,SSTR1,RXFP2,FSHR,NPY4R,NPR3,OXTR,CXCR1,TSHR,ACKR4,TACR1,...,GALR2,LEPR,MC5R,PDE1A,AGTR1,NPR1,FLT1,NTSR2,RXFP4,IL10RA
CCL16,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
IGF2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
NPB,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ADM2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CD40LG,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [22]:
binary_matrix.shape

(224, 196)

# Save Binary Matrix

In [23]:
filename = '~/./Documents/Harmonizome/Guide to Pharmacology/Output/gp_protein_binary_matrix_%s.tsv.zip'% str(datetime.date.today())[0:7].replace('-', '_')
binary_matrix.to_csv(filename, sep='\t', compression='gzip')

# Create Gene Set Library

In [24]:
path = '/Users/moshesilverstein/Documents/Harmonizome/Guide to Pharmacology/Output/'

In [25]:
name = 'gp_protein_gene_set'

In [26]:
mf.createUpGeneSetLib(binary_matrix, path, name)

Progeres: 100%  196 Out of 196   

# Create Attribute Library

In [27]:
path = '/Users/moshesilverstein/Documents/Harmonizome/Guide to Pharmacology/Output/'

In [28]:
name = 'gp_protein_attribute_set'

In [29]:
mf.createUpAttributeSetLib(binary_matrix, path, name)

Progeres: 100%  224 Out of 224   

# Create Gene Similarity Matrix

In [30]:
gene_similarity_matix = mf.createSimilarityMatrix(binary_matrix, 'jaccard')

In [31]:
gene_similarity_matix.head()

Unnamed: 0,CCL16,IGF2,NPB,ADM2,CD40LG,HEBP1,CCL17,CGB3,CCK,CCL1,...,CD274,CXCL8,EDN3,CXCL17,GAL,RSPO1,CAMP,POMC,GNRH2,IL10
CCL16,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
IGF2,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NPB,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ADM2,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD40LG,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Save Gene Similarity Matrix

In [32]:
filename = '~/./Documents/Harmonizome/Guide to Pharmacology/Output/gp_protein_gene_similarity_matix_%s.tsv.zip'% str(datetime.date.today())[0:7].replace('-', '_')
gene_similarity_matix.to_csv(filename, sep='\t', compression='gzip')

# Create Attribute Similarity matrix

In [33]:
attribute_similarity_matix = mf.createSimilarityMatrix(binary_matrix.T, 'jaccard')

In [34]:
attribute_similarity_matix.head()

Unnamed: 0,SSTR1,RXFP2,FSHR,NPY4R,NPR3,OXTR,CXCR1,TSHR,ACKR4,TACR1,...,GALR2,LEPR,MC5R,PDE1A,AGTR1,NPR1,FLT1,NTSR2,RXFP4,IL10RA
SSTR1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
RXFP2,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0
FSHR,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NPY4R,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NPR3,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Save Attribute Similarity Matrix

In [35]:
filename = '~/./Documents/Harmonizome/Guide to Pharmacology/Output/gp_protein_attribute_similarity_matix_%s.tsv.zip'% str(datetime.date.today())[0:7].replace('-', '_')
attribute_similarity_matix.to_csv(filename, sep='\t', compression='gzip')

# Create Gene List

In [36]:
gene_list = mf.createGeneList(binary_matrix)

Progeres: 100%  224 Out of 224   

In [37]:
gene_list.head()

Unnamed: 0,GeneSym,GeneID
0,CCL16,6360
1,IGF2,3481
2,NPB,256933
3,ADM2,79924
4,CD40LG,959


In [38]:
gene_list.shape

(224, 2)

# Save Gene List

In [39]:
filename = '~/./Documents/Harmonizome/Guide to Pharmacology/Output/gp_protein_gene_list_%s.tsv.zip'% str(datetime.date.today())[0:7].replace('-', '_')
gene_list.to_csv(filename, sep='\t', index=False, compression='gzip')

# Create Attribute List

In [40]:
attribute_list = mf.createAttributeList(binary_matrix)

In [41]:
attribute_list.head()

Unnamed: 0,Attributes
0,SSTR1
1,RXFP2
2,FSHR
3,NPY4R
4,NPR3


In [42]:
attribute_list.shape

(196, 1)

# Save Attribute List

In [43]:
filename = '~/./Documents/Harmonizome/Guide to Pharmacology/Output/gp_protein_attribute_list_%s.tsv.zip'% str(datetime.date.today())[0:7].replace('-', '_')
attribute_list.to_csv(filename, sep='\t', index=False, compression='gzip')

# Create Gene-Attribute Edge List

In [44]:
path = '/Users/moshesilverstein/Documents/Harmonizome/Guide to Pharmacology/Output/'

In [45]:
name = 'gp_protein_gene_attribute_edge_list'

In [46]:
mf.createGeneAttributeEdgeList(binary_matrix, gene_list, path, name)

Progeres: 100%  196 Out of 196   

 The number of statisticaly relevent gene-attribute associations is: 427
