 # Guide to Pharmacology (Chemical Ligands of Receptors)

 Author: Moshe Silverstein <br/>
 Date: 8-17 <br/>
 Data Source: http://www.guidetopharmacology.org/download.jsp

 Reviewer: Charles Dai <br>
 Updated: 6-20

In [1]:
# appyter init
from appyter import magic
magic.init(lambda _=globals: _())

In [2]:
import sys
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import harmonizome.utility_functions as uf
import harmonizome.lookup as lookup

In [3]:
# from clustergrammer_widget import *
# net = Network(clustergrammer_widget)

In [4]:
%load_ext autoreload
%autoreload 2

 ### Python Version

In [5]:
sys.version

'3.8.0 (default, Oct 28 2019, 16:14:01) \n[GCC 8.3.0]'

 # Initialization

 ### Choose Interaction

In [9]:
%%appyter code_eval
{% set ligand = ChoiceField(
    name='ligands',
    label='Ligands of receptors',
    choices={
        'Protein Ligands': "'Protein'",
        'Chemical Ligands': "'Chemical'"
    },
    default='Protein Ligands',
    section='data'
) %}

 ### Load Mapping Dictionaries

In [37]:
symbol_lookup, geneid_lookup = lookup.get_lookups()

Gathering sources: 100%|██████████| 3/3 [00:09<00:00,  3.29s/it]


 ### Output Path

In [10]:
%%appyter code_exec

output_name = 'guide_to_pharm_' + {{ligand}}.lower()

path = 'Output/Guide-to-Pharm-' + {{ligand}}
if not os.path.exists(path):
    os.makedirs(path)

```python
output_name = 'guide_to_pharm_' + 'Protein'.lower()
path = 'Output/Guide-to-Pharm-' + 'Protein'
if not os.path.exists(path):
    os.makedirs(path)
```

In [11]:
%%appyter hide_code
{% do SectionField(
    name='data',
    title='Load Data',
    subtitle='Upload Files from the Guide to Pharmacology Interactions Data Set',
) %}

 # Load Data

In [29]:
%%appyter code_exec

headers = ['target_species', 'ligand_gene_symbol', 'target_gene_symbol', 
            'ligand']
df = pd.read_csv({{FileField(
    constraint='.*\.csv$',
    name='interactions', 
    label='Interaction Data', 
    default='Input/Guide-to-Pharm/interactions.csv',
    section='data')
}}, usecols=headers)

```python
headers = ['target_species', 'ligand_gene_symbol', 'target_gene_symbol',
            'ligand']
df = pd.read_csv('Input/Guide-to-Pharm/interactions.csv', usecols=headers)
```

In [30]:
df.head()

Unnamed: 0,target_gene_symbol,target_species,ligand,ligand_gene_symbol
0,ALOX12,Human,ML355,
1,ALOX15,Human,PKUMDL_MH_1001,
2,ALOX15,Human,compound 34 [PMID: 20866075],
3,ALOX15,Human,ML351,
4,ALOX15B,Human,compound 21n [PMID: 17656086],


In [31]:
df.shape

(19775, 4)

 # Pre-process Data

 ## Get Relevant Data

In [32]:
# Get only certain species
df = df[df['target_species'].str.contains('|'.join(['Human', 'Mouse', 'Rat']))]

In [33]:
%%appyter code_exec

if {{ligand}} == 'Protein':
    df = df[['target_gene_symbol', 'ligand_gene_symbol']]
elif {{ligand}} == 'Chemical':
    df = df[['target_gene_symbol', 'ligand']]

```python
if 'Protein' == 'Protein':
    df = df[['target_gene_symbol', 'ligand_gene_symbol']]
elif 'Protein' == 'Chemical':
    df = df[['target_gene_symbol', 'ligand']]
```

In [34]:
# Expand duplicate gene names
df = df.dropna()
df['target_gene_symbol'] = df['target_gene_symbol'].str.split('|')
df = df.explode('target_gene_symbol').set_index('target_gene_symbol')

In [35]:
# Remove duplicate names of attribute
df.iloc[:, 0] = df.iloc[:, 0].map(lambda x: x.split('|')[0]) 
df.head()

Unnamed: 0_level_0,ligand_gene_symbol
target_gene_symbol,Unnamed: 1_level_1
TNFRSF9,TNFSF9
ALOX5,ALOX5AP
Ackr3,Adm
Ackr3,Calca
ACKR3,CXCL12


 # Filter Data

 ## Map Gene Symbols to Up-to-date Approved Gene Symbols

In [38]:
%%appyter code_exec
if {{ligand}} == 'protein':
    df = df.reset_index().set_index('ligand_gene_symbol')
    df = uf.mapgenesymbols(df, symbol_lookup)
    df = df.reset_index().set_index('target_gene_symbol')
df = uf.mapgenesymbols(df, symbol_lookup)
df.shape

```python
if 'Protein' == 'protein':
    df = df.reset_index().set_index('ligand_gene_symbol')
    df = uf.mapgenesymbols(df, symbol_lookup)
    df = df.reset_index().set_index('target_gene_symbol')
df = uf.mapgenesymbols(df, symbol_lookup)
df.shape
```

100%|██████████| 1330/1330 [00:00<00:00, 405173.18it/s]


 # Analyze Data

 ## Create Binary Matrix

In [None]:
binary_matrix = uf.createBinaryMatrix(df)
binary_matrix.head()

In [None]:
binary_matrix.shape

In [None]:
uf.saveData(binary_matrix, path, output_name + '_binary_matrix', 
            compression='npz', dtype=np.uint8)

 ## Create Gene List

In [None]:
gene_list = uf.createGeneList(binary_matrix, geneid_lookup)
gene_list.head()

In [None]:
gene_list.shape

In [None]:
uf.saveData(gene_list, path, output_name + '_gene_list',
            ext='tsv', compression='gzip', index=False)

 ## Create Attribute List

In [None]:
attribute_list = uf.createAttributeList(binary_matrix)
attribute_list.head()

In [None]:
attribute_list.shape

In [None]:
uf.saveData(attribute_list, path, output_name + '_attribute_list',
            ext='tsv', compression='gzip')

 ## Create Gene and Attribute Set Libraries

In [None]:
uf.createUpGeneSetLib(binary_matrix, path, output_name + '_gene_up_set')

In [None]:
uf.createUpAttributeSetLib(binary_matrix, path, 
                           output_name + '_attribute_up_set')

 ## Create Attribute Similarity Matrix

In [None]:
attribute_similarity_matrix = uf.createSimilarityMatrix(binary_matrix.T, 'jaccard', sparse=True)
attribute_similarity_matrix.head()

In [None]:
uf.saveData(attribute_similarity_matrix, path,
            output_name + '_attribute_similarity_matrix', 
            compression='npz', symmetric=True, dtype=np.float32)

In [None]:
# net.load_df(attribute_similarity_matrix.iloc[:,:].copy())
# net.filter_N_top('row', rank_type='sum', N_top=300)
# net.cluster()
# net.widget()

 ## Create Gene Similarity Matrix

In [None]:
gene_similarity_matrix = uf.createSimilarityMatrix(binary_matrix, 'jaccard', sparse=True)
gene_similarity_matrix.head()

In [None]:
uf.saveData(gene_similarity_matrix, path, 
            output_name + '_gene_similarity_matrix',
            compression='npz', symmetric=True, dtype=np.float32)

 ## Create Gene-Attribute Edge List

In [None]:
uf.createGeneAttributeEdgeList(binary_matrix, attribute_list, gene_list, 
                               path, output_name + '_gene_attribute_edge_list')

 # Create Downloadable Save File

In [None]:
uf.createArchive(path)

 ### Link to download output files: [click here](./output_archive.zip)