# Project IronFox MK I

This project is a sequel to Project Fire Fox. It documents the creation of the function that will be used later in querying the database for the purpose of rendering the information on the website.

In [1]:
#calling library
from kinase_declarative import *
from sqlalchemy import create_engine, or_
from sqlalchemy.orm import sessionmaker
from pprint import pprint

In [2]:
#create engine and bine the engine
engine = create_engine("sqlite:///kinase_database.db")
Base.metadata.bind = engine

In [3]:
#create a session object
session = sessionmaker(bind=engine)
s = session()

# Intermediate kinase results page

This section documents the creation of functions for the following page.

![kinase_intermediate_results_page.png](attachment:kinase_intermediate_results_page.png)

In [8]:
kinase_input = "AKT3_HUMAN"
kinase_query = s.query(KinaseGeneMeta).join(KinaseGeneName).filter(or_(KinaseGeneName.gene_alias==kinase_input, KinaseGeneMeta.uniprot_entry==kinase_input,\
                                   KinaseGeneMeta.uniprot_number==kinase_input, KinaseGeneMeta.protein_name==kinase_input)).all()
for row in kinase_query:
    print(row.to_dict())

{'protein_name': 'RAC-gamma serine/threonine-protein kinase', 'uniprot_number': 'Q9Y243', 'uniprot_entry': 'AKT3_HUMAN', 'gene_name': 'AKT3', 'kinase_family': 'AGC Ser/Thr protein kinase family'}


In [34]:
def get_gene_protein_name(kinase_input):
    """
    Returns a list of dictionary.
    In the dictionary, there are gene name and protein name.
    Returns empty list when no match is found.
    >> kin = "AKT"
    >> get_gene_protein_name(kin)
    [{'Gene Name': 'AKT', 'Protein Name': 'RAC-alpha serine/threonine-protein kinase'}, 
    {'Gene Name': 'AKT', 'Protein Name': 'RAC-beta serine/threonine-protein kinase'}, 
    {'Gene Name': 'AKT', 'Protein Name': 'RAC-gamma serine/threonine-protein kinase'}]
    """
    like_kin = "%{}%".format(kinase_input)
    tmp = []
    kinase_query = s.query(KinaseGeneMeta).join(KinaseGeneName).filter(or_(KinaseGeneName.gene_alias.like(like_kin), KinaseGeneMeta.uniprot_entry.like(like_kin),\
                                   KinaseGeneMeta.uniprot_number.like(like_kin), KinaseGeneMeta.protein_name.like(like_kin))).all()
    for row in kinase_query:
        results = {}
        results["Gene_Name"] = row.to_dict()["gene_name"]
        results["Protein_Name"] = row.to_dict()["protein_name"]
        tmp.append(results)
    return tmp
kin = "AKT"
uni_num = "Q9Y243"
pprint(get_gene_protein_name(kin))
print(get_gene_protein_name(uni_num))

[{'Gene Name': 'AKT1',
  'Protein Name': 'RAC-alpha serine/threonine-protein kinase'},
 {'Gene Name': 'AKT2',
  'Protein Name': 'RAC-beta serine/threonine-protein kinase'},
 {'Gene Name': 'AKT3',
  'Protein Name': 'RAC-gamma serine/threonine-protein kinase'}]
[{'Gene Name': 'AKT3', 'Protein Name': 'RAC-gamma serine/threonine-protein kinase'}]


# Individual kinase page

The following section documents the creation of function for individual kinase. There will be multiple functions for this section.

![individual_kinase_page.png](attachment:individual_kinase_page.png)

## Function to return gene name, family, protein name, uniprot entry, uniprot number.

In [38]:
kinase = "AKT3"
kinase = s.query(KinaseGeneMeta).filter(KinaseGeneMeta.gene_name==kinase).one()
kinase.to_dict()

{'protein_name': 'RAC-gamma serine/threonine-protein kinase',
 'uniprot_number': 'Q9Y243',
 'uniprot_entry': 'AKT3_HUMAN',
 'gene_name': 'AKT3',
 'kinase_family': 'AGC Ser/Thr protein kinase family'}

In [43]:
def get_gene_metadata_from_gene(kinase_str):
    """
    Takes in a gene name as a string then output a dictionary.
    >> get_gene_metadata_from_gene("MAPK1")
    {'gene_name': 'MAPK1', 
    'kinase_family': 'CMGC Ser/Thr protein kinase family',
    'protein_name': 'Mitogen-activated protein kinase 1',
    'uniprot_entry': 'MK01_HUMAN',
    'uniprot_number': 'P28482'}
    """
    kinase_obj = s.query(KinaseGeneMeta).filter(KinaseGeneMeta.gene_name==kinase_str).one()
    return kinase_obj.to_dict()
pprint(get_gene_metadata("MAPK1"))

{'gene_name': 'MAPK1',
 'kinase_family': 'CMGC Ser/Thr protein kinase family',
 'protein_name': 'Mitogen-activated protein kinase 1',
 'uniprot_entry': 'MK01_HUMAN',
 'uniprot_number': 'P28482'}


## Function to return subcellular location of kinase

In [45]:
kinase = "ALPK1"
kinase_query = s.query(KinaseSubcellularLocation).join(KinaseGeneName).filter(KinaseGeneName.gene_alias==kinase).all()
for row in kinase_query:
    print(row.subcellular_location)

Cytosol
Cytoplasm


In [8]:
def get_subcellular_location_from_gene(kinase_gene):
    """
    Returns a list of dictionary.
    The dictionary has the gene as the key and the subcellular location in the list.
    >> get_subcellular_location_from_gene('MAPK1')
    {'Gene_Name': 'MAPK1', 'Subcellular_Locations': ['Cytoplasm', 'Cytoskeleton', 'Membrane', 'Nucleus', 'Caveola', 
    'Microtubule Organizing Center', 'Spindle', 'Plasma Membrane', 'Mitotic Spindle', 'Focal Adhesion', 'Cytosol', 
    'Golgi Apparatus', 'Late Endosome', 'Early Endosome', 'Mitochondrion', 'Azurophil Granule Lumen', 'Nucleoplasm', 
    'Extracellular Region', 'Cell', 'Pseudopodium', 'Perikaryon', 'Protein-Containing Complex', 'Dendrite Cytoplasm', 
    'Axon', 'Postsynaptic Density']}
    """
    tmp = []
    results = {}
    results["Gene_Name"] = kinase_gene
    kinase_query = s.query(KinaseSubcellularLocation).join(KinaseGeneName).filter(KinaseGeneName.gene_alias==kinase_gene).all()
    for row in kinase_query:
        tmp.append(row.subcellular_location)
    results["Subcellular_Locations"] = tmp
    return results
print(get_subcellular_location_from_gene('MAPK1'))

{'Gene_Name': 'MAPK1', 'Subcellular_Locations': ['Cytoplasm', 'Cytoskeleton', 'Membrane', 'Nucleus', 'Caveola', 'Microtubule Organizing Center', 'Spindle', 'Plasma Membrane', 'Mitotic Spindle', 'Focal Adhesion', 'Cytosol', 'Golgi Apparatus', 'Late Endosome', 'Early Endosome', 'Mitochondrion', 'Azurophil Granule Lumen', 'Nucleoplasm', 'Extracellular Region', 'Cell', 'Pseudopodium', 'Perikaryon', 'Protein-Containing Complex', 'Dendrite Cytoplasm', 'Axon', 'Postsynaptic Density']}


## Function to return the inhibitors from a kinase

In [49]:
def get_inhibitors_from_gene(kinase_gene):
    """
    Take a string and return a list of dictionaries.
    Returns empty list if there are no inhibitors.
    >> get_inhibitors_from_gene("SGK1")
    ['GSK650394A', 'SGK-Sanofi-14i','SGK1-Sanofi-14g', 'SGK1-Sanofi-14h', 'SGK1-Sanofi-14n']
    """
    results = []
    kinase_query = s.query(KinaseGeneName).filter(KinaseGeneName.gene_alias==kinase).one()
    for inhibitor in kinase_query.inhibitors:
        results.append(inhibitor.inhibitor)
    return results
kinase = "SGK1"
get_inhibitors_from_gene(kinase)

['GSK650394A',
 'SGK-Sanofi-14i',
 'SGK1-Sanofi-14g',
 'SGK1-Sanofi-14h',
 'SGK1-Sanofi-14n']

## Function to return substrates and phosphosites from a kinase

In [4]:
kinase_gene = "JAK2"
kinase_obj = s.query(KinaseGeneName).filter(KinaseGeneName.gene_alias==kinase_gene).one()
for phosphosite in kinase_obj.phosphosites:
    pprint(phosphosite.substrate.substrate_name)

'ARHGEF1'
'CCR2'
'JAK2'
'JAK2'
'JAK2'


In [5]:
tmp = {}
kinase_gene = "JAK2"
kinase_obj = s.query(KinaseGeneName).filter(KinaseGeneName.gene_alias==kinase_gene).one()
for phosphosite in kinase_obj.phosphosites:
#     tmp = {}
    gene = phosphosite.substrate.substrate_name
    print(gene)
    print(phosphosite.to_dict())
    if gene in tmp:
        tmp[gene].append(phosphosite.to_dict())
    else:
        tmp[gene] = [phosphosite.to_dict()]
pprint(tmp)

ARHGEF1
{'phosphosite': 'Y738', 'chromosome': 19, 'karyotype_band': 'q13.2', 'strand': 1, 'start_position': 41904999, 'end_position': 41905001, 'neighbouring_sequences': 'WDQEAQIyELVAQTV'}
CCR2
{'phosphosite': 'Y139', 'chromosome': 3, 'karyotype_band': 'p21.31', 'strand': 1, 'start_position': 46357942, 'end_position': 46357944, 'neighbouring_sequences': 'ILLTIDRyLAIVHAV'}
JAK2
{'phosphosite': 'Y570', 'chromosome': 9, 'karyotype_band': 'p24.1', 'strand': 1, 'start_position': 5072558, 'end_position': 5072560, 'neighbouring_sequences': 'VRREVGDyGQLHETE'}
JAK2
{'phosphosite': 'Y119', 'chromosome': 9, 'karyotype_band': 'p24.1', 'strand': 1, 'start_position': 5044407, 'end_position': 5044409, 'neighbouring_sequences': 'VLYRIRFyFPRWYCS'}
JAK2
{'phosphosite': 'S523', 'chromosome': 9, 'karyotype_band': 'p24.1', 'strand': 1, 'start_position': 5069978, 'end_position': 5069980, 'neighbouring_sequences': 'GVSDVPtsPTLQRPT'}
{'ARHGEF1': [{'chromosome': 19,
              'end_position': 41905001,
    

In [9]:
def get_substrates_phosphosites_from_gene(kinase_gene):
    """
    Takes in a gene name of a kinase and return a dictionary of dictionaries.
    In each dictionary (inner), the key is the substrate name; the value is a list of dictionary containing the metadata
    of phosphosites.
    >> get_substrates_phosphosites_from_gene("JAK2")
    {'ARHGEF1': [{'phosphosite': 'Y738', 'chromosome': 19, 'karyotype_band': 'q13.2', 'strand': 1, 'start_position': 41904999, 
    'end_position': 41905001, 'neighbouring_sequences': 'WDQEAQIyELVAQTV'}],...}
    """
    tmp = {}
    kinase_gene = "JAK2"
    kinase_obj = s.query(KinaseGeneName).filter(KinaseGeneName.gene_alias==kinase_gene).one()
    for phosphosite in kinase_obj.phosphosites:
        gene = phosphosite.substrate.substrate_name
        if gene in tmp:
            tmp[gene].append(phosphosite.to_dict())
        else:
            tmp[gene] = [phosphosite.to_dict()]
    return tmp
print(get_substrates_phosphosites_from_gene("JAK2"))

{'ARHGEF1': [{'phosphosite': 'Y738', 'chromosome': 19, 'karyotype_band': 'q13.2', 'strand': 1, 'start_position': 41904999, 'end_position': 41905001, 'neighbouring_sequences': 'WDQEAQIyELVAQTV'}], 'CCR2': [{'phosphosite': 'Y139', 'chromosome': 3, 'karyotype_band': 'p21.31', 'strand': 1, 'start_position': 46357942, 'end_position': 46357944, 'neighbouring_sequences': 'ILLTIDRyLAIVHAV'}], 'JAK2': [{'phosphosite': 'Y570', 'chromosome': 9, 'karyotype_band': 'p24.1', 'strand': 1, 'start_position': 5072558, 'end_position': 5072560, 'neighbouring_sequences': 'VRREVGDyGQLHETE'}, {'phosphosite': 'Y119', 'chromosome': 9, 'karyotype_band': 'p24.1', 'strand': 1, 'start_position': 5044407, 'end_position': 5044409, 'neighbouring_sequences': 'VLYRIRFyFPRWYCS'}, {'phosphosite': 'S523', 'chromosome': 9, 'karyotype_band': 'p24.1', 'strand': 1, 'start_position': 5069978, 'end_position': 5069980, 'neighbouring_sequences': 'GVSDVPtsPTLQRPT'}]}


# Returning kinase for the substrate-phospho-inhibitor analysis page

In [11]:
#making a dummy test list
import pandas as pd
sub_pho_df = pd.read_csv("new_clean_human_kinase_substrates.csv", index_col=0)
sub_pho_df.head()

Unnamed: 0,GENE,KINASE,KIN_ACC_ID,SUBSTRATE,SUB_GENE_ID,SUB_ACC_ID,SUB_GENE,SUB_MOD_RSD,SITE_GRP_ID,SITE_+/-7_AA,DOMAIN,SUB_ENTRY_NAME,KIN_ENTRY_NAME
0,EIF2AK1,HRI,Q9BQI3,eIF2-alpha,1965.0,P05198,EIF2S1,S52,447635,MILLsELsRRRIRsI,S1,IF2A_HUMAN,E2AK1_HUMAN
1,EIF2AK1,HRI,Q9BQI3,eIF2-alpha,1965.0,P05198,EIF2S1,S49,450210,IEGMILLsELsRRRI,S1,IF2A_HUMAN,E2AK1_HUMAN
2,PRKCD,PKCD,Q05655,HDAC5,10014.0,Q9UQL6,HDAC5,S259,447995,FPLRkTAsEPNLKVR,,HDAC5_HUMAN,KPCD_HUMAN
3,PRKCD,PKCD,Q05655,PTPRA iso2,5786.0,P18433,PTPRA,S204,447612,PLLARSPsTNRKYPP,,PTPRA_HUMAN,KPCD_HUMAN
4,PRKCD,PKCD,Q05655,hnRNP K,3190.0,P61978,HNRNPK,S302,457408,GrGGrGGsrArNLPL,,HNRPK_HUMAN,KPCD_HUMAN


In [13]:
#pd.concat([df1['c'], df2['c']], axis=1, keys=['df1', 'df2'])
dummy_df = pd.concat([sub_pho_df["SUB_GENE"], sub_pho_df["SUB_MOD_RSD"]], axis=1, keys=["substrate", "phosphosite"])
dummy_df.head()

Unnamed: 0,substrate,phosphosite
0,EIF2S1,S52
1,EIF2S1,S49
2,HDAC5,S259
3,PTPRA,S204
4,HNRNPK,S302


In [20]:
for i in len(dummy_df):
    print(dummy_df.iloc[i])

TypeError: 'int' object is not iterable

In [None]:
area_dict = lake.set_index('area').T.to_dict('records')[0]
area