In [1]:
from regraph import plot_graph

from kami import KamiCorpus
from kami.data_structures.entities import *
from kami.data_structures.interactions import *
from kami.data_structures.definitions import *
from kami.exporters.kappa import generate_kappa, KappaInitialCondition

In [2]:
# Create an empty KAMI corpus
corpus = KamiCorpus("EGFR_signalling")

In [3]:
# Create an interaction object
egfr = Protoform("P00533")
kinase = Region(
    name="Protein kinase",
    start=712,
    end=979,
    states=[State("activity", True)])

egfr_kinase = RegionActor(
    protoform=egfr,
    region=kinase)

interaction1 = LigandModification(
    enzyme=egfr_kinase,
    substrate=egfr,
    target= Residue(
        "Y", 1092,
        state=State("phosphorylation", False)),
    value=True,
    rate=1,
    desc="Phosphorylation of EGFR homodimer")

In [4]:
interaction1.to_json()

{'desc': 'Phosphorylation of EGFR homodimer',
 'enzyme': {'data': {'protoform': {'bound_to': [],
    'regions': [],
    'residues': [],
    'sites': [],
    'states': [],
    'unbound_from': [],
    'uniprotid': 'P00533'},
   'region': {'bound_to': [],
    'end': 979,
    'name': 'Protein kinase',
    'residues': [],
    'sites': [],
    'start': 712,
    'states': [{'name': 'activity', 'test': True}],
    'unbound_from': []}},
  'type': 'RegionActor'},
 'enzyme_bnd_subactor': 'protoform',
 'rate': 1,
 'substrate': {'data': {'bound_to': [],
   'regions': [],
   'residues': [],
   'sites': [],
   'states': [],
   'unbound_from': [],
   'uniprotid': 'P00533'},
  'type': 'Protoform'},
 'substrate_bnd_subactor': 'protoform',
 'target': {'data': {'aa': ['Y'],
   'loc': 1092,
   'state': {'name': 'phosphorylation', 'test': False}},
  'type': 'Residue'},
 'type': 'LigandModification',
 'value': True}

In [5]:
# Aggregate the interaction object to the corpus
nugget1_id = corpus.add_interaction(interaction1)



Loading InterPro Data version 66


In [6]:
# Access the newly created nugget
nugget = corpus.get_nugget(nugget1_id)
print(corpus.get_nugget_desc(nugget1_id))
nugget_identification = corpus.get_nugget_typing(nugget1_id)
print("Nugget nodes: ")
for n in nugget.nodes():
    print("-> '{}' - identified as '{}' in the AG, typed as '{}' in the MM".format(
        n, nugget_identification[n], corpus.get_action_graph_typing()[nugget_identification[n]]))

Phosphorylation of EGFR homodimer
Nugget nodes: 
-> 'P00533' - identified as 'P00533_P00533_1' in the AG, typed as 'protoform' in the MM
-> 'P00533_region_Protein kinase_712_979' - identified as 'P00533_region_Protein kinase_712_979' in the AG, typed as 'region' in the MM
-> 'P00533_region_Protein kinase_712_979_activity' - identified as 'P00533_region_Protein kinase_712_979_activity' in the AG, typed as 'state' in the MM
-> 'P00533_1' - identified as 'P00533_P00533_1' in the AG, typed as 'protoform' in the MM
-> 'mod' - identified as 'mod' in the AG, typed as 'mod' in the MM
-> 'P00533_1_Y1092' - identified as 'P00533_1_Y1092' in the AG, typed as 'residue' in the MM
-> 'P00533_1_Y1092_phosphorylation' - identified as 'P00533_1_Y1092_phosphorylation' in the AG, typed as 'state' in the MM
-> 'is_bnd' - identified as 'is_bnd' in the AG, typed as 'bnd' in the MM


In [7]:
# Manually add a new protoform
new_protoform_node = corpus.add_protoform(Protoform("P62993"))
print(new_protoform_node)

# Access protoforms
corpus.protoforms()

P62993


['P00533_P00533_1', 'P62993']

In [8]:
# Manually add a new components to an arbitrary protoform

corpus.add_site(Site("New site"), new_protoform_node)
print(corpus.get_attached_sites(new_protoform_node))

['P62993_site_New site']


In [9]:
grb2 = Protoform("P62993")
grb2_sh2 = RegionActor(
    protoform=grb2,
    region=Region(name="SH2"))

shc1 = Protoform("P29353")
shc1_pY = SiteActor(
    protoform=shc1,
    site=Site(
        name="pY",
        residues=[Residue("Y", 317, State("phosphorylation", True))]))
interaction1 = Binding(grb2_sh2, shc1_pY)

grb2_sh2_with_residues = RegionActor(
    protoform=grb2,
    region=Region(
        name="SH2",
        residues=[
            Residue("S", 90, test=True),
            Residue("D", 90, test=False)]))

egfr_pY = SiteActor(
    protoform=egfr,
    site=Site(
        name="pY",
        residues=[Residue("Y", 1092, State("phosphorylation", True))]))

interaction2 = Binding(grb2_sh2_with_residues, shc1_pY)

In [10]:
corpus.add_interactions([interaction1, interaction2])

['EGFR_signalling_nugget_2', 'EGFR_signalling_nugget_3']

In [11]:
corpus.get_attached_states('P00533_region_Protein kinase_712_979')

['P00533_region_Protein kinase_712_979_activity']

In [14]:
# Create a protein definition for GRB2
protoform = Protoform(
    "P62993",
    regions=[Region(
        name="SH2",
        residues=[
            Residue("S", 90, test=True),
            Residue("D", 90, test=False)])])

ashl = Product("Ash-L", residues=[Residue("S", 90)])
s90d = Product("S90D", residues=[Residue("D", 90)])
grb3 = Product("Grb3", removed_components={"regions": [Region("SH2")]})

grb2_definition = Definition(protoform, [ashl, s90d, grb3])

In [15]:
rule, instance = grb2_definition.generate_rule(corpus.action_graph, corpus.get_action_graph_typing())

In [16]:
grb_variants_model = corpus.instantiate("EGFR_signalling_GRB2", [grb2_definition])

In [18]:
# The following initial condition specifies:
# 150 molecules of the canonical EGFR protein (no PTMs, bounds or activity)
# 75 molecules of the EGFR protein with active kinase
# 30 molecules of the EGFR protein with phosphorylated Y1092
# 30 molecules of the EGFR protein bound to the SH2 domain of Ash-L through its pY site
# 30 instances of the EGFR protein dimers
egfr_initial = KappaInitialCondition(
    canonical_protein=Protein(Protoform("P00533")),
    canonical_count=150,
    stateful_components=[
        (kinase, 75),
        (Residue("Y", 1092, state=State("phosphorylation", True)), 30),
        (Site(name="pY", bound_to=[
            RegionActor(protoform=grb2, region=Region(name="SH2"), variant_name="Ash-L")
        ]), 30)
    ],
    bounds=[
        (Protein(Protoform("P00533")), 30),
    ])

In [19]:
# The following initial conditions specify:
# 200 molecules of the canonical Ash-L (no PTMs, bounds or activity)
# 40 molecules of Ash-L bound to the pY site of SHC1
ashl_initial = KappaInitialCondition(
    canonical_protein=Protein(Protoform("P62993"), "Ash-L"),
    canonical_count=200,
    stateful_components=[
        (Region(name="SH2", bound_to=[shc1_pY]), 40)
    ])

# 20 mutant molecules S90D
# 10 molecules of S90D bound to the pY site of EGFR
s90d_initial = KappaInitialCondition(
    canonical_protein=Protein(Protoform("P62993"), "S90D"),
    canonical_count=20,
    stateful_components=[
        (Region(name="SH2", bound_to=[egfr_pY]), 10)
    ])

# 70 molecules of the splice variant Grb3
grb3_initial = KappaInitialCondition(
    canonical_protein=Protein(Protoform("P62993"), "Grb3"),
    canonical_count=70)

In [20]:
# The following initial condition specifies:
# 100 molecules of the canonical SHC1 protein (no PTMs, bounds or activity)
# 30 molecules of the SHC1 protein phosphorylated at Y317
shc1_initial = KappaInitialCondition(
    canonical_protein=Protein(Protoform("P29353")),
    canonical_count=100,
    stateful_components=[
        (Residue("Y", 317, state=State("phosphorylation", True)), 30)
    ],
)

In [21]:
kappa = generate_kappa(
    grb_variants_model, 
    initial_conditions=[
        egfr_initial,
        ashl_initial,
        s90d_initial,
        grb3_initial,
        shc1_initial
    ]
)

{'P00533': [{'P00533_P00533_1': None}, 'EGFR'], 'P62993': [{'P62993': 'Ash-L', 'P629931': 'Grb3', 'P629932': 'S90D'}, 'GRB2'], 'P29353': [{'P29353': None}, 'SHC1']}
Agent UP:  P00533
	 Name:  EGFR
	 Variants:  {'P00533_P00533_1': 'variant_1'}
	 Kami sites:  {}
	 Direct bnd sites:  {'is_bnd': ('site', [])}
	 Region bnd sites:  {}
Agent UP:  P62993
	 Name:  GRB2
	 Variants:  {'P62993': 'Ash_L', 'P629931': 'Grb3', 'P629932': 'S90D'}
	 Kami sites:  {'P62993_site_New site2': 'Ash_L_site_New_site', 'P62993_site_New site1': 'Grb3_site_New_site', 'P62993_site_New site': 'S90D_site_New_site'}
	 Direct bnd sites:  {}
	 Region bnd sites:  {('region_21', 'P62993_region_SH2_bnd_P29353_site_pY_P62993_region_SH2_bnd_P29353_site_pY_1'): ('Ash_L_SH2_site', ['region_2', 'P29353_site_pY']), ('region_2', 'P62993_region_SH2_bnd_P29353_site_pY_P62993_region_SH2_bnd_P29353_site_pY_1'): ('S90D_SH2_site', ['P29353_site_pY', 'region_21'])}
Agent UP:  P29353
	 Name:  SHC1
	 Variants:  {'P29353': 'variant_1'}
	 K

In [22]:
print(kappa)

// Automatically generated from KAMI-model 'EGFR_signalling_GRB2' 13-02-2020 13:46:50

// Signatures

%agent: EGFR(activity{0 1},phosphorylation{0 1},('site', []))
%agent: GRB2(variant{Ash_L Grb3 S90D},Ash_L_site_New_site,Grb3_site_New_site,S90D_site_New_site,('Ash_L_SH2_site', ['region_2', 'P29353_site_pY']),('S90D_SH2_site', ['P29353_site_pY', 'region_21']))
%agent: SHC1(phosphorylation{0 1},site_pY)

// Rules 

'rule 1' EGFR(activity{1}), EGFR(phosphorylation) -> EGFR(activity{1}), EGFR(phosphorylation{1})  @ 1  //Phosphorylation of EGFR homodimer

'rule 2' GRB2(variant{S90D},S90D_SH2_site[.]), SHC1(phosphorylation{1},site_pY[.]) -> GRB2(variant{S90D},S90D_SH2_site[1]), SHC1(phosphorylation{1},site_pY[1])

'rule 3' GRB2(variant{Grb3},), SHC1(phosphorylation{1},site_pY[.]) -> GRB2(variant{Grb3},), SHC1(phosphorylation{1},site_pY[1])

'rule 4' GRB2(variant{Ash_L},Ash_L_SH2_site[.]), SHC1(phosphorylation{1},site_pY[.]) -> GRB2(variant{Ash_L},Ash_L_SH2_site[1]), SHC1(phosphorylation{1},

In [92]:
from regraph.utils import keys_by_value

from kami.aggregation.identifiers import EntityIdentifier


def _normalize_variant_name(name):
    return name.replace(" ", "_").replace(
        ",", "_").replace("/", "_").replace("-", "_")

def _generate_isoforms_from_corpus(corpus, definition_rules):
    """Generate agents: an agent per protoform (gene)."""
    isoforms = {}
    for protoform in corpus.protoforms():
        uniprot_id = corpus.get_uniprot(protoform)
        hgnc_symbol = corpus.get_hgnc_symbol(protoform)

        isoforms[uniprot_id] = dict()
        isoforms[uniprot_id]["hgnc_symbol"] = hgnc_symbol
        isoforms[uniprot_id]["ref_node"] = protoform
        isoforms[uniprot_id]["variants"] = dict()

        for rule, instance in definition_rules:
            # If the protoform will be instantiated
            if protoform in instance.values():
                lhs_protoform = keys_by_value(instance, protoform)[0]
                p_protoforms = keys_by_value(rule.p_lhs, lhs_protoform)
                # Retrieve variants and their names from the intantiation rule
                variants = []
                for p_protoform in p_protoforms:
                    rhs_node_attrs = rule.rhs.get_node(rule.p_rhs[p_protoform])
                    variant_name = None
                    if "variant_name" in rhs_node_attrs:
                        variant_name = _normalize_variant_name(list(rhs_node_attrs["variant_name"])[0])
                    variants.append(variant_name)
                i = 1
                for variant in variants:
                    if variant is None:
                        isoforms[uniprot_id]["variants"][
                            "variant_{}".format(i)] = None
                        i += 1
                    else:
                        isoforms[uniprot_id]["variants"][_normalize_variant_name(variant)] = None
            else:
                isoforms[uniprot_id]["variants"]["variant_1"] = protoform
    return isoforms


def _generate_isoforms_from_model(model):
    """Generate agents: an agent per protoform and its variants."""
    isoforms = {}
    for protein in model.proteins():
        uniprot_id = model.get_uniprot(protein)
        variant_name = model.get_variant_name(protein)
        hgnc_symbol = model.get_hgnc_symbol(protein)
        if uniprot_id in isoforms.keys():
            isoforms[uniprot_id]["hgnc_symbol"] = hgnc_symbol
            if variant_name is None:
                variant_name = "variant_{}".format(
                    len(isoforms[uniprot_id]["variants"]) + 1)
            isoforms[uniprot_id]["variants"][_normalize_variant_name(variant_name)] = protein
        else:
            isoforms[uniprot_id] = dict()
            isoforms[uniprot_id]["ref_node"] = None
            isoforms[uniprot_id]["hgnc_symbol"] = hgnc_symbol
            if variant_name is None:
                variant_name = "variant_1"
            isoforms[uniprot_id]["variants"] = {
                _normalize_variant_name(variant_name): protein
            }
    return isoforms

def _generate_agents(identifier, isoforms):
    # Generate agent names and variants
    agents = {}
    for isoform, data in isoforms.items():
        if data["hgnc_symbol"] is not None:
            agent_name = data["hgnc_symbol"]
        else:
            agent_name = isoform
        agents[isoform] = {}
        agents[isoform]["agent_name"] = agent_name
        agents[isoform]["ref_node"] = data["ref_node"]
        agents[isoform]["variants"] = data["variants"]

        # Generate stateful sites
        agents[isoform]["stateful_sites"] = {}
        agents[isoform]["kami_sites"] = {}
        agents[isoform]["direct_bnd_sites"] = {}
        agents[isoform]["region_bnd_sites"] = {}
        for protein, variant_name in data["variants"].items():
            # Find stateful sites (a Kappa site per every distinct state)
            prefix = ""
            if len(data["variants"]) > 1:
                prefix = variant_name + "_"
                
            _generate_stateful_sites()
            states = identifier.get_attached_states(protein)
            for s in states:
                state_name = list(get_node(identifier.graph, s)["name"])[0]
                site_name = generate_new_element_id(
                    agents[isoform]["stateful_sites"].values(),
                    prefix + "{}".format(state_name))
                agents[isoform]["stateful_sites"][s] = site_name

    
    # Generate direct binding sites
    # Generate binding through kami sites
    # Generate binding through kami regions
    return agents

def generate_from_corpus(corpus, definitions,
                         initial_conditions=None,
                         default_concentation=100,
                         default_bnd_rate=None,
                         default_brk_rate=None,
                         default_mod_rate=None):
    """Generate Kappa script from KAMI corpus."""
    # Generate instantiation rules from definitions
    definition_rules = []
    for i, d in enumerate(definitions):
        instantiation_rule, instance = d.generate_rule(
            corpus.action_graph, corpus.get_action_graph_typing())
        definition_rules.append((instantiation_rule, instance))
    
    # Create an entity identified from the action graph
    identifier = EntityIdentifier(
        corpus.action_graph,
        corpus.get_action_graph_typing(),
        immediate=False)
    print(_generate_agents(
        identifier,
        _generate_isoforms_from_corpus(corpus, definition_rules)))


def generate_from_model(model, initial_conditions=None,
                        default_concentation=100):
    """Generate Kappa script from KAMI model."""
    if initial_conditions is None:
        initial_conditions = []

    default_bnd_rate = None
    if model.default_bnd_rate is not None:
        default_bnd_rate = model.default_bnd_rate

    default_brk_rate = None
    if model.default_brk_rate is not None:
        default_brk_rate = model.default_brk_rate

    default_mod_rate = None
    if model.default_mod_rate is not None:
        default_mod_rate = model.default_mod_rate
        
    # Create an entity identified from the action graph
    identifier = EntityIdentifier(
        model.action_graph,
        model.get_action_graph_typing(),
        immediate=False)

    print(_generate_agents(
        identifier,
        _generate_isoforms_from_model(model)))

In [93]:
generate_from_corpus(corpus, [grb2_definition])

GraphError: Node 'variant_1' does not exist in the graph

In [None]:
generate_from_model(grb_variants_model)