In [1]:
import cell2cell as c2c
import pandas as pd

import obonet

  import pandas.util.testing as pdt


# Load Data

**Lists of Ligand-Receptor Pairs**

In [2]:
all_lr = pd.read_excel('../Data/PPI-Networks/Celegans-Curated-LR-pairs.xlsx')

In [3]:
ga_lr = pd.read_csv('../Data/PPI-Networks/Celegans-GA-Selected-LR-pairs.csv')

In [4]:
all_lr.shape

(245, 9)

**Gene-Onthology associations in C. elegans**

In [5]:
phenotype = obonet.read_obo('../Data/Onthology/phenotype_ontology.WS276.obo')

In [6]:
po_annot = c2c.io.load_go_annotations('../Data/Onthology/phenotype_association.WS276.wb.gz')

Opening GO annotations from ../Data/Onthology/phenotype_association.WS276.wb.gz
../Data/Onthology/phenotype_association.WS276.wb.gz was correctly loaded


# Analysis

In [7]:
all_genes = list(set(all_lr[['Ligand_WB', 'Receptor_WB']].values.flatten()))

In [8]:
ga_genes = list(set(ga_lr[['Ligand_WB', 'Receptor_WB']].values.flatten()))

In [9]:
all_annot = po_annot.loc[po_annot.Gene.isin(all_genes)]

In [10]:
ga_annot = po_annot.loc[po_annot.Gene.isin(ga_genes)]

In [11]:
all_annot['Onthology'] = all_annot['GO'].apply(lambda x: phenotype.nodes[x]['name'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [12]:
ga_annot['Onthology'] = ga_annot['GO'].apply(lambda x: phenotype.nodes[x]['name'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [13]:
tests = ['morphology phenotype', 'cell migration']

In [14]:
for key_word in tests:
    all_test = all_annot.loc[all_annot.Onthology.str.contains(key_word)].Name.unique()
    ga_test = ga_annot.loc[ga_annot.Onthology.str.contains(key_word)].Name.unique()
    
    print('PHENOTYPE: {}'.format(key_word))
    print('Expected ratio in background: {}'.format(len(all_test) / len(all_genes)))
    print('Observed ratio in GA-selection: {}'.format(len(ga_test) / len(ga_genes)))
    print(ga_test)
    
    test = c2c.stats.enrichment.fisher_representation(len(ga_genes), len(ga_test), len(all_genes), len(all_test))
    print('Depletion - Odds: {}; P-value: {}'.format(test['odds'][0], test['pval'][0]))
    print('Enrichment - Odds: {}; P-value: {}'.format(test['odds'][1], test['pval'][1]))
    print('')

PHENOTYPE: morphology phenotype
Expected ratio in background: 0.04736842105263158
Observed ratio in GA-selection: 0.1016949152542373
['daf-2' 'lin-17' 'lin-18' 'lin-44' 'mom-2' 'epi-1']
Depletion - Odds: 4.830188679245283; P-value: 0.9956272998197243
Enrichment - Odds: 4.830188679245283; P-value: 0.026860302232248286

PHENOTYPE: cell migration
Expected ratio in background: 0.21578947368421053
Observed ratio in GA-selection: 0.3559322033898305
['cam-1' 'cfz-2' 'cwn-1' 'dbl-1' 'epi-1' 'lag-2' 'let-2' 'lin-12' 'lin-17'
 'lin-44' 'mab-20' 'mig-1' 'mom-2' 'pat-3' 'unc-5' 'unc-6' 'unc-129'
 'ddr-1' 'ina-1' 'nid-1' 'rig-6']
Depletion - Odds: 3.067105263157895; P-value: 0.9994379045167932
Enrichment - Odds: 3.067105263157895; P-value: 0.0019113679915507085

