## Kirchner et al. 2019 - Figure S5C 

Load libraries

In [2]:
import pandas as pd
import numpy as np

from tqdm import tqdm

import matplotlib.pyplot as plt

Load database of compartment download from ***

In [4]:
db = pd.read_table('./human_compartment_knowledge_full.tsv', 
                   names=['Ensembl', 'Gene', 'GO_term', 'Compartment',
                          'Database', 'Evidence_code', 'Confidence_score'])

#Select high confidence score (Score >= 3)

db_hc = db[db['Confidence_score'] >= 3]

  This is separate from the ipykernel package so we can avoid doing imports until


Load human motif database

In [5]:
human_db = pd.read_csv('./human_db.csv')

Create a list protein identifier according to types of motifs (canonical > phopshorylation-generated > acetylation-generated)

In [None]:
list_proteins = np.unique(human_db['ID'])

cano = list()
phospho = list()
acetyl = list()

for protein in tqdm(list_proteins):
    
    motifs = np.unique(human_db[human_db['ID'] == protein]['motif_type'])
    
    gene_name = str(np.unique(human_db[human_db['ID'] == protein]["Gene names"])[0]).split(' ')[0]
    
    if 'Canonical' in motifs: 
        cano.append(gene_name)
    elif 'Canonical' not in motifs and 'Phosphorylation' in motifs:
        phospho.append(gene_name)
    elif 'Canonical' not in motifs and 'Phosphorylation' not in motifs and 'Acetylation' in motifs:
        acetyl.append(gene_name)
    else:
        print("Problem with: {}".format(protein))

List of relevant compartments for quantification and plots

In [6]:
relevant_compartments = ["Cytoskeleton", 'Extracellular space', 'Peroxisome', 'Mitochondrion', 
                        'Nucleus', 'Cytosol', 'Endoplasmic reticulum', 'Endosome', 'Lysosome',
                        'Plasma membrane']

### Figure

In [None]:
# Create a dict of the number of protein per compartment

prot_per_cmp = dict()

for c in relevant_compartments:
    prot_per_cmp[c] = np.unique(db_hc.loc[db_hc['Compartment'] == c, 'Gene'])

Results will be stored in dict to allow building of contengency tables and calculation of $\chi$ square statistics

In [None]:
results_for_stats = dict()

fig, ax = plt.subplots(figsize=(7,3.5))
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)

for i,c in enumerate(relevant_compartments):
    
    total_in_compartment = len(prot_per_cmp[c])
    nb_cano = 0
    nb_phospho = 0
    nb_acetyl = 0
    
    for prot in prot_per_cmp[c]:
        if prot in cano:
            nb_cano += 1
        elif prot in phospho:
            nb_phospho += 1
        elif prot in acetyl:
            nb_acetyl += 1
    
    results_for_stats[c] = {'canonical': nb_cano,
                            'phospho':nb_phospho,
                            'acetyl': nb_acetyl,
                            'no_motif': total_in_compartment-(nb_cano+nb_phospho+nb_acetyl),
                           'total_in_compartment': total_in_compartment
                           }
    
    percent_cano = nb_cano/total_in_compartment*100
    percent_phospho = nb_phospho / total_in_compartment * 100
    percent_acetyl = nb_acetyl / total_in_compartment * 100
    percent_other = 100 - (percent_cano + percent_phospho + percent_acetyl)

    plt.barh(y=i, width=percent_cano, color='#EEEE00')
    plt.barh(y=i, width=percent_phospho, color='#009ACD', left=percent_cano)
    plt.barh(y=i, width=percent_acetyl, color='#66CD00', left=percent_cano+percent_phospho)
    plt.barh(y=i, width=percent_other, color='gray', left=percent_cano+percent_phospho+percent_acetyl)

plt.barh(y=len(relevant_compartments), width=45.98, color='#EEEE00', left=None)
plt.barh(y=len(relevant_compartments), width=20.31, color='#009ACD', left=45.98)
plt.barh(y=len(relevant_compartments), width=9.17, color='#66CD00', left=20.31+45.98)
plt.barh(y=len(relevant_compartments), width=24.54, color='gray', left=20.31+45.98+9.17)

y_labels = relevant_compartments.copy()
y_labels.append('Full proteome')

plt.yticks(ticks=np.arange(len(relevant_compartments)+1), labels=y_labels)
plt.xlim(0, 100)

import matplotlib.patches as mpatches

colors=['#EEEE00', '#009ACD', '#66CD00', 'gray']
labels=['canonical', 'phosphorylation', 'acetylation', 'no motif']

patches = list()
for i,j in zip(colors, labels):
    patches.append(mpatches.Patch(color=i, label=j))

plt.legend(handles=patches, bbox_to_anchor=[1, 1])

plt.xlabel('% of proteins')

plt.tight_layout()

plt.savefig('./compartment.png', dpi=300)

plt.show()

Export result table

In [None]:
export = pd.DataFrame(results_for_stats)
export.to_csv('./results_compartments.csv')