# Estimate Amino Acid Composition

This tutorial will give help you go over the general usage of ACE.

In [3]:
import pandas as pd
from ace import Estimator


In [9]:
# Step 1. Format your proteomics data following the template below.The first
#   column is the protein name in FASTA format, the second column is the
#   corresponding peptide sequence. The rest of the columns are arbitrary
#   amount of food sample replicate names, and the values are the quantitative
#   abundances. See more details by opening `meat_proteomics.csv`.

data = pd.read_csv('proteomics_example.csv')
print(data.iloc[1])
print("Columns:", data.columns.tolist())

Protein      sp|A0JNC0|TMOD1_BOVIN Tropomodulin-1 OS=Bos ta...
Sequence                                         EGLNSVIKPTQYK
19.raw                                                5.42e+11
20.raw                                                     NaN
21.raw                                                     NaN
B96_1.raw                                                  NaN
B96_2.raw                                                  NaN
B96_3.raw                                                  NaN
Name: 1, dtype: object
Columns: ['Protein', 'Sequence', '19.raw', '20.raw', '21.raw', 'B96_1.raw', 'B96_2.raw', 'B96_3.raw']


In [11]:
# Step 2. Estimate the amino acid composition.

estimator = Estimator(
    remove_zeroes=True,      # If true, impute zeroes.
    remove_duplicates=True,  # If true, remove duplicates.
    merge_substr=True,       # If true, merge substring sequences.
    filters=['CRAP'])        # If specified, remove proteins with the patterns.

amino_acid_composition, pqi = estimator.estimate(data)
print(amino_acid_composition)


     19.raw    20.raw    21.raw  B96_1.raw  B96_2.raw  B96_3.raw
A  0.088549  0.089449  0.088617   0.088188   0.089322   0.088964
B  0.000000  0.000000  0.000000   0.000000   0.000000   0.000000
C  0.012975  0.012861  0.012886   0.013905   0.013670   0.013614
D  0.058136  0.057692  0.058035   0.056830   0.057262   0.057459
E  0.093800  0.092193  0.093204   0.093645   0.094100   0.093912
F  0.034486  0.035496  0.035384   0.035760   0.035326   0.035929
G  0.066752  0.068216  0.067461   0.067719   0.066020   0.066093
H  0.025982  0.026718  0.026071   0.024536   0.025034   0.024634
I  0.056462  0.056024  0.056468   0.052740   0.053711   0.053300
K  0.084501  0.083720  0.084531   0.086503   0.087159   0.086688
L  0.084223  0.084740  0.083765   0.086283   0.086615   0.086092
M  0.029824  0.030643  0.029914   0.027756   0.028306   0.028237
N  0.036324  0.036537  0.036602   0.038238   0.038088   0.038192
P  0.042442  0.041991  0.042946   0.042682   0.040516   0.041253
Q  0.037924  0.038697  0.

In [12]:
# Protein quantitation index (PQI).
print(pqi)


                                                         19.raw       20.raw  \
sp|A0JNC0|TMOD1_BOVIN Tropomodulin-1 OS=Bos tau...  1.81623e+11     8.69e+08   
sp|A0JNJ5|MYL1_BOVIN Myosin light chain 1/3, sk...  2.50129e+12   2.3926e+12   
sp|A2I7M9|SPA32_BOVIN Serpin A3-2 OS=Bos taurus...     1.22e+08     2.84e+09   
sp|A2I7N1|SPA35_BOVIN Serpin A3-5 OS=Bos taurus...     2.52e+09   4.6197e+09   
sp|A2I7N3|SPA37_BOVIN Serpin A3-7 OS=Bos taurus...          NaN      5.3e+09   
...                                                         ...          ...   
tr|Q29RI2|Q29RI2_BOVIN Phosphorylase kinase cat...     4.75e+09  8.20667e+09   
tr|Q94DL4|Q94DL4_ORYSJ Os01g0964133 protein OS=...     2.46e+11     4.56e+11   
tr|Q148C2|Q148C2_BOVIN Troponin C type 2 (Fast)...  5.18667e+10  5.43667e+10   
tr|Q148D3|Q148D3_BOVIN Fumarate hydratase OS=Bo...  6.87333e+09  9.65333e+09   
tr|Q148H2|Q148H2_BOVIN Myosin light chain 6B OS...  3.48067e+11  3.19033e+11   

                                       