In [1]:
!date

Wed Aug  7 16:46:19 PDT 2019


### Notebook to process PEAKS 8.5 SPIDER runs for December 2018 UWPR samples.
### Starting with the sample relevant to De Novo paper, 378  (100 m sinking particles).
### Exported the results of PEAKS 8.5 searches into Git ETNP 2017 data directory.
### Database ID'd peptides are in `running#-peaks#-spider-peptide.csv` files and de novo only peptides are in `running#-peaks#-spider-dno.csv`.
### SPIDER modifications are only in `peptide` files.

In [2]:
! ls /Users/meganduffy/Documents/git-repos/2017-etnp/data/uwpr-dec2018/UWPR-Dec2018-SPIDER/378_etnp2017_100m_trap-SPIDER82-20181214/

378-spider-DB-search-psm.csv      378-spider82-protein-peptides.csv
378-spider82-dno.csv              378-spider82-proteins.csv
378-spider82-peptide.csv          378-spider82-proteins.fasta


### Starting with sample 378: ETNP 2017 P2 100 m trap

In [3]:
import os

In [4]:
os.getcwd()

'/Users/meganduffy/Documents/git-repos/2017-etnp/notebooks'

In [5]:
import pandas as pd

In [6]:
pd.__version__

'0.18.1'

In [7]:
data = pd.read_csv("/Users/meganduffy/Documents/git-repos/2017-etnp/data/uwpr-dec2018/UWPR-Dec2018-SPIDER/378_etnp2017_100m_trap-SPIDER82-20181214/378-spider82-peptide.csv")

In [8]:
data.head()

Unnamed: 0,Peptide,-10lgP,Mass,Length,ppm,m/z,RT,Area 378,Fraction,Scan,Source File,#Spec,#Spec 378,Accession,PTM,AScore
0,LPQVEGTGGDVQPSQDLVR,102.52,1994.0068,19,0.2,998.0109,54.03,63800000.0,14,15243,20181214_378_etnp2017_100m_trap.raw,4,4,54036848,,
1,SC(+57.02)AAAGTEC(+57.02)LISGWGNTK(+28.03),88.26,1909.8662,18,0.6,955.9409,65.06,10400000.0,14,18301,20181214_378_etnp2017_100m_trap.raw,2,2,|#CONTAM#TRYP_PIG|,Carbamidomethylation; Dimethylation(KR),C2:Carbamidomethylation:1000.00;C9:Carbamidome...
2,VIGQNEAVDAVSNAIR,74.03,1654.8638,16,0.7,828.4398,64.54,41700000.0,14,18159,20181214_378_etnp2017_100m_trap.raw,6,6,54036848,,
3,AIQQQIENPLAQQILSGELVPGK,70.68,2473.354,23,0.2,825.4587,86.7,774000.0,14,24111,20181214_378_etnp2017_100m_trap.raw,2,2,54036848,,
4,LPQ(+.98)VEGTGGDVQPSQDLVR,66.56,1994.9908,19,4.7,998.5074,55.04,515000.0,14,15528,20181214_378_etnp2017_100m_trap.raw,1,1,54036848,Deamidation (NQ),Q3:Deamidation (NQ):113.83


In [9]:
# Keep only peptide column
pep = data[["Peptide"]]

In [10]:
pep.head()

Unnamed: 0,Peptide
0,LPQVEGTGGDVQPSQDLVR
1,SC(+57.02)AAAGTEC(+57.02)LISGWGNTK(+28.03)
2,VIGQNEAVDAVSNAIR
3,AIQQQIENPLAQQILSGELVPGK
4,LPQ(+.98)VEGTGGDVQPSQDLVR


In [11]:
# Write altered dataframe to new csv file
# Used header and index parameters to get rid of 'Peptide' header and the indexing

pep.to_csv("/Users/meganduffy/Documents/git-repos/2017-etnp/data/uwpr-dec2018/UWPR-Dec2018-SPIDER/378_etnp2017_100m_trap-SPIDER82-20181214/378-peaks82-spider-peps.csv", header=False, index=False)

In [12]:
# Write altered dataframe to new txt file
# Used header and index parameters to get rid of 'Peptide' header and the indexing

pep.to_csv("/Users/meganduffy/Documents/git-repos/2017-etnp/data/uwpr-dec2018/UWPR-Dec2018-SPIDER/378_etnp2017_100m_trap-SPIDER82-20181214/378-peaks82-spider-peps.txt", header=False, index=False)

In [13]:
!head /Users/meganduffy/Documents/git-repos/2017-etnp/data/uwpr-dec2018/UWPR-Dec2018-SPIDER/378_etnp2017_100m_trap-SPIDER82-20181214/378-peaks82-spider-peps.txt

LPQVEGTGGDVQPSQDLVR
SC(+57.02)AAAGTEC(+57.02)LISGWGNTK(+28.03)
VIGQNEAVDAVSNAIR
AIQQQIENPLAQQILSGELVPGK
LPQ(+.98)VEGTGGDVQPSQDLVR
LPQVEGTGGD(+21.98)VQPSQDLVR
LPQVEGTGGD(+53.92)VQPSQDLVR
LPQVEGTGGD(+17.03)VQPSQDLVR
NNPVLIGEPGVGK
N(+.98)NPVLIGEPGVGK


In [14]:
# Removes all characters in parentheses and saves as new file

!awk -F "[()]" '{ for (i=2; i<NF; i+=2) print $i }' /Users/meganduffy/Documents/git-repos/2017-etnp/data/uwpr-dec2018/UWPR-Dec2018-SPIDER/378_etnp2017_100m_trap-SPIDER82-20181214/378-peaks82-spider-peps.txt > /Users/meganduffy/Documents/git-repos/2017-etnp/data/uwpr-dec2018/UWPR-Dec2018-SPIDER/378_etnp2017_100m_trap-SPIDER82-20181214/378-peaks82-spider-mods.txt

In [15]:
# How does the modifications text file look?

!head /Users/meganduffy/Documents/git-repos/2017-etnp/data/uwpr-dec2018/UWPR-Dec2018-SPIDER/378_etnp2017_100m_trap-SPIDER82-20181214/378-peaks82-spider-mods.txt 

+57.02
+57.02
+28.03
+.98
+21.98
+53.92
+17.03
+.98
-18.01
+21.98


In [16]:
# How many modifications for this sample?

!wc -l /Users/meganduffy/Documents/git-repos/2017-etnp/data/uwpr-dec2018/UWPR-Dec2018-SPIDER/378_etnp2017_100m_trap-SPIDER82-20181214/378-peaks82-spider-mods.txt

     102 /Users/meganduffy/Documents/git-repos/2017-etnp/data/uwpr-dec2018/UWPR-Dec2018-SPIDER/378_etnp2017_100m_trap-SPIDER82-20181214/378-peaks82-spider-mods.txt


In [17]:
# Compared with how many total peptides?

!wc -l /Users/meganduffy/Documents/git-repos/2017-etnp/data/uwpr-dec2018/UWPR-Dec2018-SPIDER/378_etnp2017_100m_trap-SPIDER82-20181214/378-peaks82-spider-peps.txt

     393 /Users/meganduffy/Documents/git-repos/2017-etnp/data/uwpr-dec2018/UWPR-Dec2018-SPIDER/378_etnp2017_100m_trap-SPIDER82-20181214/378-peaks82-spider-peps.txt


In [18]:
# What's the percentage of modified peptides for this sample?

102/393

0.2595419847328244

In [19]:
# Keep only peptide column
ptm = data[["PTM"]]

In [20]:
ptm.head()

Unnamed: 0,PTM
0,
1,Carbamidomethylation; Dimethylation(KR)
2,
3,
4,Deamidation (NQ)


In [21]:
# How many blank rows?
print (ptm.isnull().sum())

PTM    305
dtype: int64


In [22]:
# Get rid of blank rows
ptm_svelte = ptm.dropna()

In [23]:
# Write altered dataframe to new txt file
# Used header and index parameters to get rid of 'PTM' header and the indexing

ptm_svelte.to_csv("/Users/meganduffy/Documents/git-repos/2017-etnp/data/uwpr-dec2018/UWPR-Dec2018-SPIDER/378_etnp2017_100m_trap-SPIDER82-20181214/378-peaks82-spider-ptm.txt", header=False, index=False)

In [24]:
!head /Users/meganduffy/Documents/git-repos/2017-etnp/data/uwpr-dec2018/UWPR-Dec2018-SPIDER/378_etnp2017_100m_trap-SPIDER82-20181214/378-peaks82-spider-ptm.txt

Carbamidomethylation; Dimethylation(KR)
Deamidation (NQ)
Sodium adduct
Replacement of 2 protons by iron
Replacement of proton with ammonium ion
Deamidation (NQ)
Dehydration
Sodium adduct
Deamidation (NQ)
Sodium adduct


In [25]:
!ls /Users/meganduffy/Documents/git-repos/2017-etnp/data/uwpr-dec2018/UWPR-Dec2018-SPIDER/378_etnp2017_100m_trap-SPIDER82-20181214/

378-peaks82-spider-mods.txt       378-spider82-dno.csv
378-peaks82-spider-peps.csv       378-spider82-peptide.csv
378-peaks82-spider-peps.txt       378-spider82-protein-peptides.csv
378-peaks82-spider-ptm.txt        378-spider82-proteins.csv
378-spider-DB-search-psm.csv      378-spider82-proteins.fasta
