# Manduca sexta Proteomics

Developmental stages of hemolymph  
cell membrane of fat body and hemocytes

## Read in files

###  set up files for analysis

* fMemBand: cell membrane of hemocytes and fat body; each gel band
* fMemGroup: cell membrane of hemocytes and fat body; each group
* fDevBand: developmental stages of hemolymph; each gel band
* fDevGroup: developmental stages of hemolymph; each group

In [6]:
fMemBand = "X:\\Insects\\2017Ms_fatbody_hemocyte_membrane\\raw_32group\\combined\\txt\\proteinGroups.txt"
fMemGroup = "X:\\Insects\\2017Ms_fatbody_hemocyte_membrane\\raw_32group\\combined\\txt_4groupWIthPi\\proteinGroups.txt"
fDevBand = "X:\\Insects\\1520\\1520_A\\1520_A_adult\\combined\\txt_band\\proteinGroups_band_develop.txt"
fDevGroup = "X:\\Insects\\1520\\1520_A\\1520_A_adult\\combined\\txt_group\\proteinGroups_group_develop.txt"

###  read in files as dataframe

In [7]:
import pandas as pd
import numpy as np
from Bio import SeqIO

In [8]:
dfMemBand = pd.read_csv(fMemBand, sep="\t",low_memory=False)
dfMemGroup = pd.read_csv(fMemGroup, sep = "\t", low_memory=False)
dfDevBand = pd.read_csv(fDevBand, sep = "\t", low_memory=False)
dfDevGroup = pd.read_csv(fDevGroup, sep = "\t", low_memory=False)

## check files

###  check shape and columns

In [9]:
print("dfMemBand",dfMemBand.shape)
print("dfMemGroup", dfMemGroup.shape)
print("dfDevBand", dfDevBand.shape)
print("dfDevGroup", dfDevGroup.shape)

dfMemBand (4419, 332)
dfMemGroup (4419, 80)
dfDevBand (1887, 906)
dfDevGroup (1887, 150)


###  row 100 of dfMemBand

In [19]:
for _n,_c in enumerate(dfMemBand.columns):
    print(_n,"\t\t", _c, "\t\t\t\t",dfMemBand.loc[100,_c])

0 		 Protein IDs 				 Msex2.00140-RA;MCOT.C00176.2.0.OPO5P
1 		 Majority protein IDs 				 Msex2.00140-RA;MCOT.C00176.2.0.OPO5P
2 		 Peptide counts (all) 				 2;2
3 		 Peptide counts (razor+unique) 				 2;2
4 		 Peptide counts (unique) 				 2;2
5 		 Fasta headers 				 ;
6 		 Number of proteins 				 2
7 		 Peptides 				 2
8 		 Razor + unique peptides 				 2
9 		 Unique peptides 				 2
10 		 Peptides FP1 				 0
11 		 Peptides FP2 				 0
12 		 Peptides FP3 				 0
13 		 Peptides FP4 				 0
14 		 Peptides FP5 				 1
15 		 Peptides FP6 				 0
16 		 Peptides FP7 				 0
17 		 Peptides FP8 				 0
18 		 Peptides FS1 				 0
19 		 Peptides FS2 				 0
20 		 Peptides FS3 				 0
21 		 Peptides FS4 				 0
22 		 Peptides FS5 				 0
23 		 Peptides FS6 				 2
24 		 Peptides FS7 				 0
25 		 Peptides FS8 				 0
26 		 Peptides HP1 				 0
27 		 Peptides HP2 				 0
28 		 Peptides HP3 				 0
29 		 Peptides HP4 				 0
30 		 Peptides HP5 				 0
31 		 Peptides HP6 				 0
32 		 Peptides HP7 				 0
33 		 Peptides H

### row 100 of dfMemGroup

In [20]:
for _n,_c in enumerate(dfMemGroup.columns):
    print(_n,"\t\t", _c, "\t\t\t\t",dfMemGroup.loc[100,_c])

0 		 Protein IDs 				 Msex2.00140-RA;MCOT.C00176.2.0.OPO5P
1 		 Majority protein IDs 				 Msex2.00140-RA;MCOT.C00176.2.0.OPO5P
2 		 Peptide counts (all) 				 2;2
3 		 Peptide counts (razor+unique) 				 2;2
4 		 Peptide counts (unique) 				 2;2
5 		 Fasta headers 				 ;
6 		 Number of proteins 				 2
7 		 Peptides 				 2
8 		 Razor + unique peptides 				 2
9 		 Unique peptides 				 2
10 		 Peptides FP 				 1
11 		 Peptides FS 				 2
12 		 Peptides HP 				 0
13 		 Peptides HS 				 0
14 		 Razor + unique peptides FP 				 1
15 		 Razor + unique peptides FS 				 2
16 		 Razor + unique peptides HP 				 0
17 		 Razor + unique peptides HS 				 0
18 		 Unique peptides FP 				 1
19 		 Unique peptides FS 				 2
20 		 Unique peptides HP 				 0
21 		 Unique peptides HS 				 0
22 		 Sequence coverage [%] 				 9.0
23 		 Unique + razor sequence coverage [%] 				 9.0
24 		 Unique sequence coverage [%] 				 9.0
25 		 Mol. weight [kDa] 				 31.826
26 		 Sequence length 				 277
27 		 Sequence lengths 			

## filter

### remove the last 14 columns

In [21]:
dfMemBand_filter1 = dfMemBand.iloc[:,:(dfMemBand.shape[1] - 14)]
dfMemGroup_filter1 = dfMemGroup.iloc[:,:(dfMemGroup.shape[1] - 14)]
dfDevBand_filter1 = dfDevBand.iloc[:,:(dfDevBand.shape[1] - 14)]
dfDevGroup_filter1 = dfDevGroup.iloc[:,:(dfDevGroup.shape[1] - 14)]

### remove comtaminant proteins

In [27]:
f_filter = lambda x: x[0][:5] != "CON__" and x[0][:5] != "REV__"

In [30]:
dfMemBand_filter2 = dfMemBand_filter1[dfMemBand_filter1.apply(f_filter, axis=1)]
dfMemGroup_filter2 = dfMemGroup_filter1[dfMemGroup_filter1.apply(f_filter, axis=1)]
dfDevBand_filter2 = dfDevBand_filter1[dfDevBand_filter1.apply(f_filter, axis=1)]
dfDevGroup_filter2 = dfDevGroup_filter1[dfDevGroup_filter1.apply(f_filter, axis=1)]

## save files

### save filter1

In [31]:
folder = "C:\\Users\\ATPs\\OneDrive\\Lab\\works\\2017DmSPSPH\\20180166ManducaMassSpectDevelopMembrane\\"
dfMemBand_filter1.to_csv(folder+"20180118MsMemBand_filter1.csv")
dfMemGroup_filter1.to_csv(folder+"20180118MsMemGroup_filter1.csv")
dfDevBand_filter1.to_csv(folder+"20180118MsDevBand_filter1.csv")
dfDevGroup_filter1.to_csv(folder+"20180118MsDevGroup_filter1.csv")

### save filter2

In [32]:
dfMemBand_filter2.to_csv(folder+"20180118MsMemBand_filter2.csv")
dfMemGroup_filter2.to_csv(folder+"20180118MsMemGroup_filter2.csv")
dfDevBand_filter2.to_csv(folder+"20180118MsDevBand_filter2.csv")
dfDevGroup_filter2.to_csv(folder+"20180118MsDevGroup_filter2.csv")