In [None]:
# Ch09-5 - Proteomics Analysis

In [None]:
## Install Packages ##
! pip install biopython matplotlib pandas seaborn pyteomics

In [None]:
# Import Libraries 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from Bio.SeqUtils.ProtParam import ProteinAnalysis
from pyteomics import parser, mass

In [None]:
# Define a sample protein sequence (P53 - TP53 Tumor Suppressor)
protein_sequence = (
    "MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPG"
    "PDEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGT"
    "GFVKVGQSTSRHKKLMFKTEGPDSD"
)

In [None]:
# Analyze protein properties
protein = ProteinAnalysis(protein_sequence)
molecular_weight = protein.molecular_weight()
hydrophobicity = protein.gravy()
isoelectric_point = protein.isoelectric_point()
amino_acid_composition = protein.count_amino_acids()

print(f"Protein Molecular Weight: {molecular_weight:.2f} Da")
print(f"Protein Hydrophobicity (GRAVY): {hydrophobicity:.2f}")
print(f"Protein Isoelectric Point (pI): {isoelectric_point:.2f}")

In [None]:
# Perform in-silico trypsin digestion
peptides = sorted(list(parser.cleave(protein_sequence, parser.expasy_rules['trypsin'])))  # Convert set to list

In [None]:
# Calculate peptide masses
peptide_masses = [mass.calculate_mass(sequence=p) for p in peptides]

In [None]:
# Create DataFrame for peptide properties
df = pd.DataFrame({'Peptide': peptides, 'Mass (Da)': peptide_masses})

# Filter out very small peptides (e.g., <500 Da)
df = df[df['Mass (Da)'] > 500]

In [None]:
# Display top 10 peptides
print("\nTop 10 Peptides:")
print(df.head(10))

In [None]:
# Visualization: Peptide mass distribution
plt.figure(figsize=(10, 5))
sns.histplot(df['Mass (Da)'], bins=30, kde=True, color="blue")
plt.xlabel("Peptide Mass (Da)")
plt.ylabel("Frequency")
plt.title("Peptide Mass Distribution (Trypsin Digest)")
plt.show()

In [None]:
## End of Notebook ##